fs-lawrisk/tools/audit_risks.py


import json
import os
from lawrisk.services import licensing_repo as lic_repo
from lawrisk.utils.env_loader import load_env

def clean_text(text):
    if not text:
        return ""
    return str(text).strip()

def _clean_text(text):
    return clean_text(text)

def audit_risks():
    load_env()
    conn = lic_repo._lic_pg_conn()
    cur = conn.cursor()

    # Get Region ID for '市级'
    cur.execute("SELECT id FROM regions WHERE name = '市级'")
    row = cur.fetchone()
    if not row:
        print("Region '市级' not found in DB.")
        return
    region_id = row[0]
    print(f"Auditing Region: 市级 ({region_id})")

    base_dir = r"市级初版-20251219\许可风险提示"
    if not os.path.exists(base_dir):
        print(f"Directory not found: {base_dir}")
        return

    mismatches = []
    files = [f for f in os.listdir(base_dir) if f.endswith(".json")]
    print(f"Scanning {len(files)} JSON files...")

    processed_count = 0
    for fname in files:
        processed_count += 1
        if processed_count % 5 == 0:
            print(f"Processing file {processed_count}/{len(files)}: {fname}...")
        fpath = os.path.join(base_dir, fname)
        try:
            with open(fpath, 'r', encoding='utf-8') as f:
                data = json.load(f)

            # Count risks in '市级' sheet only
            sheet_rows = []

            # Helper to normalize sheet name
            target_sheet = None
            for sname in sheets.keys():
                if _clean_text(sname) == '市级' or '营业执照' in sname: # Special case for 109
                    target_sheet = sname
                    break

            if not target_sheet:
                 # If no '市级', maybe report it?
                 # print(f"File {fname} has no 市级 sheet. Sheets: {list(sheets.keys())}")
                 continue

            sheet_rows = sheets[target_sheet].get("rows", [])

            file_counts = {}
            for row in sheet_rows:
                p_name = clean_text(row.get("permit_name"))
                if p_name:
                    file_counts[p_name] = file_counts.get(p_name, 0) + 1

            # Check DB
            for p_name, f_count in file_counts.items():
                cur.execute("""
                    SELECT count(*)
                    FROM region_permit_risks rpr
                    JOIN permits p ON p.id = rpr.permit_id
                    WHERE rpr.region_id = %s AND p.name = %s
                """, (region_id, p_name))
                db_count = cur.fetchone()[0]

                if db_count != f_count:
                    mismatches.append({
                        "file": fname,
                        "permit": p_name,
                        "file_count": f_count,
                        "db_count": db_count,
                        "sheet": target_sheet
                    })

        except Exception as e:
            # print(f"Error reading {fname}: {e}")
            pass

    conn.close()

    with open("audit_report.txt", "w", encoding="utf-8") as f:
        f.write("\n" + "="*60 + "\n")
        f.write("RISK COUNT MISMATCH REPORT\n")
        f.write("="*60 + "\n")

        if not mismatches:
            f.write("All file risk counts match the database!\n")
        else:
            f.write(f"{'Permit Name':<40} | {'File':<6} | {'DB':<6} | {'Filename'}\n")
            f.write("-" * 110 + "\n")
            for m in mismatches:
                f_short = (m['file'][:40] + '..') if len(m['file']) > 40 else m['file']
                f.write(f"{m['permit'][:38]:<40} | {m['file_count']:<6} | {m['db_count']:<6} | {f_short}\n")
    print("Report written to audit_report.txt")

if __name__ == "__main__":
    audit_risks()