fs-lawrisk/tools/generate_unbound_report.py


import pandas as pd
import os
import sys

# Add project root to path
sys.path.append(os.getcwd())

from lawrisk.services import licensing_repo as lic_repo
from lawrisk.utils.env_loader import load_env

def generate_report():
    load_env()
    file_path = "审批服务部门.xlsx"
    output_path = "审批服务部门_核对结果.xlsx"

    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return

    print("Loading Excel...")
    df = pd.read_excel(file_path)

    # Normalize headers to find the correct column
    col_map = {c: c.strip() for c in df.columns}
    df.rename(columns=col_map, inplace=True)

    permit_col = None
    for c in df.columns:
        if "事项名称" in c:
            permit_col = c
            break

    if not permit_col:
        print("Could not find '事项名称' column.")
        return

    print(f"Using '{permit_col}' as permit name column.")

    print("Fetching system permits...")
    with lic_repo._lic_pg_conn() as conn:
        cur = conn.cursor()
        cur.execute("SELECT name FROM permits")
        db_permits = set(row[0] for row in cur.fetchall())

    print(f"Found {len(db_permits)} permits in database.")

    # Check matches
    results = []

    for idx, row in df.iterrows():
        p_name = str(row[permit_col]).strip()
        if p_name in db_permits:
            results.append("已匹配") # Matched
        else:
            results.append("未匹配") # Not Matched

    df["匹配状态"] = results

    print(f"Saving report to {output_path}...")
    df.to_excel(output_path, index=False)
    print("Done.")

if __name__ == "__main__":
    generate_report()