fs-lawrisk/tools/analyze_permits.py

111 lines
3.7 KiB
Python
Raw Permalink Normal View History

import os
import pandas as pd
import pg8000.dbapi as pg
from lawrisk.utils.env_loader import load_env
def analyze():
load_env()
conn_params = {
"host": os.getenv("LIC_PG_HOST", "172.24.240.1"),
"port": int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))),
"user": os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")),
"password": os.getenv("LIC_PG_PASSWORD", ""),
"database": os.getenv("LIC_PG_DATABASE", "licensing_risks"),
}
print("--- 1. Checking Database Bindings for '市级' ---")
try:
conn = pg.connect(**conn_params)
cur = conn.cursor()
cur.execute("SELECT id FROM regions WHERE name = '市级'")
res = cur.fetchone()
if not res:
print("Region '市级' not found.")
return
region_id = res[0]
print(f"Region ID: {region_id}")
cur.execute("SELECT COUNT(*) FROM region_theme_permits WHERE region_id = %s", (region_id,))
count = cur.fetchone()[0]
print(f"Total bindings in region_theme_permits: {count}")
cur.execute("SELECT COUNT(*) FROM region_permit_details WHERE region_id = %s", (region_id,))
count_details = cur.fetchone()[0]
print(f"Total permits in region_permit_details: {count_details}")
except Exception as e:
print(f"DB Error: {e}")
return
finally:
conn.close()
print("\n--- 2. Comparing Excel vs DB ---")
excel_path = "主题-事项绑定.xlsx"
if not os.path.exists(excel_path):
print("Excel file not found.")
return
try:
df = pd.read_excel(excel_path, header=1)
df.columns = [str(c).strip() for c in df.columns]
# Define col_theme
col_theme = '主题'
if col_theme not in df.columns:
for c in df.columns:
if '主题' in c:
col_theme = c
break
if col_theme in df.columns:
df[col_theme] = df[col_theme].fillna(method='ffill')
col_permit = '审批事项'
if col_permit not in df.columns:
for c in df.columns:
if '事项' in c and '主题' not in c:
col_permit = c
break
print(f"Using Excel Column: {col_permit}")
print("\n--- Excel Head (First 20 rows) ---")
print(df[[col_theme, col_permit]].head(20).to_string())
excel_permits = set(df[col_permit].dropna().astype(str).str.strip().tolist())
print(f"\nUnique Permits in Excel ({len(excel_permits)}):")
for p in sorted(list(excel_permits)):
print(f" [EXCEL] {p}")
# Fetch DB permits again
conn = pg.connect(**conn_params)
cur = conn.cursor()
cur.execute("""
SELECT p.name
FROM region_permit_details rpd
JOIN permits p ON p.id = rpd.permit_id
WHERE rpd.region_id = %s
""", (region_id,))
db_permits = set([row[0].strip() for row in cur.fetchall()])
conn.close()
print(f"Unique Permits in DB: {len(db_permits)}")
missing_in_db = excel_permits - db_permits
missing_in_excel = db_permits - excel_permits
print(f"\nMissing in DB ({len(missing_in_db)}):")
for p in list(missing_in_db)[:10]:
print(f" - {p}")
if len(missing_in_db) > 10: print(" ...")
print(f"\nExtra in DB (Not in Excel) ({len(missing_in_excel)}):")
for p in list(missing_in_excel)[:10]:
print(f" - {p}")
except Exception as e:
print(f"Analysis Error: {e}")
if __name__ == "__main__":
analyze()