111 lines
3.7 KiB
Python
111 lines
3.7 KiB
Python
|
|
|
||
|
|
import os
|
||
|
|
import pandas as pd
|
||
|
|
import pg8000.dbapi as pg
|
||
|
|
from lawrisk.utils.env_loader import load_env
|
||
|
|
|
||
|
|
def analyze():
|
||
|
|
load_env()
|
||
|
|
conn_params = {
|
||
|
|
"host": os.getenv("LIC_PG_HOST", "172.24.240.1"),
|
||
|
|
"port": int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))),
|
||
|
|
"user": os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")),
|
||
|
|
"password": os.getenv("LIC_PG_PASSWORD", ""),
|
||
|
|
"database": os.getenv("LIC_PG_DATABASE", "licensing_risks"),
|
||
|
|
}
|
||
|
|
|
||
|
|
print("--- 1. Checking Database Bindings for '市级' ---")
|
||
|
|
try:
|
||
|
|
conn = pg.connect(**conn_params)
|
||
|
|
cur = conn.cursor()
|
||
|
|
|
||
|
|
cur.execute("SELECT id FROM regions WHERE name = '市级'")
|
||
|
|
res = cur.fetchone()
|
||
|
|
if not res:
|
||
|
|
print("Region '市级' not found.")
|
||
|
|
return
|
||
|
|
region_id = res[0]
|
||
|
|
print(f"Region ID: {region_id}")
|
||
|
|
|
||
|
|
cur.execute("SELECT COUNT(*) FROM region_theme_permits WHERE region_id = %s", (region_id,))
|
||
|
|
count = cur.fetchone()[0]
|
||
|
|
print(f"Total bindings in region_theme_permits: {count}")
|
||
|
|
|
||
|
|
cur.execute("SELECT COUNT(*) FROM region_permit_details WHERE region_id = %s", (region_id,))
|
||
|
|
count_details = cur.fetchone()[0]
|
||
|
|
print(f"Total permits in region_permit_details: {count_details}")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"DB Error: {e}")
|
||
|
|
return
|
||
|
|
finally:
|
||
|
|
conn.close()
|
||
|
|
|
||
|
|
print("\n--- 2. Comparing Excel vs DB ---")
|
||
|
|
excel_path = "主题-事项绑定.xlsx"
|
||
|
|
if not os.path.exists(excel_path):
|
||
|
|
print("Excel file not found.")
|
||
|
|
return
|
||
|
|
|
||
|
|
try:
|
||
|
|
df = pd.read_excel(excel_path, header=1)
|
||
|
|
df.columns = [str(c).strip() for c in df.columns]
|
||
|
|
|
||
|
|
# Define col_theme
|
||
|
|
col_theme = '主题'
|
||
|
|
if col_theme not in df.columns:
|
||
|
|
for c in df.columns:
|
||
|
|
if '主题' in c:
|
||
|
|
col_theme = c
|
||
|
|
break
|
||
|
|
if col_theme in df.columns:
|
||
|
|
df[col_theme] = df[col_theme].fillna(method='ffill')
|
||
|
|
|
||
|
|
col_permit = '审批事项'
|
||
|
|
if col_permit not in df.columns:
|
||
|
|
for c in df.columns:
|
||
|
|
if '事项' in c and '主题' not in c:
|
||
|
|
col_permit = c
|
||
|
|
break
|
||
|
|
print(f"Using Excel Column: {col_permit}")
|
||
|
|
|
||
|
|
print("\n--- Excel Head (First 20 rows) ---")
|
||
|
|
print(df[[col_theme, col_permit]].head(20).to_string())
|
||
|
|
|
||
|
|
excel_permits = set(df[col_permit].dropna().astype(str).str.strip().tolist())
|
||
|
|
print(f"\nUnique Permits in Excel ({len(excel_permits)}):")
|
||
|
|
for p in sorted(list(excel_permits)):
|
||
|
|
print(f" [EXCEL] {p}")
|
||
|
|
|
||
|
|
# Fetch DB permits again
|
||
|
|
conn = pg.connect(**conn_params)
|
||
|
|
cur = conn.cursor()
|
||
|
|
cur.execute("""
|
||
|
|
SELECT p.name
|
||
|
|
FROM region_permit_details rpd
|
||
|
|
JOIN permits p ON p.id = rpd.permit_id
|
||
|
|
WHERE rpd.region_id = %s
|
||
|
|
""", (region_id,))
|
||
|
|
db_permits = set([row[0].strip() for row in cur.fetchall()])
|
||
|
|
conn.close()
|
||
|
|
|
||
|
|
print(f"Unique Permits in DB: {len(db_permits)}")
|
||
|
|
|
||
|
|
missing_in_db = excel_permits - db_permits
|
||
|
|
missing_in_excel = db_permits - excel_permits
|
||
|
|
|
||
|
|
print(f"\nMissing in DB ({len(missing_in_db)}):")
|
||
|
|
for p in list(missing_in_db)[:10]:
|
||
|
|
print(f" - {p}")
|
||
|
|
if len(missing_in_db) > 10: print(" ...")
|
||
|
|
|
||
|
|
print(f"\nExtra in DB (Not in Excel) ({len(missing_in_excel)}):")
|
||
|
|
for p in list(missing_in_excel)[:10]:
|
||
|
|
print(f" - {p}")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Analysis Error: {e}")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
analyze()
|