169 lines
6.0 KiB
Python
169 lines
6.0 KiB
Python
|
||
import os
|
||
import pandas as pd
|
||
import pg8000.dbapi as pg
|
||
from lawrisk.utils.env_loader import load_env
|
||
|
||
def fix_bindings():
|
||
load_env()
|
||
conn_params = {
|
||
"host": os.getenv("LIC_PG_HOST", "172.24.240.1"),
|
||
"port": int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))),
|
||
"user": os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")),
|
||
"password": os.getenv("LIC_PG_PASSWORD", ""),
|
||
"database": os.getenv("LIC_PG_DATABASE", "licensing_risks"),
|
||
}
|
||
|
||
excel_path = "主题-事项绑定.xlsx"
|
||
if not os.path.exists(excel_path):
|
||
print(f"Excel file not found: {excel_path}")
|
||
return
|
||
|
||
print("Reading Excel...")
|
||
try:
|
||
# Try header at row 1 (0-indexed) which is row 2 in Excel
|
||
df = pd.read_excel(excel_path, header=1)
|
||
except Exception as e:
|
||
print(f"Read error: {e}")
|
||
return
|
||
|
||
# Normalize column names
|
||
print("Raw Columns:", df.columns.tolist())
|
||
df.columns = [str(c).strip() for c in df.columns]
|
||
print("Normalized Columns:", df.columns.tolist())
|
||
|
||
# Fill merged cells (forward fill)
|
||
col_theme = '主题'
|
||
if col_theme not in df.columns:
|
||
# Prefer "主题" only, but "主题事项" is okay if "主题" not found
|
||
candidates = [c for c in df.columns if "主题" in c]
|
||
if candidates:
|
||
# Sort by length, prefer shorter '主题'
|
||
candidates.sort(key=len)
|
||
col_theme = candidates[0]
|
||
|
||
print(f"Using Theme Column: '{col_theme}'")
|
||
if col_theme in df.columns:
|
||
df[col_theme] = df[col_theme].fillna(method='ffill')
|
||
else:
|
||
print("Theme column missing! Aborting.")
|
||
return
|
||
|
||
col_permit = '审批事项' # Default to what we saw
|
||
if col_permit not in df.columns:
|
||
# heuristic
|
||
candidates = []
|
||
for c in df.columns:
|
||
if "审批事项" in c: candidates.append(c)
|
||
elif "许可事项" in c: candidates.append(c)
|
||
elif "事项" in c and c != col_theme: candidates.append(c)
|
||
|
||
if candidates:
|
||
col_permit = candidates[0]
|
||
|
||
print(f"Using columns: Theme='{col_theme}', Permit='{col_permit}'")
|
||
|
||
excel_bindings = df[[col_theme, col_permit]].dropna(subset=[col_permit])
|
||
# Also drop if theme is null (though we filled it)
|
||
excel_bindings = excel_bindings.dropna(subset=[col_theme])
|
||
|
||
print(f"Found {len(excel_bindings)} bindings in Excel.")
|
||
|
||
try:
|
||
conn = pg.connect(**conn_params)
|
||
cur = conn.cursor()
|
||
|
||
# Get DB Permits (市级)
|
||
print("Fetching DB Permits...")
|
||
cur.execute("""
|
||
SELECT p.id, p.name
|
||
FROM region_permit_details rpd
|
||
JOIN permits p ON p.id = rpd.permit_id
|
||
JOIN regions r ON r.id = rpd.region_id
|
||
WHERE r.name = '市级'
|
||
""")
|
||
db_permits = {row[1].strip(): row[0] for row in cur.fetchall()}
|
||
print(f"Found {len(db_permits)} permits in DB for '市级'.")
|
||
|
||
# Get Themes
|
||
cur.execute("SELECT id, name FROM themes")
|
||
themes = {row[1].strip(): row[0] for row in cur.fetchall()}
|
||
print(f"Found {len(themes)} themes in DB.")
|
||
|
||
# Check if themes are populated. If only 1 themes, maybe we need to import themes?
|
||
# Assuming themes table is populated from previous sessions.
|
||
|
||
# Get Region ID
|
||
cur.execute("SELECT id FROM regions WHERE name = '市级'")
|
||
region_id = cur.fetchone()[0]
|
||
|
||
matches = 0
|
||
mismatches = 0
|
||
to_insert = []
|
||
|
||
# Helper for fuzzy matching
|
||
def normalize_name(n):
|
||
return n.replace("许可", "").replace("证", "").replace("(", "").replace(")", "").replace("(", "").replace(")", "").strip()
|
||
|
||
for idx, row in excel_bindings.iterrows():
|
||
t_name = str(row[col_theme]).strip()
|
||
p_name = str(row[col_permit]).strip()
|
||
|
||
# 1. Resolve Theme
|
||
t_id = themes.get(t_name)
|
||
if not t_id:
|
||
# Try creating theme? Or fuzzy match theme?
|
||
pass
|
||
|
||
# 2. Resolve Permit
|
||
p_id = db_permits.get(p_name)
|
||
if not p_id:
|
||
# Fuzzy
|
||
norm_p = normalize_name(p_name)
|
||
for db_name, db_pid in db_permits.items():
|
||
if normalize_name(db_name) == norm_p:
|
||
p_id = db_pid
|
||
# print(f"Fuzzy Match: '{p_name}' -> '{db_name}'")
|
||
break
|
||
# Try containment
|
||
if norm_p in normalize_name(db_name) and len(norm_p) > 4:
|
||
p_id = db_pid
|
||
break
|
||
|
||
if p_id and t_id:
|
||
to_insert.append((region_id, t_id, p_id))
|
||
matches += 1
|
||
else:
|
||
if not p_id:
|
||
print(f"Unmatched Excel Permit: '{p_name}'")
|
||
# Suggest candidates
|
||
candidates = []
|
||
for db_name in db_permits.keys():
|
||
if p_name in db_name or db_name in p_name:
|
||
candidates.append(db_name)
|
||
if candidates:
|
||
print(f" Possible DB Candidates: {candidates}")
|
||
if not t_id:
|
||
pass
|
||
mismatches += 1
|
||
|
||
print(f"\nPotential bindings: {matches}, Unmatched: {mismatches}")
|
||
|
||
if to_insert:
|
||
print(f"Inserting {len(to_insert)} bindings...")
|
||
for rid, tid, pid in to_insert:
|
||
cur.execute("""
|
||
INSERT INTO region_theme_permits (region_id, theme_id, permit_id)
|
||
VALUES (%s, %s, %s)
|
||
ON CONFLICT (region_id, theme_id, permit_id) DO NOTHING
|
||
""", (rid, tid, pid))
|
||
conn.commit()
|
||
print("Bindings inserted/updated.")
|
||
|
||
conn.close()
|
||
except Exception as e:
|
||
print(f"Error: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
fix_bindings()
|