169 lines
6.0 KiB
Python
169 lines
6.0 KiB
Python
|
|
|
|||
|
|
import os
|
|||
|
|
import pandas as pd
|
|||
|
|
import pg8000.dbapi as pg
|
|||
|
|
from lawrisk.utils.env_loader import load_env
|
|||
|
|
|
|||
|
|
def fix_bindings():
|
|||
|
|
load_env()
|
|||
|
|
conn_params = {
|
|||
|
|
"host": os.getenv("LIC_PG_HOST", "172.24.240.1"),
|
|||
|
|
"port": int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))),
|
|||
|
|
"user": os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")),
|
|||
|
|
"password": os.getenv("LIC_PG_PASSWORD", ""),
|
|||
|
|
"database": os.getenv("LIC_PG_DATABASE", "licensing_risks"),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
excel_path = "主题-事项绑定.xlsx"
|
|||
|
|
if not os.path.exists(excel_path):
|
|||
|
|
print(f"Excel file not found: {excel_path}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print("Reading Excel...")
|
|||
|
|
try:
|
|||
|
|
# Try header at row 1 (0-indexed) which is row 2 in Excel
|
|||
|
|
df = pd.read_excel(excel_path, header=1)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"Read error: {e}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# Normalize column names
|
|||
|
|
print("Raw Columns:", df.columns.tolist())
|
|||
|
|
df.columns = [str(c).strip() for c in df.columns]
|
|||
|
|
print("Normalized Columns:", df.columns.tolist())
|
|||
|
|
|
|||
|
|
# Fill merged cells (forward fill)
|
|||
|
|
col_theme = '主题'
|
|||
|
|
if col_theme not in df.columns:
|
|||
|
|
# Prefer "主题" only, but "主题事项" is okay if "主题" not found
|
|||
|
|
candidates = [c for c in df.columns if "主题" in c]
|
|||
|
|
if candidates:
|
|||
|
|
# Sort by length, prefer shorter '主题'
|
|||
|
|
candidates.sort(key=len)
|
|||
|
|
col_theme = candidates[0]
|
|||
|
|
|
|||
|
|
print(f"Using Theme Column: '{col_theme}'")
|
|||
|
|
if col_theme in df.columns:
|
|||
|
|
df[col_theme] = df[col_theme].fillna(method='ffill')
|
|||
|
|
else:
|
|||
|
|
print("Theme column missing! Aborting.")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
col_permit = '审批事项' # Default to what we saw
|
|||
|
|
if col_permit not in df.columns:
|
|||
|
|
# heuristic
|
|||
|
|
candidates = []
|
|||
|
|
for c in df.columns:
|
|||
|
|
if "审批事项" in c: candidates.append(c)
|
|||
|
|
elif "许可事项" in c: candidates.append(c)
|
|||
|
|
elif "事项" in c and c != col_theme: candidates.append(c)
|
|||
|
|
|
|||
|
|
if candidates:
|
|||
|
|
col_permit = candidates[0]
|
|||
|
|
|
|||
|
|
print(f"Using columns: Theme='{col_theme}', Permit='{col_permit}'")
|
|||
|
|
|
|||
|
|
excel_bindings = df[[col_theme, col_permit]].dropna(subset=[col_permit])
|
|||
|
|
# Also drop if theme is null (though we filled it)
|
|||
|
|
excel_bindings = excel_bindings.dropna(subset=[col_theme])
|
|||
|
|
|
|||
|
|
print(f"Found {len(excel_bindings)} bindings in Excel.")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
conn = pg.connect(**conn_params)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
# Get DB Permits (市级)
|
|||
|
|
print("Fetching DB Permits...")
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT p.id, p.name
|
|||
|
|
FROM region_permit_details rpd
|
|||
|
|
JOIN permits p ON p.id = rpd.permit_id
|
|||
|
|
JOIN regions r ON r.id = rpd.region_id
|
|||
|
|
WHERE r.name = '市级'
|
|||
|
|
""")
|
|||
|
|
db_permits = {row[1].strip(): row[0] for row in cur.fetchall()}
|
|||
|
|
print(f"Found {len(db_permits)} permits in DB for '市级'.")
|
|||
|
|
|
|||
|
|
# Get Themes
|
|||
|
|
cur.execute("SELECT id, name FROM themes")
|
|||
|
|
themes = {row[1].strip(): row[0] for row in cur.fetchall()}
|
|||
|
|
print(f"Found {len(themes)} themes in DB.")
|
|||
|
|
|
|||
|
|
# Check if themes are populated. If only 1 themes, maybe we need to import themes?
|
|||
|
|
# Assuming themes table is populated from previous sessions.
|
|||
|
|
|
|||
|
|
# Get Region ID
|
|||
|
|
cur.execute("SELECT id FROM regions WHERE name = '市级'")
|
|||
|
|
region_id = cur.fetchone()[0]
|
|||
|
|
|
|||
|
|
matches = 0
|
|||
|
|
mismatches = 0
|
|||
|
|
to_insert = []
|
|||
|
|
|
|||
|
|
# Helper for fuzzy matching
|
|||
|
|
def normalize_name(n):
|
|||
|
|
return n.replace("许可", "").replace("证", "").replace("(", "").replace(")", "").replace("(", "").replace(")", "").strip()
|
|||
|
|
|
|||
|
|
for idx, row in excel_bindings.iterrows():
|
|||
|
|
t_name = str(row[col_theme]).strip()
|
|||
|
|
p_name = str(row[col_permit]).strip()
|
|||
|
|
|
|||
|
|
# 1. Resolve Theme
|
|||
|
|
t_id = themes.get(t_name)
|
|||
|
|
if not t_id:
|
|||
|
|
# Try creating theme? Or fuzzy match theme?
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
# 2. Resolve Permit
|
|||
|
|
p_id = db_permits.get(p_name)
|
|||
|
|
if not p_id:
|
|||
|
|
# Fuzzy
|
|||
|
|
norm_p = normalize_name(p_name)
|
|||
|
|
for db_name, db_pid in db_permits.items():
|
|||
|
|
if normalize_name(db_name) == norm_p:
|
|||
|
|
p_id = db_pid
|
|||
|
|
# print(f"Fuzzy Match: '{p_name}' -> '{db_name}'")
|
|||
|
|
break
|
|||
|
|
# Try containment
|
|||
|
|
if norm_p in normalize_name(db_name) and len(norm_p) > 4:
|
|||
|
|
p_id = db_pid
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if p_id and t_id:
|
|||
|
|
to_insert.append((region_id, t_id, p_id))
|
|||
|
|
matches += 1
|
|||
|
|
else:
|
|||
|
|
if not p_id:
|
|||
|
|
print(f"Unmatched Excel Permit: '{p_name}'")
|
|||
|
|
# Suggest candidates
|
|||
|
|
candidates = []
|
|||
|
|
for db_name in db_permits.keys():
|
|||
|
|
if p_name in db_name or db_name in p_name:
|
|||
|
|
candidates.append(db_name)
|
|||
|
|
if candidates:
|
|||
|
|
print(f" Possible DB Candidates: {candidates}")
|
|||
|
|
if not t_id:
|
|||
|
|
pass
|
|||
|
|
mismatches += 1
|
|||
|
|
|
|||
|
|
print(f"\nPotential bindings: {matches}, Unmatched: {mismatches}")
|
|||
|
|
|
|||
|
|
if to_insert:
|
|||
|
|
print(f"Inserting {len(to_insert)} bindings...")
|
|||
|
|
for rid, tid, pid in to_insert:
|
|||
|
|
cur.execute("""
|
|||
|
|
INSERT INTO region_theme_permits (region_id, theme_id, permit_id)
|
|||
|
|
VALUES (%s, %s, %s)
|
|||
|
|
ON CONFLICT (region_id, theme_id, permit_id) DO NOTHING
|
|||
|
|
""", (rid, tid, pid))
|
|||
|
|
conn.commit()
|
|||
|
|
print("Bindings inserted/updated.")
|
|||
|
|
|
|||
|
|
conn.close()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"Error: {e}")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
fix_bindings()
|