import os import pandas as pd import pg8000.dbapi as pg from lawrisk.utils.env_loader import load_env def fix_bindings(): load_env() conn_params = { "host": os.getenv("LIC_PG_HOST", "172.24.240.1"), "port": int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))), "user": os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")), "password": os.getenv("LIC_PG_PASSWORD", ""), "database": os.getenv("LIC_PG_DATABASE", "licensing_risks"), } excel_path = "主题-事项绑定.xlsx" if not os.path.exists(excel_path): print(f"Excel file not found: {excel_path}") return print("Reading Excel...") try: # Try header at row 1 (0-indexed) which is row 2 in Excel df = pd.read_excel(excel_path, header=1) except Exception as e: print(f"Read error: {e}") return # Normalize column names print("Raw Columns:", df.columns.tolist()) df.columns = [str(c).strip() for c in df.columns] print("Normalized Columns:", df.columns.tolist()) # Fill merged cells (forward fill) col_theme = '主题' if col_theme not in df.columns: # Prefer "主题" only, but "主题事项" is okay if "主题" not found candidates = [c for c in df.columns if "主题" in c] if candidates: # Sort by length, prefer shorter '主题' candidates.sort(key=len) col_theme = candidates[0] print(f"Using Theme Column: '{col_theme}'") if col_theme in df.columns: df[col_theme] = df[col_theme].fillna(method='ffill') else: print("Theme column missing! Aborting.") return col_permit = '审批事项' # Default to what we saw if col_permit not in df.columns: # heuristic candidates = [] for c in df.columns: if "审批事项" in c: candidates.append(c) elif "许可事项" in c: candidates.append(c) elif "事项" in c and c != col_theme: candidates.append(c) if candidates: col_permit = candidates[0] print(f"Using columns: Theme='{col_theme}', Permit='{col_permit}'") excel_bindings = df[[col_theme, col_permit]].dropna(subset=[col_permit]) # Also drop if theme is null (though we filled it) excel_bindings = excel_bindings.dropna(subset=[col_theme]) print(f"Found {len(excel_bindings)} bindings in Excel.") try: conn = pg.connect(**conn_params) cur = conn.cursor() # Get DB Permits (市级) print("Fetching DB Permits...") cur.execute(""" SELECT p.id, p.name FROM region_permit_details rpd JOIN permits p ON p.id = rpd.permit_id JOIN regions r ON r.id = rpd.region_id WHERE r.name = '市级' """) db_permits = {row[1].strip(): row[0] for row in cur.fetchall()} print(f"Found {len(db_permits)} permits in DB for '市级'.") # Get Themes cur.execute("SELECT id, name FROM themes") themes = {row[1].strip(): row[0] for row in cur.fetchall()} print(f"Found {len(themes)} themes in DB.") # Check if themes are populated. If only 1 themes, maybe we need to import themes? # Assuming themes table is populated from previous sessions. # Get Region ID cur.execute("SELECT id FROM regions WHERE name = '市级'") region_id = cur.fetchone()[0] matches = 0 mismatches = 0 to_insert = [] # Helper for fuzzy matching def normalize_name(n): return n.replace("许可", "").replace("证", "").replace("(", "").replace(")", "").replace("(", "").replace(")", "").strip() for idx, row in excel_bindings.iterrows(): t_name = str(row[col_theme]).strip() p_name = str(row[col_permit]).strip() # 1. Resolve Theme t_id = themes.get(t_name) if not t_id: # Try creating theme? Or fuzzy match theme? pass # 2. Resolve Permit p_id = db_permits.get(p_name) if not p_id: # Fuzzy norm_p = normalize_name(p_name) for db_name, db_pid in db_permits.items(): if normalize_name(db_name) == norm_p: p_id = db_pid # print(f"Fuzzy Match: '{p_name}' -> '{db_name}'") break # Try containment if norm_p in normalize_name(db_name) and len(norm_p) > 4: p_id = db_pid break if p_id and t_id: to_insert.append((region_id, t_id, p_id)) matches += 1 else: if not p_id: print(f"Unmatched Excel Permit: '{p_name}'") # Suggest candidates candidates = [] for db_name in db_permits.keys(): if p_name in db_name or db_name in p_name: candidates.append(db_name) if candidates: print(f" Possible DB Candidates: {candidates}") if not t_id: pass mismatches += 1 print(f"\nPotential bindings: {matches}, Unmatched: {mismatches}") if to_insert: print(f"Inserting {len(to_insert)} bindings...") for rid, tid, pid in to_insert: cur.execute(""" INSERT INTO region_theme_permits (region_id, theme_id, permit_id) VALUES (%s, %s, %s) ON CONFLICT (region_id, theme_id, permit_id) DO NOTHING """, (rid, tid, pid)) conn.commit() print("Bindings inserted/updated.") conn.close() except Exception as e: print(f"Error: {e}") if __name__ == "__main__": fix_bindings()