import os import pandas as pd import pg8000.dbapi as pg from lawrisk.utils.env_loader import load_env def normalize(text): if not text: return "" return str(text).replace("(", "").replace(")", "").replace("(", "").replace(")", "").replace("许可", "").replace("证", "").replace("审批", "").replace("核发", "").replace("备案", "").replace("业务", "").replace("从事", "").replace("企业", "").replace("设立", "").replace("变更", "").replace(" ", "").strip() def force_bind(): load_env() conn_params = { "host": os.getenv("LIC_PG_HOST", "172.24.240.1"), "port": int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))), "user": os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")), "password": os.getenv("LIC_PG_PASSWORD", ""), "database": os.getenv("LIC_PG_DATABASE", "licensing_risks"), } # Manual Mappings (Excel Name -> List of DB Names) manual_map = { "食品经营许可": ["食品经营许可"], "旅客住宿服务": ["旅馆业特种行业许可证核发"], "开设旅馆": ["旅馆业特种行业许可证核发"], "旅馆业特种行业许可": ["旅馆业特种行业许可证核发"], "药品经营许可(零售)": ["药品经营许可证(零售)"], "食品生产许可": ["食品生产许可"], "建设项目环境影响报告表审批": ["建设项目环境影响评价文件审批(广东省厅事项名称) 【国家标准名:“建设项目环境影响评价审批(海洋工程、核与辐射类除外)”】"], "娱乐场所审批": [ "歌舞娱乐场所从事娱乐场所经营 activity 审批", "游艺娱乐场所从事娱乐场所经营活动审批,内资娱乐场所变更、延续、补证、注销审批" ], "第二类医疗器械经营备案": ["第二类医疗器械经营备案"], "经营高危险性体育项目许可": ["经营高危险性体育项目许可"], "互联网上网服务营业场所经营单位审批": ["互联网上网服务营业场所信息网络安全审核"], } # 1. Load Excel Map excel_path = "主题-事项绑定.xlsx" if not os.path.exists(excel_path): print(f"Excel file not found: {excel_path}") return print("Reading Excel...") df = pd.read_excel(excel_path, header=1) df.columns = [str(c).strip() for c in df.columns] col_theme = '主题' if col_theme not in df.columns: for c in df.columns: if '主题' in c: col_theme = c; break col_permit = '审批事项' if col_permit not in df.columns: for c in df.columns: if '事项' in c and c != col_theme: col_permit = c; break if col_theme not in df.columns or col_permit not in df.columns: print(f"Columns not found in Excel. Found: {df.columns.tolist()}") return df[col_theme] = df[col_theme].ffill() permit_to_themes = {} for idx, row in df.iterrows(): p = str(row[col_permit]).strip() t = str(row[col_theme]).strip() if p and t and p != 'nan' and t != 'nan': if p not in permit_to_themes: permit_to_themes[p] = [] if t not in permit_to_themes[p]: permit_to_themes[p].append(t) print(f"Loaded {len(permit_to_themes)} unique permit names from Excel.") # 2. Connect DB conn = pg.connect(**conn_params) cur = conn.cursor() # 3. Get Region ID cur.execute("SELECT id FROM regions WHERE name = '市级'") res = cur.fetchone() if not res: print("Region '市级' not found in DB.") return region_id = res[0] # 4. Get DB Themes cur.execute("SELECT id, name FROM themes") db_themes = {row[1]: row[0] for row in cur.fetchall()} # Name -> ID # Import missing themes if any all_excel_themes = set() for t_list in permit_to_themes.values(): for t in t_list: all_excel_themes.add(t) for t_name in all_excel_themes: if t_name not in db_themes: print(f"Creating missing theme: {t_name}") cur.execute("INSERT INTO themes (id, name) VALUES (%s, %s)", (str(pg.uuid.uuid4()), t_name)) cur.execute("SELECT id FROM themes WHERE name = %s", (t_name,)) tid = cur.fetchone()[0] db_themes[t_name] = tid conn.commit() # 5. Invert Manual Map for lookup inverted_manual = {} for excel_n, db_list in manual_map.items(): if isinstance(db_list, str): db_list = [db_list] for db_n in db_list: if db_n not in inverted_manual: inverted_manual[db_n] = [] inverted_manual[db_n].append(excel_n) # 6. Get DB Permits for '市级' cur.execute(""" SELECT p.id, p.name FROM region_permit_details rpd JOIN permits p ON p.id = rpd.permit_id WHERE rpd.region_id = %s """, (region_id,)) db_permits = cur.fetchall() # (id, name) print(f"Scanning {len(db_permits)} DB permits for binding...") bind_count = 0 for pid, pname in db_permits: matched_themes = [] # A. Manual Map if pname in inverted_manual: for en in inverted_manual[pname]: if en in permit_to_themes: matched_themes.extend(permit_to_themes[en]) if matched_themes: print(f"Manual Match: '{pname}'") # B. Exact Match if not matched_themes: matched_themes = permit_to_themes.get(pname, []) if matched_themes: print(f"Exact Match: '{pname}'") # C. Fuzzy Match if not matched_themes: p_norm = normalize(pname) best_match = None max_score = 0 for excel_p in permit_to_themes.keys(): e_norm = normalize(excel_p) if e_norm in p_norm or p_norm in e_norm: score = len(set(e_norm) & set(p_norm)) if score > max_score and score >= 2: max_score = score best_match = excel_p if best_match: matched_themes = permit_to_themes[best_match] print(f"Fuzzy Match: '{pname}' -> '{best_match}'") if matched_themes: for t_name in set(matched_themes): tid = db_themes.get(t_name) if tid: cur.execute(""" INSERT INTO region_theme_permits (region_id, theme_id, permit_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING """, (region_id, tid, pid)) bind_count += 1 else: print(f"Theme '{t_name}' not found in DB themes map.") conn.commit() conn.close() print(f"Total bindings created/verified: {bind_count}") if __name__ == "__main__": force_bind()