fs-lawrisk/tools/force_bind_themes.py

182 lines
7.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import pandas as pd
import pg8000.dbapi as pg
from lawrisk.utils.env_loader import load_env
def normalize(text):
if not text: return ""
return str(text).replace("", "").replace("", "").replace("(", "").replace(")", "").replace("许可", "").replace("", "").replace("审批", "").replace("核发", "").replace("备案", "").replace("业务", "").replace("从事", "").replace("企业", "").replace("设立", "").replace("变更", "").replace(" ", "").strip()
def force_bind():
load_env()
conn_params = {
"host": os.getenv("LIC_PG_HOST", "172.24.240.1"),
"port": int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))),
"user": os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")),
"password": os.getenv("LIC_PG_PASSWORD", ""),
"database": os.getenv("LIC_PG_DATABASE", "licensing_risks"),
}
# Manual Mappings (Excel Name -> List of DB Names)
manual_map = {
"食品经营许可": ["食品经营许可"],
"旅客住宿服务": ["旅馆业特种行业许可证核发"],
"开设旅馆": ["旅馆业特种行业许可证核发"],
"旅馆业特种行业许可": ["旅馆业特种行业许可证核发"],
"药品经营许可(零售)": ["药品经营许可证(零售)"],
"食品生产许可": ["食品生产许可"],
"建设项目环境影响报告表审批": ["建设项目环境影响评价文件审批(广东省厅事项名称) 【国家标准名:“建设项目环境影响评价审批(海洋工程、核与辐射类除外)”】"],
"娱乐场所审批": [
"歌舞娱乐场所从事娱乐场所经营 activity 审批",
"游艺娱乐场所从事娱乐场所经营活动审批,内资娱乐场所变更、延续、补证、注销审批"
],
"第二类医疗器械经营备案": ["第二类医疗器械经营备案"],
"经营高危险性体育项目许可": ["经营高危险性体育项目许可"],
"互联网上网服务营业场所经营单位审批": ["互联网上网服务营业场所信息网络安全审核"],
}
# 1. Load Excel Map
excel_path = "主题-事项绑定.xlsx"
if not os.path.exists(excel_path):
print(f"Excel file not found: {excel_path}")
return
print("Reading Excel...")
df = pd.read_excel(excel_path, header=1)
df.columns = [str(c).strip() for c in df.columns]
col_theme = '主题'
if col_theme not in df.columns:
for c in df.columns:
if '主题' in c: col_theme = c; break
col_permit = '审批事项'
if col_permit not in df.columns:
for c in df.columns:
if '事项' in c and c != col_theme: col_permit = c; break
if col_theme not in df.columns or col_permit not in df.columns:
print(f"Columns not found in Excel. Found: {df.columns.tolist()}")
return
df[col_theme] = df[col_theme].ffill()
permit_to_themes = {}
for idx, row in df.iterrows():
p = str(row[col_permit]).strip()
t = str(row[col_theme]).strip()
if p and t and p != 'nan' and t != 'nan':
if p not in permit_to_themes:
permit_to_themes[p] = []
if t not in permit_to_themes[p]:
permit_to_themes[p].append(t)
print(f"Loaded {len(permit_to_themes)} unique permit names from Excel.")
# 2. Connect DB
conn = pg.connect(**conn_params)
cur = conn.cursor()
# 3. Get Region ID
cur.execute("SELECT id FROM regions WHERE name = '市级'")
res = cur.fetchone()
if not res:
print("Region '市级' not found in DB.")
return
region_id = res[0]
# 4. Get DB Themes
cur.execute("SELECT id, name FROM themes")
db_themes = {row[1]: row[0] for row in cur.fetchall()} # Name -> ID
# Import missing themes if any
all_excel_themes = set()
for t_list in permit_to_themes.values():
for t in t_list:
all_excel_themes.add(t)
for t_name in all_excel_themes:
if t_name not in db_themes:
print(f"Creating missing theme: {t_name}")
cur.execute("INSERT INTO themes (id, name) VALUES (%s, %s)", (str(pg.uuid.uuid4()), t_name))
cur.execute("SELECT id FROM themes WHERE name = %s", (t_name,))
tid = cur.fetchone()[0]
db_themes[t_name] = tid
conn.commit()
# 5. Invert Manual Map for lookup
inverted_manual = {}
for excel_n, db_list in manual_map.items():
if isinstance(db_list, str): db_list = [db_list]
for db_n in db_list:
if db_n not in inverted_manual: inverted_manual[db_n] = []
inverted_manual[db_n].append(excel_n)
# 6. Get DB Permits for '市级'
cur.execute("""
SELECT p.id, p.name
FROM region_permit_details rpd
JOIN permits p ON p.id = rpd.permit_id
WHERE rpd.region_id = %s
""", (region_id,))
db_permits = cur.fetchall() # (id, name)
print(f"Scanning {len(db_permits)} DB permits for binding...")
bind_count = 0
for pid, pname in db_permits:
matched_themes = []
# A. Manual Map
if pname in inverted_manual:
for en in inverted_manual[pname]:
if en in permit_to_themes:
matched_themes.extend(permit_to_themes[en])
if matched_themes:
print(f"Manual Match: '{pname}'")
# B. Exact Match
if not matched_themes:
matched_themes = permit_to_themes.get(pname, [])
if matched_themes:
print(f"Exact Match: '{pname}'")
# C. Fuzzy Match
if not matched_themes:
p_norm = normalize(pname)
best_match = None
max_score = 0
for excel_p in permit_to_themes.keys():
e_norm = normalize(excel_p)
if e_norm in p_norm or p_norm in e_norm:
score = len(set(e_norm) & set(p_norm))
if score > max_score and score >= 2:
max_score = score
best_match = excel_p
if best_match:
matched_themes = permit_to_themes[best_match]
print(f"Fuzzy Match: '{pname}' -> '{best_match}'")
if matched_themes:
for t_name in set(matched_themes):
tid = db_themes.get(t_name)
if tid:
cur.execute("""
INSERT INTO region_theme_permits (region_id, theme_id, permit_id)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
""", (region_id, tid, pid))
bind_count += 1
else:
print(f"Theme '{t_name}' not found in DB themes map.")
conn.commit()
conn.close()
print(f"Total bindings created/verified: {bind_count}")
if __name__ == "__main__":
force_bind()