197 lines
8.0 KiB
Python
197 lines
8.0 KiB
Python
|
||
import os
|
||
import pandas as pd
|
||
import pg8000.dbapi as pg
|
||
from lawrisk.utils.env_loader import load_env
|
||
from lawrisk.services import licensing_repo as lic_repo
|
||
|
||
def bind_v2():
|
||
load_env()
|
||
|
||
# 1. Load Excel Map
|
||
excel_path = os.path.join("data", "主题-事项绑定.xlsx")
|
||
if not os.path.exists(excel_path):
|
||
# Try root as fallback
|
||
excel_path = "主题-事项绑定.xlsx"
|
||
if not os.path.exists(excel_path):
|
||
print(f"Excel file not found!")
|
||
return
|
||
|
||
print(f"Reading Excel: {excel_path}")
|
||
df = pd.read_excel(excel_path, header=1)
|
||
df.columns = [str(c).strip() for c in df.columns]
|
||
|
||
# Identify columns
|
||
col_theme = '主题'
|
||
if col_theme not in df.columns:
|
||
for c in df.columns:
|
||
if '主题' in c: col_theme = c; break
|
||
|
||
col_permit = '审批事项'
|
||
if col_permit not in df.columns:
|
||
for c in df.columns:
|
||
if '事项' in c and c != col_theme: col_permit = c; break
|
||
|
||
if col_theme not in df.columns or col_permit not in df.columns:
|
||
print(f"Columns not found in Excel. Found: {df.columns.tolist()}")
|
||
return
|
||
|
||
# Forward fill theme names as they are usually merged in Excel
|
||
df[col_theme] = df[col_theme].ffill()
|
||
|
||
permit_to_themes = {}
|
||
for idx, row in df.iterrows():
|
||
p = str(row[col_permit]).strip()
|
||
t = str(row[col_theme]).strip()
|
||
if p and t and p != 'nan' and t != 'nan':
|
||
if p not in permit_to_themes:
|
||
permit_to_themes[p] = []
|
||
if t not in permit_to_themes[p]:
|
||
permit_to_themes[p].append(t)
|
||
|
||
print(f"Loaded {len(permit_to_themes)} unique permit names from Excel mapping.")
|
||
|
||
# 2. Connect DB
|
||
conn = lic_repo._lic_pg_conn()
|
||
cur = conn.cursor()
|
||
|
||
try:
|
||
# 3. Get Region ID for '市级' (Main target)
|
||
cur.execute("SELECT id FROM regions WHERE name = '市级'")
|
||
res = cur.fetchone()
|
||
if not res:
|
||
print("Region '市级' not found in DB.")
|
||
return
|
||
region_id = res[0]
|
||
|
||
# 4. Clear existing bindings for '市级' to ensure fresh re-mapping
|
||
print(f"Clearing existing bindings for region '市级'...")
|
||
cur.execute("DELETE FROM region_theme_permits WHERE region_id = %s", (region_id,))
|
||
print(f"Deleted {cur.rowcount} old bindings.")
|
||
|
||
# 5. Ensure all themes from Excel exist in DB
|
||
cur.execute("SELECT id, name FROM themes")
|
||
db_themes = {row[1]: str(row[0]) for row in cur.fetchall()}
|
||
|
||
all_excel_themes = set()
|
||
for t_list in permit_to_themes.values():
|
||
for t in t_list:
|
||
all_excel_themes.add(t)
|
||
|
||
for t_name in all_excel_themes:
|
||
if t_name not in db_themes:
|
||
print(f"Creating missing theme: {t_name}")
|
||
new_id = str(pg.uuid.uuid4())
|
||
cur.execute("INSERT INTO themes (id, name) VALUES (%s, %s)", (new_id, t_name))
|
||
db_themes[t_name] = new_id
|
||
|
||
# 6. Define mapping overrides for messy names
|
||
manual_map = {
|
||
"食品经营许可": ["食品经营许可"],
|
||
"旅客住宿服务": ["旅馆业特种行业许可证核发"],
|
||
"开设旅馆": ["旅馆业特种行业许可证核发"],
|
||
"旅馆业特种行业许可": ["旅馆业特种行业许可证核发"],
|
||
"药品经营许可(零售)": ["药品经营许可证(零售)"],
|
||
"食品生产许可": ["食品生产许可"],
|
||
"建设项目环境影响报告表审批": ["建设项目环境影响评价文件审批(广东省厅事项名称) 【国家标准名:“建设项目环境影响评价审批(海洋工程、核与辐射类除外)”】"],
|
||
"娱乐场所审批": [
|
||
"歌舞娱乐场所从事娱乐场所经营 activity 审批",
|
||
"游艺娱乐场所从事娱乐场所经营活动审批,内资娱乐场所变更、延续、补证、注销审批"
|
||
],
|
||
"第二类医疗器械经营备案": ["第二类医疗器械经营备案"],
|
||
"经营高危险性体育项目许可": ["经营高危险性体育项目许可"],
|
||
"互联网上网服务营业场所经营单位审批": ["互联网上网服务营业场所信息网络安全审核"],
|
||
"营业执照": ["营业执照"],
|
||
}
|
||
|
||
inverted_manual = {}
|
||
for excel_n, db_list in manual_map.items():
|
||
for db_n in db_list:
|
||
inverted_manual.setdefault(db_n, []).append(excel_n)
|
||
|
||
# 7. Get DB Permits for '市级'
|
||
cur.execute("""
|
||
SELECT p.id, p.name
|
||
FROM region_permit_details rpd
|
||
JOIN permits p ON p.id = rpd.permit_id
|
||
WHERE rpd.region_id = %s
|
||
""", (region_id,))
|
||
db_permits = cur.fetchall()
|
||
print(f"Scanning {len(db_permits)} DB permits for binding...")
|
||
|
||
bind_count = 0
|
||
|
||
def normalize_func(text):
|
||
if not text: return ""
|
||
# Remove common suffixes and punctuation for better fuzzy matching
|
||
return str(text).replace("(", "").replace(")", "").replace("(", "").replace(")", "").replace("许可", "").replace("证", "").replace("审批", "").replace("核发", "").replace("备案", "").replace("业务", "").replace("从事", "").replace("企业", "").replace("设立", "").replace("变更", "").replace(" ", "").strip()
|
||
|
||
processed = 0
|
||
all_bindings = []
|
||
for pid_raw, pname in db_permits:
|
||
processed += 1
|
||
pid = str(pid_raw)
|
||
matched_excel_names = []
|
||
|
||
# A. Manual Map lookup (Case insensitive)
|
||
for db_n, excel_names in inverted_manual.items():
|
||
if db_n.lower() == pname.lower():
|
||
matched_excel_names.extend(excel_names)
|
||
|
||
# B. Exact Match (Case insensitive)
|
||
for en in permit_to_themes.keys():
|
||
if en.lower() == pname.lower():
|
||
matched_excel_names.append(en)
|
||
|
||
# C. Fuzzy Match fallback
|
||
if not matched_excel_names:
|
||
p_norm = normalize_func(pname)
|
||
best_match = None
|
||
max_score = 0
|
||
for excel_p in permit_to_themes.keys():
|
||
e_norm = normalize_func(excel_p)
|
||
if not e_norm or not p_norm: continue
|
||
if e_norm in p_norm or p_norm in e_norm:
|
||
# Character intersection score
|
||
score = len(set(e_norm) & set(p_norm))
|
||
if score > max_score and score >= 2:
|
||
max_score = score
|
||
best_match = excel_p
|
||
if best_match:
|
||
matched_excel_names.append(best_match)
|
||
|
||
# Perform binding for all unique themes associated with matched names
|
||
final_themes = set()
|
||
for en in matched_excel_names:
|
||
if en in permit_to_themes:
|
||
for t in permit_to_themes[en]:
|
||
final_themes.add(t)
|
||
|
||
for t_name in final_themes:
|
||
tid = db_themes.get(t_name)
|
||
if tid:
|
||
print(f" -> Binding to Theme: {t_name}")
|
||
all_bindings.append((region_id, tid, pid))
|
||
else:
|
||
print(f"Warning: Theme '{t_name}' not found in DB maps.")
|
||
|
||
print(f"Inserting {len(all_bindings)} new bindings...")
|
||
for b in all_bindings:
|
||
cur.execute("""
|
||
INSERT INTO region_theme_permits (region_id, theme_id, permit_id)
|
||
VALUES (%s, %s, %s)
|
||
ON CONFLICT DO NOTHING
|
||
""", b)
|
||
bind_count += (1 if cur.rowcount > 0 else 0)
|
||
|
||
conn.commit()
|
||
print(f"Update complete. Created {bind_count} bindings for '市级' region.")
|
||
except Exception as e:
|
||
conn.rollback()
|
||
print(f"Error during update: {e}")
|
||
finally:
|
||
conn.close()
|
||
|
||
if __name__ == "__main__":
|
||
bind_v2()
|