197 lines
8.0 KiB
Python
197 lines
8.0 KiB
Python
|
|
|
|||
|
|
import os
|
|||
|
|
import pandas as pd
|
|||
|
|
import pg8000.dbapi as pg
|
|||
|
|
from lawrisk.utils.env_loader import load_env
|
|||
|
|
from lawrisk.services import licensing_repo as lic_repo
|
|||
|
|
|
|||
|
|
def bind_v2():
|
|||
|
|
load_env()
|
|||
|
|
|
|||
|
|
# 1. Load Excel Map
|
|||
|
|
excel_path = os.path.join("data", "主题-事项绑定.xlsx")
|
|||
|
|
if not os.path.exists(excel_path):
|
|||
|
|
# Try root as fallback
|
|||
|
|
excel_path = "主题-事项绑定.xlsx"
|
|||
|
|
if not os.path.exists(excel_path):
|
|||
|
|
print(f"Excel file not found!")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"Reading Excel: {excel_path}")
|
|||
|
|
df = pd.read_excel(excel_path, header=1)
|
|||
|
|
df.columns = [str(c).strip() for c in df.columns]
|
|||
|
|
|
|||
|
|
# Identify columns
|
|||
|
|
col_theme = '主题'
|
|||
|
|
if col_theme not in df.columns:
|
|||
|
|
for c in df.columns:
|
|||
|
|
if '主题' in c: col_theme = c; break
|
|||
|
|
|
|||
|
|
col_permit = '审批事项'
|
|||
|
|
if col_permit not in df.columns:
|
|||
|
|
for c in df.columns:
|
|||
|
|
if '事项' in c and c != col_theme: col_permit = c; break
|
|||
|
|
|
|||
|
|
if col_theme not in df.columns or col_permit not in df.columns:
|
|||
|
|
print(f"Columns not found in Excel. Found: {df.columns.tolist()}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# Forward fill theme names as they are usually merged in Excel
|
|||
|
|
df[col_theme] = df[col_theme].ffill()
|
|||
|
|
|
|||
|
|
permit_to_themes = {}
|
|||
|
|
for idx, row in df.iterrows():
|
|||
|
|
p = str(row[col_permit]).strip()
|
|||
|
|
t = str(row[col_theme]).strip()
|
|||
|
|
if p and t and p != 'nan' and t != 'nan':
|
|||
|
|
if p not in permit_to_themes:
|
|||
|
|
permit_to_themes[p] = []
|
|||
|
|
if t not in permit_to_themes[p]:
|
|||
|
|
permit_to_themes[p].append(t)
|
|||
|
|
|
|||
|
|
print(f"Loaded {len(permit_to_themes)} unique permit names from Excel mapping.")
|
|||
|
|
|
|||
|
|
# 2. Connect DB
|
|||
|
|
conn = lic_repo._lic_pg_conn()
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 3. Get Region ID for '市级' (Main target)
|
|||
|
|
cur.execute("SELECT id FROM regions WHERE name = '市级'")
|
|||
|
|
res = cur.fetchone()
|
|||
|
|
if not res:
|
|||
|
|
print("Region '市级' not found in DB.")
|
|||
|
|
return
|
|||
|
|
region_id = res[0]
|
|||
|
|
|
|||
|
|
# 4. Clear existing bindings for '市级' to ensure fresh re-mapping
|
|||
|
|
print(f"Clearing existing bindings for region '市级'...")
|
|||
|
|
cur.execute("DELETE FROM region_theme_permits WHERE region_id = %s", (region_id,))
|
|||
|
|
print(f"Deleted {cur.rowcount} old bindings.")
|
|||
|
|
|
|||
|
|
# 5. Ensure all themes from Excel exist in DB
|
|||
|
|
cur.execute("SELECT id, name FROM themes")
|
|||
|
|
db_themes = {row[1]: str(row[0]) for row in cur.fetchall()}
|
|||
|
|
|
|||
|
|
all_excel_themes = set()
|
|||
|
|
for t_list in permit_to_themes.values():
|
|||
|
|
for t in t_list:
|
|||
|
|
all_excel_themes.add(t)
|
|||
|
|
|
|||
|
|
for t_name in all_excel_themes:
|
|||
|
|
if t_name not in db_themes:
|
|||
|
|
print(f"Creating missing theme: {t_name}")
|
|||
|
|
new_id = str(pg.uuid.uuid4())
|
|||
|
|
cur.execute("INSERT INTO themes (id, name) VALUES (%s, %s)", (new_id, t_name))
|
|||
|
|
db_themes[t_name] = new_id
|
|||
|
|
|
|||
|
|
# 6. Define mapping overrides for messy names
|
|||
|
|
manual_map = {
|
|||
|
|
"食品经营许可": ["食品经营许可"],
|
|||
|
|
"旅客住宿服务": ["旅馆业特种行业许可证核发"],
|
|||
|
|
"开设旅馆": ["旅馆业特种行业许可证核发"],
|
|||
|
|
"旅馆业特种行业许可": ["旅馆业特种行业许可证核发"],
|
|||
|
|
"药品经营许可(零售)": ["药品经营许可证(零售)"],
|
|||
|
|
"食品生产许可": ["食品生产许可"],
|
|||
|
|
"建设项目环境影响报告表审批": ["建设项目环境影响评价文件审批(广东省厅事项名称) 【国家标准名:“建设项目环境影响评价审批(海洋工程、核与辐射类除外)”】"],
|
|||
|
|
"娱乐场所审批": [
|
|||
|
|
"歌舞娱乐场所从事娱乐场所经营 activity 审批",
|
|||
|
|
"游艺娱乐场所从事娱乐场所经营活动审批,内资娱乐场所变更、延续、补证、注销审批"
|
|||
|
|
],
|
|||
|
|
"第二类医疗器械经营备案": ["第二类医疗器械经营备案"],
|
|||
|
|
"经营高危险性体育项目许可": ["经营高危险性体育项目许可"],
|
|||
|
|
"互联网上网服务营业场所经营单位审批": ["互联网上网服务营业场所信息网络安全审核"],
|
|||
|
|
"营业执照": ["营业执照"],
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
inverted_manual = {}
|
|||
|
|
for excel_n, db_list in manual_map.items():
|
|||
|
|
for db_n in db_list:
|
|||
|
|
inverted_manual.setdefault(db_n, []).append(excel_n)
|
|||
|
|
|
|||
|
|
# 7. Get DB Permits for '市级'
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT p.id, p.name
|
|||
|
|
FROM region_permit_details rpd
|
|||
|
|
JOIN permits p ON p.id = rpd.permit_id
|
|||
|
|
WHERE rpd.region_id = %s
|
|||
|
|
""", (region_id,))
|
|||
|
|
db_permits = cur.fetchall()
|
|||
|
|
print(f"Scanning {len(db_permits)} DB permits for binding...")
|
|||
|
|
|
|||
|
|
bind_count = 0
|
|||
|
|
|
|||
|
|
def normalize_func(text):
|
|||
|
|
if not text: return ""
|
|||
|
|
# Remove common suffixes and punctuation for better fuzzy matching
|
|||
|
|
return str(text).replace("(", "").replace(")", "").replace("(", "").replace(")", "").replace("许可", "").replace("证", "").replace("审批", "").replace("核发", "").replace("备案", "").replace("业务", "").replace("从事", "").replace("企业", "").replace("设立", "").replace("变更", "").replace(" ", "").strip()
|
|||
|
|
|
|||
|
|
processed = 0
|
|||
|
|
all_bindings = []
|
|||
|
|
for pid_raw, pname in db_permits:
|
|||
|
|
processed += 1
|
|||
|
|
pid = str(pid_raw)
|
|||
|
|
matched_excel_names = []
|
|||
|
|
|
|||
|
|
# A. Manual Map lookup (Case insensitive)
|
|||
|
|
for db_n, excel_names in inverted_manual.items():
|
|||
|
|
if db_n.lower() == pname.lower():
|
|||
|
|
matched_excel_names.extend(excel_names)
|
|||
|
|
|
|||
|
|
# B. Exact Match (Case insensitive)
|
|||
|
|
for en in permit_to_themes.keys():
|
|||
|
|
if en.lower() == pname.lower():
|
|||
|
|
matched_excel_names.append(en)
|
|||
|
|
|
|||
|
|
# C. Fuzzy Match fallback
|
|||
|
|
if not matched_excel_names:
|
|||
|
|
p_norm = normalize_func(pname)
|
|||
|
|
best_match = None
|
|||
|
|
max_score = 0
|
|||
|
|
for excel_p in permit_to_themes.keys():
|
|||
|
|
e_norm = normalize_func(excel_p)
|
|||
|
|
if not e_norm or not p_norm: continue
|
|||
|
|
if e_norm in p_norm or p_norm in e_norm:
|
|||
|
|
# Character intersection score
|
|||
|
|
score = len(set(e_norm) & set(p_norm))
|
|||
|
|
if score > max_score and score >= 2:
|
|||
|
|
max_score = score
|
|||
|
|
best_match = excel_p
|
|||
|
|
if best_match:
|
|||
|
|
matched_excel_names.append(best_match)
|
|||
|
|
|
|||
|
|
# Perform binding for all unique themes associated with matched names
|
|||
|
|
final_themes = set()
|
|||
|
|
for en in matched_excel_names:
|
|||
|
|
if en in permit_to_themes:
|
|||
|
|
for t in permit_to_themes[en]:
|
|||
|
|
final_themes.add(t)
|
|||
|
|
|
|||
|
|
for t_name in final_themes:
|
|||
|
|
tid = db_themes.get(t_name)
|
|||
|
|
if tid:
|
|||
|
|
print(f" -> Binding to Theme: {t_name}")
|
|||
|
|
all_bindings.append((region_id, tid, pid))
|
|||
|
|
else:
|
|||
|
|
print(f"Warning: Theme '{t_name}' not found in DB maps.")
|
|||
|
|
|
|||
|
|
print(f"Inserting {len(all_bindings)} new bindings...")
|
|||
|
|
for b in all_bindings:
|
|||
|
|
cur.execute("""
|
|||
|
|
INSERT INTO region_theme_permits (region_id, theme_id, permit_id)
|
|||
|
|
VALUES (%s, %s, %s)
|
|||
|
|
ON CONFLICT DO NOTHING
|
|||
|
|
""", b)
|
|||
|
|
bind_count += (1 if cur.rowcount > 0 else 0)
|
|||
|
|
|
|||
|
|
conn.commit()
|
|||
|
|
print(f"Update complete. Created {bind_count} bindings for '市级' region.")
|
|||
|
|
except Exception as e:
|
|||
|
|
conn.rollback()
|
|||
|
|
print(f"Error during update: {e}")
|
|||
|
|
finally:
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
bind_v2()
|