182 lines
7.0 KiB
Python
182 lines
7.0 KiB
Python
|
|
|
|||
|
|
import os
|
|||
|
|
import pandas as pd
|
|||
|
|
import pg8000.dbapi as pg
|
|||
|
|
from lawrisk.utils.env_loader import load_env
|
|||
|
|
|
|||
|
|
def normalize(text):
|
|||
|
|
if not text: return ""
|
|||
|
|
return str(text).replace("(", "").replace(")", "").replace("(", "").replace(")", "").replace("许可", "").replace("证", "").replace("审批", "").replace("核发", "").replace("备案", "").replace("业务", "").replace("从事", "").replace("企业", "").replace("设立", "").replace("变更", "").replace(" ", "").strip()
|
|||
|
|
|
|||
|
|
def force_bind():
|
|||
|
|
load_env()
|
|||
|
|
conn_params = {
|
|||
|
|
"host": os.getenv("LIC_PG_HOST", "172.24.240.1"),
|
|||
|
|
"port": int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))),
|
|||
|
|
"user": os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")),
|
|||
|
|
"password": os.getenv("LIC_PG_PASSWORD", ""),
|
|||
|
|
"database": os.getenv("LIC_PG_DATABASE", "licensing_risks"),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Manual Mappings (Excel Name -> List of DB Names)
|
|||
|
|
manual_map = {
|
|||
|
|
"食品经营许可": ["食品经营许可"],
|
|||
|
|
"旅客住宿服务": ["旅馆业特种行业许可证核发"],
|
|||
|
|
"开设旅馆": ["旅馆业特种行业许可证核发"],
|
|||
|
|
"旅馆业特种行业许可": ["旅馆业特种行业许可证核发"],
|
|||
|
|
"药品经营许可(零售)": ["药品经营许可证(零售)"],
|
|||
|
|
"食品生产许可": ["食品生产许可"],
|
|||
|
|
"建设项目环境影响报告表审批": ["建设项目环境影响评价文件审批(广东省厅事项名称) 【国家标准名:“建设项目环境影响评价审批(海洋工程、核与辐射类除外)”】"],
|
|||
|
|
"娱乐场所审批": [
|
|||
|
|
"歌舞娱乐场所从事娱乐场所经营 activity 审批",
|
|||
|
|
"游艺娱乐场所从事娱乐场所经营活动审批,内资娱乐场所变更、延续、补证、注销审批"
|
|||
|
|
],
|
|||
|
|
"第二类医疗器械经营备案": ["第二类医疗器械经营备案"],
|
|||
|
|
"经营高危险性体育项目许可": ["经营高危险性体育项目许可"],
|
|||
|
|
"互联网上网服务营业场所经营单位审批": ["互联网上网服务营业场所信息网络安全审核"],
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 1. Load Excel Map
|
|||
|
|
excel_path = "主题-事项绑定.xlsx"
|
|||
|
|
if not os.path.exists(excel_path):
|
|||
|
|
print(f"Excel file not found: {excel_path}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print("Reading Excel...")
|
|||
|
|
df = pd.read_excel(excel_path, header=1)
|
|||
|
|
df.columns = [str(c).strip() for c in df.columns]
|
|||
|
|
|
|||
|
|
col_theme = '主题'
|
|||
|
|
if col_theme not in df.columns:
|
|||
|
|
for c in df.columns:
|
|||
|
|
if '主题' in c: col_theme = c; break
|
|||
|
|
|
|||
|
|
col_permit = '审批事项'
|
|||
|
|
if col_permit not in df.columns:
|
|||
|
|
for c in df.columns:
|
|||
|
|
if '事项' in c and c != col_theme: col_permit = c; break
|
|||
|
|
|
|||
|
|
if col_theme not in df.columns or col_permit not in df.columns:
|
|||
|
|
print(f"Columns not found in Excel. Found: {df.columns.tolist()}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
df[col_theme] = df[col_theme].ffill()
|
|||
|
|
|
|||
|
|
permit_to_themes = {}
|
|||
|
|
for idx, row in df.iterrows():
|
|||
|
|
p = str(row[col_permit]).strip()
|
|||
|
|
t = str(row[col_theme]).strip()
|
|||
|
|
if p and t and p != 'nan' and t != 'nan':
|
|||
|
|
if p not in permit_to_themes:
|
|||
|
|
permit_to_themes[p] = []
|
|||
|
|
if t not in permit_to_themes[p]:
|
|||
|
|
permit_to_themes[p].append(t)
|
|||
|
|
|
|||
|
|
print(f"Loaded {len(permit_to_themes)} unique permit names from Excel.")
|
|||
|
|
|
|||
|
|
# 2. Connect DB
|
|||
|
|
conn = pg.connect(**conn_params)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
# 3. Get Region ID
|
|||
|
|
cur.execute("SELECT id FROM regions WHERE name = '市级'")
|
|||
|
|
res = cur.fetchone()
|
|||
|
|
if not res:
|
|||
|
|
print("Region '市级' not found in DB.")
|
|||
|
|
return
|
|||
|
|
region_id = res[0]
|
|||
|
|
|
|||
|
|
# 4. Get DB Themes
|
|||
|
|
cur.execute("SELECT id, name FROM themes")
|
|||
|
|
db_themes = {row[1]: row[0] for row in cur.fetchall()} # Name -> ID
|
|||
|
|
|
|||
|
|
# Import missing themes if any
|
|||
|
|
all_excel_themes = set()
|
|||
|
|
for t_list in permit_to_themes.values():
|
|||
|
|
for t in t_list:
|
|||
|
|
all_excel_themes.add(t)
|
|||
|
|
|
|||
|
|
for t_name in all_excel_themes:
|
|||
|
|
if t_name not in db_themes:
|
|||
|
|
print(f"Creating missing theme: {t_name}")
|
|||
|
|
cur.execute("INSERT INTO themes (id, name) VALUES (%s, %s)", (str(pg.uuid.uuid4()), t_name))
|
|||
|
|
cur.execute("SELECT id FROM themes WHERE name = %s", (t_name,))
|
|||
|
|
tid = cur.fetchone()[0]
|
|||
|
|
db_themes[t_name] = tid
|
|||
|
|
conn.commit()
|
|||
|
|
|
|||
|
|
# 5. Invert Manual Map for lookup
|
|||
|
|
inverted_manual = {}
|
|||
|
|
for excel_n, db_list in manual_map.items():
|
|||
|
|
if isinstance(db_list, str): db_list = [db_list]
|
|||
|
|
for db_n in db_list:
|
|||
|
|
if db_n not in inverted_manual: inverted_manual[db_n] = []
|
|||
|
|
inverted_manual[db_n].append(excel_n)
|
|||
|
|
|
|||
|
|
# 6. Get DB Permits for '市级'
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT p.id, p.name
|
|||
|
|
FROM region_permit_details rpd
|
|||
|
|
JOIN permits p ON p.id = rpd.permit_id
|
|||
|
|
WHERE rpd.region_id = %s
|
|||
|
|
""", (region_id,))
|
|||
|
|
db_permits = cur.fetchall() # (id, name)
|
|||
|
|
print(f"Scanning {len(db_permits)} DB permits for binding...")
|
|||
|
|
|
|||
|
|
bind_count = 0
|
|||
|
|
|
|||
|
|
for pid, pname in db_permits:
|
|||
|
|
matched_themes = []
|
|||
|
|
|
|||
|
|
# A. Manual Map
|
|||
|
|
if pname in inverted_manual:
|
|||
|
|
for en in inverted_manual[pname]:
|
|||
|
|
if en in permit_to_themes:
|
|||
|
|
matched_themes.extend(permit_to_themes[en])
|
|||
|
|
if matched_themes:
|
|||
|
|
print(f"Manual Match: '{pname}'")
|
|||
|
|
|
|||
|
|
# B. Exact Match
|
|||
|
|
if not matched_themes:
|
|||
|
|
matched_themes = permit_to_themes.get(pname, [])
|
|||
|
|
if matched_themes:
|
|||
|
|
print(f"Exact Match: '{pname}'")
|
|||
|
|
|
|||
|
|
# C. Fuzzy Match
|
|||
|
|
if not matched_themes:
|
|||
|
|
p_norm = normalize(pname)
|
|||
|
|
best_match = None
|
|||
|
|
max_score = 0
|
|||
|
|
|
|||
|
|
for excel_p in permit_to_themes.keys():
|
|||
|
|
e_norm = normalize(excel_p)
|
|||
|
|
if e_norm in p_norm or p_norm in e_norm:
|
|||
|
|
score = len(set(e_norm) & set(p_norm))
|
|||
|
|
if score > max_score and score >= 2:
|
|||
|
|
max_score = score
|
|||
|
|
best_match = excel_p
|
|||
|
|
|
|||
|
|
if best_match:
|
|||
|
|
matched_themes = permit_to_themes[best_match]
|
|||
|
|
print(f"Fuzzy Match: '{pname}' -> '{best_match}'")
|
|||
|
|
|
|||
|
|
if matched_themes:
|
|||
|
|
for t_name in set(matched_themes):
|
|||
|
|
tid = db_themes.get(t_name)
|
|||
|
|
if tid:
|
|||
|
|
cur.execute("""
|
|||
|
|
INSERT INTO region_theme_permits (region_id, theme_id, permit_id)
|
|||
|
|
VALUES (%s, %s, %s)
|
|||
|
|
ON CONFLICT DO NOTHING
|
|||
|
|
""", (region_id, tid, pid))
|
|||
|
|
bind_count += 1
|
|||
|
|
else:
|
|||
|
|
print(f"Theme '{t_name}' not found in DB themes map.")
|
|||
|
|
|
|||
|
|
conn.commit()
|
|||
|
|
conn.close()
|
|||
|
|
print(f"Total bindings created/verified: {bind_count}")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
force_bind()
|