89 lines
3.0 KiB
Python
89 lines
3.0 KiB
Python
from lawrisk.utils.env_loader import load_env
|
|
load_env()
|
|
|
|
from lawrisk.services.licensing_repo import _lic_pg_conn
|
|
import json
|
|
|
|
def get_duplicates_report():
|
|
with _lic_pg_conn() as conn:
|
|
cur = conn.cursor()
|
|
|
|
# Join risks and region_permit_risks
|
|
sql = """
|
|
SELECT
|
|
r.name as region_name,
|
|
p.name as permit_name,
|
|
rk.risk_content,
|
|
rpr.serial_number,
|
|
rpr.risk_id,
|
|
rpr.region_id,
|
|
rpr.permit_id
|
|
FROM region_permit_risks rpr
|
|
JOIN regions r ON r.id = rpr.region_id
|
|
JOIN permits p ON p.id = rpr.permit_id
|
|
JOIN risks rk ON rk.id = rpr.risk_id
|
|
"""
|
|
cur.execute(sql)
|
|
rows = cur.fetchall()
|
|
|
|
# Track by (Region, Permit)
|
|
permit_risks = {}
|
|
for row in rows:
|
|
reg_name, p_name, content, serial, rid, pid, r_id = row[0], row[1], row[2], row[3], row[4], row[5], row[6]
|
|
key = (row[5], row[6]) # (region_id, permit_id)
|
|
if key not in permit_risks:
|
|
permit_risks[key] = {
|
|
"region": reg_name,
|
|
"permit": p_name,
|
|
"risks": []
|
|
}
|
|
permit_risks[key]["risks"].append({
|
|
"content": content.strip() if content else "",
|
|
"serial": serial,
|
|
"risk_id": row[4]
|
|
})
|
|
|
|
dupes_count = 0
|
|
total_perms = 0
|
|
|
|
report = []
|
|
|
|
for key, data in permit_risks.items():
|
|
risks = data["risks"]
|
|
seen_content = {}
|
|
seen_serial = {}
|
|
|
|
p_dupes = []
|
|
|
|
for r in risks:
|
|
# Content duplicate check
|
|
if r["content"] in seen_content:
|
|
p_dupes.append(f"DUPE_CONTENT: {repr(r['content'][:50])} (IDs: {r['risk_id']} vs {seen_content[r['content']]})")
|
|
else:
|
|
seen_content[r["content"]] = r["risk_id"]
|
|
|
|
# Serial duplicate check
|
|
if r["serial"] and r["serial"] in seen_serial:
|
|
p_dupes.append(f"DUPE_SERIAL: {r['serial']} (IDs: {r['risk_id']} vs {seen_serial[r['serial']]})")
|
|
else:
|
|
if r["serial"]:
|
|
seen_serial[r["serial"]] = r["risk_id"]
|
|
|
|
if p_dupes:
|
|
dupes_count += 1
|
|
report.append(f"--- {data['region']} | {data['permit']} ---")
|
|
for msg in p_dupes:
|
|
report.append(f" {msg}")
|
|
|
|
total_perms += 1
|
|
|
|
print(f"Checked {total_perms} permits.")
|
|
print(f"Found {dupes_count} permits with duplicates.")
|
|
|
|
with open("risk_duplicates_report.txt", "w", encoding="utf-8") as f:
|
|
f.write("\n".join(report))
|
|
print("Detailed report written to risk_duplicates_report.txt")
|
|
|
|
if __name__ == "__main__":
|
|
get_duplicates_report()
|