fs-lawrisk/tools/v_risk_report.py

89 lines
3.0 KiB
Python

from lawrisk.utils.env_loader import load_env
load_env()
from lawrisk.services.licensing_repo import _lic_pg_conn
import json
def get_duplicates_report():
with _lic_pg_conn() as conn:
cur = conn.cursor()
# Join risks and region_permit_risks
sql = """
SELECT
r.name as region_name,
p.name as permit_name,
rk.risk_content,
rpr.serial_number,
rpr.risk_id,
rpr.region_id,
rpr.permit_id
FROM region_permit_risks rpr
JOIN regions r ON r.id = rpr.region_id
JOIN permits p ON p.id = rpr.permit_id
JOIN risks rk ON rk.id = rpr.risk_id
"""
cur.execute(sql)
rows = cur.fetchall()
# Track by (Region, Permit)
permit_risks = {}
for row in rows:
reg_name, p_name, content, serial, rid, pid, r_id = row[0], row[1], row[2], row[3], row[4], row[5], row[6]
key = (row[5], row[6]) # (region_id, permit_id)
if key not in permit_risks:
permit_risks[key] = {
"region": reg_name,
"permit": p_name,
"risks": []
}
permit_risks[key]["risks"].append({
"content": content.strip() if content else "",
"serial": serial,
"risk_id": row[4]
})
dupes_count = 0
total_perms = 0
report = []
for key, data in permit_risks.items():
risks = data["risks"]
seen_content = {}
seen_serial = {}
p_dupes = []
for r in risks:
# Content duplicate check
if r["content"] in seen_content:
p_dupes.append(f"DUPE_CONTENT: {repr(r['content'][:50])} (IDs: {r['risk_id']} vs {seen_content[r['content']]})")
else:
seen_content[r["content"]] = r["risk_id"]
# Serial duplicate check
if r["serial"] and r["serial"] in seen_serial:
p_dupes.append(f"DUPE_SERIAL: {r['serial']} (IDs: {r['risk_id']} vs {seen_serial[r['serial']]})")
else:
if r["serial"]:
seen_serial[r["serial"]] = r["risk_id"]
if p_dupes:
dupes_count += 1
report.append(f"--- {data['region']} | {data['permit']} ---")
for msg in p_dupes:
report.append(f" {msg}")
total_perms += 1
print(f"Checked {total_perms} permits.")
print(f"Found {dupes_count} permits with duplicates.")
with open("risk_duplicates_report.txt", "w", encoding="utf-8") as f:
f.write("\n".join(report))
print("Detailed report written to risk_duplicates_report.txt")
if __name__ == "__main__":
get_duplicates_report()