fs-lawrisk/tools/v_check_dupes.py

53 lines
1.7 KiB
Python

from lawrisk.utils.env_loader import load_env
load_env()
import os
import pg8000.dbapi as pg
def get_conn():
host = os.getenv("LIC_PG_HOST", "172.24.240.1")
port = int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432")))
user = os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres"))
password = os.getenv("LIC_PG_PASSWORD", "")
database = os.getenv("LIC_PG_DATABASE", "licensing_risks")
return pg.connect(host=host, port=port, user=user, password=password, database=database)
def check():
conn = get_conn()
cur = conn.cursor()
print("Checking for duplicate risk links (same region, permit, risk)...")
cur.execute("""
SELECT region_id, permit_id, risk_id, COUNT(*)
FROM region_permit_risks
GROUP BY region_id, permit_id, risk_id
HAVING COUNT(*) > 1
""")
links = cur.fetchall()
print(f"Duplicates (exact R,P,Risk ID link): {len(links)}")
for l in links:
print(f" R:{l[0]} P:{l[1]} Risk:{l[2]} Count:{l[3]}")
print("\nChecking for duplicate risk content for same permit...")
cur.execute("""
SELECT
rpr.region_id,
rpr.permit_id,
TRIM(rk.risk_content),
TRIM(rpr.serial_number),
COUNT(*)
FROM region_permit_risks rpr
JOIN risks rk ON rk.id = rpr.risk_id
GROUP BY rpr.region_id, rpr.permit_id, TRIM(rk.risk_content), TRIM(rpr.serial_number)
HAVING COUNT(*) > 1
""")
content_dups = cur.fetchall()
print(f"Duplicates (same permit + same content + same serial): {len(content_dups)}")
for d in content_dups:
print(f" R:{d[0]} P:{d[1]} Content: {repr(d[2][:50])} Serial: {d[3]} Count:{d[4]}")
conn.close()
if __name__ == "__main__":
check()