from lawrisk.utils.env_loader import load_env load_env() import os import pg8000.dbapi as pg def get_conn(): host = os.getenv("LIC_PG_HOST", "172.24.240.1") port = int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))) user = os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")) password = os.getenv("LIC_PG_PASSWORD", "") database = os.getenv("LIC_PG_DATABASE", "licensing_risks") return pg.connect(host=host, port=port, user=user, password=password, database=database) def check(): conn = get_conn() cur = conn.cursor() print("Checking for duplicate risk links (same region, permit, risk)...") cur.execute(""" SELECT region_id, permit_id, risk_id, COUNT(*) FROM region_permit_risks GROUP BY region_id, permit_id, risk_id HAVING COUNT(*) > 1 """) links = cur.fetchall() print(f"Duplicates (exact R,P,Risk ID link): {len(links)}") for l in links: print(f" R:{l[0]} P:{l[1]} Risk:{l[2]} Count:{l[3]}") print("\nChecking for duplicate risk content for same permit...") cur.execute(""" SELECT rpr.region_id, rpr.permit_id, TRIM(rk.risk_content), TRIM(rpr.serial_number), COUNT(*) FROM region_permit_risks rpr JOIN risks rk ON rk.id = rpr.risk_id GROUP BY rpr.region_id, rpr.permit_id, TRIM(rk.risk_content), TRIM(rpr.serial_number) HAVING COUNT(*) > 1 """) content_dups = cur.fetchall() print(f"Duplicates (same permit + same content + same serial): {len(content_dups)}") for d in content_dups: print(f" R:{d[0]} P:{d[1]} Content: {repr(d[2][:50])} Serial: {d[3]} Count:{d[4]}") conn.close() if __name__ == "__main__": check()