from lawrisk.utils.env_loader import load_env load_env() from lawrisk.services.licensing_repo import _lic_pg_conn import json def inspect_risk_tables(): with _lic_pg_conn() as conn: cur = conn.cursor() # Check for duplicate combinations of (region_id, permit_id, risk_id) # This shouldn't happen if there is a primary key, but let's check. print("\n--- Check 4: Same (Region, Permit, RiskID) ---") cur.execute(""" SELECT region_id, permit_id, risk_id, COUNT(*) FROM region_permit_risks GROUP BY region_id, permit_id, risk_id HAVING COUNT(*) > 1 """) dupes4 = cur.fetchall() print(f"Duplicates (R, P, RiskID): {len(dupes4)}") # Maybe the duplication is because of multiple imports? # Let's look at all risks for a specific permit that might have issues. print("\n--- Listing first 10 permits and their risk counts ---") cur.execute(""" SELECT p.name, rpd.region_id, COUNT(rpr.risk_id) FROM region_permit_details rpd JOIN permits p ON p.id = rpd.permit_id LEFT JOIN region_permit_risks rpr ON rpr.permit_id = rpd.permit_id AND rpr.region_id = rpd.region_id GROUP BY p.name, rpd.region_id ORDER BY COUNT(rpr.risk_id) DESC LIMIT 10 """) for row in cur.fetchall(): print(f"Permit: {row[0]}, Region: {row[1]}, Risks: {row[2]}") # Let's check if there are risks with identical content in the risks table itself. print("\n--- Check 5: Identical Content in 'risks' table ---") cur.execute(""" SELECT risk_content, COUNT(*) FROM risks GROUP BY risk_content HAVING COUNT(*) > 1 LIMIT 10 """) dupes5 = cur.fetchall() print(f"Identical content in 'risks' table: {len(dupes5)}") for d in dupes5: print(f" Count: {d[1]}, Content: {repr(d[0][:100])}...") if __name__ == "__main__": inspect_risk_tables()