from lawrisk.utils.env_loader import load_env
load_env()

from lawrisk.services.licensing_repo import _lic_pg_conn
import json

def inspect_risk_tables():
    with _lic_pg_conn() as conn:
        cur = conn.cursor()
        
        # Check for duplicate combinations of (region_id, permit_id, risk_id)
        # This shouldn't happen if there is a primary key, but let's check.
        print("\n--- Check 4: Same (Region, Permit, RiskID) ---")
        cur.execute("""
            SELECT region_id, permit_id, risk_id, COUNT(*)
            FROM region_permit_risks
            GROUP BY region_id, permit_id, risk_id
            HAVING COUNT(*) > 1
        """)
        dupes4 = cur.fetchall()
        print(f"Duplicates (R, P, RiskID): {len(dupes4)}")

        # Maybe the duplication is because of multiple imports?
        # Let's look at all risks for a specific permit that might have issues.
        print("\n--- Listing first 10 permits and their risk counts ---")
        cur.execute("""
            SELECT p.name, rpd.region_id, COUNT(rpr.risk_id)
            FROM region_permit_details rpd
            JOIN permits p ON p.id = rpd.permit_id
            LEFT JOIN region_permit_risks rpr ON rpr.permit_id = rpd.permit_id AND rpr.region_id = rpd.region_id
            GROUP BY p.name, rpd.region_id
            ORDER BY COUNT(rpr.risk_id) DESC
            LIMIT 10
        """)
        for row in cur.fetchall():
            print(f"Permit: {row[0]}, Region: {row[1]}, Risks: {row[2]}")

        # Let's check if there are risks with identical content in the risks table itself.
        print("\n--- Check 5: Identical Content in 'risks' table ---")
        cur.execute("""
            SELECT risk_content, COUNT(*)
            FROM risks
            GROUP BY risk_content
            HAVING COUNT(*) > 1
            LIMIT 10
        """)
        dupes5 = cur.fetchall()
        print(f"Identical content in 'risks' table: {len(dupes5)}")
        for d in dupes5:
            print(f" Count: {d[1]}, Content: {repr(d[0][:100])}...")

if __name__ == "__main__":
    inspect_risk_tables()