from lawrisk.utils.env_loader import load_env load_env() import os import pg8000.dbapi as pg def get_conn(): host = os.getenv("LIC_PG_HOST", "172.24.240.1") port = int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432"))) user = os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres")) password = os.getenv("LIC_PG_PASSWORD", "") database = os.getenv("LIC_PG_DATABASE", "licensing_risks") return pg.connect(host=host, port=port, user=user, password=password, database=database) def detect_any_duplicates(): conn = get_conn() cur = conn.cursor() # Let's find all risks per permit and see if any have same content cur.execute(""" SELECT rpr.region_id, r.name as region_name, rpr.permit_id, p.name as permit_name, rk.risk_content, COUNT(*) FROM region_permit_risks rpr JOIN regions r ON r.id = rpr.region_id JOIN permits p ON p.id = rpr.permit_id JOIN risks rk ON rk.id = rpr.risk_id GROUP BY rpr.region_id, r.name, rpr.permit_id, p.name, rk.risk_content HAVING COUNT(*) > 1 """) rows = cur.fetchall() print(f"Total duplicate content groups for same permit: {len(rows)}") for row in rows: print(f" [{row[1]}] {row[3]}: {repr(row[4][:50])} (Count: {row[5]})") conn.close() if __name__ == "__main__": detect_any_duplicates()