53 lines
1.7 KiB
Python
53 lines
1.7 KiB
Python
from lawrisk.utils.env_loader import load_env
|
|
load_env()
|
|
|
|
import os
|
|
import pg8000.dbapi as pg
|
|
|
|
def get_conn():
|
|
host = os.getenv("LIC_PG_HOST", "172.24.240.1")
|
|
port = int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432")))
|
|
user = os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres"))
|
|
password = os.getenv("LIC_PG_PASSWORD", "")
|
|
database = os.getenv("LIC_PG_DATABASE", "licensing_risks")
|
|
return pg.connect(host=host, port=port, user=user, password=password, database=database)
|
|
|
|
def check():
|
|
conn = get_conn()
|
|
cur = conn.cursor()
|
|
|
|
print("Checking for duplicate risk links (same region, permit, risk)...")
|
|
cur.execute("""
|
|
SELECT region_id, permit_id, risk_id, COUNT(*)
|
|
FROM region_permit_risks
|
|
GROUP BY region_id, permit_id, risk_id
|
|
HAVING COUNT(*) > 1
|
|
""")
|
|
links = cur.fetchall()
|
|
print(f"Duplicates (exact R,P,Risk ID link): {len(links)}")
|
|
for l in links:
|
|
print(f" R:{l[0]} P:{l[1]} Risk:{l[2]} Count:{l[3]}")
|
|
|
|
print("\nChecking for duplicate risk content for same permit...")
|
|
cur.execute("""
|
|
SELECT
|
|
rpr.region_id,
|
|
rpr.permit_id,
|
|
TRIM(rk.risk_content),
|
|
TRIM(rpr.serial_number),
|
|
COUNT(*)
|
|
FROM region_permit_risks rpr
|
|
JOIN risks rk ON rk.id = rpr.risk_id
|
|
GROUP BY rpr.region_id, rpr.permit_id, TRIM(rk.risk_content), TRIM(rpr.serial_number)
|
|
HAVING COUNT(*) > 1
|
|
""")
|
|
content_dups = cur.fetchall()
|
|
print(f"Duplicates (same permit + same content + same serial): {len(content_dups)}")
|
|
for d in content_dups:
|
|
print(f" R:{d[0]} P:{d[1]} Content: {repr(d[2][:50])} Serial: {d[3]} Count:{d[4]}")
|
|
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
check()
|