44 lines
1.4 KiB
Python
44 lines
1.4 KiB
Python
|
|
from lawrisk.utils.env_loader import load_env
|
||
|
|
load_env()
|
||
|
|
|
||
|
|
import os
|
||
|
|
import pg8000.dbapi as pg
|
||
|
|
|
||
|
|
def get_conn():
|
||
|
|
host = os.getenv("LIC_PG_HOST", "172.24.240.1")
|
||
|
|
port = int(os.getenv("LIC_PG_PORT", os.getenv("PG_PORT", "5432")))
|
||
|
|
user = os.getenv("LIC_PG_USER", os.getenv("PG_USER", "postgres"))
|
||
|
|
password = os.getenv("LIC_PG_PASSWORD", "")
|
||
|
|
database = os.getenv("LIC_PG_DATABASE", "licensing_risks")
|
||
|
|
return pg.connect(host=host, port=port, user=user, password=password, database=database)
|
||
|
|
|
||
|
|
def detect_any_duplicates():
|
||
|
|
conn = get_conn()
|
||
|
|
cur = conn.cursor()
|
||
|
|
|
||
|
|
# Let's find all risks per permit and see if any have same content
|
||
|
|
cur.execute("""
|
||
|
|
SELECT
|
||
|
|
rpr.region_id,
|
||
|
|
r.name as region_name,
|
||
|
|
rpr.permit_id,
|
||
|
|
p.name as permit_name,
|
||
|
|
rk.risk_content,
|
||
|
|
COUNT(*)
|
||
|
|
FROM region_permit_risks rpr
|
||
|
|
JOIN regions r ON r.id = rpr.region_id
|
||
|
|
JOIN permits p ON p.id = rpr.permit_id
|
||
|
|
JOIN risks rk ON rk.id = rpr.risk_id
|
||
|
|
GROUP BY rpr.region_id, r.name, rpr.permit_id, p.name, rk.risk_content
|
||
|
|
HAVING COUNT(*) > 1
|
||
|
|
""")
|
||
|
|
rows = cur.fetchall()
|
||
|
|
print(f"Total duplicate content groups for same permit: {len(rows)}")
|
||
|
|
for row in rows:
|
||
|
|
print(f" [{row[1]}] {row[3]}: {repr(row[4][:50])} (Count: {row[5]})")
|
||
|
|
|
||
|
|
conn.close()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
detect_any_duplicates()
|