fs-lawrisk/tools/maintain_db_cleanup.py

175 lines
6.8 KiB
Python

import os
import sys
import subprocess
import datetime
import pg8000.dbapi as pg
# -----------------------------------------------------------------------------
# 1. Environment & Configuration
# -----------------------------------------------------------------------------
def load_env():
env_vars = {}
try:
with open('.env', 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
if '=' in line:
k, v = line.split('=', 1)
env_vars[k.strip()] = v.strip()
except Exception:
pass
return env_vars
env = load_env()
HOST = env.get("LIC_PG_HOST", "172.24.240.1")
PORT = int(env.get("LIC_PG_PORT", "5432"))
USER = env.get("LIC_PG_USER", "postgres")
PASSWORD = env.get("LIC_PG_PASSWORD", "")
DATABASE = env.get("LIC_PG_DATABASE", "licensing_risks")
# -----------------------------------------------------------------------------
# 2. Database Backup
# -----------------------------------------------------------------------------
def perform_backup():
date_str = datetime.datetime.now().strftime("%Y-%m-%d")
backup_file = os.path.abspath(f"data/backup_{date_str}.sql")
# Ensure data directory exists
os.makedirs(os.path.dirname(backup_file), exist_ok=True)
print(f"Starting backup to {backup_file}...")
# Set PGPASSWORD for pg_dump
env_copy = os.environ.copy()
env_copy["PGPASSWORD"] = PASSWORD
# Construct command
# pg_dump -h host -p port -U user -f file dbname
cmd = [
"pg_dump",
"-h", HOST,
"-p", str(PORT),
"-U", USER,
"-f", backup_file,
DATABASE
]
try:
subprocess.run(cmd, env=env_copy, check=True)
print("Backup completed successfully.")
return True
except subprocess.CalledProcessError as e:
print(f"Backup failed: {e}")
return False
except FileNotFoundError:
print("pg_dump not found in PATH. Skipping backup (RISKY!).")
# In this specific task, if backup fails, we should probably stop or ask user.
# But given I am an agent, I should try to warn and maybe proceed if trivial, but deleting data is dangerous.
# I will stop if backup fails.
return False
# -----------------------------------------------------------------------------
# 3. Data Cleanup
# -----------------------------------------------------------------------------
def perform_cleanup():
print("Connecting to database...")
try:
conn = pg.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE)
cursor = conn.cursor()
target_theme_name = "不涉及"
# 1. Find theme ID
print(f"Finding theme '{target_theme_name}'...")
cursor.execute("SELECT id FROM themes WHERE name = %s", (target_theme_name,))
row = cursor.fetchone()
if not row:
print(f"Theme '{target_theme_name}' not found. Nothing to do.")
conn.close()
return
theme_id = row[0]
print(f"Found theme_id: {theme_id}")
# 2. Find associated permits (matters)
# Using region_theme_permits as the binding table
print("Finding associated permits...")
cursor.execute("SELECT DISTINCT permit_id FROM region_theme_permits WHERE theme_id = %s", (theme_id,))
permit_rows = cursor.fetchall()
permit_ids = [r[0] for r in permit_rows]
print(f"Found {len(permit_ids)} permits bound to '{target_theme_name}'.")
if not permit_ids:
print("No permits bound to this theme. Proceeding to delete theme only.")
# Start Transaction
try:
# 3. Clear TOPICS for these permits (Unbind ALL themes from these permits)
if permit_ids:
# Need to convert UUIDs to string for SQL IN clause or use execute with any
# pg8000 handles list/tuple for IN nicely if formatted manually, or we can loop.
# Batch delete is better.
# DELETE FROM region_theme_permits WHERE permit_id IN (...)
# Format for IN clause
permit_ids_tuple = tuple(permit_ids)
if len(permit_ids) == 1:
# distinct check ensures > 0, but tuple of 1 element needs comma
pass
# Note: pg8000 might struggle with large IN clauses in param logic depending on version.
# But let's try standard parameterized query.
# Actually, to be safe with syntax: "IN %s" with a tuple works in some drivers,
# but often 'IN (%s, %s)' is needed.
# I'll generate placeholders.
placeholders = ",".join(["%s"] * len(permit_ids))
print(f"Clearing ALL topic bindings for these {len(permit_ids)} permits...")
# Delete from region_theme_permits
cursor.execute(f"DELETE FROM region_theme_permits WHERE permit_id IN ({placeholders})", permit_ids_tuple)
print(f"Deleted {cursor.rowcount} rows from region_theme_permits.")
# Also check region_permit_theme_overrides if they exist for these permits?
# The user said "clear topics". Overrides might not be "bindings" per se, but let's check table existence.
# I'll skip overrides to avoid complexity unless requested, sticking to "bindings".
# 4. Delete the theme itself from all related tables
print(f"Deleting theme '{target_theme_name}' references...")
# Delete from region_themes
cursor.execute("DELETE FROM region_themes WHERE theme_id = %s", (theme_id,))
print(f"Deleted {cursor.rowcount} rows from region_themes.")
# Delete from permit_theme_rules
cursor.execute("DELETE FROM permit_theme_rules WHERE theme_id = %s", (theme_id,))
print(f"Deleted {cursor.rowcount} rows from permit_theme_rules.")
# Delete from themes
cursor.execute("DELETE FROM themes WHERE id = %s", (theme_id,))
print(f"Deleted {cursor.rowcount} row from themes.")
# Commit
conn.commit()
print("Transaction committed successfully.")
except Exception as e:
conn.rollback()
print(f"Error during transaction, rolled back. Error: {e}")
raise
conn.close()
except Exception as e:
print(f"Database error: {e}")
if __name__ == "__main__":
if perform_backup():
perform_cleanup()
else:
print("Aborting cleanup because backup failed.")