fs-lawrisk/tests/verify_parsing.py

76 lines
3.3 KiB
Python

import os
import sys
# Add project root to path
sys.path.append(r"c:\Users\WIN10\Desktop\work\11th-week\法律风险提示-new\市监局-lawRisk-backend")
from lawrisk.services.licensing_repo import _parse_import_workbook
import logging
import logging
# Configure file logging
logging.basicConfig(level=logging.DEBUG, filename='verification.log', filemode='w', encoding='utf-8', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# Ensure library logger also writes there if it has own handlers
lib_logger = logging.getLogger("lawrisk.services.licensing_repo")
if lib_logger.handlers:
# It has a StreamHandler, add our FileHandler?
# Actually basicConfig might not affect it if it has handlers and propagate=False.
fh = logging.FileHandler('verification.log', encoding='utf-8')
fh.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
lib_logger.addHandler(fh)
lib_logger.setLevel(logging.DEBUG)
def verify_parsing():
excel_path = r"c:\Users\WIN10\Desktop\work\11th-week\法律风险提示-new\市监局-lawRisk-backend\test_import_data.xlsx"
with open(excel_path, "rb") as f:
file_bytes = f.read()
print(f"Parsing {excel_path}...")
try:
result = _parse_import_workbook(file_bytes, "test_import_data.xlsx")
except Exception as e:
print(f"Parsing FAILED: {e}")
import traceback
traceback.print_exc()
return
sheets = result.get("sheets", {})
if not sheets:
print("No sheets parsed!")
return
for sheet_name, sheet_data in sheets.items():
print(f"--- Sheet: {sheet_name} ---")
rows = sheet_data.get("rows", [])
print(f"Row count: {len(rows)}")
if rows:
first_row = rows[0]
print("First row:", first_row)
# Verify Metadata IN THE ROW (since defaults are propagated to rows)
if first_row.get("permit_name") != "Integration Test Permit":
print(f"ERROR: Expected permit_name='Integration Test Permit', got '{first_row.get('permit_name')}'")
if first_row.get("filler_name") != "AutoTester":
print(f"ERROR: Expected filler_name='AutoTester', got '{first_row.get('filler_name')}'")
if first_row.get("unit_name") != "Test Unit":
print(f"ERROR: Expected unit_name='Test Unit', got '{first_row.get('unit_name')}'")
# Row index 1 ("1" in col A)
# Row index 1 ("1" in col A)
if first_row.get("risk_content") != "Risk: Unregistered Business":
print(f"ERROR: Expected Row 1 risk_content='Risk: Unregistered Business', got '{first_row.get('risk_content')}'")
# Check Permit Status extraction if we mapped it (column didn't exist in my populate script, so maybe blank)
# Actually I didn't populate A:序号, B:风险提示内容, C:法律依据, D:文号, E:摘要, F:备注
# Wait, did I miss columns?
# Header Row 8: 序号, 风险提示内容, 法律依据, 文号, 摘要, 备注, 许可情况... (Wait, where is Permit Status?)
# In blank template:
# Row 8: 风险提示 | ...
# Actually let's trust the parser.
pass
if __name__ == "__main__":
verify_parsing()