fs-lawrisk/tests/verify_parsing.py


import os
import sys

# Add project root to path
sys.path.append(r"c:\Users\WIN10\Desktop\work\11th-week\法律风险提示-new\市监局-lawRisk-backend")

from lawrisk.services.licensing_repo import _parse_import_workbook
import logging
import logging
# Configure file logging
logging.basicConfig(level=logging.DEBUG, filename='verification.log', filemode='w', encoding='utf-8', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# Ensure library logger also writes there if it has own handlers
lib_logger = logging.getLogger("lawrisk.services.licensing_repo")
if lib_logger.handlers:
    # It has a StreamHandler, add our FileHandler?
    # Actually basicConfig might not affect it if it has handlers and propagate=False.
    fh = logging.FileHandler('verification.log', encoding='utf-8')
    fh.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
    lib_logger.addHandler(fh)
    lib_logger.setLevel(logging.DEBUG)

def verify_parsing():
    excel_path = r"c:\Users\WIN10\Desktop\work\11th-week\法律风险提示-new\市监局-lawRisk-backend\test_import_data.xlsx"
    with open(excel_path, "rb") as f:
        file_bytes = f.read()

    print(f"Parsing {excel_path}...")
    try:
        result = _parse_import_workbook(file_bytes, "test_import_data.xlsx")
    except Exception as e:
        print(f"Parsing FAILED: {e}")
        import traceback
        traceback.print_exc()
        return

    sheets = result.get("sheets", {})
    if not sheets:
        print("No sheets parsed!")
        return

    for sheet_name, sheet_data in sheets.items():
        print(f"--- Sheet: {sheet_name} ---")
        rows = sheet_data.get("rows", [])
        print(f"Row count: {len(rows)}")

        if rows:
            first_row = rows[0]
            print("First row:", first_row)

            # Verify Metadata IN THE ROW (since defaults are propagated to rows)
            if first_row.get("permit_name") != "Integration Test Permit":
                print(f"ERROR: Expected permit_name='Integration Test Permit', got '{first_row.get('permit_name')}'")
            if first_row.get("filler_name") != "AutoTester":
                print(f"ERROR: Expected filler_name='AutoTester', got '{first_row.get('filler_name')}'")
            if first_row.get("unit_name") != "Test Unit":
                print(f"ERROR: Expected unit_name='Test Unit', got '{first_row.get('unit_name')}'")

            # Row index 1 ("1" in col A)
            # Row index 1 ("1" in col A)
            if first_row.get("risk_content") != "Risk: Unregistered Business":
                 print(f"ERROR: Expected Row 1 risk_content='Risk: Unregistered Business', got '{first_row.get('risk_content')}'")

            # Check Permit Status extraction if we mapped it (column didn't exist in my populate script, so maybe blank)
            # Actually I didn't populate A:序号, B:风险提示内容, C:法律依据, D:文号, E:摘要, F:备注
            # Wait, did I miss columns?
            # Header Row 8: 序号, 风险提示内容, 法律依据, 文号, 摘要, 备注, 许可情况... (Wait, where is Permit Status?)
            # In blank template:
            # Row 8: 风险提示 | ...
            # Actually let's trust the parser.
            pass


if __name__ == "__main__":
    verify_parsing()