fs-lawrisk/tools/re_export_jsons_v2.py


import os
import json
import logging
from lawrisk.services.licensing_repo import _parse_import_workbook

# Configure logging to see what's happening
logging.basicConfig(level=logging.INFO)

def main():
    source_dir = r"市级初版-20251219\许可风险提示"
    if not os.path.exists(source_dir):
        print(f"Error: Directory {source_dir} not found.")
        return

    processed_count = 0
    error_count = 0

    for filename in os.listdir(source_dir):
        # Process only .xlsx files and skip temporary files
        if filename.endswith(".xlsx") and not filename.startswith("~$"):
            fpath = os.path.join(source_dir, filename)
            print(f"Processing {filename}...")

            try:
                with open(fpath, "rb") as f:
                    content = f.read()

                # Parse the workbook using the updated logic in licensing_repo
                parsed = _parse_import_workbook(content, filename)

                # Ensure the filename in JSON matches the Excel source
                parsed["filename"] = filename

                # Derive output JSON filename (same base name as XLSX)
                out_name = filename.rsplit('.', 1)[0] + ".json"
                out_path = os.path.join(source_dir, out_name)

                with open(out_path, "w", encoding="utf-8") as out_f:
                    json.dump(parsed, out_f, ensure_ascii=False, indent=2)

                print(f"  Successfully exported to {out_name}")
                processed_count += 1
            except Exception as e:
                print(f"  Error processing {filename}: {e}")
                error_count += 1

    print(f"\nProcessing finished.")
    print(f"Total processed: {processed_count}")
    print(f"Total errors: {error_count}")

if __name__ == "__main__":
    main()