fs-lawrisk/tools/inspect_header_logic.py

34 lines
1.2 KiB
Python
Raw Normal View History

import os
import logging
from lawrisk.services.licensing_repo import _resolve_import_header, _score_import_header, _clean_text
def simulate_header_detection():
# Representative header row from the Excel
row_values = ["", "风险提示", "风险提示内容", "法律依据", "文号", "摘要", "备注"]
header_map = {}
resolved_by_name = {}
row_candidate_map = {}
for col_idx, cell_text in enumerate(row_values, start=1):
cell_text = _clean_text(cell_text)
if not cell_text: continue
canonicals = _resolve_import_header(cell_text)
if not canonicals: continue
for canonical in canonicals:
# Simple simulation of score logic
score = 100 # Default
previous = row_candidate_map.get(canonical)
if not previous or score > previous[2]:
row_candidate_map[canonical] = (col_idx, cell_text, score)
for canonical, (col_idx, cell_text, _) in row_candidate_map.items():
header_map[col_idx] = canonical
print(f"Header Map: {header_map}")
if __name__ == "__main__":
simulate_header_detection()