34 lines
1.2 KiB
Python
34 lines
1.2 KiB
Python
|
|
import os
|
|
import logging
|
|
from lawrisk.services.licensing_repo import _resolve_import_header, _score_import_header, _clean_text
|
|
|
|
def simulate_header_detection():
|
|
# Representative header row from the Excel
|
|
row_values = ["八", "风险提示", "风险提示内容", "法律依据", "文号", "摘要", "备注"]
|
|
|
|
header_map = {}
|
|
resolved_by_name = {}
|
|
|
|
row_candidate_map = {}
|
|
for col_idx, cell_text in enumerate(row_values, start=1):
|
|
cell_text = _clean_text(cell_text)
|
|
if not cell_text: continue
|
|
canonicals = _resolve_import_header(cell_text)
|
|
if not canonicals: continue
|
|
|
|
for canonical in canonicals:
|
|
# Simple simulation of score logic
|
|
score = 100 # Default
|
|
previous = row_candidate_map.get(canonical)
|
|
if not previous or score > previous[2]:
|
|
row_candidate_map[canonical] = (col_idx, cell_text, score)
|
|
|
|
for canonical, (col_idx, cell_text, _) in row_candidate_map.items():
|
|
header_map[col_idx] = canonical
|
|
|
|
print(f"Header Map: {header_map}")
|
|
|
|
if __name__ == "__main__":
|
|
simulate_header_detection()
|