report-detect/archive/tools/show_results.py

"""
显示批量测试结果摘要
"""
import json

# 读取测试结果
with open('test_reports_full/test_report.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

summary = data['summary']
results = data['results']

print("=" * 80)
print("批量测试结果摘要")
print("=" * 80)

print(f"\n总体统计:")
print(f"  处理PDF数量: {summary['total_processed']}")
print(f"  平均处理时间: {summary['avg_processing_time']:.1f}秒")

print(f"\nCMA提取结果:")
print(f"  精确匹配: {summary['cma']['exact']}")
print(f"  部分匹配: {summary['cma']['partial']}")
print(f"  可接受: {summary['cma']['acceptable']}")
print(f"  未匹配: {summary['cma']['no_match']}")
print(f"  准确率: {summary['cma']['accuracy']*100:.1f}%")

print(f"\n机构提取结果:")
print(f"  精确匹配: {summary['institution']['exact']}")
print(f"  部分匹配: {summary['institution']['partial']}")
print(f"  可接受: {summary['institution']['acceptable']}")
print(f"  未匹配: {summary['institution']['no_match']}")
print(f"  准确率: {summary['institution']['accuracy']*100:.1f}%")

print(f"\n详细结果 (前10个):")
print("-" * 80)
for i, r in enumerate(results[:10], 1):
    pdf_name = r['pdf_name'][:40]
    cma = r['extracted'].get('cma', 'N/A')
    expected_cma = r['expected'].get('cma', 'N/A')
    inst = r['extracted'].get('institution', 'N/A')[:30]
    cma_match = r['comparison']['cma'].get('match_type', 'unknown')

    print(f"{i}. {pdf_name}")
    print(f"   CMA: {cma} (期望: {expected_cma}) [{cma_match}]")
    print(f"   机构: {inst}...")

# 显示失败的PDF
print(f"\n失败的PDF:")
print("-" * 80)
failed = [r for r in results if r['comparison']['cma'].get('match_type') == 'no_match']
if failed:
    for r in failed:
        pdf_name = r['pdf_name'][:40]
        expected_cma = r['expected'].get('cma', 'N/A')
        extracted_cma = r['extracted'].get('cma', 'N/A')
        print(f"- {pdf_name}")
        print(f"  期望: {expected_cma}, 提取: {extracted_cma}")
else:
    print("无")

print("\n" + "=" * 80)
print("提示: 在浏览器中打开 test_reports_full/summary.html 查看详细的可视化报告")
print("=" * 80)
chore(project): conservative cleanup - archive temp scripts and old docs Major cleanup to improve project organization and maintainability. Changes: - Moved 34 temp/debug/test scripts to archive/temp_scripts/ - Moved 9 auxiliary tools to archive/tools/ - Moved 3 CRT test scripts to archive/crt_tests/ - Moved 4 OCR test scripts to archive/ocr_tests/ - Moved 14 old documentation files to archive/docs/ - Deleted 4 useless files (duplicates, temp files) Root directory: - Before: 67 files (cluttered) - After: 10 core files (clean and organized) Core files retained: - test_accuracy_batch_full.py (main script) - cma_extraction_template_primary.py (CMA extraction) - cma_extraction_final.py (backup CMA extraction) - CLAUDE.md (project guide) - TEST_ACCURACY_BATCH_README.md (usage guide) - TEST_ACCURACY_BATCH_DEPENDENCIES.md (dependency docs) - CLEANUP_PLAN.md (cleanup plan) - CLEANUP_SUMMARY.md (this file) - IMPLEMENTATION_SUMMARY.md (implementation summary) - requirements.txt (dependencies) Archive structure: archive/ ├── temp_scripts/ (34 files: test_, debug_, analyze_, etc.) ├── tools/ (9 files: find_, show_, visualize_, etc.) ├── crt_tests/ (3 files: CRT extraction tests) ├── ocr_tests/ (4 files: OCR timeout tests) └── docs/ (14 files: old reports and guides) Benefits: ✓ Cleaner root directory - easier navigation ✓ Better organization - clear separation of concerns ✓ Preserved history - all files archived, not deleted ✓ Improved maintainability - easier to find active files ✓ Better git history - removed 198 deleted files from tracking No functional changes - all core functionality preserved. Related: - TEST_ACCURACY_BATCH_DEPENDENCIES.md - dependency analysis - CLEANUP_PLAN.md - detailed cleanup plan Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-03-03 14:35:06 +08:00			`"""`
			`显示批量测试结果摘要`
			`"""`
			`import json`

			`# 读取测试结果`
			`with open('test_reports_full/test_report.json', 'r', encoding='utf-8') as f:`
			`data = json.load(f)`

			`summary = data['summary']`
			`results = data['results']`

			`print("=" * 80)`
			`print("批量测试结果摘要")`
			`print("=" * 80)`

			`print(f"\n总体统计:")`
			`print(f" 处理PDF数量: {summary['total_processed']}")`
			`print(f" 平均处理时间: {summary['avg_processing_time']:.1f}秒")`

			`print(f"\nCMA提取结果:")`
			`print(f" 精确匹配: {summary['cma']['exact']}")`
			`print(f" 部分匹配: {summary['cma']['partial']}")`
			`print(f" 可接受: {summary['cma']['acceptable']}")`
			`print(f" 未匹配: {summary['cma']['no_match']}")`
			`print(f" 准确率: {summary['cma']['accuracy']*100:.1f}%")`

			`print(f"\n机构提取结果:")`
			`print(f" 精确匹配: {summary['institution']['exact']}")`
			`print(f" 部分匹配: {summary['institution']['partial']}")`
			`print(f" 可接受: {summary['institution']['acceptable']}")`
			`print(f" 未匹配: {summary['institution']['no_match']}")`
			`print(f" 准确率: {summary['institution']['accuracy']*100:.1f}%")`

			`print(f"\n详细结果 (前10个):")`
			`print("-" * 80)`
			`for i, r in enumerate(results[:10], 1):`
			`pdf_name = r['pdf_name'][:40]`
			`cma = r['extracted'].get('cma', 'N/A')`
			`expected_cma = r['expected'].get('cma', 'N/A')`
			`inst = r['extracted'].get('institution', 'N/A')[:30]`
			`cma_match = r['comparison']['cma'].get('match_type', 'unknown')`

			`print(f"{i}. {pdf_name}")`
			`print(f" CMA: {cma} (期望: {expected_cma}) [{cma_match}]")`
			`print(f" 机构: {inst}...")`

			`# 显示失败的PDF`
			`print(f"\n失败的PDF:")`
			`print("-" * 80)`
			`failed = [r for r in results if r['comparison']['cma'].get('match_type') == 'no_match']`
			`if failed:`
			`for r in failed:`
			`pdf_name = r['pdf_name'][:40]`
			`expected_cma = r['expected'].get('cma', 'N/A')`
			`extracted_cma = r['extracted'].get('cma', 'N/A')`
			`print(f"- {pdf_name}")`
			`print(f" 期望: {expected_cma}, 提取: {extracted_cma}")`
			`else:`
			`print("无")`

			`print("\n" + "=" * 80)`
			`print("提示: 在浏览器中打开 test_reports_full/summary.html 查看详细的可视化报告")`
			`print("=" * 80)`