report-detect/archive/temp_scripts/test_improved_crt_extractio...

60 lines
1.7 KiB
Python

"""
测试改进后的CRT提取功能 - 验证YDQ25_002294.pdf和YDQ23_001838.pdf
"""
import sys
import os
# Add parent directory to path to import from test_accuracy_batch_full
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from test_accuracy_batch_full import extract_institution_from_crt
def test_crt_extraction():
"""测试CRT提取"""
test_cases = [
{
'pdf': 'src/test/resources/data/pdfs/YDQ25_002294.pdf',
'expected': ['广东产品质量监督检验研究院'],
},
{
'pdf': 'src/test/resources/data/pdfs/YDQ23_001838.pdf',
'expected': ['广东产品质量监督检验研究院'],
},
]
print("="*80)
print("TESTING IMPROVED CRT EXTRACTION")
print("="*80)
for test_case in test_cases:
pdf_path = test_case['pdf']
expected = test_case['expected']
print(f"\n{'#'*80}")
print(f"PDF: {os.path.basename(pdf_path)}")
print(f"Expected: {expected}")
print(f"{'#'*80}\n")
# Extract CRT
result = extract_institution_from_crt(pdf_path)
print(f"\nResult: {result}")
# Check if extraction succeeded
if result:
if expected[0] in result:
print(f"✓✓✓ SUCCESS! Found expected institution: {expected[0]}")
else:
print(f"✗✗✗ PARTIAL SUCCESS! Found institutions but not the expected one:")
print(f" Expected: {expected[0]}")
print(f" Got: {result}")
else:
print(f"✗✗✗ FAILED! No institutions extracted")
print("\n" + "="*80)
print("TEST COMPLETE")
print("="*80)
if __name__ == "__main__":
test_crt_extraction()