60 lines
1.7 KiB
Python
60 lines
1.7 KiB
Python
"""
|
|
测试改进后的CRT提取功能 - 验证YDQ25_002294.pdf和YDQ23_001838.pdf
|
|
"""
|
|
import sys
|
|
import os
|
|
|
|
# Add parent directory to path to import from test_accuracy_batch_full
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from test_accuracy_batch_full import extract_institution_from_crt
|
|
|
|
def test_crt_extraction():
|
|
"""测试CRT提取"""
|
|
test_cases = [
|
|
{
|
|
'pdf': 'src/test/resources/data/pdfs/YDQ25_002294.pdf',
|
|
'expected': ['广东产品质量监督检验研究院'],
|
|
},
|
|
{
|
|
'pdf': 'src/test/resources/data/pdfs/YDQ23_001838.pdf',
|
|
'expected': ['广东产品质量监督检验研究院'],
|
|
},
|
|
]
|
|
|
|
print("="*80)
|
|
print("TESTING IMPROVED CRT EXTRACTION")
|
|
print("="*80)
|
|
|
|
for test_case in test_cases:
|
|
pdf_path = test_case['pdf']
|
|
expected = test_case['expected']
|
|
|
|
print(f"\n{'#'*80}")
|
|
print(f"PDF: {os.path.basename(pdf_path)}")
|
|
print(f"Expected: {expected}")
|
|
print(f"{'#'*80}\n")
|
|
|
|
# Extract CRT
|
|
result = extract_institution_from_crt(pdf_path)
|
|
|
|
print(f"\nResult: {result}")
|
|
|
|
# Check if extraction succeeded
|
|
if result:
|
|
if expected[0] in result:
|
|
print(f"✓✓✓ SUCCESS! Found expected institution: {expected[0]}")
|
|
else:
|
|
print(f"✗✗✗ PARTIAL SUCCESS! Found institutions but not the expected one:")
|
|
print(f" Expected: {expected[0]}")
|
|
print(f" Got: {result}")
|
|
else:
|
|
print(f"✗✗✗ FAILED! No institutions extracted")
|
|
|
|
print("\n" + "="*80)
|
|
print("TEST COMPLETE")
|
|
print("="*80)
|
|
|
|
if __name__ == "__main__":
|
|
test_crt_extraction()
|