""" 测试改进后的CRT提取功能 - 验证YDQ25_002294.pdf和YDQ23_001838.pdf """ import sys import os # Add parent directory to path to import from test_accuracy_batch_full sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from test_accuracy_batch_full import extract_institution_from_crt def test_crt_extraction(): """测试CRT提取""" test_cases = [ { 'pdf': 'src/test/resources/data/pdfs/YDQ25_002294.pdf', 'expected': ['广东产品质量监督检验研究院'], }, { 'pdf': 'src/test/resources/data/pdfs/YDQ23_001838.pdf', 'expected': ['广东产品质量监督检验研究院'], }, ] print("="*80) print("TESTING IMPROVED CRT EXTRACTION") print("="*80) for test_case in test_cases: pdf_path = test_case['pdf'] expected = test_case['expected'] print(f"\n{'#'*80}") print(f"PDF: {os.path.basename(pdf_path)}") print(f"Expected: {expected}") print(f"{'#'*80}\n") # Extract CRT result = extract_institution_from_crt(pdf_path) print(f"\nResult: {result}") # Check if extraction succeeded if result: if expected[0] in result: print(f"✓✓✓ SUCCESS! Found expected institution: {expected[0]}") else: print(f"✗✗✗ PARTIAL SUCCESS! Found institutions but not the expected one:") print(f" Expected: {expected[0]}") print(f" Got: {result}") else: print(f"✗✗✗ FAILED! No institutions extracted") print("\n" + "="*80) print("TEST COMPLETE") print("="*80) if __name__ == "__main__": test_crt_extraction()