report-detect/archive/temp_scripts/test_improved_crt_extractio...

"""
测试改进后的CRT提取功能 - 验证YDQ25_002294.pdf和YDQ23_001838.pdf
"""
import sys
import os

# Add parent directory to path to import from test_accuracy_batch_full
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from test_accuracy_batch_full import extract_institution_from_crt

def test_crt_extraction():
    """测试CRT提取"""
    test_cases = [
        {
            'pdf': 'src/test/resources/data/pdfs/YDQ25_002294.pdf',
            'expected': ['广东产品质量监督检验研究院'],
        },
        {
            'pdf': 'src/test/resources/data/pdfs/YDQ23_001838.pdf',
            'expected': ['广东产品质量监督检验研究院'],
        },
    ]

    print("="*80)
    print("TESTING IMPROVED CRT EXTRACTION")
    print("="*80)

    for test_case in test_cases:
        pdf_path = test_case['pdf']
        expected = test_case['expected']

        print(f"\n{'#'*80}")
        print(f"PDF: {os.path.basename(pdf_path)}")
        print(f"Expected: {expected}")
        print(f"{'#'*80}\n")

        # Extract CRT
        result = extract_institution_from_crt(pdf_path)

        print(f"\nResult: {result}")

        # Check if extraction succeeded
        if result:
            if expected[0] in result:
                print(f"✓✓✓ SUCCESS! Found expected institution: {expected[0]}")
            else:
                print(f"✗✗✗ PARTIAL SUCCESS! Found institutions but not the expected one:")
                print(f"   Expected: {expected[0]}")
                print(f"   Got: {result}")
        else:
            print(f"✗✗✗ FAILED! No institutions extracted")

    print("\n" + "="*80)
    print("TEST COMPLETE")
    print("="*80)

if __name__ == "__main__":
    test_crt_extraction()