45 lines
1.2 KiB
Python
45 lines
1.2 KiB
Python
"""
|
|
Test CRT extraction for YDQ25_002294.pdf
|
|
"""
|
|
import sys
|
|
import os
|
|
from pathlib import Path
|
|
|
|
os.environ["DISABLE_MODEL_SOURCE_CHECK"] = "True"
|
|
|
|
# Import CRT extraction function
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
from test_accuracy_batch_full import extract_institution_from_crt
|
|
|
|
# Test PDF
|
|
pdf_path = Path("src/test/resources/data/pdfs/YDQ25_002294.pdf")
|
|
|
|
print(f"Testing CRT extraction for: {pdf_path}")
|
|
print("=" * 80)
|
|
|
|
# Check if file exists
|
|
if not pdf_path.exists():
|
|
print(f"ERROR: PDF not found: {pdf_path}")
|
|
sys.exit(1)
|
|
|
|
# Extract institutions from CRT
|
|
institutions = extract_institution_from_crt(str(pdf_path))
|
|
|
|
print("\n" + "=" * 80)
|
|
print("RESULTS")
|
|
print("=" * 80)
|
|
print(f"Institutions found: {len(institutions)}")
|
|
for idx, inst in enumerate(institutions, 1):
|
|
print(f" {idx}. {inst}")
|
|
|
|
if institutions:
|
|
print(f"\n✓ CRT extraction SUCCESS: {institutions[0]}")
|
|
else:
|
|
print("\n✗ CRT extraction FAILED: No institutions found")
|
|
print("\nPossible reasons:")
|
|
print(" 1. PDF has no digital signatures (scanned PDF)")
|
|
print(" 2. PDF signatures are not accessible (locked/encrypted)")
|
|
print(" 3. Certificate parsing failed")
|
|
|
|
print("=" * 80)
|