""" 直接测试CRT提取函数 """ from test_accuracy_batch_full import extract_institution_from_crt import sys # Redirect stdout to avoid encoding issues class UTF8Stdout: def write(self, text): if isinstance(text, str): text = text.encode('utf-8', errors='replace').decode('utf-8') sys.stdout.buffer.write(text.encode('utf-8', errors='replace')) def flush(self): sys.stdout.buffer.flush() print("Testing CRT extraction...") pdf_path = "src/test/resources/data/pdfs/YDQ25_002294.pdf" result = extract_institution_from_crt(pdf_path) print(f"\nResult for {pdf_path}:") print(f" Type: {type(result)}") print(f" Length: {len(result)}") print(f" Content: {result}") # Also test YDQ23_001838.pdf pdf_path2 = "src/test/resources/data/pdfs/YDQ23_001838.pdf" result2 = extract_institution_from_crt(pdf_path2) print(f"\nResult for {pdf_path2}:") print(f" Type: {type(result2)}") print(f" Length: {len(result2)}") print(f" Content: {result2}") # Check if expected institution is in results expected = "广东产品质量监督检验研究院" print(f"\nExpected institution: {expected}") print(f" Found in PDF1: {expected in result}") print(f" Found in PDF2: {expected in result2}")