#!/usr/bin/env python # -*- coding: utf-8 -*- """ Direct test of PaddleOCRVL to verify it works correctly. """ import sys from pathlib import Path def test_paddleocrvl_direct(): """Test PaddleOCRVL directly without multiprocessing.""" print("=" * 80) print("PaddleOCRVL Direct Test") print("=" * 80) try: from paddleocr import PaddleOCRVL print("OK PaddleOCRVL import successful") except ImportError as e: print(f"FAIL Failed to import PaddleOCRVL: {e}") print(" Install with: pip install paddleocr[doc-parser]") return False # Initialize print("\nInitializing PaddleOCRVL pipeline...") try: vl_pipeline = PaddleOCRVL( use_seal_recognition=True, use_ocr_for_image_block=True, use_layout_detection=True ) print("OK Pipeline initialized successfully") except Exception as e: print(f"FAIL Failed to initialize pipeline: {e}") import traceback traceback.print_exc() return False # Find a test image test_dirs = [ Path("test_reports_full"), Path("bridge_output"), Path("temp_paddleocr_vl"), ] test_image = None for test_dir in test_dirs: if test_dir.exists(): # Find any PNG file png_files = list(test_dir.glob("**/*seal*.png")) if png_files: test_image = png_files[0] break if not test_image: print("\nNo test image found. Creating a simple test...") # Create a simple test image with text from PIL import Image, ImageDraw, ImageFont img = Image.new('RGB', (400, 400), color='white') draw = ImageDraw.Draw(img) # Draw a red circle (seal-like) draw.ellipse([50, 50, 350, 350], outline='red', width=5) # Add text try: # Try to use a font that supports Chinese font = ImageFont.truetype("msyh.ttc", 30) except: font = ImageFont.load_default() text = "测试机构名称" draw.text((200, 200), text, fill='black', font=font, anchor='mm') test_image = Path("test_seal.png") img.save(test_image) print(f"Created test image: {test_image}") print(f"\nTesting with image: {test_image}") print(f"Image size: {test_image.stat().st_size} bytes") # Run prediction print("\nRunning prediction (this may take 10-30 seconds)...") import time start = time.time() try: output = vl_pipeline.predict(str(test_image), batch_size=1) elapsed = time.time() - start print(f"OK Prediction completed in {elapsed:.1f} seconds") print(f"Output length: {len(output) if output else 0}") if output and len(output) > 0: res = output[0] # Save to JSON temp_dir = Path("test_paddleocrvl_output") temp_dir.mkdir(exist_ok=True) res.save_to_json(save_path=str(temp_dir)) json_file = temp_dir / f"{test_image.stem}_res.json" print(f"\nJSON saved to: {json_file}") if json_file.exists(): import json with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) print(f"\nParsing results ({len(data.get('parsing_res_list', []))} blocks):") for i, block in enumerate(data.get('parsing_res_list', [])): label = block.get('block_label', 'unknown') content = block.get('block_content', '') print(f" Block {i+1}: {label}") if content: print(f" Content: '{content[:100]}...'") if label == 'seal': print(f" *** SEAL DETECTED ***") print(f" Full text: '{content}'") # Check if seal was found seal_blocks = [b for b in data.get('parsing_res_list', []) if b.get('block_label') == 'seal'] if seal_blocks: print(f"\nOK SUCCESS: Found {len(seal_blocks)} seal(s)") return True else: print(f"\nFAIL FAIL: No seal blocks detected") return False else: print(f"\nFAIL JSON file not created") return False else: print(f"\nFAIL No output from predict()") return False except Exception as e: elapsed = time.time() - start print(f"\nFAIL Prediction failed after {elapsed:.1f} seconds: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = test_paddleocrvl_direct() print("\n" + "=" * 80) if success: print("PaddleOCRVL is working correctly!") sys.exit(0) else: print("PaddleOCRVL test failed!") sys.exit(1)