report-detect/archive/temp_scripts/test_paddleocrvl_direct.py

158 lines
4.9 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Direct test of PaddleOCRVL to verify it works correctly.
"""
import sys
from pathlib import Path
def test_paddleocrvl_direct():
"""Test PaddleOCRVL directly without multiprocessing."""
print("=" * 80)
print("PaddleOCRVL Direct Test")
print("=" * 80)
try:
from paddleocr import PaddleOCRVL
print("OK PaddleOCRVL import successful")
except ImportError as e:
print(f"FAIL Failed to import PaddleOCRVL: {e}")
print(" Install with: pip install paddleocr[doc-parser]")
return False
# Initialize
print("\nInitializing PaddleOCRVL pipeline...")
try:
vl_pipeline = PaddleOCRVL(
use_seal_recognition=True,
use_ocr_for_image_block=True,
use_layout_detection=True
)
print("OK Pipeline initialized successfully")
except Exception as e:
print(f"FAIL Failed to initialize pipeline: {e}")
import traceback
traceback.print_exc()
return False
# Find a test image
test_dirs = [
Path("test_reports_full"),
Path("bridge_output"),
Path("temp_paddleocr_vl"),
]
test_image = None
for test_dir in test_dirs:
if test_dir.exists():
# Find any PNG file
png_files = list(test_dir.glob("**/*seal*.png"))
if png_files:
test_image = png_files[0]
break
if not test_image:
print("\nNo test image found. Creating a simple test...")
# Create a simple test image with text
from PIL import Image, ImageDraw, ImageFont
img = Image.new('RGB', (400, 400), color='white')
draw = ImageDraw.Draw(img)
# Draw a red circle (seal-like)
draw.ellipse([50, 50, 350, 350], outline='red', width=5)
# Add text
try:
# Try to use a font that supports Chinese
font = ImageFont.truetype("msyh.ttc", 30)
except:
font = ImageFont.load_default()
text = "测试机构名称"
draw.text((200, 200), text, fill='black', font=font, anchor='mm')
test_image = Path("test_seal.png")
img.save(test_image)
print(f"Created test image: {test_image}")
print(f"\nTesting with image: {test_image}")
print(f"Image size: {test_image.stat().st_size} bytes")
# Run prediction
print("\nRunning prediction (this may take 10-30 seconds)...")
import time
start = time.time()
try:
output = vl_pipeline.predict(str(test_image), batch_size=1)
elapsed = time.time() - start
print(f"OK Prediction completed in {elapsed:.1f} seconds")
print(f"Output length: {len(output) if output else 0}")
if output and len(output) > 0:
res = output[0]
# Save to JSON
temp_dir = Path("test_paddleocrvl_output")
temp_dir.mkdir(exist_ok=True)
res.save_to_json(save_path=str(temp_dir))
json_file = temp_dir / f"{test_image.stem}_res.json"
print(f"\nJSON saved to: {json_file}")
if json_file.exists():
import json
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
print(f"\nParsing results ({len(data.get('parsing_res_list', []))} blocks):")
for i, block in enumerate(data.get('parsing_res_list', [])):
label = block.get('block_label', 'unknown')
content = block.get('block_content', '')
print(f" Block {i+1}: {label}")
if content:
print(f" Content: '{content[:100]}...'")
if label == 'seal':
print(f" *** SEAL DETECTED ***")
print(f" Full text: '{content}'")
# Check if seal was found
seal_blocks = [b for b in data.get('parsing_res_list', []) if b.get('block_label') == 'seal']
if seal_blocks:
print(f"\nOK SUCCESS: Found {len(seal_blocks)} seal(s)")
return True
else:
print(f"\nFAIL FAIL: No seal blocks detected")
return False
else:
print(f"\nFAIL JSON file not created")
return False
else:
print(f"\nFAIL No output from predict()")
return False
except Exception as e:
elapsed = time.time() - start
print(f"\nFAIL Prediction failed after {elapsed:.1f} seconds: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = test_paddleocrvl_direct()
print("\n" + "=" * 80)
if success:
print("PaddleOCRVL is working correctly!")
sys.exit(0)
else:
print("PaddleOCRVL test failed!")
sys.exit(1)