158 lines
4.9 KiB
Python
158 lines
4.9 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Direct test of PaddleOCRVL to verify it works correctly.
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
def test_paddleocrvl_direct():
|
|
"""Test PaddleOCRVL directly without multiprocessing."""
|
|
print("=" * 80)
|
|
print("PaddleOCRVL Direct Test")
|
|
print("=" * 80)
|
|
|
|
try:
|
|
from paddleocr import PaddleOCRVL
|
|
print("OK PaddleOCRVL import successful")
|
|
|
|
except ImportError as e:
|
|
print(f"FAIL Failed to import PaddleOCRVL: {e}")
|
|
print(" Install with: pip install paddleocr[doc-parser]")
|
|
return False
|
|
|
|
# Initialize
|
|
print("\nInitializing PaddleOCRVL pipeline...")
|
|
try:
|
|
vl_pipeline = PaddleOCRVL(
|
|
use_seal_recognition=True,
|
|
use_ocr_for_image_block=True,
|
|
use_layout_detection=True
|
|
)
|
|
print("OK Pipeline initialized successfully")
|
|
|
|
except Exception as e:
|
|
print(f"FAIL Failed to initialize pipeline: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
# Find a test image
|
|
test_dirs = [
|
|
Path("test_reports_full"),
|
|
Path("bridge_output"),
|
|
Path("temp_paddleocr_vl"),
|
|
]
|
|
|
|
test_image = None
|
|
for test_dir in test_dirs:
|
|
if test_dir.exists():
|
|
# Find any PNG file
|
|
png_files = list(test_dir.glob("**/*seal*.png"))
|
|
if png_files:
|
|
test_image = png_files[0]
|
|
break
|
|
|
|
if not test_image:
|
|
print("\nNo test image found. Creating a simple test...")
|
|
|
|
# Create a simple test image with text
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
img = Image.new('RGB', (400, 400), color='white')
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
# Draw a red circle (seal-like)
|
|
draw.ellipse([50, 50, 350, 350], outline='red', width=5)
|
|
|
|
# Add text
|
|
try:
|
|
# Try to use a font that supports Chinese
|
|
font = ImageFont.truetype("msyh.ttc", 30)
|
|
except:
|
|
font = ImageFont.load_default()
|
|
|
|
text = "测试机构名称"
|
|
draw.text((200, 200), text, fill='black', font=font, anchor='mm')
|
|
|
|
test_image = Path("test_seal.png")
|
|
img.save(test_image)
|
|
print(f"Created test image: {test_image}")
|
|
|
|
print(f"\nTesting with image: {test_image}")
|
|
print(f"Image size: {test_image.stat().st_size} bytes")
|
|
|
|
# Run prediction
|
|
print("\nRunning prediction (this may take 10-30 seconds)...")
|
|
import time
|
|
start = time.time()
|
|
|
|
try:
|
|
output = vl_pipeline.predict(str(test_image), batch_size=1)
|
|
elapsed = time.time() - start
|
|
|
|
print(f"OK Prediction completed in {elapsed:.1f} seconds")
|
|
print(f"Output length: {len(output) if output else 0}")
|
|
|
|
if output and len(output) > 0:
|
|
res = output[0]
|
|
|
|
# Save to JSON
|
|
temp_dir = Path("test_paddleocrvl_output")
|
|
temp_dir.mkdir(exist_ok=True)
|
|
res.save_to_json(save_path=str(temp_dir))
|
|
|
|
json_file = temp_dir / f"{test_image.stem}_res.json"
|
|
print(f"\nJSON saved to: {json_file}")
|
|
|
|
if json_file.exists():
|
|
import json
|
|
with open(json_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
print(f"\nParsing results ({len(data.get('parsing_res_list', []))} blocks):")
|
|
|
|
for i, block in enumerate(data.get('parsing_res_list', [])):
|
|
label = block.get('block_label', 'unknown')
|
|
content = block.get('block_content', '')
|
|
print(f" Block {i+1}: {label}")
|
|
if content:
|
|
print(f" Content: '{content[:100]}...'")
|
|
|
|
if label == 'seal':
|
|
print(f" *** SEAL DETECTED ***")
|
|
print(f" Full text: '{content}'")
|
|
|
|
# Check if seal was found
|
|
seal_blocks = [b for b in data.get('parsing_res_list', []) if b.get('block_label') == 'seal']
|
|
if seal_blocks:
|
|
print(f"\nOK SUCCESS: Found {len(seal_blocks)} seal(s)")
|
|
return True
|
|
else:
|
|
print(f"\nFAIL FAIL: No seal blocks detected")
|
|
return False
|
|
else:
|
|
print(f"\nFAIL JSON file not created")
|
|
return False
|
|
else:
|
|
print(f"\nFAIL No output from predict()")
|
|
return False
|
|
|
|
except Exception as e:
|
|
elapsed = time.time() - start
|
|
print(f"\nFAIL Prediction failed after {elapsed:.1f} seconds: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = test_paddleocrvl_direct()
|
|
print("\n" + "=" * 80)
|
|
if success:
|
|
print("PaddleOCRVL is working correctly!")
|
|
sys.exit(0)
|
|
else:
|
|
print("PaddleOCRVL test failed!")
|
|
sys.exit(1)
|