74 lines
2.3 KiB
Python
74 lines
2.3 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Seal Text Recognition using SealRecognition Pipeline
|
|
This pipeline handles curved/arc text in seals properly
|
|
"""
|
|
import sys
|
|
import json
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(json.dumps({"error": "Usage: python seal_ocr.py <image_path> [output_path]"}))
|
|
sys.exit(1)
|
|
|
|
image_path = sys.argv[1]
|
|
output_path = sys.argv[2] if len(sys.argv) > 2 else "./seal_output/"
|
|
|
|
try:
|
|
from paddleocr import SealRecognition
|
|
import os
|
|
|
|
# Ensure output directory exists
|
|
os.makedirs(output_path, exist_ok=True)
|
|
|
|
# Initialize SealRecognition pipeline
|
|
# This handles curved text detection and recognition
|
|
pipeline = SealRecognition(
|
|
use_doc_orientation_classify=False,
|
|
use_doc_unwarping=False,
|
|
)
|
|
|
|
print(f"Processing: {image_path}", file=sys.stderr)
|
|
|
|
# Run pipeline
|
|
output = pipeline.predict(image_path)
|
|
|
|
all_texts = []
|
|
for res in output:
|
|
# Save visualization
|
|
res.save_to_img(output_path)
|
|
res.save_to_json(output_path)
|
|
|
|
# Extract text from result
|
|
if hasattr(res, 'rec_texts'):
|
|
for text in res.rec_texts:
|
|
if text:
|
|
all_texts.append(str(text))
|
|
|
|
# Try different attribute names
|
|
if hasattr(res, 'ocr_result'):
|
|
for item in res.ocr_result:
|
|
if isinstance(item, dict) and 'text' in item:
|
|
all_texts.append(str(item['text']))
|
|
elif isinstance(item, (list, tuple)) and len(item) > 1:
|
|
all_texts.append(str(item[1]))
|
|
|
|
# Convert result to JSON-safe format
|
|
result_dict = {
|
|
"success": True,
|
|
"output_path": output_path,
|
|
"texts": all_texts,
|
|
"combined_text": " ".join(all_texts)
|
|
}
|
|
|
|
print(json.dumps(result_dict, ensure_ascii=False))
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
print(json.dumps({"error": str(e), "traceback": traceback.format_exc()}))
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|