#!/usr/bin/env python # -*- coding: utf-8 -*- """ Seal Text Recognition using SealRecognition Pipeline This pipeline handles curved/arc text in seals properly """ import sys import json def main(): if len(sys.argv) < 2: print(json.dumps({"error": "Usage: python seal_ocr.py [output_path]"})) sys.exit(1) image_path = sys.argv[1] output_path = sys.argv[2] if len(sys.argv) > 2 else "./seal_output/" try: from paddleocr import SealRecognition import os # Ensure output directory exists os.makedirs(output_path, exist_ok=True) # Initialize SealRecognition pipeline # This handles curved text detection and recognition pipeline = SealRecognition( use_doc_orientation_classify=False, use_doc_unwarping=False, ) print(f"Processing: {image_path}", file=sys.stderr) # Run pipeline output = pipeline.predict(image_path) all_texts = [] for res in output: # Save visualization res.save_to_img(output_path) res.save_to_json(output_path) # Extract text from result if hasattr(res, 'rec_texts'): for text in res.rec_texts: if text: all_texts.append(str(text)) # Try different attribute names if hasattr(res, 'ocr_result'): for item in res.ocr_result: if isinstance(item, dict) and 'text' in item: all_texts.append(str(item['text'])) elif isinstance(item, (list, tuple)) and len(item) > 1: all_texts.append(str(item[1])) # Convert result to JSON-safe format result_dict = { "success": True, "output_path": output_path, "texts": all_texts, "combined_text": " ".join(all_texts) } print(json.dumps(result_dict, ensure_ascii=False)) except Exception as e: import traceback print(json.dumps({"error": str(e), "traceback": traceback.format_exc()})) sys.exit(1) if __name__ == "__main__": main()