#!/usr/bin/env python # -*- coding: utf-8 -*- """ Curved Seal Text Extraction 1. First crop the seal region from sanity_check.png 2. Use SealTextDetection to find curved text regions 3. Use TextRecognition for each region """ import sys import json import os from PIL import Image, ImageDraw, ImageFont import numpy as np def main(): if len(sys.argv) < 2: print(json.dumps({"error": "Usage: python seal_curved_ocr.py "})) sys.exit(1) image_path = sys.argv[1] try: from paddleocr import LayoutDetection, SealTextDetection, TextRecognition print("Step 1: Detecting seal region...", file=sys.stderr) # Step 1: Find seal in the image layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L") layout_output = layout_model.predict(image_path, batch_size=1, layout_nms=True) seal_box = None for res in layout_output: for box in res["boxes"]: if box["label"] == "seal": seal_box = box["coordinate"] break if seal_box is None: print(json.dumps({"error": "No seal found in image"})) sys.exit(1) print(f"Found seal at: {seal_box}", file=sys.stderr) # Step 2: Crop seal region img = Image.open(image_path) x1, y1, x2, y2 = [int(c) for c in seal_box] padding = 20 x1 = max(0, x1 - padding) y1 = max(0, y1 - padding) x2 = min(img.width, x2 + padding) y2 = min(img.height, y2 + padding) seal_crop = img.crop((x1, y1, x2, y2)) seal_crop_path = "temp_seal_crop.png" seal_crop.save(seal_crop_path) print("Step 2: Detecting text in seal...", file=sys.stderr) # Step 3: Detect text in seal seal_det = SealTextDetection(model_name="PP-OCRv4_server_seal_det") det_output = seal_det.predict(seal_crop_path, batch_size=1) # Step 4: Recognize text text_rec = TextRecognition(model_name="PP-OCRv4_server_rec") all_texts = [] draw = ImageDraw.Draw(seal_crop) for det_res in det_output: dt_polys = det_res.get("dt_polys", []) print(f"Found {len(dt_polys)} text regions", file=sys.stderr) for poly in dt_polys: poly = np.array(poly) # Get bounding box min_x, min_y = poly.min(axis=0) max_x, max_y = poly.max(axis=0) # Crop text region text_crop = seal_crop.crop((int(min_x), int(min_y), int(max_x), int(max_y))) text_crop_path = "temp_text_crop.png" text_crop.save(text_crop_path) # Recognize rec_output = text_rec.predict(text_crop_path, batch_size=1) for rec_res in rec_output: text = rec_res.get("rec_text", "") score = rec_res.get("rec_score", 0) if hasattr(score, 'item'): score = score.item() all_texts.append({ "text": str(text), "score": float(score) }) print(f" - Recognized: '{text}' (score: {score:.2f})", file=sys.stderr) # Draw polygon pts = [(int(p[0]), int(p[1])) for p in poly] draw.polygon(pts, outline=(255, 0, 255), width=2) os.remove(text_crop_path) # Save annotated seal seal_crop.save("seal_annotated.png") os.remove(seal_crop_path) combined = " ".join([t["text"] for t in all_texts]) print(json.dumps({ "success": True, "texts": all_texts, "combined_text": combined }, ensure_ascii=False)) except Exception as e: import traceback print(json.dumps({"error": str(e), "traceback": traceback.format_exc()})) sys.exit(1) if __name__ == "__main__": main()