report-detect/scripts/seal_curved_ocr.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Curved Seal Text Extraction
1. First crop the seal region from sanity_check.png
2. Use SealTextDetection to find curved text regions
3. Use TextRecognition for each region
"""
import sys
import json
import os
from PIL import Image, ImageDraw, ImageFont
import numpy as np

def main():
    if len(sys.argv) < 2:
        print(json.dumps({"error": "Usage: python seal_curved_ocr.py <image_path>"}))
        sys.exit(1)

    image_path = sys.argv[1]

    try:
        from paddleocr import LayoutDetection, SealTextDetection, TextRecognition

        print("Step 1: Detecting seal region...", file=sys.stderr)

        # Step 1: Find seal in the image
        layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L")
        layout_output = layout_model.predict(image_path, batch_size=1, layout_nms=True)

        seal_box = None
        for res in layout_output:
            for box in res["boxes"]:
                if box["label"] == "seal":
                    seal_box = box["coordinate"]
                    break

        if seal_box is None:
            print(json.dumps({"error": "No seal found in image"}))
            sys.exit(1)

        print(f"Found seal at: {seal_box}", file=sys.stderr)

        # Step 2: Crop seal region
        img = Image.open(image_path)
        x1, y1, x2, y2 = [int(c) for c in seal_box]
        padding = 20
        x1 = max(0, x1 - padding)
        y1 = max(0, y1 - padding)
        x2 = min(img.width, x2 + padding)
        y2 = min(img.height, y2 + padding)

        seal_crop = img.crop((x1, y1, x2, y2))
        seal_crop_path = "temp_seal_crop.png"
        seal_crop.save(seal_crop_path)

        print("Step 2: Detecting text in seal...", file=sys.stderr)

        # Step 3: Detect text in seal
        seal_det = SealTextDetection(model_name="PP-OCRv4_server_seal_det")
        det_output = seal_det.predict(seal_crop_path, batch_size=1)

        # Step 4: Recognize text
        text_rec = TextRecognition(model_name="PP-OCRv4_server_rec")

        all_texts = []
        draw = ImageDraw.Draw(seal_crop)

        for det_res in det_output:
            dt_polys = det_res.get("dt_polys", [])
            print(f"Found {len(dt_polys)} text regions", file=sys.stderr)

            for poly in dt_polys:
                poly = np.array(poly)
                # Get bounding box
                min_x, min_y = poly.min(axis=0)
                max_x, max_y = poly.max(axis=0)

                # Crop text region
                text_crop = seal_crop.crop((int(min_x), int(min_y), int(max_x), int(max_y)))
                text_crop_path = "temp_text_crop.png"
                text_crop.save(text_crop_path)

                # Recognize
                rec_output = text_rec.predict(text_crop_path, batch_size=1)
                for rec_res in rec_output:
                    text = rec_res.get("rec_text", "")
                    score = rec_res.get("rec_score", 0)
                    if hasattr(score, 'item'):
                        score = score.item()

                    all_texts.append({
                        "text": str(text),
                        "score": float(score)
                    })
                    print(f"  - Recognized: '{text}' (score: {score:.2f})", file=sys.stderr)

                # Draw polygon
                pts = [(int(p[0]), int(p[1])) for p in poly]
                draw.polygon(pts, outline=(255, 0, 255), width=2)

                os.remove(text_crop_path)

        # Save annotated seal
        seal_crop.save("seal_annotated.png")
        os.remove(seal_crop_path)

        combined = " ".join([t["text"] for t in all_texts])
        print(json.dumps({
            "success": True,
            "texts": all_texts,
            "combined_text": combined
        }, ensure_ascii=False))

    except Exception as e:
        import traceback
        print(json.dumps({"error": str(e), "traceback": traceback.format_exc()}))
        sys.exit(1)

if __name__ == "__main__":
    main()