report-detect/scripts/seal_curved_ocr.py

122 lines
4.1 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Curved Seal Text Extraction
1. First crop the seal region from sanity_check.png
2. Use SealTextDetection to find curved text regions
3. Use TextRecognition for each region
"""
import sys
import json
import os
from PIL import Image, ImageDraw, ImageFont
import numpy as np
def main():
if len(sys.argv) < 2:
print(json.dumps({"error": "Usage: python seal_curved_ocr.py <image_path>"}))
sys.exit(1)
image_path = sys.argv[1]
try:
from paddleocr import LayoutDetection, SealTextDetection, TextRecognition
print("Step 1: Detecting seal region...", file=sys.stderr)
# Step 1: Find seal in the image
layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L")
layout_output = layout_model.predict(image_path, batch_size=1, layout_nms=True)
seal_box = None
for res in layout_output:
for box in res["boxes"]:
if box["label"] == "seal":
seal_box = box["coordinate"]
break
if seal_box is None:
print(json.dumps({"error": "No seal found in image"}))
sys.exit(1)
print(f"Found seal at: {seal_box}", file=sys.stderr)
# Step 2: Crop seal region
img = Image.open(image_path)
x1, y1, x2, y2 = [int(c) for c in seal_box]
padding = 20
x1 = max(0, x1 - padding)
y1 = max(0, y1 - padding)
x2 = min(img.width, x2 + padding)
y2 = min(img.height, y2 + padding)
seal_crop = img.crop((x1, y1, x2, y2))
seal_crop_path = "temp_seal_crop.png"
seal_crop.save(seal_crop_path)
print("Step 2: Detecting text in seal...", file=sys.stderr)
# Step 3: Detect text in seal
seal_det = SealTextDetection(model_name="PP-OCRv4_server_seal_det")
det_output = seal_det.predict(seal_crop_path, batch_size=1)
# Step 4: Recognize text
text_rec = TextRecognition(model_name="PP-OCRv4_server_rec")
all_texts = []
draw = ImageDraw.Draw(seal_crop)
for det_res in det_output:
dt_polys = det_res.get("dt_polys", [])
print(f"Found {len(dt_polys)} text regions", file=sys.stderr)
for poly in dt_polys:
poly = np.array(poly)
# Get bounding box
min_x, min_y = poly.min(axis=0)
max_x, max_y = poly.max(axis=0)
# Crop text region
text_crop = seal_crop.crop((int(min_x), int(min_y), int(max_x), int(max_y)))
text_crop_path = "temp_text_crop.png"
text_crop.save(text_crop_path)
# Recognize
rec_output = text_rec.predict(text_crop_path, batch_size=1)
for rec_res in rec_output:
text = rec_res.get("rec_text", "")
score = rec_res.get("rec_score", 0)
if hasattr(score, 'item'):
score = score.item()
all_texts.append({
"text": str(text),
"score": float(score)
})
print(f" - Recognized: '{text}' (score: {score:.2f})", file=sys.stderr)
# Draw polygon
pts = [(int(p[0]), int(p[1])) for p in poly]
draw.polygon(pts, outline=(255, 0, 255), width=2)
os.remove(text_crop_path)
# Save annotated seal
seal_crop.save("seal_annotated.png")
os.remove(seal_crop_path)
combined = " ".join([t["text"] for t in all_texts])
print(json.dumps({
"success": True,
"texts": all_texts,
"combined_text": combined
}, ensure_ascii=False))
except Exception as e:
import traceback
print(json.dumps({"error": str(e), "traceback": traceback.format_exc()}))
sys.exit(1)
if __name__ == "__main__":
main()