#!/usr/bin/env python # -*- coding: utf-8 -*- """ Seal Text Extraction Pipeline 1. Use LayoutDetection to find seal regions 2. Crop seal region from image 3. Use SealTextDetection to find text areas in seal 4. Use TextRecognition to extract text from detected areas """ import sys import json import os from PIL import Image import numpy as np def main(): if len(sys.argv) < 2: print(json.dumps({"error": "Usage: python seal_text_extract.py [output_path]"})) sys.exit(1) image_path = sys.argv[1] output_path = sys.argv[2] if len(sys.argv) > 2 else "seal_text_output.png" try: from paddleocr import LayoutDetection, SealTextDetection, TextRecognition from PIL import ImageDraw, ImageFont # Step 1: Detect layout and find seal regions print("Step 1: Detecting layout...", file=sys.stderr) layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L") layout_output = layout_model.predict(image_path, batch_size=1, layout_nms=True) seal_boxes = [] for res in layout_output: for box in res["boxes"]: if box["label"] == "seal": seal_boxes.append({ "coordinate": box["coordinate"], "score": box["score"] }) if not seal_boxes: print(json.dumps({"success": False, "error": "No seal detected in image"})) sys.exit(0) print(f"Found {len(seal_boxes)} seal(s)", file=sys.stderr) # Step 2: Initialize seal text detection and text recognition seal_det_model = SealTextDetection(model_name="PP-OCRv4_server_seal_det") text_rec_model = TextRecognition(model_name="PP-OCRv4_server_rec") # Load original image for cropping and visualization img = Image.open(image_path) draw = ImageDraw.Draw(img) try: font = ImageFont.truetype("msyh.ttc", 18) except: font = ImageFont.load_default() all_results = [] for idx, seal in enumerate(seal_boxes): x1, y1, x2, y2 = seal["coordinate"] # Crop seal region with padding padding = 10 crop_x1 = max(0, int(x1) - padding) crop_y1 = max(0, int(y1) - padding) crop_x2 = min(img.width, int(x2) + padding) crop_y2 = min(img.height, int(y2) + padding) seal_crop = img.crop((crop_x1, crop_y1, crop_x2, crop_y2)) seal_crop_path = f"temp_seal_{idx}.png" seal_crop.save(seal_crop_path) print(f"Step 2: Detecting text in seal {idx}...", file=sys.stderr) # Step 3: Detect text areas in seal seal_det_output = seal_det_model.predict(seal_crop_path, batch_size=1) seal_texts = [] for det_res in seal_det_output: dt_polys = det_res["dt_polys"] dt_scores = det_res["dt_scores"] print(f"Found {len(dt_polys)} text region(s) in seal", file=sys.stderr) # Step 4: For each detected text region, crop and recognize for poly_idx, poly in enumerate(dt_polys): poly = np.array(poly) # Get bounding box of polygon min_x, min_y = poly.min(axis=0) max_x, max_y = poly.max(axis=0) # Crop text region text_crop = seal_crop.crop((int(min_x), int(min_y), int(max_x), int(max_y))) text_crop_path = f"temp_text_{idx}_{poly_idx}.png" text_crop.save(text_crop_path) # Recognize text rec_output = text_rec_model.predict(text_crop_path, batch_size=1) for rec_res in rec_output: text = rec_res.get("rec_text", "") score = rec_res.get("rec_score", 0) # Convert numpy types to Python native types if hasattr(score, 'item'): score = score.item() seal_texts.append({ "text": str(text), "score": float(score), "poly": [[float(p[0]), float(p[1])] for p in poly] }) # Draw on original image abs_poly = [[int(p[0] + crop_x1), int(p[1] + crop_y1)] for p in poly] draw.polygon([tuple(p) for p in abs_poly], outline=(255, 0, 255), width=2) # Clean up temp file os.remove(text_crop_path) all_results.append({ "seal_box": [float(c) for c in seal["coordinate"]], "seal_score": float(seal["score"]), "texts": seal_texts }) # Clean up temp seal crop os.remove(seal_crop_path) # Draw seal box draw.rectangle([x1, y1, x2, y2], outline=(255, 0, 255), width=3) # Save visualization img.save(output_path) # Combine all extracted texts combined_texts = [] for result in all_results: for text_item in result["texts"]: combined_texts.append(text_item["text"]) print(json.dumps({ "success": True, "output_path": output_path, "seals": all_results, "combined_text": " ".join(combined_texts) }, ensure_ascii=False)) except Exception as e: import traceback print(json.dumps({"error": str(e), "traceback": traceback.format_exc()})) sys.exit(1) if __name__ == "__main__": main()