report-detect/scripts/seal_text_extract.py

162 lines
6.0 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Seal Text Extraction Pipeline
1. Use LayoutDetection to find seal regions
2. Crop seal region from image
3. Use SealTextDetection to find text areas in seal
4. Use TextRecognition to extract text from detected areas
"""
import sys
import json
import os
from PIL import Image
import numpy as np
def main():
if len(sys.argv) < 2:
print(json.dumps({"error": "Usage: python seal_text_extract.py <image_path> [output_path]"}))
sys.exit(1)
image_path = sys.argv[1]
output_path = sys.argv[2] if len(sys.argv) > 2 else "seal_text_output.png"
try:
from paddleocr import LayoutDetection, SealTextDetection, TextRecognition
from PIL import ImageDraw, ImageFont
# Step 1: Detect layout and find seal regions
print("Step 1: Detecting layout...", file=sys.stderr)
layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L")
layout_output = layout_model.predict(image_path, batch_size=1, layout_nms=True)
seal_boxes = []
for res in layout_output:
for box in res["boxes"]:
if box["label"] == "seal":
seal_boxes.append({
"coordinate": box["coordinate"],
"score": box["score"]
})
if not seal_boxes:
print(json.dumps({"success": False, "error": "No seal detected in image"}))
sys.exit(0)
print(f"Found {len(seal_boxes)} seal(s)", file=sys.stderr)
# Step 2: Initialize seal text detection and text recognition
seal_det_model = SealTextDetection(model_name="PP-OCRv4_server_seal_det")
text_rec_model = TextRecognition(model_name="PP-OCRv4_server_rec")
# Load original image for cropping and visualization
img = Image.open(image_path)
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("msyh.ttc", 18)
except:
font = ImageFont.load_default()
all_results = []
for idx, seal in enumerate(seal_boxes):
x1, y1, x2, y2 = seal["coordinate"]
# Crop seal region with padding
padding = 10
crop_x1 = max(0, int(x1) - padding)
crop_y1 = max(0, int(y1) - padding)
crop_x2 = min(img.width, int(x2) + padding)
crop_y2 = min(img.height, int(y2) + padding)
seal_crop = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
seal_crop_path = f"temp_seal_{idx}.png"
seal_crop.save(seal_crop_path)
print(f"Step 2: Detecting text in seal {idx}...", file=sys.stderr)
# Step 3: Detect text areas in seal
seal_det_output = seal_det_model.predict(seal_crop_path, batch_size=1)
seal_texts = []
for det_res in seal_det_output:
dt_polys = det_res["dt_polys"]
dt_scores = det_res["dt_scores"]
print(f"Found {len(dt_polys)} text region(s) in seal", file=sys.stderr)
# Step 4: For each detected text region, crop and recognize
for poly_idx, poly in enumerate(dt_polys):
poly = np.array(poly)
# Get bounding box of polygon
min_x, min_y = poly.min(axis=0)
max_x, max_y = poly.max(axis=0)
# Crop text region
text_crop = seal_crop.crop((int(min_x), int(min_y), int(max_x), int(max_y)))
text_crop_path = f"temp_text_{idx}_{poly_idx}.png"
text_crop.save(text_crop_path)
# Recognize text
rec_output = text_rec_model.predict(text_crop_path, batch_size=1)
for rec_res in rec_output:
text = rec_res.get("rec_text", "")
score = rec_res.get("rec_score", 0)
# Convert numpy types to Python native types
if hasattr(score, 'item'):
score = score.item()
seal_texts.append({
"text": str(text),
"score": float(score),
"poly": [[float(p[0]), float(p[1])] for p in poly]
})
# Draw on original image
abs_poly = [[int(p[0] + crop_x1), int(p[1] + crop_y1)] for p in poly]
draw.polygon([tuple(p) for p in abs_poly], outline=(255, 0, 255), width=2)
# Clean up temp file
os.remove(text_crop_path)
all_results.append({
"seal_box": [float(c) for c in seal["coordinate"]],
"seal_score": float(seal["score"]),
"texts": seal_texts
})
# Clean up temp seal crop
os.remove(seal_crop_path)
# Draw seal box
draw.rectangle([x1, y1, x2, y2], outline=(255, 0, 255), width=3)
# Save visualization
img.save(output_path)
# Combine all extracted texts
combined_texts = []
for result in all_results:
for text_item in result["texts"]:
combined_texts.append(text_item["text"])
print(json.dumps({
"success": True,
"output_path": output_path,
"seals": all_results,
"combined_text": " ".join(combined_texts)
}, ensure_ascii=False))
except Exception as e:
import traceback
print(json.dumps({"error": str(e), "traceback": traceback.format_exc()}))
sys.exit(1)
if __name__ == "__main__":
main()