162 lines
6.0 KiB
Python
162 lines
6.0 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Seal Text Extraction Pipeline
|
|
1. Use LayoutDetection to find seal regions
|
|
2. Crop seal region from image
|
|
3. Use SealTextDetection to find text areas in seal
|
|
4. Use TextRecognition to extract text from detected areas
|
|
"""
|
|
import sys
|
|
import json
|
|
import os
|
|
from PIL import Image
|
|
import numpy as np
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(json.dumps({"error": "Usage: python seal_text_extract.py <image_path> [output_path]"}))
|
|
sys.exit(1)
|
|
|
|
image_path = sys.argv[1]
|
|
output_path = sys.argv[2] if len(sys.argv) > 2 else "seal_text_output.png"
|
|
|
|
try:
|
|
from paddleocr import LayoutDetection, SealTextDetection, TextRecognition
|
|
from PIL import ImageDraw, ImageFont
|
|
|
|
# Step 1: Detect layout and find seal regions
|
|
print("Step 1: Detecting layout...", file=sys.stderr)
|
|
layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L")
|
|
layout_output = layout_model.predict(image_path, batch_size=1, layout_nms=True)
|
|
|
|
seal_boxes = []
|
|
for res in layout_output:
|
|
for box in res["boxes"]:
|
|
if box["label"] == "seal":
|
|
seal_boxes.append({
|
|
"coordinate": box["coordinate"],
|
|
"score": box["score"]
|
|
})
|
|
|
|
if not seal_boxes:
|
|
print(json.dumps({"success": False, "error": "No seal detected in image"}))
|
|
sys.exit(0)
|
|
|
|
print(f"Found {len(seal_boxes)} seal(s)", file=sys.stderr)
|
|
|
|
# Step 2: Initialize seal text detection and text recognition
|
|
seal_det_model = SealTextDetection(model_name="PP-OCRv4_server_seal_det")
|
|
text_rec_model = TextRecognition(model_name="PP-OCRv4_server_rec")
|
|
|
|
# Load original image for cropping and visualization
|
|
img = Image.open(image_path)
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
try:
|
|
font = ImageFont.truetype("msyh.ttc", 18)
|
|
except:
|
|
font = ImageFont.load_default()
|
|
|
|
all_results = []
|
|
|
|
for idx, seal in enumerate(seal_boxes):
|
|
x1, y1, x2, y2 = seal["coordinate"]
|
|
|
|
# Crop seal region with padding
|
|
padding = 10
|
|
crop_x1 = max(0, int(x1) - padding)
|
|
crop_y1 = max(0, int(y1) - padding)
|
|
crop_x2 = min(img.width, int(x2) + padding)
|
|
crop_y2 = min(img.height, int(y2) + padding)
|
|
|
|
seal_crop = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
|
|
seal_crop_path = f"temp_seal_{idx}.png"
|
|
seal_crop.save(seal_crop_path)
|
|
|
|
print(f"Step 2: Detecting text in seal {idx}...", file=sys.stderr)
|
|
|
|
# Step 3: Detect text areas in seal
|
|
seal_det_output = seal_det_model.predict(seal_crop_path, batch_size=1)
|
|
|
|
seal_texts = []
|
|
for det_res in seal_det_output:
|
|
dt_polys = det_res["dt_polys"]
|
|
dt_scores = det_res["dt_scores"]
|
|
|
|
print(f"Found {len(dt_polys)} text region(s) in seal", file=sys.stderr)
|
|
|
|
# Step 4: For each detected text region, crop and recognize
|
|
for poly_idx, poly in enumerate(dt_polys):
|
|
poly = np.array(poly)
|
|
# Get bounding box of polygon
|
|
min_x, min_y = poly.min(axis=0)
|
|
max_x, max_y = poly.max(axis=0)
|
|
|
|
# Crop text region
|
|
text_crop = seal_crop.crop((int(min_x), int(min_y), int(max_x), int(max_y)))
|
|
text_crop_path = f"temp_text_{idx}_{poly_idx}.png"
|
|
text_crop.save(text_crop_path)
|
|
|
|
# Recognize text
|
|
rec_output = text_rec_model.predict(text_crop_path, batch_size=1)
|
|
|
|
for rec_res in rec_output:
|
|
text = rec_res.get("rec_text", "")
|
|
score = rec_res.get("rec_score", 0)
|
|
|
|
# Convert numpy types to Python native types
|
|
if hasattr(score, 'item'):
|
|
score = score.item()
|
|
|
|
seal_texts.append({
|
|
"text": str(text),
|
|
"score": float(score),
|
|
"poly": [[float(p[0]), float(p[1])] for p in poly]
|
|
})
|
|
|
|
|
|
# Draw on original image
|
|
abs_poly = [[int(p[0] + crop_x1), int(p[1] + crop_y1)] for p in poly]
|
|
draw.polygon([tuple(p) for p in abs_poly], outline=(255, 0, 255), width=2)
|
|
|
|
# Clean up temp file
|
|
os.remove(text_crop_path)
|
|
|
|
all_results.append({
|
|
"seal_box": [float(c) for c in seal["coordinate"]],
|
|
"seal_score": float(seal["score"]),
|
|
"texts": seal_texts
|
|
})
|
|
|
|
|
|
# Clean up temp seal crop
|
|
os.remove(seal_crop_path)
|
|
|
|
# Draw seal box
|
|
draw.rectangle([x1, y1, x2, y2], outline=(255, 0, 255), width=3)
|
|
|
|
# Save visualization
|
|
img.save(output_path)
|
|
|
|
# Combine all extracted texts
|
|
combined_texts = []
|
|
for result in all_results:
|
|
for text_item in result["texts"]:
|
|
combined_texts.append(text_item["text"])
|
|
|
|
print(json.dumps({
|
|
"success": True,
|
|
"output_path": output_path,
|
|
"seals": all_results,
|
|
"combined_text": " ".join(combined_texts)
|
|
}, ensure_ascii=False))
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
print(json.dumps({"error": str(e), "traceback": traceback.format_exc()}))
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|