report-detect/scripts/find_optimal_unwarp.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Find Optimal Unwarping Parameters for Seal Text
Sweeps warp factors, runs OCR, and filters for best match to target text.
"""
import sys
import cv2
import numpy as np
import json
import os
import difflib

# Target text
TARGET_TEXT = "威凯检测技术有限公司"
MIN_SIMILARITY = 0.70

def similarity(s1, s2):
    return difflib.SequenceMatcher(None, s1, s2).ratio()

def unwarp_and_recognize(image_path, output_dir):
    try:
        from paddleocr import TextRecognition
    except ImportError:
        print(json.dumps({"error": "paddleocr not installed"}), file=sys.stderr)
        sys.exit(1)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    print(f"Loading {image_path}...", file=sys.stderr)
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not load {image_path}", file=sys.stderr)
        sys.exit(1)

    h, w = img.shape[:2]
    cx, cy = w // 2, h // 2
    max_radius = min(cx, cy)

    # Initialize Recognizer
    text_rec = TextRecognition(model_name="PP-OCRv4_server_rec")

    results = []

    # Sweep warp factors
    factors = np.arange(1.5, 6.5, 0.5)

    for factor in factors:
        factor = float(factor) # Ensure float

        # Unwarp logic (Same as unwarp_seal.py Scheme 2 fix)
        out_w = int(max_radius * 2 * 3.14 * factor)
        out_h = int(max_radius)

        rotated_src = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
        unwarped = cv2.warpPolar(rotated_src, (out_w, out_h), (cx, cy), max_radius, cv2.WARP_FILL_OUTLIERS + cv2.WARP_POLAR_LINEAR)

        # Crop bottom 30% (Outer ring)
        crop_start_y = int(out_h * 0.70)
        unwarped_crop = unwarped[crop_start_y:out_h, :]

        # Flip vertical (feet to center correction)
        unwarped_disp = cv2.flip(unwarped_crop, 0)

        # Save temp file for OCR
        temp_filename = f"temp_unwarp_{factor:.1f}.png"
        temp_path = os.path.join(output_dir, temp_filename)
        cv2.imwrite(temp_path, unwarped_disp)

        # Run OCR
        # Note: TextRecognition might return multiple results if it detects multiple blocks,
        # but usually for a single line strip it returns one.
        # However, TextRecognition class predicts on image crops.
        rec_output = text_rec.predict(temp_path, batch_size=1)

        recognized_text = ""
        score = 0.0

        # Concatenate all recognized text
        texts = []
        for res in rec_output:
            txt = res.get("rec_text", "")
            if txt:
                texts.append(txt)
                score = max(score, res.get("rec_score", 0)) # Max score or avg?

        recognized_text = "".join(texts)
        if hasattr(score, 'item'): score = score.item()

        # Calculate similarity
        sim = similarity(recognized_text, TARGET_TEXT)

        print(f"Factor {factor:.1f}: '{recognized_text}' (Sim: {sim:.2f})", file=sys.stderr)

        if sim >= MIN_SIMILARITY:
            results.append({
                "factor": factor,
                "text": recognized_text,
                "similarity": sim,
                "image_path": temp_path
            })
        else:
            # Clean up fail cases to save space? Or keep for debug?
            # Let's clean up
            try:
                os.remove(temp_path)
            except: pass

    # Sort results by similarity descdending
    results.sort(key=lambda x: x["similarity"], reverse=True)

    # Generate summary image of successes
    if results:
        # Load images
        images = []
        for res in results:
            im = cv2.imread(res["image_path"])
            # Resize
            disp_h = 60
            disp_w = int(im.shape[1] * (disp_h / im.shape[0]))
            if disp_w == 0: disp_w = 1
            im_disp = cv2.resize(im, (disp_w, disp_h))

            # Add Border and Text
            im_disp = cv2.copyMakeBorder(im_disp, 40, 0, 0, 0, cv2.BORDER_CONSTANT, value=(255, 255, 255))
            label = f"F:{res['factor']:.1f} Sim:{res['similarity']:.2f} T:{res['text']}"
            cv2.putText(im_disp, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
            images.append(im_disp)

        # Combine
        max_w = max(i.shape[1] for i in images)
        combined = np.zeros((sum(i.shape[0] for i in images), max_w, 3), dtype=np.uint8)
        combined[:] = 255

        y = 0
        for i in images:
            h, w = i.shape[:2]
            combined[y:y+h, 0:w] = i
            y += h

        summary_path = os.path.join(output_dir, "optimal_unwarp_summary.png")
        cv2.imwrite(summary_path, combined)

        print(json.dumps({
            "success": True,
            "results": results,
            "summary_path": summary_path
        }, ensure_ascii=False))
    else:
        print(json.dumps({
            "success": False,
            "error": "No parameters found matching target text."
        }))

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("Usage: python find_optimal_unwarp.py <image_path> <output_dir>")
        sys.exit(1)
    unwarp_and_recognize(sys.argv[1], sys.argv[2])