#!/usr/bin/env python # -*- coding: utf-8 -*- """ Verify Unwarp Variations Runs OCR on a folder of images and finds best match. """ import sys import os import json import difflib # Target text TARGET_TEXT = "威凯检测技术有限公司" MIN_SIMILARITY = 0.50 # Lower threshold to catch partials def similarity(s1, s2): return difflib.SequenceMatcher(None, s1, s2).ratio() def main(): if len(sys.argv) < 2: print("Usage: python verify_unwarps.py ") sys.exit(1) folder = sys.argv[1] print(f"Initializing OCR (CPU mode)...", file=sys.stderr) try: os.environ["DISABLE_MODEL_SOURCE_CHECK"] = "True" from paddleocr import TextRecognition # Force CPU via environment variable if needed, but remove args here text_rec = TextRecognition(model_name="PP-OCRv4_server_rec") except Exception as e: print(f"Error loading PaddleOCR: {e}", file=sys.stderr) sys.exit(1) results = [] for filename in os.listdir(folder): if not filename.endswith(".png"): continue path = os.path.join(folder, filename) # Parse factor from filename "factor_X.X.png" try: factor = float(filename.split("_")[1].replace(".png", "")) except: factor = 0.0 print(f"Processing {filename}...", file=sys.stderr) try: rec_output = text_rec.predict(path, batch_size=1) recognized_text = "" score = 0.0 texts = [] for res in rec_output: txt = res.get("rec_text", "") if txt: texts.append(txt) score = max(score, res.get("rec_score", 0)) recognized_text = "".join(texts) if hasattr(score, 'item'): score = score.item() sim = similarity(recognized_text, TARGET_TEXT) # Print to stdout for visibility print(f"File: {filename} | Text: '{recognized_text}' | Sim: {sim:.2f}") sys.stdout.flush() if sim >= MIN_SIMILARITY: results.append({ "factor": factor, "text": recognized_text, "similarity": sim, "image_path": path }) except Exception as e: print(f" Error processing {filename}: {e}", file=sys.stderr) # Sort results.sort(key=lambda x: x["similarity"], reverse=True) print(json.dumps({ "success": True, "results": results }, ensure_ascii=False)) if __name__ == "__main__": main()