report-detect/scripts/verify_unwarps.py

95 lines
2.7 KiB
Python
Raw Normal View History

2026-02-05 13:57:22 +08:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Verify Unwarp Variations
Runs OCR on a folder of images and finds best match.
"""
import sys
import os
import json
import difflib
# Target text
TARGET_TEXT = "威凯检测技术有限公司"
MIN_SIMILARITY = 0.50 # Lower threshold to catch partials
def similarity(s1, s2):
return difflib.SequenceMatcher(None, s1, s2).ratio()
def main():
if len(sys.argv) < 2:
print("Usage: python verify_unwarps.py <folder>")
sys.exit(1)
folder = sys.argv[1]
print(f"Initializing OCR (CPU mode)...", file=sys.stderr)
try:
os.environ["DISABLE_MODEL_SOURCE_CHECK"] = "True"
from paddleocr import TextRecognition
# Force CPU via environment variable if needed, but remove args here
text_rec = TextRecognition(model_name="PP-OCRv4_server_rec")
except Exception as e:
print(f"Error loading PaddleOCR: {e}", file=sys.stderr)
sys.exit(1)
results = []
for filename in os.listdir(folder):
if not filename.endswith(".png"): continue
path = os.path.join(folder, filename)
# Parse factor from filename "factor_X.X.png"
try:
factor = float(filename.split("_")[1].replace(".png", ""))
except:
factor = 0.0
print(f"Processing {filename}...", file=sys.stderr)
try:
rec_output = text_rec.predict(path, batch_size=1)
recognized_text = ""
score = 0.0
texts = []
for res in rec_output:
txt = res.get("rec_text", "")
if txt:
texts.append(txt)
score = max(score, res.get("rec_score", 0))
recognized_text = "".join(texts)
if hasattr(score, 'item'): score = score.item()
sim = similarity(recognized_text, TARGET_TEXT)
# Print to stdout for visibility
print(f"File: {filename} | Text: '{recognized_text}' | Sim: {sim:.2f}")
sys.stdout.flush()
if sim >= MIN_SIMILARITY:
results.append({
"factor": factor,
"text": recognized_text,
"similarity": sim,
"image_path": path
})
except Exception as e:
print(f" Error processing {filename}: {e}", file=sys.stderr)
# Sort
results.sort(key=lambda x: x["similarity"], reverse=True)
print(json.dumps({
"success": True,
"results": results
}, ensure_ascii=False))
if __name__ == "__main__":
main()