95 lines
2.7 KiB
Python
95 lines
2.7 KiB
Python
|
|
#!/usr/bin/env python
|
||
|
|
# -*- coding: utf-8 -*-
|
||
|
|
"""
|
||
|
|
Verify Unwarp Variations
|
||
|
|
Runs OCR on a folder of images and finds best match.
|
||
|
|
"""
|
||
|
|
import sys
|
||
|
|
import os
|
||
|
|
import json
|
||
|
|
import difflib
|
||
|
|
|
||
|
|
# Target text
|
||
|
|
TARGET_TEXT = "威凯检测技术有限公司"
|
||
|
|
MIN_SIMILARITY = 0.50 # Lower threshold to catch partials
|
||
|
|
|
||
|
|
def similarity(s1, s2):
|
||
|
|
return difflib.SequenceMatcher(None, s1, s2).ratio()
|
||
|
|
|
||
|
|
def main():
|
||
|
|
if len(sys.argv) < 2:
|
||
|
|
print("Usage: python verify_unwarps.py <folder>")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
folder = sys.argv[1]
|
||
|
|
|
||
|
|
print(f"Initializing OCR (CPU mode)...", file=sys.stderr)
|
||
|
|
try:
|
||
|
|
os.environ["DISABLE_MODEL_SOURCE_CHECK"] = "True"
|
||
|
|
from paddleocr import TextRecognition
|
||
|
|
# Force CPU via environment variable if needed, but remove args here
|
||
|
|
text_rec = TextRecognition(model_name="PP-OCRv4_server_rec")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Error loading PaddleOCR: {e}", file=sys.stderr)
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
results = []
|
||
|
|
|
||
|
|
for filename in os.listdir(folder):
|
||
|
|
if not filename.endswith(".png"): continue
|
||
|
|
|
||
|
|
path = os.path.join(folder, filename)
|
||
|
|
|
||
|
|
# Parse factor from filename "factor_X.X.png"
|
||
|
|
try:
|
||
|
|
factor = float(filename.split("_")[1].replace(".png", ""))
|
||
|
|
except:
|
||
|
|
factor = 0.0
|
||
|
|
|
||
|
|
print(f"Processing {filename}...", file=sys.stderr)
|
||
|
|
|
||
|
|
try:
|
||
|
|
rec_output = text_rec.predict(path, batch_size=1)
|
||
|
|
|
||
|
|
recognized_text = ""
|
||
|
|
score = 0.0
|
||
|
|
texts = []
|
||
|
|
|
||
|
|
for res in rec_output:
|
||
|
|
txt = res.get("rec_text", "")
|
||
|
|
if txt:
|
||
|
|
texts.append(txt)
|
||
|
|
score = max(score, res.get("rec_score", 0))
|
||
|
|
|
||
|
|
recognized_text = "".join(texts)
|
||
|
|
if hasattr(score, 'item'): score = score.item()
|
||
|
|
|
||
|
|
sim = similarity(recognized_text, TARGET_TEXT)
|
||
|
|
|
||
|
|
# Print to stdout for visibility
|
||
|
|
print(f"File: {filename} | Text: '{recognized_text}' | Sim: {sim:.2f}")
|
||
|
|
sys.stdout.flush()
|
||
|
|
|
||
|
|
if sim >= MIN_SIMILARITY:
|
||
|
|
results.append({
|
||
|
|
"factor": factor,
|
||
|
|
"text": recognized_text,
|
||
|
|
"similarity": sim,
|
||
|
|
"image_path": path
|
||
|
|
})
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f" Error processing {filename}: {e}", file=sys.stderr)
|
||
|
|
|
||
|
|
# Sort
|
||
|
|
results.sort(key=lambda x: x["similarity"], reverse=True)
|
||
|
|
|
||
|
|
print(json.dumps({
|
||
|
|
"success": True,
|
||
|
|
"results": results
|
||
|
|
}, ensure_ascii=False))
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|