40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
|
|
|
||
|
|
import cv2
|
||
|
|
import os
|
||
|
|
import difflib
|
||
|
|
from paddleocr import PaddleOCR
|
||
|
|
|
||
|
|
def similarity(s1, s2):
|
||
|
|
return difflib.SequenceMatcher(None, s1, s2).ratio()
|
||
|
|
|
||
|
|
def test_fixed_ocr():
|
||
|
|
target = "威凯检测技术有限公司"
|
||
|
|
img_path = "fixed_polar_strip.png"
|
||
|
|
if not os.path.exists(img_path): return
|
||
|
|
|
||
|
|
img = cv2.imread(img_path)
|
||
|
|
ocr = PaddleOCR(use_angle_cls=True, lang='ch')
|
||
|
|
|
||
|
|
# Try normal and flipped
|
||
|
|
for name, image in [("normal", img), ("hflip", cv2.flip(img, 1))]:
|
||
|
|
# Padding
|
||
|
|
padded = cv2.copyMakeBorder(image, 30, 30, 0, 0, cv2.BORDER_CONSTANT, value=[255, 255, 255])
|
||
|
|
res = ocr.ocr(padded)
|
||
|
|
|
||
|
|
best_text = ""
|
||
|
|
best_sim = 0.0
|
||
|
|
if res:
|
||
|
|
for page in res:
|
||
|
|
if 'rec_texts' in page:
|
||
|
|
for t in page['rec_texts']:
|
||
|
|
ct = t.replace(" ", "")
|
||
|
|
s = similarity(target, ct)
|
||
|
|
if s > best_sim:
|
||
|
|
best_sim = s
|
||
|
|
best_text = ct
|
||
|
|
|
||
|
|
print(f"Mode: {name} | Sim: {best_sim:.4f} | Text: {best_text}")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
test_fixed_ocr()
|