report-detect/archive/tools/visualize_matches.py

103 lines
3.2 KiB
Python

"""
Visualize all template matches on the page to understand what's happening
"""
import cv2
import numpy as np
from pathlib import Path
# Load page image
page_img_path = "test_reports_full/YDQ23_001838.pdf/doc_page.png"
page_img = cv2.imread(str(page_img_path))
if page_img is None:
print("ERROR: Could not load page image")
exit(1)
h, w = page_img.shape[:2]
print(f"Page size: {w}x{h}")
# Load template
template_path = "template/CMA_Logo.png"
template = cv2.imread(str(template_path), cv2.IMREAD_GRAYSCALE)
if template is None:
print("ERROR: Could not load template")
exit(1)
template_h, template_w = template.shape
print(f"Template size: {template_w}x{template_h}")
# Convert page to grayscale
page_gray = cv2.cvtColor(page_img, cv2.COLOR_BGR2GRAY)
# Run template matching
result = cv2.matchTemplate(page_gray, template, cv2.TM_CCORR_NORMED)
# Find all matches above different thresholds
print("\nFinding matches at different thresholds:")
for threshold in [0.3, 0.5, 0.7, 0.8, 0.9]:
loc = np.where(result >= threshold)
num_matches = len(loc[0])
print(f" Threshold {threshold}: {num_matches} matches")
# Find top 10 matches
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
print(f"\nBest match:")
print(f" Confidence: {max_val:.3f}")
print(f" Location: {max_loc}")
print(f" Center: ({max_loc[0] + template_w // 2}, {max_loc[1] + template_h // 2})")
# Calculate relative position
rel_x = (max_loc[0] + template_w // 2) / w * 100
rel_y = (max_loc[1] + template_h // 2) / h * 100
print(f" Relative position: ({rel_x:.1f}%, {rel_y:.1f}%)")
# Find all matches above 0.3
threshold = 0.3
loc = np.where(result >= threshold)
print(f"\nAll matches above {threshold}:")
matches = []
for pt in zip(*loc[::-1]):
conf = result[pt[1], pt[0]]
center_x = pt[0] + template_w // 2
center_y = pt[1] + template_h // 2
rel_x = center_x / w * 100
rel_y = center_y / h * 100
matches.append({
'pos': pt,
'conf': conf,
'center': (center_x, center_y),
'rel': (rel_x, rel_y)
})
# Sort by confidence
matches.sort(key=lambda x: x['conf'], reverse=True)
for i, m in enumerate(matches[:20]):
print(f" Match #{i+1}:")
print(f" Position: {m['pos']}")
print(f" Center: {m['center']}")
print(f" Relative: ({m['rel'][0]:.1f}%, {m['rel'][1]:.1f}%)")
print(f" Confidence: {m['conf']:.3f}")
print()
# Visualize top 5 matches
viz = page_img.copy()
for i, m in enumerate(matches[:5]):
pt = m['pos']
cv2.rectangle(viz, pt, (pt[0] + template_w, pt[1] + template_h), (0, 255, 0), 2)
cv2.putText(viz, f"#{i+1}:{m['conf']:.2f}", (pt[0], pt[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
# Draw 60% threshold line
threshold_y = int(h * 0.6)
cv2.line(viz, (0, threshold_y), (w, threshold_y), (255, 0, 0), 2)
cv2.putText(viz, "60% threshold", (10, threshold_y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
output_path = "test_reports_full/YDQ23_001838.pdf/all_matches_visualization.png"
cv2.imwrite(output_path, viz)
print(f"\nVisualization saved to: {output_path}")
print(f"Top 5 matches marked with green boxes")
print(f"Red line shows 60% threshold (matches below are filtered)")