"""
Test the fixed ROI calculation
"""
import subprocess
import sys

# Clear all Python cache first
print("Clearing Python cache...")
subprocess.run(["python", "-c", """
import os, shutil
for root, dirs, files in os.walk('.'):
    for d in dirs[:200]:
        if d == '__pycache__':
            try:
                shutil.rmtree(os.path.join(root, d))
            except:
                pass
"""], capture_output=True)

# Now run the test with fresh Python
import os
os.environ["DISABLE_MODEL_SOURCE_CHECK"] = "True"

import fitz
import numpy as np
import cv2
import re
from paddleocr import PaddleOCR

# Fresh import
import importlib
import cma_extraction_template_primary
importlib.reload(cma_extraction_template_primary)

from cma_extraction_template_primary import locate_template_multi_scale, imread_unicode

pdf_path = "src/test/resources/data/pdfs/YDQ23_001838.pdf"
template_path = "template/CMA_Logo.png"

print("=" * 80)
print("TESTING FIXED ROI CALCULATION")
print("=" * 80)

# Extract page
doc = fitz.open(pdf_path)
page = doc[0]
mat = fitz.Matrix(300 / 72, 300 / 72)
pix = page.get_pixmap(matrix=mat)
img_data = pix.tobytes("png")
img_array = np.frombuffer(img_data, dtype=np.uint8)
page_img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
doc.close()

print(f"\nPage size: {page_img.shape}")
h, w = page_img.shape[:2]

# Load template and match
template = imread_unicode(template_path, cv2.IMREAD_COLOR)

print("\nRunning template matching...")
match_res = locate_template_multi_scale(page_img, template)

if not match_res.get('success'):
    print(f"ERROR: Template matching failed: {match_res.get('reason')}")
    sys.exit(1)

print(f"Match succeeded: confidence={match_res['max_val']:.3f}")

# Calculate ROI with NEW formula
x, y = match_res['match_center']
template_h = match_res['template_h']
template_w = match_res['template_w']

print(f"\nCalculating ROI with NEW formula...")
print(f"  Logo center: ({x}, {y})")
print(f"  Template size: {template_w}x{template_h}")

# NEW ROI calculation: extend down by template_h * 4
roi_x1 = int(max(0, x))
roi_y1 = int(max(0, y - template_h // 2))
roi_x2 = int(min(w, x + min(600, w - x)))
roi_y2 = int(min(h, y + template_h * 4))  # NEW: extend down by 4x

print(f"\nNEW ROI coordinates:")
print(f"  ROI: ({roi_x1}, {roi_y1}) -> ({roi_x2}, {roi_y2})")
print(f"  ROI size: {roi_x2 - roi_x1}x{roi_y2 - roi_y1}")

rel_x1 = roi_x1 / w * 100
rel_y1 = roi_y1 / h * 100
rel_x2 = roi_x2 / w * 100
rel_y2 = roi_y2 / h * 100
print(f"  Relative: ({rel_x1:.1f}%, {rel_y1:.1f}%) -> ({rel_x2:.1f}%, {rel_y2:.1f}%)")

# Extract ROI
roi_img = page_img[roi_y1:roi_y2, roi_x1:roi_x2]
print(f"\nActual ROI size: {roi_img.shape}")

# Save ROI
os.makedirs("test_debug_new", exist_ok=True)
cv2.imwrite("test_debug_new/roi_debug.png", roi_img)
print("ROI saved to: test_debug_new/roi_debug.png")

# Run OCR on ROI
print("\nRunning OCR on NEW ROI...")
ocr = PaddleOCR(lang='ch')
ocr_result = ocr.predict(roi_img)

if ocr_result and len(ocr_result) > 0:
    res = ocr_result[0]
    texts = res.get('rec_texts', [])
    scores = res.get('rec_scores', [])

    print(f"\nOCR found {len(texts)} text lines:")
    found_4400 = False
    found_2100 = False
    for i, (text, score) in enumerate(zip(texts, scores)):
        numbers = re.findall(r'\d{11,12}', text.replace(" ", ""))
        if numbers or score > 0.5:
            print(f"  [{i}] '{text}' (score: {score:.2f})")
            if numbers:
                print(f"      Numbers: {numbers}")
                if "440023010130" in numbers:
                    print(f"      ^ Found 440023010130 (report number)")
                    found_4400 = True
                if "210020349096" in numbers:
                    print(f"      ^ Found 210020349096 (CORRECT CMA CODE!)")
                    found_2100 = True

    print("\n" + "=" * 80)
    print("RESULT")
    print("=" * 80)
    if found_2100:
        print("SUCCESS: Found correct CMA code 210020349096!")
    elif found_4400:
        print("FAILED: Still finding 440023010130 instead of 210020349096")
    else:
        print("FAILED: No CMA codes found")
else:
    print("ERROR: OCR returned no results")

print("=" * 80)