""" Extract and save first page of PDF for visual inspection. """ import os import sys import cv2 import numpy as np import fitz # PyMuPDF pdf_dir = "src/test/resources/data/pdfs" test_files = [ ("YDQ25_002294.pdf", "YDQ25_002294_page1.png"), ("财政部关于请协助提供相关材料的函_pages10-15.pdf", "财政部_pages10-15_page1.png"), ("财政部关于请协助提供相关材料的函_pages4-9.pdf", "财政部_pages4-9_page1.png") ] output_dir = "debug_images" os.makedirs(output_dir, exist_ok=True) for pdf_name, output_name in test_files: pdf_path = os.path.join(pdf_dir, pdf_name) print(f"Processing: {pdf_name}") try: doc = fitz.open(pdf_path) page = doc[0] pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n) # Convert to BGR if pix.n == 4: img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR) elif pix.n == 3: img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) elif pix.n == 1: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) doc.close() output_path = os.path.join(output_dir, output_name) cv2.imwrite(output_path, img) print(f" Saved: {output_path}") print(f" Size: {img.shape[1]}x{img.shape[0]}") except Exception as e: print(f" ERROR: {e}") print(f"\nAll images saved to: {output_dir}/") print("Please manually inspect these images to see if CMA logo is present.")