import cv2 import numpy as np import os import json from paddleocr import PaddleOCR, LayoutDetection from fitz import open as pdf_open from PIL import Image def reproduce_java_flow(): # 1. Configuration pdf_dir = "refer/认监-扫描件识别/input_pdfs" output_dir = "repro_results" if not os.path.exists(output_dir): os.makedirs(output_dir) # Load models layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L") ocr_det = PaddleOCR(use_angle_cls=False, lang="ch", det_db_thresh=0.3, det_db_box_thresh=0.5) # Process first 5 PDFs to present results pdfs = [f for f in os.listdir(pdf_dir) if f.endswith(".pdf")][:5] for pdf_name in pdfs: pdf_path = os.path.join(pdf_dir, pdf_name) print(f"Processing {pdf_name}...") # Open PDF and get first page doc = pdf_open(pdf_path) page = doc[0] import fitz pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2x zoom for better detection img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, 3) img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR) # 2. Layout Detection layout_res = layout_model.predict(img_bgr) seal_candidates = [] for res in layout_res: for box in res["boxes"]: if box["label"] in ["seal", "image"]: seal_candidates.append(box["coordinate"]) print(f" Found {len(seal_candidates)} seal candidates.") for i, coord in enumerate(seal_candidates): x1, y1, x2, y2 = map(int, coord) # Crop seal crop = img_bgr[y1:y2, x1:x2] if crop.size == 0: continue # 3. Text Detection on crop (to find unwrap points) det_res = ocr_det.ocr(crop) points = [] if det_res and det_res[0]: for line in det_res[0]: if isinstance(line, list) and len(line) > 0: box = line[0] # The 4 corners points.extend(box) # 4. Smart Unwarp calculation (replicate Java polarUnwarpSmart) h, w = crop.shape[:2] cx, cy = w // 2, h // 2 radius = min(cx, cy) best_theta = np.pi / 2 # Default 6 o'clock if points: # Find point closest to 6 o'clock (0, radius) relative to center best_dist = float('inf') for p in points: px, py = p[0], p[1] dx, dy = px - cx, py - cy dist = np.sqrt(dx**2 + (dy - radius)**2) if dist < best_dist: best_dist = dist best_theta = np.arctan2(dy, dx) # Mapping start_theta to polarUnwarp # We want best_theta to be at the center of the unwarped strip? Or the start? # Standard warpPolar starts at 0 (right). # If we want best_theta at the center of the strip (180 deg), we rotate accordingly. angle_deg = (best_theta * 180 / np.pi) - 180 M = cv2.getRotationMatrix2D((cx, cy), angle_deg, 1.0) rotated = cv2.warpAffine(crop, M, (w, h), borderValue=(255, 255, 255)) out_w = int(radius * 2 * np.pi * 1.2) # Use 1.2 factor for better spacing unwarped = cv2.warpPolar(rotated, (out_w, radius), (cx, cy), radius, cv2.WARP_POLAR_LINEAR) # Keep outer 40% (the text ring) strip = unwarped[int(radius*0.6):int(radius*0.98), :] # CRITICAL: Vertical flip to make characters upright (feet point to center in polar) final_strip = cv2.flip(strip, 0) # Save visual results out_name = f"{pdf_name}_seal_{i}_unwarped.png" cv2.imwrite(os.path.join(output_dir, out_name), final_strip) # Also save the original crop for comparison cv2.imwrite(os.path.join(output_dir, f"{pdf_name}_seal_{i}_crop.png"), crop) print(f"Batch Repro Complete. Results in {output_dir}/") if __name__ == "__main__": reproduce_java_flow()