report-detect/scripts/reproduce_java_flow.py

103 lines
4.2 KiB
Python
Raw Permalink Normal View History

2026-02-05 13:57:22 +08:00
import cv2
import numpy as np
import os
import json
from paddleocr import PaddleOCR, LayoutDetection
from fitz import open as pdf_open
from PIL import Image
def reproduce_java_flow():
# 1. Configuration
pdf_dir = "refer/认监-扫描件识别/input_pdfs"
output_dir = "repro_results"
if not os.path.exists(output_dir): os.makedirs(output_dir)
# Load models
layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L")
ocr_det = PaddleOCR(use_angle_cls=False, lang="ch", det_db_thresh=0.3, det_db_box_thresh=0.5)
# Process first 5 PDFs to present results
pdfs = [f for f in os.listdir(pdf_dir) if f.endswith(".pdf")][:5]
for pdf_name in pdfs:
pdf_path = os.path.join(pdf_dir, pdf_name)
print(f"Processing {pdf_name}...")
# Open PDF and get first page
doc = pdf_open(pdf_path)
page = doc[0]
import fitz
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2x zoom for better detection
img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, 3)
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR)
# 2. Layout Detection
layout_res = layout_model.predict(img_bgr)
seal_candidates = []
for res in layout_res:
for box in res["boxes"]:
if box["label"] in ["seal", "image"]:
seal_candidates.append(box["coordinate"])
print(f" Found {len(seal_candidates)} seal candidates.")
for i, coord in enumerate(seal_candidates):
x1, y1, x2, y2 = map(int, coord)
# Crop seal
crop = img_bgr[y1:y2, x1:x2]
if crop.size == 0: continue
# 3. Text Detection on crop (to find unwrap points)
det_res = ocr_det.ocr(crop)
points = []
if det_res and det_res[0]:
for line in det_res[0]:
if isinstance(line, list) and len(line) > 0:
box = line[0] # The 4 corners
points.extend(box)
# 4. Smart Unwarp calculation (replicate Java polarUnwarpSmart)
h, w = crop.shape[:2]
cx, cy = w // 2, h // 2
radius = min(cx, cy)
best_theta = np.pi / 2 # Default 6 o'clock
if points:
# Find point closest to 6 o'clock (0, radius) relative to center
best_dist = float('inf')
for p in points:
px, py = p[0], p[1]
dx, dy = px - cx, py - cy
dist = np.sqrt(dx**2 + (dy - radius)**2)
if dist < best_dist:
best_dist = dist
best_theta = np.arctan2(dy, dx)
# Mapping start_theta to polarUnwarp
# We want best_theta to be at the center of the unwarped strip? Or the start?
# Standard warpPolar starts at 0 (right).
# If we want best_theta at the center of the strip (180 deg), we rotate accordingly.
angle_deg = (best_theta * 180 / np.pi) - 180
M = cv2.getRotationMatrix2D((cx, cy), angle_deg, 1.0)
rotated = cv2.warpAffine(crop, M, (w, h), borderValue=(255, 255, 255))
out_w = int(radius * 2 * np.pi * 1.2) # Use 1.2 factor for better spacing
unwarped = cv2.warpPolar(rotated, (out_w, radius), (cx, cy), radius, cv2.WARP_POLAR_LINEAR)
# Keep outer 40% (the text ring)
strip = unwarped[int(radius*0.6):int(radius*0.98), :]
# CRITICAL: Vertical flip to make characters upright (feet point to center in polar)
final_strip = cv2.flip(strip, 0)
# Save visual results
out_name = f"{pdf_name}_seal_{i}_unwarped.png"
cv2.imwrite(os.path.join(output_dir, out_name), final_strip)
# Also save the original crop for comparison
cv2.imwrite(os.path.join(output_dir, f"{pdf_name}_seal_{i}_crop.png"), crop)
print(f"Batch Repro Complete. Results in {output_dir}/")
if __name__ == "__main__":
reproduce_java_flow()