import os os.environ["DISABLE_MODEL_SOURCE_CHECK"] = "True" import cv2 import numpy as np import math import fitz # PyMuPDF import paddlex as px # Using Paddlex for Layout from paddleocr import SealTextDetection # Tool: Standard Polar Unwarp def polar_unwarp(img, center, radius, start_theta, angular_extent): if angular_extent <= 0: return None strip_w = int(angular_extent * radius) strip_h = int(radius * 0.6) if strip_w <= 0 or strip_h <= 0: return None strip = np.zeros((strip_h, strip_w, 3), dtype=np.uint8) for y in range(strip_h): r = radius - y for x in range(strip_w): theta = start_theta + angular_extent * (x / strip_w) src_x = center[0] + r * math.cos(theta) src_y = center[1] + r * math.sin(theta) if 0 <= src_x < img.shape[1]-1 and 0 <= src_y < img.shape[0]-1: patch = cv2.getRectSubPix(img, (1, 1), (float(src_x), float(src_y))) strip[y, x] = patch[0, 0] else: strip[y, x] = [255, 255, 255] return strip def calculate_precise_arc(polygons, center): initial_clusters = [] gap_thresh = math.radians(15) for poly in polygons: thetas = sorted([math.atan2(p[1] - center[1], p[0] - center[0]) for i, p in enumerate(poly)]) if not thetas: continue max_gap = 0 gap_idx = -1 for i in range(len(thetas)): gap = (thetas[0] + 2*math.pi - thetas[i]) if i == len(thetas)-1 else (thetas[i+1]-thetas[i]) if gap > max_gap: max_gap = gap; gap_idx = i if gap_idx == len(thetas) - 1: t_arc = thetas else: t_arc = thetas[gap_idx+1:] + [t + 2*math.pi for t in thetas[:gap_idx+1]] if not t_arc: continue curr = [t_arc[0]] for i in range(1, len(t_arc)): if t_arc[i] - t_arc[i-1] > gap_thresh: initial_clusters.append({'start': curr[0], 'end': curr[-1]}) curr = [t_arc[i]] else: curr.append(t_arc[i]) initial_clusters.append({'start': curr[0], 'end': curr[-1]}) if not initial_clusters: return 0.0, 0.0 initial_clusters.sort(key=lambda x: x['start']) merged = [] merge_thresh = math.radians(45) if initial_clusters: curr = initial_clusters[0] for i in range(1, len(initial_clusters)): nxt = initial_clusters[i] if nxt['start'] - curr['end'] < merge_thresh: curr['end'] = max(curr['end'], nxt['end']) else: merged.append(curr) curr = nxt merged.append(curr) candidates = [] for m in merged: st, en = m['start'], m['end'] ex = en - st mid = (st + en) / 2 dist_to_top = abs(((mid + math.pi/2 + math.pi) % (2*math.pi)) - math.pi) weight = math.exp(-0.5 * (dist_to_top / (math.pi/2))**2) candidates.append({'start': st, 'end': en, 'extent': ex, 'score': ex * weight}) candidates.sort(key=lambda x: x['score'], reverse=True) best = candidates[0] return best['start'], best['end'] - best['start'] def extract_pdf_page(pdf_path, page_num=0): doc = fitz.open(pdf_path) page = doc.load_page(page_num) pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n) if pix.n == 4: img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB) return cv2.cvtColor(img, cv2.COLOR_RGB2BGR) def run_layout_detection(image_path): print("Initializing Paddlex PP-DocLayout-L...") model = px.create_model("PP-DocLayout-L") output = model.predict(image_path, batch_size=1) all_regions = [] for res in output: # Paddlex 3.0 result structure: res is a dict with 'boxes' key boxes = res.get('boxes', []) for box in boxes: # box structure: label_name, label, score, coordinate label_name = box.get('label_name', box.get('label', 'unknown')) score = box.get('score', 0.0) coords = box.get('coordinate') print(f"Detected: {label_name} (Score: {score:.2f}) at {coords}") all_regions.append({ 'label': label_name, 'score': score, 'box': coords }) return all_regions def process_full_workflow(pdf_path, output_dir="report_viz"): if not os.path.exists(output_dir): os.makedirs(output_dir) print(f"Rendering PDF {pdf_path} Page 1...") page_img = extract_pdf_page(pdf_path) doc_path = os.path.join(output_dir, "doc_page.png") cv2.imwrite(doc_path, page_img) print("Running Layout Detection via Paddlex...") all_regions = run_layout_detection(doc_path) page_viz = page_img.copy() seal_boxes = [] for reg in all_regions: box = reg.get('box') label = reg.get('label') score = reg.get('score', 0.0) # In Paddlex 3.0 DocLayout, 'seal' is index 16 or name 'seal' # Let's match by name. is_seal = (label == 'seal') if score > 0.2: # Low threshold for debugging x1, y1, x2, y2 = [int(v) for v in box] color = (0, 0, 255) if is_seal else (0, 255, 0) cv2.rectangle(page_viz, (x1, y1), (x2, y2), color, 2) cv2.putText(page_viz, f"{label} {score:.2f}", (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1) if is_seal: seal_boxes.append(box) cv2.imwrite(os.path.join(output_dir, "doc_layout_viz.png"), page_viz) seal_results = [] print(f"Processing {len(seal_boxes)} detected seals...") det_model = SealTextDetection(model_name="PP-OCRv4_server_seal_det") for i, box in enumerate(seal_boxes): x1, y1, x2, y2 = [int(v) for v in box] pad = 40 y1_p, y2_p = max(0, y1-pad), min(page_img.shape[0], y2+pad) x1_p, x2_p = max(0, x1-pad), min(page_img.shape[1], x2+pad) seal_crop = page_img[y1_p:y2_p, x1_p:x2_p] crop_path = os.path.join(output_dir, f"seal_crop_{i}.png") cv2.imwrite(crop_path, seal_crop) print(f"Refining Seal #{i}...") output = det_model.predict(crop_path, batch_size=1) all_polygons = [] for res in output: # SealTextDetection returns dt_polys directly in the result polys = res.get('dt_polys') if isinstance(res, dict) else None if polys: all_polygons.extend(polys) print(f" Found {len(polys)} text polygons in seal #{i}") ch, cw = seal_crop.shape[:2] center = [cw // 2, ch // 2] radius = min(cw, ch) // 2 - 10 start_theta, extent = calculate_precise_arc(all_polygons, center) marked = seal_crop.copy() for p in all_polygons: cv2.polylines(marked, [np.array(p, dtype=np.int32)], True, (0, 255, 0), 2) unwarp_name = f"seal_unwarp_{i}.png" unwarp_path = os.path.join(output_dir, unwarp_name) unwarp = None if extent > 0: unwarp = polar_unwarp(seal_crop, center, radius, start_theta, extent) if unwarp is not None: cv2.imwrite(unwarp_path, unwarp) def draw_line(m, theta, color): x = center[0] + radius * math.cos(theta) y = center[1] + radius * math.sin(theta) cv2.line(m, (int(center[0]), int(center[1])), (int(x), int(y)), color, 2) draw_line(marked, start_theta, (255, 0, 0)) draw_line(marked, start_theta + extent, (0, 0, 255)) marked_name = f"seal_marked_{i}.png" cv2.imwrite(os.path.join(output_dir, marked_name), marked) seal_results.append({'index': i, 'crop': f"seal_crop_{i}.png", 'marked': marked_name, 'unwarp': unwarp_name if unwarp is not None else None}) # Integrated HTML Template html = f"""
File: WTS2025-21283.pdf | Detected Regions: {len(all_regions)}
Detection Overlay
Unwarped Organization Name
{f'No text arc found in this crop.
'}No seals detected for unwarping.
'}