diff --git a/pom.xml b/pom.xml
index ae56d33..b2ec804 100644
--- a/pom.xml
+++ b/pom.xml
@@ -242,6 +242,68 @@
+
+
+ package-python-static
+ process-resources
+
+ copy-resources
+
+
+ ${project.build.directory}/classes/python-runtime
+
+
+ packaging/python/python-3.9-linux-static
+
+ **/*.pyc
+ **/__pycache__/**
+ **/test/**
+ **/tests/**
+
+
+
+
+
+
+
+ package-python-venv
+ process-resources
+
+ copy-resources
+
+
+ ${project.build.directory}/classes/python-runtime/venv-offline
+
+
+ packaging/python/venv-linux-offline
+
+ **/*.pyc
+ **/__pycache__/**
+ **/tests/**
+ **/test/**
+ **/*.md
+ **/*.dist-info/**
+
+
+
+
+
+
+
+ package-ocr-models
+ process-resources
+
+ copy-resources
+
+
+ ${project.build.directory}/classes/models
+
+
+ packaging/python/models
+
+
+
+
diff --git a/src/main/java/com/chinaweal/youfool/reportdetect/modules/ocr/service/OcrService.java b/src/main/java/com/chinaweal/youfool/reportdetect/modules/ocr/service/OcrService.java
index 130c84a..71c2ab8 100644
--- a/src/main/java/com/chinaweal/youfool/reportdetect/modules/ocr/service/OcrService.java
+++ b/src/main/java/com/chinaweal/youfool/reportdetect/modules/ocr/service/OcrService.java
@@ -1,116 +1,78 @@
package com.chinaweal.youfool.reportdetect.modules.ocr.service;
-import ai.djl.inference.Predictor;
-import ai.djl.modality.cv.Image;
-import ai.djl.modality.cv.ImageFactory;
-import ai.djl.modality.cv.output.DetectedObjects;
-import ai.djl.modality.cv.output.Rectangle;
-import ai.djl.repository.zoo.Criteria;
-import ai.djl.repository.zoo.ZooModel;
-import ai.djl.translate.TranslateException;
import com.chinaweal.youfool.reportdetect.common.utils.CertUtils;
-import com.chinaweal.youfool.reportdetect.common.utils.PdfUtils;
import com.chinaweal.youfool.reportdetect.modules.task.entity.OCRResult;
-import com.chinaweal.youfool.reportdetect.modules.ocr.utils.CmaTemplateExtractor;
+import com.chinaweal.youfool.reportdetect.modules.ocr.client.FlaskOCRClient;
+import com.chinaweal.youfool.reportdetect.modules.ocr.dto.FlaskOCRResponse;
import com.chinaweal.youfool.reportdetect.modules.ocr.utils.InstitutionNameCleaner;
import com.chinaweal.youfool.reportdetect.modules.ocr.utils.InstitutionNameSearcher;
-import com.chinaweal.youfool.reportdetect.modules.ocr.utils.SealExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
-import javax.annotation.PostConstruct;
-import java.io.File;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.util.*;
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.util.stream.Collectors;
-import java.awt.image.BufferedImage;
-import javax.imageio.ImageIO;
+/**
+ * OCR Service - Python-First Architecture
+ *
+ * This service delegates all OCR processing to the Python Flask API.
+ * The Java OCR implementation has been removed in favor of the
+ * Python-based OCR engine which provides better accuracy and
+ * easier maintenance.
+ *
+ * Architecture:
+ * - Java Backend (Spring Boot) → Flask API (Python) → PaddleOCR
+ * - All OCR processing is done by the Python Flask API server
+ * - Java only handles business logic and database operations
+ *
+ * @author Claude Code
+ * @version 2.0 - Python-First Architecture
+ */
@Service
public class OcrService {
private static final Logger log = LoggerFactory.getLogger(OcrService.class);
@Autowired
- private LayoutDetectionService layoutService;
+ private FlaskOCRClient flaskOCRClient;
- @Autowired
- private PaddleOCRVLService paddleOCRVLService;
-
- @Autowired
+ @Autowired(required = false)
private com.chinaweal.youfool.reportdetect.modules.ocr.engine.PythonOcrEngine pythonOcrEngine;
- public void setLayoutService(LayoutDetectionService layoutService) {
- this.layoutService = layoutService;
- }
+ @Value("${app.ocr.engine:python}")
+ private String ocrEngineType; // python (recommended) or fallback
- public void setPaddleOCRVLService(PaddleOCRVLService paddleOCRVLService) {
- this.paddleOCRVLService = paddleOCRVLService;
- }
-
- @Value("${app.ocr.mock:false}")
- private boolean mockMode;
-
- @Value("${app.ocr.engine:java}")
- private String ocrEngineType; // java or python
-
- private String vizPath;
-
- public void setVizPath(String vizPath) {
- this.vizPath = vizPath;
- }
-
- private static final Pattern CMA_PATTERN_1 = Pattern.compile("\\d{11}");
- private static final Pattern CMA_PATTERN_2 = Pattern.compile("\\d{12}");
-
- private List recKeys = new ArrayList<>();
- private CmaTemplateExtractor cmaExtractor;
-
- private static final int MIN_POLYGONS_FOR_UNWARP = 3;
-
- @PostConstruct
- public void init() {
- try {
- Path keyPath = Paths.get("src/main/resources/ppocr_keys_v1.txt");
- if (Files.exists(keyPath)) {
- this.recKeys = Files.readAllLines(keyPath, StandardCharsets.UTF_8);
- log.info("Loaded {} keys for OCR Recognition", recKeys.size());
- }
- } catch (Exception e) {
- log.warn("Failed to load OCR keys: {}", e.getMessage());
- }
-
- // Initialize CMA template extractor
- this.cmaExtractor = new CmaTemplateExtractor();
- log.info("CMA Template Extractor initialized");
- }
-
- public static class OcrExecutionResult {
- public String text = "";
- public List