Package embedded Python archives and enforce embedded runtime

This commit is contained in:
黄仁欢 2026-03-19 14:18:14 +08:00
parent fc9cbcf1da
commit 9064d3ea10
4 changed files with 910 additions and 36 deletions

45
pom.xml
View File

@ -171,6 +171,11 @@
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.26.1</version>
</dependency>
</dependencies>
<build>
@ -247,9 +252,9 @@
</resources>
</configuration>
</execution>
<!-- Package Python runtime for offline deployment -->
<!-- Package Python runtime + venv archives for offline deployment (Windows-safe) -->
<execution>
<id>package-python-static</id>
<id>package-python-archives</id>
<phase>process-resources</phase>
<goals>
<goal>copy-resources</goal>
@ -258,37 +263,11 @@
<outputDirectory>${project.build.directory}/classes/python-runtime</outputDirectory>
<resources>
<resource>
<directory>packaging/python/python-3.9-linux-static</directory>
<excludes>
<exclude>**/*.pyc</exclude>
<exclude>**/__pycache__/**</exclude>
<exclude>**/test/**</exclude>
<exclude>**/tests/**</exclude>
</excludes>
</resource>
</resources>
</configuration>
</execution>
<!-- Package Python virtual environment for offline deployment -->
<execution>
<id>package-python-venv</id>
<phase>process-resources</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/classes/python-runtime/venv-offline</outputDirectory>
<resources>
<resource>
<directory>packaging/python/venv-linux-offline</directory>
<excludes>
<exclude>**/*.pyc</exclude>
<exclude>**/__pycache__/**</exclude>
<exclude>**/tests/**</exclude>
<exclude>**/test/**</exclude>
<exclude>**/*.md</exclude>
<exclude>**/*.dist-info/**</exclude>
</excludes>
<directory>packaging/python</directory>
<includes>
<include>python-runtime.tar.gz</include>
<include>venv-offline.tar.gz</include>
</includes>
</resource>
</resources>
</configuration>

View File

@ -1,11 +1,11 @@
# Offline Python Requirements - Fixed Versions for Reproducibility
# Generated: 2026-03-04
# Target: Linux x86_64, Python 3.9
# Target: Linux x86_64, Python 3.10
# Core OCR Dependencies
paddleocr==2.7.5
paddlepaddle==2.5.2
opencv-python==4.8.0.76
paddlepaddle==2.6.2
opencv-python==4.6.0.66
pymupdf==1.23.0
pikepdf==8.0.0

View File

@ -0,0 +1,471 @@
package com.chinaweal.youfool.reportdetect.common.utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
import org.springframework.core.io.support.ResourcePatternResolver;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
/**
* Extracts Python resources and model files from JAR to working directory
*/
@Component
public class ResourceExtractor {
private static final Logger log = LoggerFactory.getLogger(ResourceExtractor.class);
@Value("${app.ocr.resource-dir:./ocr-resources}")
private String resourceDir;
@Value("${app.ocr.models-dir:./models}")
private String modelsDir;
@Value("${app.ocr.extract-on-startup:true}")
private boolean extractOnStartup;
@PostConstruct
public void init() {
if (extractOnStartup) {
try {
extractIfMissing();
} catch (IOException e) {
log.error("Failed to extract resources on startup", e);
}
}
}
/**
* Extract Python code and models if they don't exist
*/
public void extractIfMissing() throws IOException {
extractPythonCode();
extractModels();
}
/**
* Extract all offline resources (Python runtime, venv, models, scripts)
* Used for complete offline deployment
*/
public void extractAllResources() throws IOException {
log.info("========================================");
log.info("开始提取离线资源");
log.info("========================================");
long startTime = System.currentTimeMillis();
// 1. Extract Python runtime
extractPythonRuntime();
// 2. Extract virtual environment
extractVirtualEnvironment();
// 3. Extract OCR models
extractOcrModels();
// 4. Extract Python scripts
extractPythonCode();
long duration = System.currentTimeMillis() - startTime;
log.info("========================================");
log.info("✓ 离线资源提取完成 (耗时: {}秒)", duration / 1000.0);
log.info("========================================");
}
/**
* Extract Python API code from JAR to working directory
*/
private void extractPythonCode() throws IOException {
Path targetDir = Paths.get(resourceDir);
if (Files.exists(targetDir) &&
Files.list(targetDir).findAny().isPresent()) {
log.info("Python resources already exist at {}", targetDir);
return;
}
log.info("Extracting Python resources to {}", targetDir);
Files.createDirectories(targetDir);
// List of Python resources to extract
Map<String, String> pythonResources = new HashMap<>();
pythonResources.put("python_api/ocr_api_server.py", "ocr_api_server.py");
pythonResources.put("python_api/ocr_task_consumer.py", "ocr_task_consumer.py");
pythonResources.put("src/main/python/pdf_processor.py", "pdf_processor.py");
pythonResources.put("test_accuracy_batch_full.py", "test_accuracy_batch_full.py");
for (Map.Entry<String, String> entry : pythonResources.entrySet()) {
String sourcePath = entry.getKey();
String targetName = entry.getValue();
extractResource(sourcePath, targetDir.resolve(targetName));
}
log.info("Python resources extracted successfully");
}
/**
* Extract model files from JAR to working directory
* NOTE: This is a placeholder - actual model extraction depends on how models are packaged
* For large models (3-5GB), consider:
* 1. External model download on first run
* 2. Separate model package
* 3. Docker volume mount
*/
private void extractModels() throws IOException {
Path targetDir = Paths.get(modelsDir);
if (Files.exists(targetDir)) {
// Check if essential models exist
Path[] essentialModels = {
targetDir.resolve("pp-ocrv5/PP-OCRv5_server_det_onnx/inference.onnx"),
targetDir.resolve("pp-ocrv5/PP-OCRv5_server_rec_onnx/inference.onnx")
};
boolean allExist = true;
for (Path model : essentialModels) {
if (!Files.exists(model)) {
allExist = false;
break;
}
}
if (allExist) {
log.info("Models directory already exists with essential models at {}", targetDir);
return;
}
}
log.warn("Models directory missing or incomplete at {}", targetDir);
log.info("Models will be auto-downloaded by PaddleOCR on first use");
// Create directory structure
Files.createDirectories(targetDir);
// NOTE: For production, implement one of these strategies:
//
// Strategy 1: Download from CDN
// downloadModelsFromCdn(targetDir);
//
// Strategy 2: Extract from JAR (if bundled)
// extractModelsFromJar(targetDir);
//
// Strategy 3: Docker volume mount (recommended)
// Models are mounted as volume at runtime
}
/**
* Extract a single resource from classpath to target path
*/
private void extractResource(String resourcePath, Path targetPath) throws IOException {
InputStream is = getClass().getClassLoader().getResourceAsStream(resourcePath);
if (is == null) {
log.warn("Resource not found in classpath: {}", resourcePath);
// Try extracting from JAR directly
if (isRunningInJar()) {
extractFromJar(resourcePath, targetPath);
}
return;
}
Files.copy(is, targetPath, StandardCopyOption.REPLACE_EXISTING);
is.close();
log.debug("Extracted: {} -> {}", resourcePath, targetPath);
}
/**
* Extract file from JAR (for resources not found via classpath)
*/
private void extractFromJar(String resourcePath, Path targetPath) throws IOException {
String jarPath = getClass().getProtectionDomain().getCodeSource().getLocation().getPath();
if (jarPath == null || !jarPath.endsWith(".jar")) {
return;
}
try (JarFile jarFile = new JarFile(jarPath)) {
Enumeration<JarEntry> entries = jarFile.entries();
while (entries.hasMoreElements()) {
JarEntry entry = entries.nextElement();
String entryName = entry.getName();
if (entryName.equals(resourcePath) || entryName.endsWith("/" + resourcePath)) {
try (InputStream is = jarFile.getInputStream(entry)) {
Files.createDirectories(targetPath.getParent());
Files.copy(is, targetPath, StandardCopyOption.REPLACE_EXISTING);
log.debug("Extracted from JAR: {} -> {}", entryName, targetPath);
return;
}
}
}
}
log.warn("Resource not found in JAR: {}", resourcePath);
}
/**
* Check if application is running from JAR
*/
private boolean isRunningInJar() {
String protocol = getClass().getProtectionDomain().getCodeSource().getLocation().getProtocol();
return "jar".equals(protocol);
}
/**
* Recursively delete a directory
*/
public void deleteDirectory(Path directory) throws IOException {
if (Files.exists(directory)) {
Files.walkFileTree(directory, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
Files.delete(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
Files.delete(dir);
return FileVisitResult.CONTINUE;
}
});
}
}
public String getResourceDir() {
return resourceDir;
}
public String getModelsDir() {
return modelsDir;
}
public boolean isExtractOnStartup() {
return extractOnStartup;
}
// ========== Offline Deployment Methods ==========
private void extractPythonRuntime() throws IOException {
Path targetDir = Paths.get(resourceDir, "python-runtime");
if (Files.exists(targetDir.resolve("python/bin/python3.10")) ||
Files.exists(targetDir.resolve("python/bin/python3"))) {
log.info("[1/4] Python runtime already exists, skipping");
return;
}
log.info("[1/4] Extracting Python runtime...");
Files.createDirectories(targetDir);
if (!extractTarGzResource("python-runtime/python-runtime.tar.gz", targetDir)) {
copyDirectoryFromClasspath("python-runtime", targetDir);
}
makeExecutable(targetDir.resolve("python/bin/python3.10"));
makeExecutable(targetDir.resolve("python/bin/python3"));
log.info(" Done");
}
private void extractVirtualEnvironment() throws IOException {
Path targetDir = Paths.get(resourceDir, "python-runtime", "venv-offline");
if (Files.exists(targetDir.resolve("bin/python3.10")) ||
Files.exists(targetDir.resolve("bin/python3"))) {
log.info("[2/4] Python venv already exists, skipping");
return;
}
log.info("[2/4] Extracting Python venv...");
Files.createDirectories(targetDir.getParent());
if (!extractTarGzResource("python-runtime/venv-offline.tar.gz", targetDir.getParent())) {
Files.createDirectories(targetDir);
copyDirectoryFromClasspath("python-runtime/venv-offline", targetDir);
}
makeExecutable(targetDir.resolve("bin/python3.10"));
makeExecutable(targetDir.resolve("bin/python3"));
log.info(" Done");
}
private void extractOcrModels() throws IOException {
Path targetDir = Paths.get(modelsDir);
if (Files.exists(targetDir.resolve("pp-ocrv5/det_model/inference.onnx"))) {
log.info("[3/4] OCR模型已存在跳过");
return;
}
log.info("[3/4] 提取OCR模型...");
Files.createDirectories(targetDir);
copyDirectoryFromClasspath("models", targetDir);
log.info(" ✓ 完成");
}
private void copyDirectoryFromClasspath(String resourcePath, Path targetDir) throws IOException {
log.info(" 复制资源: {}", resourcePath);
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Resource[] resources = resolver.getResources("classpath*:" + resourcePath + "/**");
int fileCount = 0;
for (Resource resource : resources) {
if (resource.isReadable() && !resource.getURL().toString().endsWith("/")) {
try {
String path = resource.getURL().getPath();
String relativePath = path.substring(path.indexOf(resourcePath));
Path targetPath = targetDir.resolve(relativePath.substring(resourcePath.length()));
Files.createDirectories(targetPath.getParent());
Files.copy(resource.getInputStream(), targetPath,
StandardCopyOption.REPLACE_EXISTING);
fileCount++;
} catch (Exception e) {
log.debug("跳过文件: {}", resource.getFilename());
}
}
}
log.info(" 复制了 {} 个文件", fileCount);
}
/**
* Extract a .tar.gz resource from classpath into target directory.
* Returns true if extracted, false if resource not found.
*/
private boolean extractTarGzResource(String resourceName, Path targetDir) throws IOException {
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Resource resource = resolver.getResource("classpath:" + resourceName);
if (!resource.exists()) {
return false;
}
log.info(" Extracting archive: {}", resourceName);
Files.createDirectories(targetDir);
// Read to a temp file first to avoid nested-jar stream issues
Path tmp = Files.createTempFile("ocr-archive-", ".tar.gz");
try (InputStream is = resource.getInputStream()) {
Files.copy(is, tmp, StandardCopyOption.REPLACE_EXISTING);
}
long tmpSize = Files.size(tmp);
if (tmpSize <= 0) {
log.warn(" Archive size is 0 bytes: {}", resourceName);
try {
Files.deleteIfExists(tmp);
} catch (IOException ignore) {
}
return false;
}
log.info(" Archive temp file: {} ({} bytes)", tmp.toAbsolutePath(), tmpSize);
// Prefer system tar on Linux for reliability
if (isLinux()) {
try {
ProcessBuilder pb = new ProcessBuilder(
"tar", "-xzf", tmp.toAbsolutePath().toString(), "-C", targetDir.toAbsolutePath().toString());
pb.redirectErrorStream(true);
Process p = pb.start();
StringBuilder out = new StringBuilder();
try (java.io.BufferedReader br = new java.io.BufferedReader(
new java.io.InputStreamReader(p.getInputStream()))) {
String line;
while ((line = br.readLine()) != null) {
out.append(line).append('\n');
}
}
int code = p.waitFor();
if (code == 0) {
log.info(" System tar extraction succeeded: {}", resourceName);
try {
Files.deleteIfExists(tmp);
} catch (IOException ignore) {
}
return true;
} else {
log.warn(" System tar extraction failed (code {}): {}", code, resourceName);
if (out.length() > 0) {
log.warn(" System tar output: {}", out.toString().trim());
}
}
} catch (Exception e) {
log.warn(" System tar extraction error: {}", e.getMessage());
}
}
int extracted = 0;
try (InputStream fis = Files.newInputStream(tmp);
GZIPInputStream gis = new GZIPInputStream(fis);
TarArchiveInputStream tis = new TarArchiveInputStream(gis)) {
TarArchiveEntry entry;
while ((entry = tis.getNextTarEntry()) != null) {
String entryName = entry.getName();
if (entryName == null || entryName.isEmpty()) {
continue;
}
Path outPath = targetDir.resolve(entryName).normalize();
if (!outPath.startsWith(targetDir)) {
continue;
}
if (entry.isDirectory()) {
Files.createDirectories(outPath);
continue;
}
if (entry.isSymbolicLink() || entry.isLink()) {
// Skip symlinks to keep Windows compatibility
continue;
}
Files.createDirectories(outPath.getParent());
Files.copy(tis, outPath, StandardCopyOption.REPLACE_EXISTING);
extracted++;
}
}
if (extracted == 0) {
log.warn(" Archive extracted 0 files: {}", resourceName);
} else {
log.info(" Extracted {} files from {}", extracted, resourceName);
}
try {
Files.deleteIfExists(tmp);
} catch (IOException ignore) {
}
return true;
}
private boolean isLinux() {
String os = System.getProperty("os.name").toLowerCase();
return os.contains("linux");
}
private void makeExecutable(Path file) {
if (Files.exists(file)) {
try {
file.toFile().setExecutable(true);
// Also try chmod on Linux/Unix
if (System.getProperty("os.name").toLowerCase().contains("linux") ||
System.getProperty("os.name").toLowerCase().contains("unix") ||
System.getProperty("os.name").toLowerCase().contains("mac")) {
Runtime.getRuntime().exec(new String[]{"chmod", "+x", file.toString()});
}
} catch (Exception e) {
log.warn("无法设置可执行权限: {}", file);
}
}
}
}

View File

@ -0,0 +1,424 @@
package com.chinaweal.youfool.reportdetect.modules.ocr.engine;
import com.chinaweal.youfool.reportdetect.common.utils.ResourceExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.stereotype.Component;
import javax.annotation.PreDestroy;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* Manages Flask OCR API process lifecycle
* - Auto-starts Flask on application startup
* - Monitors Flask health
* - Gracefully shuts down on application exit
*/
@Component
public class FlaskProcessManager implements ApplicationListener<ContextRefreshedEvent> {
private static final Logger log = LoggerFactory.getLogger(FlaskProcessManager.class);
@Value("${app.ocr.flask.enabled:true}")
private boolean flaskEnabled;
@Value("${app.ocr.flask.host:127.0.0.1}")
private String flaskHost;
@Value("${app.ocr.flask.port:8081}")
private int flaskPort;
@Value("${app.ocr.flask.startup-timeout:60}")
private int startupTimeoutSeconds;
@Value("${app.ocr.flask.disable-model-source-check:true}")
private boolean disableModelSourceCheck;
@Value("${app.ocr.python-command:python}")
private String pythonCommand;
@Value("${app.ocr.resource-dir:./ocr-resources}")
private String resourceDir;
@Value("${app.ocr.models-dir:./models}")
private String modelsDir;
@Autowired
private ResourceExtractor resourceExtractor;
private Process flaskProcess;
private boolean flaskReady = false;
@Override
public void onApplicationEvent(ContextRefreshedEvent event) {
if (flaskEnabled) {
startFlaskProcess();
} else {
log.info("Flask process management is disabled");
}
}
/**
* Start Flask API server with embedded Python (offline mode)
*/
public synchronized void startFlaskProcess() {
if (!flaskEnabled) {
log.info("Flask process management is disabled");
return;
}
if (flaskProcess != null && flaskProcess.isAlive()) {
log.info("Flask OCR API server is already running");
return;
}
log.info("Starting Flask OCR API server...");
try {
// Extract all resources for offline mode
resourceExtractor.extractAllResources();
// Determine Python executable and Flask API script path
String pythonExecutable;
Path apiScriptPath;
// Require embedded Python (Linux + Windows layouts)
Path embeddedPython = resolveEmbeddedPython();
if (embeddedPython != null) {
pythonExecutable = embeddedPython.toString();
log.info("Using embedded Python: {}", pythonExecutable);
} else {
log.error("Embedded Python not found. Refusing to start Flask with system Python.");
log.error("Expected embedded runtime under: {}/python-runtime/python", resourceDir);
return;
}
// Try multiple locations for Flask API script
// Priority 1: Project's python_api directory (development)
apiScriptPath = Paths.get("./python_api/ocr_api_server.py");
if (Files.exists(apiScriptPath)) {
log.info("Found Flask API script at: {}", apiScriptPath);
} else {
// Priority 2: Embedded in ocr-resources (offline deployment)
apiScriptPath = Paths.get(resourceDir, "python-api/ocr_api_server.py");
if (Files.exists(apiScriptPath)) {
log.info("Found Flask API script at: {}", apiScriptPath);
} else {
// Priority 3: Root of ocr-resources
apiScriptPath = Paths.get(resourceDir, "ocr_api_server.py");
if (Files.exists(apiScriptPath)) {
log.info("Found Flask API script at: {}", apiScriptPath);
} else {
log.error("Flask API script not found at any of the following locations:");
log.error(" 1. ./python_api/ocr_api_server.py");
log.error(" 2. {}/python-api/ocr_api_server.py", resourceDir);
log.error(" 3. {}/ocr_api_server.py", resourceDir);
return;
}
}
}
// Build command
List<String> command = new ArrayList<>();
command.add(pythonExecutable);
command.add(apiScriptPath.toAbsolutePath().toString());
// Configure ProcessBuilder
ProcessBuilder pb = new ProcessBuilder(command);
// Set working directory to project root (where python_api is located)
pb.directory(new File("."));
// Set environment variables for offline mode
Map<String, String> env = pb.environment();
env.put("PYTHONIOENCODING", "utf-8");
env.put("PYTHONUNBUFFERED", "1");
// Note: Don't set PYTHONNOUSERSITE=1 as it blocks loading system-installed Flask
// Use bundled libraries if available (Linux/Windows)
Path libPath = resolveEmbeddedSitePackages();
if (libPath != null) {
env.put("PYTHONPATH", libPath.toString());
}
// Prefer embedded Python home when using standalone runtime
Path pythonHome = resolveEmbeddedPythonHome();
if (pythonHome != null) {
env.put("PYTHONHOME", pythonHome.toString());
}
// Use bundled models
Path modelsPath = Paths.get(modelsDir).toAbsolutePath();
env.put("PADDLEOCR_HOME", modelsPath.toString());
env.put("HUB_HOME", modelsPath.toString());
if (disableModelSourceCheck) {
env.put("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True");
}
// Flask configuration
env.put("PORT", String.valueOf(flaskPort));
env.put("HOST", flaskHost);
pb.redirectErrorStream(true);
log.info("Starting Flask with embedded Python: {}", pythonExecutable);
log.info("Flask will listen on: http://{}:{}", flaskHost, flaskPort);
// Start process
flaskProcess = pb.start();
startFlaskLogReader();
waitForFlaskReady();
if (flaskReady) {
log.info("✓ Flask OCR API server started successfully (offline mode)");
} else {
log.warn("Flask OCR API server not ready yet; keeping process running for late readiness");
}
} catch (Exception e) {
log.error("Failed to start Flask process (offline mode)", e);
}
}
/**
* Start thread to read Flask logs
*/
private void startFlaskLogReader() {
Thread logReader = new Thread(() -> {
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(flaskProcess.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
log.info("[Flask] {}", line);
}
} catch (IOException e) {
if (flaskProcess.isAlive()) {
log.warn("Error reading Flask logs", e);
}
}
});
logReader.setDaemon(true);
logReader.setName("Flask-Log-Reader");
logReader.start();
}
/**
* Wait for Flask to be ready by polling health endpoint
*/
private void waitForFlaskReady() {
log.info("Waiting for Flask to be ready (timeout: {}s)", startupTimeoutSeconds);
String healthUrl = String.format("http://%s:%d/health", flaskHost, flaskPort);
long startTime = System.currentTimeMillis();
long timeoutMillis = TimeUnit.SECONDS.toMillis(startupTimeoutSeconds);
while (System.currentTimeMillis() - startTime < timeoutMillis) {
if (!flaskProcess.isAlive()) {
log.error("Flask process terminated unexpectedly");
return;
}
try {
HttpURLConnection conn = (HttpURLConnection) new URL(healthUrl).openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(2000);
conn.setReadTimeout(2000);
int responseCode = conn.getResponseCode();
if (responseCode == 200) {
log.info("Flask health check passed");
flaskReady = true;
return;
}
} catch (IOException e) {
// Not ready yet, continue waiting
log.debug("Flask not ready yet: {}", e.getMessage());
}
try {
TimeUnit.SECONDS.sleep(2);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
return;
}
}
log.warn("Flask health check timeout after {}s", startupTimeoutSeconds);
}
/**
* Stop Flask process gracefully
*/
@PreDestroy
public void stopFlaskProcess() {
if (flaskProcess != null && flaskProcess.isAlive()) {
log.info("Stopping Flask OCR API server...");
// Try graceful shutdown first
flaskProcess.destroy();
try {
// Wait up to 10 seconds for process to terminate
if (!flaskProcess.waitFor(10, TimeUnit.SECONDS)) {
log.warn("Flask did not stop gracefully, forcing termination");
flaskProcess.destroyForcibly();
}
log.info("Flask OCR API server stopped");
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.error("Interrupted while waiting for Flask to stop", e);
}
flaskReady = false;
}
}
/**
* Check if Flask is ready
*/
public boolean isFlaskReady() {
return flaskReady && flaskProcess != null && flaskProcess.isAlive();
}
/**
* Get Flask base URL
*/
public String getFlaskBaseUrl() {
return String.format("http://%s:%d", flaskHost, flaskPort);
}
/**
* Restart Flask process
*/
public void restartFlask() {
log.info("Restarting Flask OCR API server...");
stopFlaskProcess();
try {
TimeUnit.SECONDS.sleep(2);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
startFlaskProcess();
}
public synchronized boolean ensureFlaskRunning() {
if (!flaskEnabled) {
log.info("Flask process management is disabled");
return false;
}
if (flaskProcess != null && flaskProcess.isAlive() && flaskReady) {
return true;
}
if (flaskProcess != null && flaskProcess.isAlive() && !flaskReady) {
waitForFlaskReady();
return flaskReady;
}
startFlaskProcess();
return flaskReady;
}
private Path resolveEmbeddedPython() {
// Prefer embedded standalone runtime first
Path linuxPython310 = Paths.get(resourceDir, "python-runtime/python/bin/python3.10");
if (Files.exists(linuxPython310)) {
return linuxPython310;
}
Path linuxPython3 = Paths.get(resourceDir, "python-runtime/python/bin/python3");
if (Files.exists(linuxPython3)) {
return linuxPython3;
}
Path linuxPython = Paths.get(resourceDir, "python-runtime/python/bin/python");
if (Files.exists(linuxPython)) {
return linuxPython;
}
// Fallback to venv interpreter if bundled (still embedded)
linuxPython310 = Paths.get(resourceDir, "python-runtime/venv-offline/bin/python3.10");
if (Files.exists(linuxPython310)) {
return linuxPython310;
}
Path linuxPython311 = Paths.get(resourceDir, "python-runtime/venv-offline/bin/python3.11");
if (Files.exists(linuxPython311)) {
return linuxPython311;
}
Path linuxPython39 = Paths.get(resourceDir, "python-runtime/venv-offline/bin/python3.9");
if (Files.exists(linuxPython39)) {
return linuxPython39;
}
Path linuxPython38 = Paths.get(resourceDir, "python-runtime/venv-offline/bin/python3.8");
if (Files.exists(linuxPython38)) {
return linuxPython38;
}
Path linuxPythonVenv = Paths.get(resourceDir, "python-runtime/venv-offline/bin/python3");
if (Files.exists(linuxPythonVenv)) {
return linuxPythonVenv;
}
Path linuxPythonAlt = Paths.get(resourceDir, "python-runtime/venv-offline/bin/python");
if (Files.exists(linuxPythonAlt)) {
return linuxPythonAlt;
}
Path windowsPython = Paths.get(resourceDir, "python-runtime/venv-offline/Scripts/python.exe");
if (Files.exists(windowsPython)) {
return windowsPython;
}
Path windowsPythonAlt = Paths.get(resourceDir, "python-runtime/python.exe");
if (Files.exists(windowsPythonAlt)) {
return windowsPythonAlt;
}
return null;
}
private Path resolveEmbeddedPythonHome() {
Path pythonHome = Paths.get(resourceDir, "python-runtime/python");
if (Files.isDirectory(pythonHome)) {
return pythonHome;
}
return null;
}
private Path resolveEmbeddedSitePackages() {
// Linux venvs use lib/pythonX.Y/site-packages
Path libRoot = Paths.get(resourceDir, "python-runtime/venv-offline/lib");
if (Files.isDirectory(libRoot)) {
try {
try (java.nio.file.DirectoryStream<Path> stream = Files.newDirectoryStream(libRoot)) {
for (Path p : stream) {
if (Files.isDirectory(p) && p.getFileName().toString().startsWith("python")) {
Path site = p.resolve("site-packages");
if (Files.exists(site)) {
return site;
}
}
}
}
} catch (IOException e) {
log.warn("Failed to scan embedded site-packages: {}", e.getMessage());
}
}
// Windows venvs
Path venvLibWin = Paths.get(resourceDir, "python-runtime/venv-offline/Lib/site-packages");
if (Files.exists(venvLibWin)) {
return venvLibWin;
}
return null;
}
}