Gracefully handle missing PaddleOCRVL

This commit is contained in:
黄仁欢 2026-03-19 15:02:01 +08:00
parent b5baaa38c3
commit 9ef41799c9
2 changed files with 74 additions and 54 deletions

View File

@ -16,7 +16,11 @@ import logging
import traceback
from pathlib import Path
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR, PaddleOCRVL
from paddleocr import PaddleOCR
try:
from paddleocr import PaddleOCRVL # type: ignore
except Exception:
PaddleOCRVL = None
# 配置日志
logging.basicConfig(
@ -93,6 +97,10 @@ def init_models():
logger.info("=" * 60)
logger.info("正在初始化 PaddleOCRVL...")
logger.info("=" * 60)
if PaddleOCRVL is None:
logger.warning("PaddleOCRVL not available in installed paddleocr. Skipping VL pipeline.")
vl_pipeline = None
else:
try:
vl_kwargs = {
"use_seal_recognition": True,
@ -220,10 +228,11 @@ def ocr_pdf():
# 处理 PDF传递 verbose 参数)
try:
ocr_model = 'paddleocr_vl' if vl_pipeline else 'ppocr_v5'
result = process_single_pdf_standalone(
Path(pdf_path),
Path(output_dir),
ocr_model='paddleocr_vl',
ocr_model=ocr_model,
vl_pipeline=vl_pipeline,
verbose=verbose # 新增传递verbose参数
)
@ -298,12 +307,13 @@ def ocr_image():
return jsonify({'success': False, 'error': f'Image not found: {image_path}'}), 404
logger.info(f"处理图像: {image_path}")
# 使用 PaddleOCRVL 识别
if not vl_pipeline:
return jsonify({'success': False, 'error': 'PaddleOCRVL not initialized'}), 500
# Use PaddleOCRVL if available, otherwise fallback to PaddleOCR
if vl_pipeline:
result = vl_pipeline.ocr(image_path)
else:
if not ocr_pipeline:
return jsonify({'success': False, 'error': 'OCR pipeline not initialized'}), 500
result = ocr_pipeline.ocr(image_path)
# 提取文本
texts = []

View File

@ -15,7 +15,11 @@ import json
import logging
from pathlib import Path
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR, PaddleOCRVL
from paddleocr import PaddleOCR
try:
from paddleocr import PaddleOCRVL # type: ignore
except Exception:
PaddleOCRVL = None
# 配置日志
logging.basicConfig(
@ -92,6 +96,10 @@ def init_models():
logger.info("=" * 60)
logger.info("正在初始化 PaddleOCRVL...")
logger.info("=" * 60)
if PaddleOCRVL is None:
logger.warning("PaddleOCRVL not available in installed paddleocr. Skipping VL pipeline.")
vl_pipeline = None
else:
try:
vl_kwargs = {
"use_seal_recognition": True,
@ -219,10 +227,11 @@ def ocr_pdf():
# 处理 PDF传递 verbose 参数)
try:
ocr_model = 'paddleocr_vl' if vl_pipeline else 'ppocr_v5'
result = process_single_pdf_standalone(
Path(pdf_path),
Path(output_dir),
ocr_model='paddleocr_vl',
ocr_model=ocr_model,
vl_pipeline=vl_pipeline,
verbose=verbose # 新增传递verbose参数
)
@ -280,12 +289,13 @@ def ocr_image():
return jsonify({'success': False, 'error': f'Image not found: {image_path}'}), 404
logger.info(f"处理图像: {image_path}")
# 使用 PaddleOCRVL 识别
if not vl_pipeline:
return jsonify({'success': False, 'error': 'PaddleOCRVL not initialized'}), 500
# Use PaddleOCRVL if available, otherwise fallback to PaddleOCR
if vl_pipeline:
result = vl_pipeline.ocr(image_path)
else:
if not ocr_pipeline:
return jsonify({'success': False, 'error': 'OCR pipeline not initialized'}), 500
result = ocr_pipeline.ocr(image_path)
# 提取文本
texts = []