Gracefully handle missing PaddleOCRVL
This commit is contained in:
parent
b5baaa38c3
commit
9ef41799c9
|
|
@ -16,7 +16,11 @@ import logging
|
||||||
import traceback
|
import traceback
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from flask import Flask, request, jsonify
|
from flask import Flask, request, jsonify
|
||||||
from paddleocr import PaddleOCR, PaddleOCRVL
|
from paddleocr import PaddleOCR
|
||||||
|
try:
|
||||||
|
from paddleocr import PaddleOCRVL # type: ignore
|
||||||
|
except Exception:
|
||||||
|
PaddleOCRVL = None
|
||||||
|
|
||||||
# 配置日志
|
# 配置日志
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
|
|
@ -93,6 +97,10 @@ def init_models():
|
||||||
logger.info("=" * 60)
|
logger.info("=" * 60)
|
||||||
logger.info("正在初始化 PaddleOCRVL...")
|
logger.info("正在初始化 PaddleOCRVL...")
|
||||||
logger.info("=" * 60)
|
logger.info("=" * 60)
|
||||||
|
if PaddleOCRVL is None:
|
||||||
|
logger.warning("PaddleOCRVL not available in installed paddleocr. Skipping VL pipeline.")
|
||||||
|
vl_pipeline = None
|
||||||
|
else:
|
||||||
try:
|
try:
|
||||||
vl_kwargs = {
|
vl_kwargs = {
|
||||||
"use_seal_recognition": True,
|
"use_seal_recognition": True,
|
||||||
|
|
@ -220,10 +228,11 @@ def ocr_pdf():
|
||||||
|
|
||||||
# 处理 PDF(传递 verbose 参数)
|
# 处理 PDF(传递 verbose 参数)
|
||||||
try:
|
try:
|
||||||
|
ocr_model = 'paddleocr_vl' if vl_pipeline else 'ppocr_v5'
|
||||||
result = process_single_pdf_standalone(
|
result = process_single_pdf_standalone(
|
||||||
Path(pdf_path),
|
Path(pdf_path),
|
||||||
Path(output_dir),
|
Path(output_dir),
|
||||||
ocr_model='paddleocr_vl',
|
ocr_model=ocr_model,
|
||||||
vl_pipeline=vl_pipeline,
|
vl_pipeline=vl_pipeline,
|
||||||
verbose=verbose # 新增:传递verbose参数
|
verbose=verbose # 新增:传递verbose参数
|
||||||
)
|
)
|
||||||
|
|
@ -298,12 +307,13 @@ def ocr_image():
|
||||||
return jsonify({'success': False, 'error': f'Image not found: {image_path}'}), 404
|
return jsonify({'success': False, 'error': f'Image not found: {image_path}'}), 404
|
||||||
|
|
||||||
logger.info(f"处理图像: {image_path}")
|
logger.info(f"处理图像: {image_path}")
|
||||||
|
# Use PaddleOCRVL if available, otherwise fallback to PaddleOCR
|
||||||
# 使用 PaddleOCRVL 识别
|
if vl_pipeline:
|
||||||
if not vl_pipeline:
|
|
||||||
return jsonify({'success': False, 'error': 'PaddleOCRVL not initialized'}), 500
|
|
||||||
|
|
||||||
result = vl_pipeline.ocr(image_path)
|
result = vl_pipeline.ocr(image_path)
|
||||||
|
else:
|
||||||
|
if not ocr_pipeline:
|
||||||
|
return jsonify({'success': False, 'error': 'OCR pipeline not initialized'}), 500
|
||||||
|
result = ocr_pipeline.ocr(image_path)
|
||||||
|
|
||||||
# 提取文本
|
# 提取文本
|
||||||
texts = []
|
texts = []
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,11 @@ import json
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from flask import Flask, request, jsonify
|
from flask import Flask, request, jsonify
|
||||||
from paddleocr import PaddleOCR, PaddleOCRVL
|
from paddleocr import PaddleOCR
|
||||||
|
try:
|
||||||
|
from paddleocr import PaddleOCRVL # type: ignore
|
||||||
|
except Exception:
|
||||||
|
PaddleOCRVL = None
|
||||||
|
|
||||||
# 配置日志
|
# 配置日志
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
|
|
@ -92,6 +96,10 @@ def init_models():
|
||||||
logger.info("=" * 60)
|
logger.info("=" * 60)
|
||||||
logger.info("正在初始化 PaddleOCRVL...")
|
logger.info("正在初始化 PaddleOCRVL...")
|
||||||
logger.info("=" * 60)
|
logger.info("=" * 60)
|
||||||
|
if PaddleOCRVL is None:
|
||||||
|
logger.warning("PaddleOCRVL not available in installed paddleocr. Skipping VL pipeline.")
|
||||||
|
vl_pipeline = None
|
||||||
|
else:
|
||||||
try:
|
try:
|
||||||
vl_kwargs = {
|
vl_kwargs = {
|
||||||
"use_seal_recognition": True,
|
"use_seal_recognition": True,
|
||||||
|
|
@ -219,10 +227,11 @@ def ocr_pdf():
|
||||||
|
|
||||||
# 处理 PDF(传递 verbose 参数)
|
# 处理 PDF(传递 verbose 参数)
|
||||||
try:
|
try:
|
||||||
|
ocr_model = 'paddleocr_vl' if vl_pipeline else 'ppocr_v5'
|
||||||
result = process_single_pdf_standalone(
|
result = process_single_pdf_standalone(
|
||||||
Path(pdf_path),
|
Path(pdf_path),
|
||||||
Path(output_dir),
|
Path(output_dir),
|
||||||
ocr_model='paddleocr_vl',
|
ocr_model=ocr_model,
|
||||||
vl_pipeline=vl_pipeline,
|
vl_pipeline=vl_pipeline,
|
||||||
verbose=verbose # 新增:传递verbose参数
|
verbose=verbose # 新增:传递verbose参数
|
||||||
)
|
)
|
||||||
|
|
@ -280,12 +289,13 @@ def ocr_image():
|
||||||
return jsonify({'success': False, 'error': f'Image not found: {image_path}'}), 404
|
return jsonify({'success': False, 'error': f'Image not found: {image_path}'}), 404
|
||||||
|
|
||||||
logger.info(f"处理图像: {image_path}")
|
logger.info(f"处理图像: {image_path}")
|
||||||
|
# Use PaddleOCRVL if available, otherwise fallback to PaddleOCR
|
||||||
# 使用 PaddleOCRVL 识别
|
if vl_pipeline:
|
||||||
if not vl_pipeline:
|
|
||||||
return jsonify({'success': False, 'error': 'PaddleOCRVL not initialized'}), 500
|
|
||||||
|
|
||||||
result = vl_pipeline.ocr(image_path)
|
result = vl_pipeline.ocr(image_path)
|
||||||
|
else:
|
||||||
|
if not ocr_pipeline:
|
||||||
|
return jsonify({'success': False, 'error': 'OCR pipeline not initialized'}), 500
|
||||||
|
result = ocr_pipeline.ocr(image_path)
|
||||||
|
|
||||||
# 提取文本
|
# 提取文本
|
||||||
texts = []
|
texts = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue