report-detect/python_api/ocr_api_server.py

371 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
PaddleOCR REST API Server
用于 Java 后端调用的 OCR 服务
功能:
- 封装 PaddleOCRVL 和 PP-OCRv5
- 提供 PDF 处理接口
- 提供图像识别接口
- 健康检查接口
"""
import os
import sys
import json
import logging
import traceback
from pathlib import Path
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
try:
from paddleocr import PaddleOCRVL # type: ignore
except Exception:
PaddleOCRVL = None
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = Flask(__name__)
# 全局模型实例
vl_pipeline = None
ocr_pipeline = None
# 添加项目根目录到 Python 路径
PROJECT_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
logger.info(f"项目根目录: {PROJECT_ROOT}")
# Force local model usage to avoid user cache / auto-download
MODEL_ROOT = Path(__file__).parent / "models"
def _find_model_dir(base: Path, *parts: str, contains: str, requires_inference: bool = False, recursive: bool = False):
"""Find the first model directory that matches a name fragment."""
try:
root = base.joinpath(*parts)
if not root.exists():
return None
contains_lower = contains.lower()
it = root.rglob("*") if recursive else root.iterdir()
for p in it:
if p.is_dir() and contains_lower in p.name.lower():
if requires_inference and not (p / "inference.yml").exists():
continue
return p
except Exception:
return None
return None
def init_models():
"""初始化 OCR 模型"""
global vl_pipeline, ocr_pipeline
# Resolve local model paths (if present)
det_dir = _find_model_dir(
MODEL_ROOT, contains="pp-ocrv5_server_det", requires_inference=True, recursive=True
)
rec_dir = _find_model_dir(
MODEL_ROOT, contains="pp-ocrv5_server_rec", requires_inference=True, recursive=True
)
cls_dir = _find_model_dir(MODEL_ROOT, "cls", contains="cls_infer")
layout_dir = _find_model_dir(
MODEL_ROOT, contains="pp-doclayoutv3", requires_inference=True, recursive=True
)
vl_rec_dir = _find_model_dir(
MODEL_ROOT, contains="paddleocr-vl-1.5", requires_inference=True, recursive=True
)
if det_dir:
logger.info(f"Using local det model: {det_dir}")
if rec_dir:
logger.info(f"Using local rec model: {rec_dir}")
if cls_dir:
logger.info(f"Using local cls model: {cls_dir}")
if layout_dir:
logger.info(f"Using local layout model: {layout_dir}")
if vl_rec_dir:
logger.info(f"Using local VL model: {vl_rec_dir}")
# 初始化 PaddleOCRVL
logger.info("=" * 60)
logger.info("正在初始化 PaddleOCRVL...")
logger.info("=" * 60)
if PaddleOCRVL is None:
logger.warning("PaddleOCRVL not available in installed paddleocr. Skipping VL pipeline.")
vl_pipeline = None
else:
try:
vl_kwargs = {
"use_seal_recognition": True,
"use_ocr_for_image_block": True,
"use_layout_detection": True,
"use_doc_orientation_classify": False,
"use_doc_unwarping": False,
}
if layout_dir:
vl_kwargs["layout_detection_model_dir"] = str(layout_dir)
vl_kwargs["layout_detection_model_name"] = "PP-DocLayoutV3"
if vl_rec_dir:
vl_kwargs["vl_rec_model_dir"] = str(vl_rec_dir)
vl_kwargs["vl_rec_model_name"] = "PaddleOCR-VL-1.5-0.9B"
vl_pipeline = PaddleOCRVL(**vl_kwargs)
logger.info("✅ PaddleOCRVL 初始化成功")
except Exception as e:
logger.error(f"❌ PaddleOCRVL 初始化失败: {e}", exc_info=True)
vl_pipeline = None
# 初始化 PP-OCRv5
logger.info("=" * 60)
logger.info("正在初始化 PP-OCRv5...")
logger.info("=" * 60)
try:
ocr_kwargs = {
"use_textline_orientation": False,
"lang": "ch",
"use_doc_orientation_classify": False,
"use_doc_unwarping": False,
}
if det_dir:
ocr_kwargs["text_detection_model_dir"] = str(det_dir)
if rec_dir:
ocr_kwargs["text_recognition_model_dir"] = str(rec_dir)
ocr_pipeline = PaddleOCR(**ocr_kwargs)
logger.info("PP-OCRv5 初始化成功")
except Exception as e:
logger.error(f"PP-OCRv5 初始化失败: {e}", exc_info=True)
ocr_pipeline = None
logger.info("=" * 60)
logger.info("模型初始化完成")
logger.info(f"PaddleOCRVL: {'✅ 可用' if vl_pipeline else '❌ 不可用'}")
logger.info(f"PP-OCRv5: {'✅ 可用' if ocr_pipeline else '❌ 不可用'}")
logger.info("=" * 60)
@app.route('/health', methods=['GET'])
def health():
"""健康检查接口"""
return jsonify({
'status': 'ok',
'vl_model': vl_pipeline is not None,
'ocr_model': ocr_pipeline is not None,
'project_root': str(PROJECT_ROOT)
})
@app.route('/api/ocr/pdf', methods=['POST'])
def ocr_pdf():
"""
处理 PDF 文件的 OCR 请求
请求 JSON 格式:
{
"pdf_path": "/path/to/file.pdf",
"output_dir": "/path/to/output",
"verbose": false // 可选,启用详细输出
}
响应 JSON 格式:
{
"success": true,
"cma": {
"code": "2023000001",
"confidence": 0.95,
"method": "template_matching"
},
"institutions": ["威凯检测技术有限公司"],
"error": null,
// 仅在 verbose=true 时包含:
"steps": { ... },
"performance": { ... }
}
"""
try:
data = request.get_json()
if not data:
return jsonify({'success': False, 'error': 'Invalid JSON'}), 400
pdf_path = data.get('pdf_path')
output_dir = data.get('output_dir', 'output')
verbose = data.get('verbose', False) # 新增verbose开关
if not pdf_path:
return jsonify({'success': False, 'error': 'pdf_path is required'}), 400
if not os.path.exists(pdf_path):
return jsonify({
'success': False,
'error': f'PDF file not found: {pdf_path}'
}), 404
logger.info("=" * 60)
logger.info(f"处理 PDF: {pdf_path}")
logger.info(f"输出目录: {output_dir}")
logger.info(f"Verbose模式: {'启用' if verbose else '禁用'}")
logger.info("=" * 60)
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
# 导入处理逻辑(从 test_accuracy_batch_full.py
try:
from test_accuracy_batch_full import process_single_pdf_standalone
except ImportError as e:
logger.error(f"无法导入 test_accuracy_batch_full: {e}")
return jsonify({
'success': False,
'error': f'Cannot import test_accuracy_batch_full: {e}'
}), 500
# 处理 PDF传递 verbose 参数)
try:
ocr_model = 'paddleocr_vl' if vl_pipeline else 'ppocr_v5'
result = process_single_pdf_standalone(
Path(pdf_path),
Path(output_dir),
ocr_model=ocr_model,
vl_pipeline=vl_pipeline,
verbose=verbose # 新增传递verbose参数
)
# Normalize response fields for Java client compatibility
if isinstance(result, dict):
if "cma_code" not in result:
cma_obj = result.get("cma") or {}
if isinstance(cma_obj, dict):
result["cma_code"] = cma_obj.get("code")
if "confidence" not in result and cma_obj.get("confidence") is not None:
result["confidence"] = cma_obj.get("confidence")
if "institution_name" not in result:
insts = result.get("institutions") or []
if isinstance(insts, list) and len(insts) > 0:
result["institution_name"] = insts[0]
logger.info("✅ 处理成功")
if result.get('cma'):
logger.info(f" CMA: {result['cma'].get('code', 'N/A')}")
logger.info(f" 机构数: {len(result.get('institutions', []))}")
if result.get('institutions'):
logger.info(f" 机构: {result['institutions'][0]}")
logger.info("=" * 60)
# 返回完整结果包含verbose信息如果启用
return jsonify(result)
except Exception as e:
tb = traceback.format_exc()
logger.error(f"PDF 处理失败: {e}\n{tb}")
return jsonify({
'success': False,
'error': f'PDF processing failed: {str(e)}',
'traceback': tb
}), 500
except Exception as e:
tb = traceback.format_exc()
logger.error(f"❌ 请求处理失败: {e}\n{tb}")
return jsonify({
'success': False,
'error': str(e),
'traceback': tb
}), 500
@app.route('/api/ocr/image', methods=['POST'])
def ocr_image():
"""
处理单个图像的 OCR 请求(用于印章识别)
请求 JSON 格式:
{
"image_path": "/path/to/image.png"
}
响应 JSON 格式:
{
"success": true,
"text": "识别的文本内容"
}
"""
try:
data = request.get_json()
image_path = data.get('image_path')
if not image_path:
return jsonify({'success': False, 'error': 'image_path is required'}), 400
if not os.path.exists(image_path):
return jsonify({'success': False, 'error': f'Image not found: {image_path}'}), 404
logger.info(f"处理图像: {image_path}")
# Use PaddleOCRVL if available, otherwise fallback to PaddleOCR
if vl_pipeline:
result = vl_pipeline.ocr(image_path)
else:
if not ocr_pipeline:
return jsonify({'success': False, 'error': 'OCR pipeline not initialized'}), 500
result = ocr_pipeline.ocr(image_path)
# 提取文本
texts = []
if result and len(result) > 0:
for line in result[0]:
if line and len(line) > 0:
texts.append(line[0][0])
text = ' '.join(texts)
logger.info(f"识别文本: {text}")
return jsonify({
'success': True,
'text': text
})
except Exception as e:
tb = traceback.format_exc()
logger.error(f"❌ 图像识别失败: {e}\n{tb}")
return jsonify({'success': False, 'error': str(e), 'traceback': tb}), 500
@app.errorhandler(404)
def not_found(error):
"""处理 404 错误"""
return jsonify({
'success': False,
'error': 'Endpoint not found'
}), 404
@app.errorhandler(500)
def internal_error(error):
"""处理 500 错误"""
return jsonify({
'success': False,
'error': 'Internal server error'
}), 500
if __name__ == '__main__':
# 初始化模型
init_models()
# 启动服务
port = int(os.environ.get('PORT', 8081))
host = os.environ.get('HOST', '0.0.0.0')
logger.info("=" * 60)
logger.info("Flask OCR API 服务器启动")
logger.info(f"地址: http://{host}:{port}")
logger.info("=" * 60)
app.run(host=host, port=port, threaded=True)