51 lines
1.4 KiB
Python
51 lines
1.4 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
OCR桥接脚本 - 跨平台版本
|
||
|
|
用于Java ProcessBuilder调用
|
||
|
|
"""
|
||
|
|
import sys
|
||
|
|
import os
|
||
|
|
import json
|
||
|
|
|
||
|
|
# 添加项目根目录到路径
|
||
|
|
project_root = os.path.dirname(os.path.abspath(__file__))
|
||
|
|
sys.path.insert(0, project_root)
|
||
|
|
sys.path.insert(0, os.path.join(project_root, 'python_api'))
|
||
|
|
|
||
|
|
from pdf_processor import process_pdf_standalone
|
||
|
|
|
||
|
|
def main():
|
||
|
|
if len(sys.argv) < 3:
|
||
|
|
print(json.dumps({"success": False, "error": "Usage: ocr_bridge_cross_platform.py <pdf_path> <output_dir>"}, ensure_ascii=False))
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
pdf_path = sys.argv[1]
|
||
|
|
output_dir = sys.argv[2] if len(sys.argv) > 2 else "output"
|
||
|
|
|
||
|
|
try:
|
||
|
|
result = process_pdf_standalone(pdf_path, output_dir, ocr_model='paddleocr_vl')
|
||
|
|
|
||
|
|
if result.get('success'):
|
||
|
|
print(json.dumps({
|
||
|
|
"success": True,
|
||
|
|
"cma_code": result.get('cma_code', ''),
|
||
|
|
"institution_name": result.get('institution_name', ''),
|
||
|
|
"confidence": result.get('confidence', 0.0)
|
||
|
|
}, ensure_ascii=False))
|
||
|
|
else:
|
||
|
|
print(json.dumps({
|
||
|
|
"success": False,
|
||
|
|
"error": result.get('error', 'Unknown error')
|
||
|
|
}, ensure_ascii=False))
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(json.dumps({
|
||
|
|
"success": False,
|
||
|
|
"error": str(e)
|
||
|
|
}, ensure_ascii=False))
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
main()
|