diff --git a/python_api/ocr_api_server.py b/python_api/ocr_api_server.py index 9ae032b..4ad9d02 100644 --- a/python_api/ocr_api_server.py +++ b/python_api/ocr_api_server.py @@ -42,7 +42,14 @@ sys.path.insert(0, str(PROJECT_ROOT)) logger.info(f"项目根目录: {PROJECT_ROOT}") # Force local model usage to avoid user cache / auto-download -MODEL_ROOT = Path(__file__).parent / "models" +_env_model_root = os.environ.get("PADDLEOCR_HOME") or os.environ.get("HUB_HOME") +MODEL_ROOT = Path(_env_model_root) if _env_model_root else (Path(__file__).parent / "models") +os.environ.setdefault("PADDLEOCR_HOME", str(MODEL_ROOT)) +os.environ.setdefault("HUB_HOME", str(MODEL_ROOT)) +os.environ.setdefault("DISABLE_MODEL_SOURCE_CHECK", "True") +os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True") +os.environ.setdefault("HUB_DISABLE_MODEL_SOURCE_CHECK", "True") +os.environ.setdefault("PADDLEHUB_NO_FETCH_LATEST", "True") def _find_model_dir(base: Path, *parts: str, contains: str, requires_inference: bool = False, recursive: bool = False): @@ -93,6 +100,10 @@ def init_models(): if vl_rec_dir: logger.info(f"Using local VL model: {vl_rec_dir}") + if not det_dir or not rec_dir: + logger.error(f"Local OCR models not found under {MODEL_ROOT}. Offline mode requires local models.") + raise RuntimeError("Local OCR models not found for offline mode") + # 初始化 PaddleOCRVL logger.info("=" * 60) logger.info("正在初始化 PaddleOCRVL...") @@ -127,16 +138,29 @@ def init_models(): logger.info("正在初始化 PP-OCRv5...") logger.info("=" * 60) try: + use_onnx = False + if det_dir and (Path(det_dir) / "inference.onnx").exists(): + use_onnx = True + if rec_dir and (Path(rec_dir) / "inference.onnx").exists(): + use_onnx = True + if cls_dir and (Path(cls_dir) / "inference.onnx").exists(): + use_onnx = True + ocr_kwargs = { "use_textline_orientation": False, "lang": "ch", "use_doc_orientation_classify": False, "use_doc_unwarping": False, + "use_onnx": use_onnx, } if det_dir: + ocr_kwargs["det_model_dir"] = str(det_dir) ocr_kwargs["text_detection_model_dir"] = str(det_dir) if rec_dir: + ocr_kwargs["rec_model_dir"] = str(rec_dir) ocr_kwargs["text_recognition_model_dir"] = str(rec_dir) + if cls_dir: + ocr_kwargs["cls_model_dir"] = str(cls_dir) ocr_pipeline = PaddleOCR(**ocr_kwargs) logger.info("PP-OCRv5 初始化成功") except Exception as e: diff --git a/src/main/resources/python-api/ocr_api_server.py b/src/main/resources/python-api/ocr_api_server.py index 226cb6d..3384235 100644 --- a/src/main/resources/python-api/ocr_api_server.py +++ b/src/main/resources/python-api/ocr_api_server.py @@ -41,7 +41,14 @@ sys.path.insert(0, str(PROJECT_ROOT)) logger.info(f"项目根目录: {PROJECT_ROOT}") # Force local model usage to avoid user cache / auto-download -MODEL_ROOT = Path(__file__).parent / "models" +_env_model_root = os.environ.get("PADDLEOCR_HOME") or os.environ.get("HUB_HOME") +MODEL_ROOT = Path(_env_model_root) if _env_model_root else (Path(__file__).parent / "models") +os.environ.setdefault("PADDLEOCR_HOME", str(MODEL_ROOT)) +os.environ.setdefault("HUB_HOME", str(MODEL_ROOT)) +os.environ.setdefault("DISABLE_MODEL_SOURCE_CHECK", "True") +os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True") +os.environ.setdefault("HUB_DISABLE_MODEL_SOURCE_CHECK", "True") +os.environ.setdefault("PADDLEHUB_NO_FETCH_LATEST", "True") def _find_model_dir(base: Path, *parts: str, contains: str, requires_inference: bool = False, recursive: bool = False): @@ -92,6 +99,10 @@ def init_models(): if vl_rec_dir: logger.info(f"Using local VL model: {vl_rec_dir}") + if not det_dir or not rec_dir: + logger.error(f"Local OCR models not found under {MODEL_ROOT}. Offline mode requires local models.") + raise RuntimeError("Local OCR models not found for offline mode") + # 初始化 PaddleOCRVL logger.info("=" * 60) logger.info("正在初始化 PaddleOCRVL...") @@ -126,16 +137,29 @@ def init_models(): logger.info("正在初始化 PP-OCRv5...") logger.info("=" * 60) try: + use_onnx = False + if det_dir and (Path(det_dir) / "inference.onnx").exists(): + use_onnx = True + if rec_dir and (Path(rec_dir) / "inference.onnx").exists(): + use_onnx = True + if cls_dir and (Path(cls_dir) / "inference.onnx").exists(): + use_onnx = True + ocr_kwargs = { "use_textline_orientation": False, "lang": "ch", "use_doc_orientation_classify": False, "use_doc_unwarping": False, + "use_onnx": use_onnx, } if det_dir: + ocr_kwargs["det_model_dir"] = str(det_dir) ocr_kwargs["text_detection_model_dir"] = str(det_dir) if rec_dir: + ocr_kwargs["rec_model_dir"] = str(rec_dir) ocr_kwargs["text_recognition_model_dir"] = str(rec_dir) + if cls_dir: + ocr_kwargs["cls_model_dir"] = str(cls_dir) ocr_pipeline = PaddleOCR(**ocr_kwargs) logger.info("PP-OCRv5 初始化成功") except Exception as e: