Force local PaddleOCR models for offline mode

This commit is contained in:
黄仁欢 2026-03-19 15:05:20 +08:00
parent 9ef41799c9
commit 926fa62798
2 changed files with 50 additions and 2 deletions

View File

@ -42,7 +42,14 @@ sys.path.insert(0, str(PROJECT_ROOT))
logger.info(f"项目根目录: {PROJECT_ROOT}")
# Force local model usage to avoid user cache / auto-download
MODEL_ROOT = Path(__file__).parent / "models"
_env_model_root = os.environ.get("PADDLEOCR_HOME") or os.environ.get("HUB_HOME")
MODEL_ROOT = Path(_env_model_root) if _env_model_root else (Path(__file__).parent / "models")
os.environ.setdefault("PADDLEOCR_HOME", str(MODEL_ROOT))
os.environ.setdefault("HUB_HOME", str(MODEL_ROOT))
os.environ.setdefault("DISABLE_MODEL_SOURCE_CHECK", "True")
os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
os.environ.setdefault("HUB_DISABLE_MODEL_SOURCE_CHECK", "True")
os.environ.setdefault("PADDLEHUB_NO_FETCH_LATEST", "True")
def _find_model_dir(base: Path, *parts: str, contains: str, requires_inference: bool = False, recursive: bool = False):
@ -93,6 +100,10 @@ def init_models():
if vl_rec_dir:
logger.info(f"Using local VL model: {vl_rec_dir}")
if not det_dir or not rec_dir:
logger.error(f"Local OCR models not found under {MODEL_ROOT}. Offline mode requires local models.")
raise RuntimeError("Local OCR models not found for offline mode")
# 初始化 PaddleOCRVL
logger.info("=" * 60)
logger.info("正在初始化 PaddleOCRVL...")
@ -127,16 +138,29 @@ def init_models():
logger.info("正在初始化 PP-OCRv5...")
logger.info("=" * 60)
try:
use_onnx = False
if det_dir and (Path(det_dir) / "inference.onnx").exists():
use_onnx = True
if rec_dir and (Path(rec_dir) / "inference.onnx").exists():
use_onnx = True
if cls_dir and (Path(cls_dir) / "inference.onnx").exists():
use_onnx = True
ocr_kwargs = {
"use_textline_orientation": False,
"lang": "ch",
"use_doc_orientation_classify": False,
"use_doc_unwarping": False,
"use_onnx": use_onnx,
}
if det_dir:
ocr_kwargs["det_model_dir"] = str(det_dir)
ocr_kwargs["text_detection_model_dir"] = str(det_dir)
if rec_dir:
ocr_kwargs["rec_model_dir"] = str(rec_dir)
ocr_kwargs["text_recognition_model_dir"] = str(rec_dir)
if cls_dir:
ocr_kwargs["cls_model_dir"] = str(cls_dir)
ocr_pipeline = PaddleOCR(**ocr_kwargs)
logger.info("PP-OCRv5 初始化成功")
except Exception as e:

View File

@ -41,7 +41,14 @@ sys.path.insert(0, str(PROJECT_ROOT))
logger.info(f"项目根目录: {PROJECT_ROOT}")
# Force local model usage to avoid user cache / auto-download
MODEL_ROOT = Path(__file__).parent / "models"
_env_model_root = os.environ.get("PADDLEOCR_HOME") or os.environ.get("HUB_HOME")
MODEL_ROOT = Path(_env_model_root) if _env_model_root else (Path(__file__).parent / "models")
os.environ.setdefault("PADDLEOCR_HOME", str(MODEL_ROOT))
os.environ.setdefault("HUB_HOME", str(MODEL_ROOT))
os.environ.setdefault("DISABLE_MODEL_SOURCE_CHECK", "True")
os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
os.environ.setdefault("HUB_DISABLE_MODEL_SOURCE_CHECK", "True")
os.environ.setdefault("PADDLEHUB_NO_FETCH_LATEST", "True")
def _find_model_dir(base: Path, *parts: str, contains: str, requires_inference: bool = False, recursive: bool = False):
@ -92,6 +99,10 @@ def init_models():
if vl_rec_dir:
logger.info(f"Using local VL model: {vl_rec_dir}")
if not det_dir or not rec_dir:
logger.error(f"Local OCR models not found under {MODEL_ROOT}. Offline mode requires local models.")
raise RuntimeError("Local OCR models not found for offline mode")
# 初始化 PaddleOCRVL
logger.info("=" * 60)
logger.info("正在初始化 PaddleOCRVL...")
@ -126,16 +137,29 @@ def init_models():
logger.info("正在初始化 PP-OCRv5...")
logger.info("=" * 60)
try:
use_onnx = False
if det_dir and (Path(det_dir) / "inference.onnx").exists():
use_onnx = True
if rec_dir and (Path(rec_dir) / "inference.onnx").exists():
use_onnx = True
if cls_dir and (Path(cls_dir) / "inference.onnx").exists():
use_onnx = True
ocr_kwargs = {
"use_textline_orientation": False,
"lang": "ch",
"use_doc_orientation_classify": False,
"use_doc_unwarping": False,
"use_onnx": use_onnx,
}
if det_dir:
ocr_kwargs["det_model_dir"] = str(det_dir)
ocr_kwargs["text_detection_model_dir"] = str(det_dir)
if rec_dir:
ocr_kwargs["rec_model_dir"] = str(rec_dir)
ocr_kwargs["text_recognition_model_dir"] = str(rec_dir)
if cls_dir:
ocr_kwargs["cls_model_dir"] = str(cls_dir)
ocr_pipeline = PaddleOCR(**ocr_kwargs)
logger.info("PP-OCRv5 初始化成功")
except Exception as e: