Force local PaddleOCR models for offline mode
This commit is contained in:
parent
9ef41799c9
commit
926fa62798
|
|
@ -42,7 +42,14 @@ sys.path.insert(0, str(PROJECT_ROOT))
|
|||
logger.info(f"项目根目录: {PROJECT_ROOT}")
|
||||
|
||||
# Force local model usage to avoid user cache / auto-download
|
||||
MODEL_ROOT = Path(__file__).parent / "models"
|
||||
_env_model_root = os.environ.get("PADDLEOCR_HOME") or os.environ.get("HUB_HOME")
|
||||
MODEL_ROOT = Path(_env_model_root) if _env_model_root else (Path(__file__).parent / "models")
|
||||
os.environ.setdefault("PADDLEOCR_HOME", str(MODEL_ROOT))
|
||||
os.environ.setdefault("HUB_HOME", str(MODEL_ROOT))
|
||||
os.environ.setdefault("DISABLE_MODEL_SOURCE_CHECK", "True")
|
||||
os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
|
||||
os.environ.setdefault("HUB_DISABLE_MODEL_SOURCE_CHECK", "True")
|
||||
os.environ.setdefault("PADDLEHUB_NO_FETCH_LATEST", "True")
|
||||
|
||||
|
||||
def _find_model_dir(base: Path, *parts: str, contains: str, requires_inference: bool = False, recursive: bool = False):
|
||||
|
|
@ -93,6 +100,10 @@ def init_models():
|
|||
if vl_rec_dir:
|
||||
logger.info(f"Using local VL model: {vl_rec_dir}")
|
||||
|
||||
if not det_dir or not rec_dir:
|
||||
logger.error(f"Local OCR models not found under {MODEL_ROOT}. Offline mode requires local models.")
|
||||
raise RuntimeError("Local OCR models not found for offline mode")
|
||||
|
||||
# 初始化 PaddleOCRVL
|
||||
logger.info("=" * 60)
|
||||
logger.info("正在初始化 PaddleOCRVL...")
|
||||
|
|
@ -127,16 +138,29 @@ def init_models():
|
|||
logger.info("正在初始化 PP-OCRv5...")
|
||||
logger.info("=" * 60)
|
||||
try:
|
||||
use_onnx = False
|
||||
if det_dir and (Path(det_dir) / "inference.onnx").exists():
|
||||
use_onnx = True
|
||||
if rec_dir and (Path(rec_dir) / "inference.onnx").exists():
|
||||
use_onnx = True
|
||||
if cls_dir and (Path(cls_dir) / "inference.onnx").exists():
|
||||
use_onnx = True
|
||||
|
||||
ocr_kwargs = {
|
||||
"use_textline_orientation": False,
|
||||
"lang": "ch",
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"use_onnx": use_onnx,
|
||||
}
|
||||
if det_dir:
|
||||
ocr_kwargs["det_model_dir"] = str(det_dir)
|
||||
ocr_kwargs["text_detection_model_dir"] = str(det_dir)
|
||||
if rec_dir:
|
||||
ocr_kwargs["rec_model_dir"] = str(rec_dir)
|
||||
ocr_kwargs["text_recognition_model_dir"] = str(rec_dir)
|
||||
if cls_dir:
|
||||
ocr_kwargs["cls_model_dir"] = str(cls_dir)
|
||||
ocr_pipeline = PaddleOCR(**ocr_kwargs)
|
||||
logger.info("PP-OCRv5 初始化成功")
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -41,7 +41,14 @@ sys.path.insert(0, str(PROJECT_ROOT))
|
|||
logger.info(f"项目根目录: {PROJECT_ROOT}")
|
||||
|
||||
# Force local model usage to avoid user cache / auto-download
|
||||
MODEL_ROOT = Path(__file__).parent / "models"
|
||||
_env_model_root = os.environ.get("PADDLEOCR_HOME") or os.environ.get("HUB_HOME")
|
||||
MODEL_ROOT = Path(_env_model_root) if _env_model_root else (Path(__file__).parent / "models")
|
||||
os.environ.setdefault("PADDLEOCR_HOME", str(MODEL_ROOT))
|
||||
os.environ.setdefault("HUB_HOME", str(MODEL_ROOT))
|
||||
os.environ.setdefault("DISABLE_MODEL_SOURCE_CHECK", "True")
|
||||
os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
|
||||
os.environ.setdefault("HUB_DISABLE_MODEL_SOURCE_CHECK", "True")
|
||||
os.environ.setdefault("PADDLEHUB_NO_FETCH_LATEST", "True")
|
||||
|
||||
|
||||
def _find_model_dir(base: Path, *parts: str, contains: str, requires_inference: bool = False, recursive: bool = False):
|
||||
|
|
@ -92,6 +99,10 @@ def init_models():
|
|||
if vl_rec_dir:
|
||||
logger.info(f"Using local VL model: {vl_rec_dir}")
|
||||
|
||||
if not det_dir or not rec_dir:
|
||||
logger.error(f"Local OCR models not found under {MODEL_ROOT}. Offline mode requires local models.")
|
||||
raise RuntimeError("Local OCR models not found for offline mode")
|
||||
|
||||
# 初始化 PaddleOCRVL
|
||||
logger.info("=" * 60)
|
||||
logger.info("正在初始化 PaddleOCRVL...")
|
||||
|
|
@ -126,16 +137,29 @@ def init_models():
|
|||
logger.info("正在初始化 PP-OCRv5...")
|
||||
logger.info("=" * 60)
|
||||
try:
|
||||
use_onnx = False
|
||||
if det_dir and (Path(det_dir) / "inference.onnx").exists():
|
||||
use_onnx = True
|
||||
if rec_dir and (Path(rec_dir) / "inference.onnx").exists():
|
||||
use_onnx = True
|
||||
if cls_dir and (Path(cls_dir) / "inference.onnx").exists():
|
||||
use_onnx = True
|
||||
|
||||
ocr_kwargs = {
|
||||
"use_textline_orientation": False,
|
||||
"lang": "ch",
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"use_onnx": use_onnx,
|
||||
}
|
||||
if det_dir:
|
||||
ocr_kwargs["det_model_dir"] = str(det_dir)
|
||||
ocr_kwargs["text_detection_model_dir"] = str(det_dir)
|
||||
if rec_dir:
|
||||
ocr_kwargs["rec_model_dir"] = str(rec_dir)
|
||||
ocr_kwargs["text_recognition_model_dir"] = str(rec_dir)
|
||||
if cls_dir:
|
||||
ocr_kwargs["cls_model_dir"] = str(cls_dir)
|
||||
ocr_pipeline = PaddleOCR(**ocr_kwargs)
|
||||
logger.info("PP-OCRv5 初始化成功")
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Reference in New Issue