From 39fe636ff2deb5f10e0ab2f2a227b606f7151092 Mon Sep 17 00:00:00 2001 From: lroyia Date: Wed, 25 Feb 2026 10:33:28 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/example/glmocr/model/GlmOcrModel.java | 15 ++++++++------- .../example/glmocr/model/GlmOcrTranslator.java | 2 +- .../glmocr/tokenizer/TokenizerService.java | 8 ++++---- src/main/resources/application.yml | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/example/glmocr/model/GlmOcrModel.java b/src/main/java/com/example/glmocr/model/GlmOcrModel.java index 28abe3c..abd47e6 100644 --- a/src/main/java/com/example/glmocr/model/GlmOcrModel.java +++ b/src/main/java/com/example/glmocr/model/GlmOcrModel.java @@ -11,6 +11,7 @@ import ai.djl.pytorch.engine.PtModel; import ai.djl.pytorch.engine.PtNDArray; import ai.djl.repository.zoo.Criteria; import ai.djl.repository.zoo.ZooModel; +import ai.djl.training.util.ProgressBar; import ai.djl.translate.TranslateException; import ai.djl.translate.Translator; import ai.djl.translate.TranslatorContext; @@ -76,7 +77,7 @@ public class GlmOcrModel { .optModelPath(modelPath) .optEngine("PyTorch") .optTranslator(new GlmOcrTranslator(config)) - .optProgress(new ProgressListener()) + .optProgress(new ProgressBar()) .build(); model = criteria.loadModel(); @@ -100,10 +101,10 @@ public class GlmOcrModel { init(); } - private static class ProgressListener implements ai.djl.training.listener.ProgressListener { - @Override - public void progressUpdate(int progress, String message) { - log.debug("Model loading progress: {}% - {}", progress, message); - } - } +// private static class ProgressListener implements ai.djl.training.listener.ProgressListener { +// @Override +// public void progressUpdate(int progress, String message) { +// log.debug("Model loading progress: {}% - {}", progress, message); +// } +// } } diff --git a/src/main/java/com/example/glmocr/model/GlmOcrTranslator.java b/src/main/java/com/example/glmocr/model/GlmOcrTranslator.java index 72a2012..5150760 100644 --- a/src/main/java/com/example/glmocr/model/GlmOcrTranslator.java +++ b/src/main/java/com/example/glmocr/model/GlmOcrTranslator.java @@ -49,7 +49,7 @@ public class GlmOcrTranslator implements Translator { int width = processedImage.getWidth(); int height = processedImage.getHeight(); - float[] pixels = new float[3 * height * width]; + double[] pixels = new double[3 * height * width]; int idx = 0; for (int c = 0; c < 3; c++) { diff --git a/src/main/java/com/example/glmocr/tokenizer/TokenizerService.java b/src/main/java/com/example/glmocr/tokenizer/TokenizerService.java index c068631..214041a 100644 --- a/src/main/java/com/example/glmocr/tokenizer/TokenizerService.java +++ b/src/main/java/com/example/glmocr/tokenizer/TokenizerService.java @@ -39,7 +39,7 @@ public class TokenizerService { return new long[0]; } Encoding encoding = tokenizer.encode(text); - return Arrays.stream(encoding.getIds()).asLongStream().toArray(); + return Arrays.stream(encoding.getIds()).toArray(); } public String decode(long[] ids) { @@ -51,8 +51,7 @@ public class TokenizerService { .filter(id -> id > 0 && id < 151936) // GLM token范围 .toArray(); - String[] tokens = tokenizer.decode(filteredIds); - return String.join("", tokens); + return tokenizer.decode(filteredIds); } public String decodeWithPrompt(long[] ids, String prompt) { @@ -65,7 +64,8 @@ public class TokenizerService { } public int getVocabSize() { - return tokenizer != null ? tokenizer.getVocabularySize() : 0; +// return tokenizer != null ? tokenizer.getVocabularySize() : 0; + return tokenizer != null ? tokenizer.getMaxLength() : 0; } public boolean isAvailable() { diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index ccaf1b9..5a63a30 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -12,7 +12,7 @@ spring: # GLM-OCR配置(纯Java本地部署) glm-ocr: # 模型本地路径(支持相对路径或绝对路径) - model-path: ./models/GLM-OCR + model-path: 'D:/development/community/GLM-OCR' # 推理设备: cpu, gpu(0), gpu(1) device: cpu # 精度: fp32, fp16, bf16, int8