From 50922641a9a30ad6d40bdf0a4af8ef15d88122f4 Mon Sep 17 00:00:00 2001 From: liuyuanchuang Date: Thu, 11 Dec 2025 19:51:51 +0800 Subject: [PATCH] feat: update ocr model --- internal/service/recognition_service.go | 23 +++-------------------- pkg/utils/model.go | 4 ++-- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/internal/service/recognition_service.go b/internal/service/recognition_service.go index b0c6f90..8283f19 100644 --- a/internal/service/recognition_service.go +++ b/internal/service/recognition_service.go @@ -200,7 +200,7 @@ func (s *RecognitionService) processVLFormula(ctx context.Context, taskID int64) log.Info(ctx, "func", "processVLFormulaQueue", "msg", "获取任务成功", "task_id", taskID) // 处理具体任务 - if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen32BInstruct); err != nil { + if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen3VL32BInstruct); err != nil { log.Error(ctx, "func", "processVLFormulaQueue", "msg", "处理任务失败", "error", err) return } @@ -349,24 +349,7 @@ func (s *RecognitionService) processVLFormulaTask(ctx context.Context, taskID in log.Error(ctx, "func", "processVLFormulaTask", "msg", "读取图片数据失败", "error", err) return err } - prompt := ` -Please perform OCR on the image and output only LaTeX code. -Important instructions: - - * "The image contains mathematical formulas, no plain text." - - * "Preserve all layout, symbols, subscripts, summations, parentheses, etc., exactly as shown." - - * "Use \[ ... \] or align environments to represent multiline math expressions." - - * "Use adaptive symbols such as \left and \right where applicable." - - * "Do not include any extra commentary, template answers, or unrelated equations." - - * "Only output valid LaTeX code based on the actual content of the image, and not change the original mathematical expression." - - * "The output result must be can render by better-react-mathjax." - ` + prompt := `Please perform OCR on the image and output only LaTeX code.` base64Image := base64.StdEncoding.EncodeToString(imageData) requestBody := formula.VLFormulaRequest{ @@ -518,7 +501,7 @@ func (s *RecognitionService) processOneTask(ctx context.Context) { log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID) // 处理具体任务 - if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLDeepSeekOCR); err != nil { + if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen3VL32BInstruct); err != nil { log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err) return } diff --git a/pkg/utils/model.go b/pkg/utils/model.go index ed1eacb..87a0e7c 100644 --- a/pkg/utils/model.go +++ b/pkg/utils/model.go @@ -1,6 +1,6 @@ package utils const ( - ModelVLQwen32BInstruct = "Qwen/Qwen2.5-VL-32B-Instruct" - ModelVLDeepSeekOCR = "deepseek-ai/DeepSeek-OCR" + ModelVLDeepSeekOCR = "deepseek-ai/DeepSeek-OCR" + ModelVLQwen3VL32BInstruct = "Qwen/Qwen3-VL-32B-Instruct" )