feat: update ocr model

This commit is contained in:
liuyuanchuang
2025-12-11 19:51:51 +08:00
parent 904ea3d146
commit 50922641a9
2 changed files with 5 additions and 22 deletions

View File

@@ -200,7 +200,7 @@ func (s *RecognitionService) processVLFormula(ctx context.Context, taskID int64)
log.Info(ctx, "func", "processVLFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
// 处理具体任务
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen32BInstruct); err != nil {
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen3VL32BInstruct); err != nil {
log.Error(ctx, "func", "processVLFormulaQueue", "msg", "处理任务失败", "error", err)
return
}
@@ -349,24 +349,7 @@ func (s *RecognitionService) processVLFormulaTask(ctx context.Context, taskID in
log.Error(ctx, "func", "processVLFormulaTask", "msg", "读取图片数据失败", "error", err)
return err
}
prompt := `
Please perform OCR on the image and output only LaTeX code.
Important instructions:
* "The image contains mathematical formulas, no plain text."
* "Preserve all layout, symbols, subscripts, summations, parentheses, etc., exactly as shown."
* "Use \[ ... \] or align environments to represent multiline math expressions."
* "Use adaptive symbols such as \left and \right where applicable."
* "Do not include any extra commentary, template answers, or unrelated equations."
* "Only output valid LaTeX code based on the actual content of the image, and not change the original mathematical expression."
* "The output result must be can render by better-react-mathjax."
`
prompt := `Please perform OCR on the image and output only LaTeX code.`
base64Image := base64.StdEncoding.EncodeToString(imageData)
requestBody := formula.VLFormulaRequest{
@@ -518,7 +501,7 @@ func (s *RecognitionService) processOneTask(ctx context.Context) {
log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
// 处理具体任务
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLDeepSeekOCR); err != nil {
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen3VL32BInstruct); err != nil {
log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err)
return
}

View File

@@ -1,6 +1,6 @@
package utils
const (
ModelVLQwen32BInstruct = "Qwen/Qwen2.5-VL-32B-Instruct"
ModelVLDeepSeekOCR = "deepseek-ai/DeepSeek-OCR"
ModelVLDeepSeekOCR = "deepseek-ai/DeepSeek-OCR"
ModelVLQwen3VL32BInstruct = "Qwen/Qwen3-VL-32B-Instruct"
)