feat: update ocr model
This commit is contained in:
@@ -200,7 +200,7 @@ func (s *RecognitionService) processVLFormula(ctx context.Context, taskID int64)
|
||||
log.Info(ctx, "func", "processVLFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
|
||||
|
||||
// 处理具体任务
|
||||
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen32BInstruct); err != nil {
|
||||
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen3VL32BInstruct); err != nil {
|
||||
log.Error(ctx, "func", "processVLFormulaQueue", "msg", "处理任务失败", "error", err)
|
||||
return
|
||||
}
|
||||
@@ -349,24 +349,7 @@ func (s *RecognitionService) processVLFormulaTask(ctx context.Context, taskID in
|
||||
log.Error(ctx, "func", "processVLFormulaTask", "msg", "读取图片数据失败", "error", err)
|
||||
return err
|
||||
}
|
||||
prompt := `
|
||||
Please perform OCR on the image and output only LaTeX code.
|
||||
Important instructions:
|
||||
|
||||
* "The image contains mathematical formulas, no plain text."
|
||||
|
||||
* "Preserve all layout, symbols, subscripts, summations, parentheses, etc., exactly as shown."
|
||||
|
||||
* "Use \[ ... \] or align environments to represent multiline math expressions."
|
||||
|
||||
* "Use adaptive symbols such as \left and \right where applicable."
|
||||
|
||||
* "Do not include any extra commentary, template answers, or unrelated equations."
|
||||
|
||||
* "Only output valid LaTeX code based on the actual content of the image, and not change the original mathematical expression."
|
||||
|
||||
* "The output result must be can render by better-react-mathjax."
|
||||
`
|
||||
prompt := `Please perform OCR on the image and output only LaTeX code.`
|
||||
base64Image := base64.StdEncoding.EncodeToString(imageData)
|
||||
|
||||
requestBody := formula.VLFormulaRequest{
|
||||
@@ -518,7 +501,7 @@ func (s *RecognitionService) processOneTask(ctx context.Context) {
|
||||
log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
|
||||
|
||||
// 处理具体任务
|
||||
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLDeepSeekOCR); err != nil {
|
||||
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen3VL32BInstruct); err != nil {
|
||||
log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
package utils
|
||||
|
||||
const (
|
||||
ModelVLQwen32BInstruct = "Qwen/Qwen2.5-VL-32B-Instruct"
|
||||
ModelVLDeepSeekOCR = "deepseek-ai/DeepSeek-OCR"
|
||||
ModelVLDeepSeekOCR = "deepseek-ai/DeepSeek-OCR"
|
||||
ModelVLQwen3VL32BInstruct = "Qwen/Qwen3-VL-32B-Instruct"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user