Compare commits
2 Commits
b9124451d2
...
97c3617731
| Author | SHA1 | Date | |
|---|---|---|---|
| 97c3617731 | |||
| ece026bea2 |
@@ -22,3 +22,10 @@ type GetFormulaTaskResponse struct {
|
|||||||
type FormulaRecognitionResponse struct {
|
type FormulaRecognitionResponse struct {
|
||||||
Result string `json:"result"`
|
Result string `json:"result"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ImageOCRResponse 图片OCR接口返回的响应
|
||||||
|
type ImageOCRResponse struct {
|
||||||
|
Latex string `json:"latex"` // LaTeX 格式内容
|
||||||
|
Markdown string `json:"markdown"` // Markdown 格式内容
|
||||||
|
MathML string `json:"mathml"` // MathML 格式(无公式时为空)
|
||||||
|
}
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import (
|
|||||||
"gitea.com/bitwsd/document_ai/pkg/constant"
|
"gitea.com/bitwsd/document_ai/pkg/constant"
|
||||||
"gitea.com/bitwsd/document_ai/pkg/httpclient"
|
"gitea.com/bitwsd/document_ai/pkg/httpclient"
|
||||||
"gitea.com/bitwsd/document_ai/pkg/oss"
|
"gitea.com/bitwsd/document_ai/pkg/oss"
|
||||||
|
"gitea.com/bitwsd/document_ai/pkg/requestid"
|
||||||
"gitea.com/bitwsd/document_ai/pkg/utils"
|
"gitea.com/bitwsd/document_ai/pkg/utils"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
)
|
)
|
||||||
@@ -511,8 +512,8 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
|
|||||||
// 设置Content-Type头为application/json
|
// 设置Content-Type头为application/json
|
||||||
headers := map[string]string{"Content-Type": "application/json", utils.RequestIDHeaderKey: utils.GetRequestIDFromContext(ctx)}
|
headers := map[string]string{"Content-Type": "application/json", utils.RequestIDHeaderKey: utils.GetRequestIDFromContext(ctx)}
|
||||||
|
|
||||||
// 发送请求时会使用带超时的context
|
// 发送请求到新的 OCR 接口
|
||||||
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/vlm/formula/predict", bytes.NewReader(jsonData), headers)
|
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/doc_process/v1/image/ocr", bytes.NewReader(jsonData), headers)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if ctx.Err() == context.DeadlineExceeded {
|
if ctx.Err() == context.DeadlineExceeded {
|
||||||
log.Error(ctx, "func", "processFormulaTask", "msg", "请求超时")
|
log.Error(ctx, "func", "processFormulaTask", "msg", "请求超时")
|
||||||
@@ -532,12 +533,18 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
|
|||||||
log.Info(ctx, "func", "processFormulaTask", "msg", "响应内容", "body", body.String())
|
log.Info(ctx, "func", "processFormulaTask", "msg", "响应内容", "body", body.String())
|
||||||
|
|
||||||
// 解析 JSON 响应
|
// 解析 JSON 响应
|
||||||
var formulaResp formula.FormulaRecognitionResponse
|
var ocrResp formula.ImageOCRResponse
|
||||||
if err := json.Unmarshal(body.Bytes(), &formulaResp); err != nil {
|
if err := json.Unmarshal(body.Bytes(), &ocrResp); err != nil {
|
||||||
log.Error(ctx, "func", "processFormulaTask", "msg", "解析响应JSON失败", "error", err)
|
log.Error(ctx, "func", "processFormulaTask", "msg", "解析响应JSON失败", "error", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = resultDao.Create(tx, dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Latex: formulaResp.Result})
|
err = resultDao.Create(tx, dao.RecognitionResult{
|
||||||
|
TaskID: taskID,
|
||||||
|
TaskType: dao.TaskTypeFormula,
|
||||||
|
Latex: ocrResp.Latex,
|
||||||
|
Markdown: ocrResp.Markdown,
|
||||||
|
MathML: ocrResp.MathML,
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "processFormulaTask", "msg", "保存任务结果失败", "error", err)
|
log.Error(ctx, "func", "processFormulaTask", "msg", "保存任务结果失败", "error", err)
|
||||||
return err
|
return err
|
||||||
@@ -705,15 +712,19 @@ func (s *RecognitionService) processOneTask(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ctx = context.WithValue(ctx, utils.RequestIDKey, task.TaskUUID)
|
ctx = context.WithValue(ctx, utils.RequestIDKey, task.TaskUUID)
|
||||||
log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
|
|
||||||
|
|
||||||
err = s.processBaiduOCRTask(ctx, taskID, task.FileURL)
|
// 使用 gls 设置 request_id,确保在整个任务处理过程中可用
|
||||||
if err != nil {
|
requestid.SetRequestID(task.TaskUUID, func() {
|
||||||
log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err)
|
log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info(ctx, "func", "processFormulaQueue", "msg", "处理任务成功", "task_id", taskID)
|
err = s.processFormulaTask(ctx, taskID, task.FileURL)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info(ctx, "func", "processFormulaQueue", "msg", "处理任务成功", "task_id", taskID)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// processMathpixTask 使用 Mathpix API 处理公式识别任务(用于增强识别)
|
// processMathpixTask 使用 Mathpix API 处理公式识别任务(用于增强识别)
|
||||||
|
|||||||
@@ -3,10 +3,10 @@ package service
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"mime/multipart"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -151,19 +151,29 @@ func (svc *TaskService) ExportTask(ctx context.Context, req *task.ExportTaskRequ
|
|||||||
return nil, "", errors.New("markdown not found")
|
return nil, "", errors.New("markdown not found")
|
||||||
}
|
}
|
||||||
|
|
||||||
// call http://localhost:8055/export
|
// 获取文件名(去掉扩展名)
|
||||||
body := &bytes.Buffer{}
|
filename := strings.TrimSuffix(recognitionTask.FileName, "."+strings.ToLower(strings.Split(recognitionTask.FileName, ".")[len(strings.Split(recognitionTask.FileName, "."))-1]))
|
||||||
writer := multipart.NewWriter(body)
|
if filename == "" {
|
||||||
_ = writer.WriteField("markdown_input", markdown)
|
filename = "texpixel"
|
||||||
_ = writer.WriteField("type", req.Type)
|
}
|
||||||
writer.Close()
|
|
||||||
|
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/doc_converter/v1/export", body)
|
// 构建 JSON 请求体
|
||||||
|
requestBody := map[string]string{
|
||||||
|
"markdown": markdown,
|
||||||
|
"filename": filename,
|
||||||
|
}
|
||||||
|
jsonData, err := json.Marshal(requestBody)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "ExportTask", "msg", "json marshal failed", "error", err)
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/doc_process/v1/convert/file", bytes.NewReader(jsonData))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "ExportTask", "msg", "create http request failed", "error", err)
|
log.Error(ctx, "func", "ExportTask", "msg", "create http request failed", "error", err)
|
||||||
return nil, "", err
|
return nil, "", err
|
||||||
}
|
}
|
||||||
httpReq.Header.Set("Content-Type", writer.FormDataContentType())
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
resp, err := client.Do(httpReq)
|
resp, err := client.Do(httpReq)
|
||||||
@@ -184,16 +194,8 @@ func (svc *TaskService) ExportTask(ctx context.Context, req *task.ExportTaskRequ
|
|||||||
return nil, "", err
|
return nil, "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
// determine content type based on export type
|
// 新接口只返回 DOCX 格式
|
||||||
var contentType string
|
contentType := "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
switch req.Type {
|
|
||||||
case "pdf":
|
|
||||||
contentType = "application/pdf"
|
|
||||||
case "docx":
|
|
||||||
contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
||||||
default:
|
|
||||||
contentType = "application/octet-stream"
|
|
||||||
}
|
|
||||||
|
|
||||||
return fileData, contentType, nil
|
return fileData, contentType, nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user