feat: convert markdown to mml

This commit is contained in:
2025-12-27 22:06:48 +08:00
parent 7c4dfaba54
commit ff6795b469

View File

@@ -7,6 +7,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"mime/multipart"
"net/http" "net/http"
"strings" "strings"
"time" "time"
@@ -996,6 +997,12 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
} }
markdownResult := strings.Join(markdownTexts, "\n\n---\n\n") markdownResult := strings.Join(markdownTexts, "\n\n---\n\n")
latex, mml, err := s.HandleConvert(ctx, markdownResult)
if err != nil {
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "转换失败", "error", err)
return err
}
// 更新或创建识别结果 // 更新或创建识别结果
resultDao := dao.NewRecognitionResultDao() resultDao := dao.NewRecognitionResultDao()
result, err := resultDao.GetByTaskID(dao.DB.WithContext(ctx), taskID) result, err := resultDao.GetByTaskID(dao.DB.WithContext(ctx), taskID)
@@ -1012,6 +1019,8 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
TaskID: taskID, TaskID: taskID,
TaskType: dao.TaskTypeFormula, TaskType: dao.TaskTypeFormula,
Markdown: markdownResult, Markdown: markdownResult,
Latex: latex,
MathML: mml,
}) })
if err != nil { if err != nil {
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "创建任务结果失败", "error", err) log.Error(ctx, "func", "processBaiduOCRTask", "msg", "创建任务结果失败", "error", err)
@@ -1021,6 +1030,8 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
// 更新现有结果 // 更新现有结果
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{ err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{
"markdown": markdownResult, "markdown": markdownResult,
"latex": latex,
"mathml": mml,
}) })
if err != nil { if err != nil {
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "更新任务结果失败", "error", err) log.Error(ctx, "func", "processBaiduOCRTask", "msg", "更新任务结果失败", "error", err)
@@ -1044,3 +1055,55 @@ func (s *RecognitionService) TestProcessMathpixTask(ctx context.Context, taskID
} }
return s.processMathpixTask(ctx, taskID, task.FileURL) return s.processMathpixTask(ctx, taskID, task.FileURL)
} }
// ConvertResponse Python 接口返回结构
type ConvertResponse struct {
Latex string `json:"latex"`
MathML string `json:"mathml"`
Error string `json:"error,omitempty"`
}
func (s *RecognitionService) HandleConvert(ctx context.Context, markdown string) (latex string, mml string, err error) {
url := "https://cloud.texpixel.com:10443/doc_converter/v1/convert"
// 构建 multipart form
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
_ = writer.WriteField("markdown_input", markdown)
writer.Close()
// 使用正确的 Content-Type包含 boundary
headers := map[string]string{
"Content-Type": writer.FormDataContentType(),
}
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, url, body, headers)
if err != nil {
return "", "", err
}
defer resp.Body.Close()
// 读取响应体
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return "", "", err
}
// 检查 HTTP 状态码
if resp.StatusCode != http.StatusOK {
return "", "", fmt.Errorf("convert failed: status %d, body: %s", resp.StatusCode, string(respBody))
}
// 解析 JSON 响应
var convertResp ConvertResponse
if err := json.Unmarshal(respBody, &convertResp); err != nil {
return "", "", fmt.Errorf("unmarshal response failed: %v, body: %s", err, string(respBody))
}
// 检查业务错误
if convertResp.Error != "" {
return "", "", fmt.Errorf("convert error: %s", convertResp.Error)
}
return convertResp.Latex, convertResp.MathML, nil
}