feat: convert markdown to mml
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -996,6 +997,12 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
|
||||
}
|
||||
markdownResult := strings.Join(markdownTexts, "\n\n---\n\n")
|
||||
|
||||
latex, mml, err := s.HandleConvert(ctx, markdownResult)
|
||||
if err != nil {
|
||||
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "转换失败", "error", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// 更新或创建识别结果
|
||||
resultDao := dao.NewRecognitionResultDao()
|
||||
result, err := resultDao.GetByTaskID(dao.DB.WithContext(ctx), taskID)
|
||||
@@ -1012,6 +1019,8 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
|
||||
TaskID: taskID,
|
||||
TaskType: dao.TaskTypeFormula,
|
||||
Markdown: markdownResult,
|
||||
Latex: latex,
|
||||
MathML: mml,
|
||||
})
|
||||
if err != nil {
|
||||
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "创建任务结果失败", "error", err)
|
||||
@@ -1021,6 +1030,8 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
|
||||
// 更新现有结果
|
||||
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{
|
||||
"markdown": markdownResult,
|
||||
"latex": latex,
|
||||
"mathml": mml,
|
||||
})
|
||||
if err != nil {
|
||||
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "更新任务结果失败", "error", err)
|
||||
@@ -1044,3 +1055,55 @@ func (s *RecognitionService) TestProcessMathpixTask(ctx context.Context, taskID
|
||||
}
|
||||
return s.processMathpixTask(ctx, taskID, task.FileURL)
|
||||
}
|
||||
|
||||
// ConvertResponse Python 接口返回结构
|
||||
type ConvertResponse struct {
|
||||
Latex string `json:"latex"`
|
||||
MathML string `json:"mathml"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
func (s *RecognitionService) HandleConvert(ctx context.Context, markdown string) (latex string, mml string, err error) {
|
||||
url := "https://cloud.texpixel.com:10443/doc_converter/v1/convert"
|
||||
|
||||
// 构建 multipart form
|
||||
body := &bytes.Buffer{}
|
||||
writer := multipart.NewWriter(body)
|
||||
_ = writer.WriteField("markdown_input", markdown)
|
||||
writer.Close()
|
||||
|
||||
// 使用正确的 Content-Type(包含 boundary)
|
||||
headers := map[string]string{
|
||||
"Content-Type": writer.FormDataContentType(),
|
||||
}
|
||||
|
||||
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, url, body, headers)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// 读取响应体
|
||||
respBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
// 检查 HTTP 状态码
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", "", fmt.Errorf("convert failed: status %d, body: %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
// 解析 JSON 响应
|
||||
var convertResp ConvertResponse
|
||||
if err := json.Unmarshal(respBody, &convertResp); err != nil {
|
||||
return "", "", fmt.Errorf("unmarshal response failed: %v, body: %s", err, string(respBody))
|
||||
}
|
||||
|
||||
// 检查业务错误
|
||||
if convertResp.Error != "" {
|
||||
return "", "", fmt.Errorf("convert error: %s", convertResp.Error)
|
||||
}
|
||||
|
||||
return convertResp.Latex, convertResp.MathML, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user