diff --git a/config/config.go b/config/config.go index d7c23f3..e3fb295 100644 --- a/config/config.go +++ b/config/config.go @@ -13,6 +13,12 @@ type Config struct { UploadDir string `mapstructure:"upload_dir"` Limit LimitConfig `mapstructure:"limit"` Aliyun AliyunConfig `mapstructure:"aliyun"` + Mathpix MathpixConfig `mapstructure:"mathpix"` +} + +type MathpixConfig struct { + AppID string `mapstructure:"app_id"` + AppKey string `mapstructure:"app_key"` } type LimitConfig struct { diff --git a/config/config_dev.yaml b/config/config_dev.yaml index 2732f25..b3bd074 100644 --- a/config/config_dev.yaml +++ b/config/config_dev.yaml @@ -44,3 +44,7 @@ aliyun: access_key_id: LTAI5t8qXhow6NCdYDtu1saF access_key_secret: qZ2SwYsNCEBckCVSOszH31yYwXU44A bucket_name: texpixel-doc + +mathpix: + app_id: "576157025515297792" + app_key: "683f7133391a1039383161653531396234343536393263346632613437343332" diff --git a/config/config_prod.yaml b/config/config_prod.yaml index eebb1fc..d43816d 100644 --- a/config/config_prod.yaml +++ b/config/config_prod.yaml @@ -43,3 +43,8 @@ aliyun: access_key_id: LTAI5t8qXhow6NCdYDtu1saF access_key_secret: qZ2SwYsNCEBckCVSOszH31yYwXU44A bucket_name: texpixel-doc + + +mathpix: + app_id: "576157025515297792" + app_key: "fb72d251e33ac85c929bfd4eec40d78368d08d82fb2ee1cffb04a8bb967d1db5" \ No newline at end of file diff --git a/internal/service/recognition_service.go b/internal/service/recognition_service.go index 7e9eee4..54abe52 100644 --- a/internal/service/recognition_service.go +++ b/internal/service/recognition_service.go @@ -166,9 +166,20 @@ func (s *RecognitionService) GetFormualTask(ctx context.Context, taskNo string) log.Error(ctx, "func", "GetFormualTask", "msg", "查询任务结果失败", "error", err, "task_no", taskNo) return nil, common.NewError(common.CodeDBError, "查询任务结果失败", err) } - latex := taskRet.NewContentCodec().GetContent().(string) - markdown := fmt.Sprintf("$$%s$$", latex) - return &formula.GetFormulaTaskResponse{TaskNo: taskNo, Latex: latex, Markdown: markdown, Status: int(task.Status)}, nil + + // 构建 Markdown 格式 + markdown := taskRet.Markdown + if markdown == "" { + markdown = fmt.Sprintf("$$%s$$", taskRet.Latex) + } + + return &formula.GetFormulaTaskResponse{ + TaskNo: taskNo, + Latex: taskRet.Latex, + Markdown: markdown, + MathML: taskRet.MathML, + Status: int(task.Status), + }, nil } func (s *RecognitionService) handleFormulaRecognition(ctx context.Context, taskID int64) error { @@ -209,6 +220,185 @@ func (s *RecognitionService) processVLFormula(ctx context.Context, taskID int64) log.Info(ctx, "func", "processVLFormulaQueue", "msg", "处理任务成功", "task_id", taskID) } + +// MathpixRequest Mathpix API /v3/text 完整请求结构 +type MathpixRequest struct { + // 图片源:URL 或 base64 编码 + Src string `json:"src"` + // 元数据键值对 + Metadata map[string]interface{} `json:"metadata"` + // 标签列表,用于标识结果 + Tags []string `json:"tags"` + // 异步请求标志 + Async bool `json:"async"` + // 回调配置 + Callback *MathpixCallback `json:"callback"` + // 输出格式列表:text, data, html, latex_styled + Formats []string `json:"formats"` + // 数据选项 + DataOptions *MathpixDataOptions `json:"data_options"` + // 返回检测到的字母表 + IncludeDetectedAlphabets bool `json:"include_detected_alphabets"` + // 允许的字母表 + AlphabetsAllowed *MathpixAlphabetsAllowed `json:"alphabets_allowed"` + // 指定图片区域 + Region *MathpixRegion `json:"region"` + // 蓝色HSV过滤模式 + EnableBlueHsvFilter bool `json:"enable_blue_hsv_filter"` + // 置信度阈值 + ConfidenceThreshold float64 `json:"confidence_threshold"` + // 符号级别置信度阈值,默认0.75 + ConfidenceRateThreshold float64 `json:"confidence_rate_threshold"` + // 包含公式标签 + IncludeEquationTags bool `json:"include_equation_tags"` + // 返回逐行信息 + IncludeLineData bool `json:"include_line_data"` + // 返回逐词信息 + IncludeWordData bool `json:"include_word_data"` + // 化学结构OCR + IncludeSmiles bool `json:"include_smiles"` + // InChI数据 + IncludeInchi bool `json:"include_inchi"` + // 几何图形数据 + IncludeGeometryData bool `json:"include_geometry_data"` + // 图表文本提取 + IncludeDiagramText bool `json:"include_diagram_text"` + // 页面信息,默认true + IncludePageInfo *bool `json:"include_page_info"` + // 自动旋转置信度阈值,默认0.99 + AutoRotateConfidenceThreshold float64 `json:"auto_rotate_confidence_threshold"` + // 移除多余空格,默认true + RmSpaces *bool `json:"rm_spaces"` + // 移除字体命令,默认false + RmFonts bool `json:"rm_fonts"` + // 使用aligned/gathered/cases代替array,默认false + IdiomaticEqnArrays bool `json:"idiomatic_eqn_arrays"` + // 移除不必要的大括号,默认false + IdiomaticBraces bool `json:"idiomatic_braces"` + // 数字始终为数学模式,默认false + NumbersDefaultToMath bool `json:"numbers_default_to_math"` + // 数学字体始终为数学模式,默认false + MathFontsDefaultToMath bool `json:"math_fonts_default_to_math"` + // 行内数学分隔符,默认 ["\\(", "\\)"] + MathInlineDelimiters []string `json:"math_inline_delimiters"` + // 行间数学分隔符,默认 ["\\[", "\\]"] + MathDisplayDelimiters []string `json:"math_display_delimiters"` + // 高级表格处理,默认false + EnableTablesFallback bool `json:"enable_tables_fallback"` + // 全角标点,null表示自动判断 + FullwidthPunctuation *bool `json:"fullwidth_punctuation"` +} + +// MathpixCallback 回调配置 +type MathpixCallback struct { + URL string `json:"url"` + Headers map[string]string `json:"headers"` +} + +// MathpixDataOptions 数据选项 +type MathpixDataOptions struct { + IncludeAsciimath bool `json:"include_asciimath"` + IncludeMathml bool `json:"include_mathml"` + IncludeLatex bool `json:"include_latex"` + IncludeTsv bool `json:"include_tsv"` +} + +// MathpixAlphabetsAllowed 允许的字母表 +type MathpixAlphabetsAllowed struct { + En bool `json:"en"` + Hi bool `json:"hi"` + Zh bool `json:"zh"` + Ja bool `json:"ja"` + Ko bool `json:"ko"` + Ru bool `json:"ru"` + Th bool `json:"th"` + Vi bool `json:"vi"` +} + +// MathpixRegion 图片区域 +type MathpixRegion struct { + TopLeftX int `json:"top_left_x"` + TopLeftY int `json:"top_left_y"` + Width int `json:"width"` + Height int `json:"height"` +} + +// MathpixResponse Mathpix API /v3/text 完整响应结构 +type MathpixResponse struct { + // 请求ID,用于调试 + RequestID string `json:"request_id"` + // Mathpix Markdown 格式文本 + Text string `json:"text"` + // 带样式的LaTeX(仅单个公式图片时返回) + LatexStyled string `json:"latex_styled"` + // 置信度 [0,1] + Confidence float64 `json:"confidence"` + // 置信度比率 [0,1] + ConfidenceRate float64 `json:"confidence_rate"` + // 行数据 + LineData []map[string]interface{} `json:"line_data"` + // 词数据 + WordData []map[string]interface{} `json:"word_data"` + // 数据对象列表 + Data []MathpixDataItem `json:"data"` + // HTML输出 + HTML string `json:"html"` + // 检测到的字母表 + DetectedAlphabets []map[string]interface{} `json:"detected_alphabets"` + // 是否打印内容 + IsPrinted bool `json:"is_printed"` + // 是否手写内容 + IsHandwritten bool `json:"is_handwritten"` + // 自动旋转置信度 + AutoRotateConfidence float64 `json:"auto_rotate_confidence"` + // 几何数据 + GeometryData []map[string]interface{} `json:"geometry_data"` + // 自动旋转角度 {0, 90, -90, 180} + AutoRotateDegrees int `json:"auto_rotate_degrees"` + // 图片宽度 + ImageWidth int `json:"image_width"` + // 图片高度 + ImageHeight int `json:"image_height"` + // 错误信息 + Error string `json:"error"` + // 错误详情 + ErrorInfo *MathpixErrorInfo `json:"error_info"` + // API版本 + Version string `json:"version"` +} + +// MathpixDataItem 数据项 +type MathpixDataItem struct { + Type string `json:"type"` + Value string `json:"value"` +} + +// MathpixErrorInfo 错误详情 +type MathpixErrorInfo struct { + ID string `json:"id"` + Message string `json:"message"` +} + +// GetMathML 从响应中获取MathML +func (r *MathpixResponse) GetMathML() string { + for _, item := range r.Data { + if item.Type == "mathml" { + return item.Value + } + } + return "" +} + +// GetAsciiMath 从响应中获取AsciiMath +func (r *MathpixResponse) GetAsciiMath() string { + for _, item := range r.Data { + if item.Type == "asciimath" { + return item.Value + } + } + return "" +} + func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int64, fileURL string) (err error) { // 为整个任务处理添加超时控制 ctx, cancel := context.WithTimeout(ctx, 45*time.Second) @@ -308,20 +498,12 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6 log.Error(ctx, "func", "processFormulaTask", "msg", "解析响应JSON失败", "error", err) return err } - // katex := utils.ToKatex(formulaResp.Result) - katex := formulaResp.Result - content := &dao.FormulaRecognitionContent{Latex: katex} - b, _ := json.Marshal(content) - // Save recognition result - result := &dao.RecognitionResult{ - TaskID: taskID, - TaskType: dao.TaskTypeFormula, - Content: b, - } - if err := resultDao.Create(tx, *result); err != nil { + err = resultDao.Create(tx, dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Latex: formulaResp.Result}) + if err != nil { log.Error(ctx, "func", "processFormulaTask", "msg", "保存任务结果失败", "error", err) return err } + isSuccess = true return nil } @@ -426,39 +608,22 @@ func (s *RecognitionService) processVLFormulaTask(ctx context.Context, taskID in } resultDao := dao.NewRecognitionResultDao() - var formulaRes *dao.FormulaRecognitionContent + var formulaRes *dao.RecognitionResult result, err := resultDao.GetByTaskID(dao.DB.WithContext(ctx), taskID) if err != nil { log.Error(ctx, "func", "processVLFormulaTask", "msg", "获取任务结果失败", "error", err) return err } if result == nil { - formulaRes = &dao.FormulaRecognitionContent{EnhanceLatex: latex} - b, err := formulaRes.Encode() - if err != nil { - log.Error(ctx, "func", "processVLFormulaTask", "msg", "编码任务结果失败", "error", err) - return err - } - err = resultDao.Create(dao.DB.WithContext(ctx), dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Content: b}) + formulaRes = &dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Latex: latex} + err = resultDao.Create(dao.DB.WithContext(ctx), *formulaRes) if err != nil { log.Error(ctx, "func", "processVLFormulaTask", "msg", "创建任务结果失败", "error", err) return err } } else { - formulaRes = result.NewContentCodec().(*dao.FormulaRecognitionContent) - err = formulaRes.Decode() - if err != nil { - log.Error(ctx, "func", "processVLFormulaTask", "msg", "解码任务结果失败", "error", err) - return err - } - formulaRes.EnhanceLatex = latex - b, err := formulaRes.Encode() - if err != nil { - log.Error(ctx, "func", "processVLFormulaTask", "msg", "编码任务结果失败", "error", err) - return err - } - - err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{"content": b}) + formulaRes.Latex = latex + err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{"latex": latex}) if err != nil { log.Error(ctx, "func", "processVLFormulaTask", "msg", "更新任务结果失败", "error", err) return err @@ -504,8 +669,7 @@ func (s *RecognitionService) processOneTask(ctx context.Context) { ctx = context.WithValue(ctx, utils.RequestIDKey, task.TaskUUID) log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID) - // 处理任务 - err = s.processFormulaTask(ctx, taskID, task.FileURL) + err = s.processMathpixTask(ctx, taskID, task.FileURL) if err != nil { log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err) return @@ -513,3 +677,142 @@ func (s *RecognitionService) processOneTask(ctx context.Context) { log.Info(ctx, "func", "processFormulaQueue", "msg", "处理任务成功", "task_id", taskID) } + +// processMathpixTask 使用 Mathpix API 处理公式识别任务(用于增强识别) +func (s *RecognitionService) processMathpixTask(ctx context.Context, taskID int64, fileURL string) error { + isSuccess := false + logDao := dao.NewRecognitionLogDao() + + defer func() { + if !isSuccess { + err := dao.NewRecognitionTaskDao().Update(dao.DB.WithContext(ctx), map[string]interface{}{"id": taskID}, map[string]interface{}{"status": dao.TaskStatusFailed}) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "更新任务状态失败", "error", err) + } + return + } + err := dao.NewRecognitionTaskDao().Update(dao.DB.WithContext(ctx), map[string]interface{}{"id": taskID}, map[string]interface{}{"status": dao.TaskStatusCompleted}) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "更新任务状态失败", "error", err) + } + }() + + // 下载图片 + imageUrl, err := oss.GetDownloadURL(ctx, fileURL) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "获取图片URL失败", "error", err) + return err + } + + // 创建 Mathpix API 请求 + mathpixReq := MathpixRequest{ + Src: imageUrl, + Formats: []string{ + "text", + "latex_styled", + "data", + "html", + }, + DataOptions: &MathpixDataOptions{ + IncludeMathml: true, + IncludeAsciimath: true, + IncludeLatex: true, + IncludeTsv: true, + }, + } + + jsonData, err := json.Marshal(mathpixReq) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "JSON编码失败", "error", err) + return err + } + + headers := map[string]string{ + "Content-Type": "application/json", + "app_id": config.GlobalConfig.Mathpix.AppID, + "app_key": config.GlobalConfig.Mathpix.AppKey, + } + + endpoint := "https://api.mathpix.com/v3/text" + + resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, endpoint, bytes.NewReader(jsonData), headers) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "Mathpix API 请求失败", "error", err) + return err + } + defer resp.Body.Close() + + body := &bytes.Buffer{} + if _, err = body.ReadFrom(resp.Body); err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "读取响应体失败", "error", err) + return err + } + + // 创建日志记录 + recognitionLog := &dao.RecognitionLog{ + TaskID: taskID, + Provider: dao.ProviderMathpix, + RequestBody: string(jsonData), + ResponseBody: body.String(), + } + + // 解析响应 + var mathpixResp MathpixResponse + if err := json.Unmarshal(body.Bytes(), &mathpixResp); err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "解析响应失败", "error", err) + return err + } + + // 检查错误 + if mathpixResp.Error != "" { + errMsg := mathpixResp.Error + if mathpixResp.ErrorInfo != nil { + errMsg = fmt.Sprintf("%s: %s", mathpixResp.ErrorInfo.ID, mathpixResp.ErrorInfo.Message) + } + log.Error(ctx, "func", "processMathpixTask", "msg", "Mathpix API 返回错误", "error", errMsg) + return fmt.Errorf("mathpix error: %s", errMsg) + } + + // 保存日志 + err = logDao.Create(dao.DB.WithContext(ctx), recognitionLog) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "保存日志失败", "error", err) + } + + // 更新或创建识别结果 + resultDao := dao.NewRecognitionResultDao() + result, err := resultDao.GetByTaskID(dao.DB.WithContext(ctx), taskID) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "获取任务结果失败", "error", err) + return err + } + + if result == nil { + // 创建新结果 + err = resultDao.Create(dao.DB.WithContext(ctx), dao.RecognitionResult{ + TaskID: taskID, + TaskType: dao.TaskTypeFormula, + Latex: mathpixResp.LatexStyled, + Markdown: mathpixResp.Text, + MathML: mathpixResp.GetMathML(), + }) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "创建任务结果失败", "error", err) + return err + } + } else { + // 更新现有结果 + err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{ + "latex": mathpixResp.LatexStyled, + "markdown": mathpixResp.Text, + "mathml": mathpixResp.GetMathML(), + }) + if err != nil { + log.Error(ctx, "func", "processMathpixTask", "msg", "更新任务结果失败", "error", err) + return err + } + } + + isSuccess = true + return nil +} diff --git a/internal/service/task.go b/internal/service/task.go index e272ed8..b42bddc 100644 --- a/internal/service/task.go +++ b/internal/service/task.go @@ -89,8 +89,11 @@ func (svc *TaskService) GetTaskList(ctx context.Context, req *task.TaskListReque var markdown string recognitionResult := recognitionResultMap[item.ID] if recognitionResult != nil { - latex = recognitionResult.NewContentCodec().GetContent().(string) - markdown = fmt.Sprintf("$$%s$$", latex) + latex = recognitionResult.Latex + markdown = recognitionResult.Markdown + if markdown == "" { + markdown = fmt.Sprintf("$$%s$$", latex) + } } originURL, err := oss.GetDownloadURL(ctx, item.FileURL) if err != nil { diff --git a/internal/storage/dao/recognition_log.go b/internal/storage/dao/recognition_log.go new file mode 100644 index 0000000..77cc2ce --- /dev/null +++ b/internal/storage/dao/recognition_log.go @@ -0,0 +1,52 @@ +package dao + +import ( + "gorm.io/gorm" +) + +// RecognitionLogProvider 第三方服务提供商 +type RecognitionLogProvider string + +const ( + ProviderMathpix RecognitionLogProvider = "mathpix" + ProviderSiliconflow RecognitionLogProvider = "siliconflow" + ProviderTexpixel RecognitionLogProvider = "texpixel" +) + +// RecognitionLog 识别调用日志表,记录第三方API调用请求和响应 +type RecognitionLog struct { + BaseModel + TaskID int64 `gorm:"column:task_id;bigint;not null;default:0;index;comment:关联任务ID" json:"task_id"` + Provider RecognitionLogProvider `gorm:"column:provider;varchar(32);not null;comment:服务提供商" json:"provider"` + RequestBody string `gorm:"column:request_body;type:longtext;comment:请求体" json:"request_body"` + ResponseBody string `gorm:"column:response_body;type:longtext;comment:响应体" json:"response_body"` +} + +func (RecognitionLog) TableName() string { + return "recognition_log" +} + +type RecognitionLogDao struct{} + +func NewRecognitionLogDao() *RecognitionLogDao { + return &RecognitionLogDao{} +} + +// Create 创建日志记录 +func (d *RecognitionLogDao) Create(tx *gorm.DB, log *RecognitionLog) error { + return tx.Create(log).Error +} + +// GetByTaskID 根据任务ID获取日志 +func (d *RecognitionLogDao) GetByTaskID(tx *gorm.DB, taskID int64) ([]*RecognitionLog, error) { + var logs []*RecognitionLog + err := tx.Where("task_id = ?", taskID).Order("created_at DESC").Find(&logs).Error + return logs, err +} + +// GetByProvider 根据提供商获取日志 +func (d *RecognitionLogDao) GetByProvider(tx *gorm.DB, provider RecognitionLogProvider, limit int) ([]*RecognitionLog, error) { + var logs []*RecognitionLog + err := tx.Where("provider = ?", provider).Order("created_at DESC").Limit(limit).Find(&logs).Error + return logs, err +} diff --git a/internal/storage/dao/result.go b/internal/storage/dao/result.go index ac30810..5ea838d 100644 --- a/internal/storage/dao/result.go +++ b/internal/storage/dao/result.go @@ -1,66 +1,16 @@ package dao import ( - "encoding/json" - "gorm.io/gorm" ) -type JSON []byte - -// ContentCodec 定义内容编解码接口 -type ContentCodec interface { - Encode() (JSON, error) - Decode() error - GetContent() interface{} // 更明确的方法名 -} - -type FormulaRecognitionContent struct { - content JSON - Latex string `json:"latex"` - AdjustLatex string `json:"adjust_latex"` - EnhanceLatex string `json:"enhance_latex"` -} - -func (c *FormulaRecognitionContent) Encode() (JSON, error) { - b, err := json.Marshal(c) - if err != nil { - return nil, err - } - return b, nil -} - -func (c *FormulaRecognitionContent) Decode() error { - return json.Unmarshal(c.content, c) -} - -// GetPreferredContent 按优先级返回公式内容 -func (c *FormulaRecognitionContent) GetContent() interface{} { - c.Decode() - if c.EnhanceLatex != "" { - return c.EnhanceLatex - } else if c.AdjustLatex != "" { - return c.AdjustLatex - } else { - return c.Latex - } -} - type RecognitionResult struct { BaseModel TaskID int64 `gorm:"column:task_id;bigint;not null;default:0;comment:任务ID" json:"task_id"` TaskType TaskType `gorm:"column:task_type;varchar(16);not null;comment:任务类型;default:''" json:"task_type"` - Content JSON `gorm:"column:content;type:json;not null;comment:识别内容" json:"content"` -} - -// NewContentCodec 创建对应任务类型的内容编解码器 -func (r *RecognitionResult) NewContentCodec() ContentCodec { - switch r.TaskType { - case TaskTypeFormula: - return &FormulaRecognitionContent{content: r.Content} - default: - return nil - } + Latex string `json:"latex"` + Markdown string `json:"markdown"` // Mathpix Markdown 格式 + MathML string `json:"mathml"` // MathML 格式 } type RecognitionResultDao struct {