diff --git a/api/v1/formula/handler.go b/api/v1/formula/handler.go index dde9334..be4a26c 100644 --- a/api/v1/formula/handler.go +++ b/api/v1/formula/handler.go @@ -9,6 +9,7 @@ import ( "gitea.com/bitwsd/document_ai/internal/service" "gitea.com/bitwsd/document_ai/internal/storage/dao" "gitea.com/bitwsd/document_ai/pkg/common" + "gitea.com/bitwsd/document_ai/pkg/constant" "gitea.com/bitwsd/document_ai/pkg/utils" "github.com/gin-gonic/gin" @@ -37,11 +38,14 @@ func NewFormulaEndpoint() *FormulaEndpoint { // @Router /v1/formula/recognition [post] func (endpoint *FormulaEndpoint) CreateTask(ctx *gin.Context) { var req formula.CreateFormulaRecognitionRequest + uid := ctx.GetInt64(constant.ContextUserID) if err := ctx.BindJSON(&req); err != nil { ctx.JSON(http.StatusOK, common.ErrorResponse(ctx, common.CodeParamError, "Invalid parameters")) return } + req.UserID = uid + if !utils.InArray(req.TaskType, []string{string(dao.TaskTypeFormula), string(dao.TaskTypeFormula)}) { ctx.JSON(http.StatusOK, common.ErrorResponse(ctx, common.CodeParamError, "Invalid task type")) return diff --git a/api/v1/formula/router.go b/api/v1/formula/router.go index 3cb6493..2a8f2b4 100644 --- a/api/v1/formula/router.go +++ b/api/v1/formula/router.go @@ -1,12 +1,16 @@ package formula import ( + "gitea.com/bitwsd/document_ai/pkg/common" "github.com/gin-gonic/gin" ) func SetupRouter(engine *gin.RouterGroup) { endpoint := NewFormulaEndpoint() - engine.POST("/formula/recognition", endpoint.CreateTask) - engine.POST("/formula/ai_enhance", endpoint.AIEnhanceRecognition) - engine.GET("/formula/recognition/:task_no", endpoint.GetTaskStatus) + formulaRouter := engine.Group("/formula", common.GetAuthMiddleware()) + { + formulaRouter.POST("/recognition", endpoint.CreateTask) + formulaRouter.POST("/ai_enhance", endpoint.AIEnhanceRecognition) + formulaRouter.GET("/recognition/:task_no", endpoint.GetTaskStatus) + } } diff --git a/api/v1/oss/handler.go b/api/v1/oss/handler.go index 3ab6145..f1d7b35 100644 --- a/api/v1/oss/handler.go +++ b/api/v1/oss/handler.go @@ -11,7 +11,6 @@ import ( "gitea.com/bitwsd/document_ai/config" "gitea.com/bitwsd/document_ai/internal/storage/dao" "gitea.com/bitwsd/document_ai/pkg/common" - "gitea.com/bitwsd/document_ai/pkg/constant" "gitea.com/bitwsd/document_ai/pkg/oss" "gitea.com/bitwsd/document_ai/pkg/utils" "github.com/gin-gonic/gin" @@ -38,7 +37,6 @@ func GetPostObjectSignature(ctx *gin.Context) { // @Failure 200 {object} common.Response "Error response" // @Router /signature_url [get] func GetSignatureURL(ctx *gin.Context) { - userID := ctx.GetInt64(constant.ContextUserID) type Req struct { FileHash string `json:"file_hash" binding:"required"` FileName string `json:"file_name" binding:"required"` @@ -51,7 +49,7 @@ func GetSignatureURL(ctx *gin.Context) { } taskDao := dao.NewRecognitionTaskDao() sess := dao.DB.WithContext(ctx) - task, err := taskDao.GetTaskByFileURL(sess, userID, req.FileHash) + task, err := taskDao.GetTaskByFileURL(sess, req.FileHash) if err != nil && err != gorm.ErrRecordNotFound { ctx.JSON(http.StatusOK, common.ErrorResponse(ctx, common.CodeDBError, "failed to get task")) return diff --git a/api/v1/task/handler.go b/api/v1/task/handler.go index a3a2f89..74ee88d 100644 --- a/api/v1/task/handler.go +++ b/api/v1/task/handler.go @@ -3,10 +3,10 @@ package task import ( "net/http" - "gitea.com/bitwsd/document_ai/pkg/log" "gitea.com/bitwsd/document_ai/internal/model/task" "gitea.com/bitwsd/document_ai/internal/service" "gitea.com/bitwsd/document_ai/pkg/common" + "gitea.com/bitwsd/document_ai/pkg/log" "github.com/gin-gonic/gin" ) @@ -43,6 +43,8 @@ func (h *TaskEndpoint) GetTaskList(c *gin.Context) { return } + req.UserID = common.GetUserIDFromContext(c) + if req.Page <= 0 { req.Page = 1 } diff --git a/api/v1/task/router.go b/api/v1/task/router.go index f817116..50067c3 100644 --- a/api/v1/task/router.go +++ b/api/v1/task/router.go @@ -1,11 +1,12 @@ package task import ( + "gitea.com/bitwsd/document_ai/pkg/common" "github.com/gin-gonic/gin" ) func SetupRouter(engine *gin.RouterGroup) { endpoint := NewTaskEndpoint() engine.POST("/task/evaluate", endpoint.EvaluateTask) - engine.GET("/task/list", endpoint.GetTaskList) + engine.GET("/task/list", common.MustAuthMiddleware(), endpoint.GetTaskList) } diff --git a/api/v1/user/router.go b/api/v1/user/router.go index b5987ef..ab186f3 100644 --- a/api/v1/user/router.go +++ b/api/v1/user/router.go @@ -9,10 +9,9 @@ func SetupRouter(router *gin.RouterGroup) { userEndpoint := NewUserEndpoint() userRouter := router.Group("/user") { - userRouter.POST("/get/sms", userEndpoint.SendVerificationCode) - userRouter.POST("/login/phone", userEndpoint.LoginByPhoneCode) - userRouter.POST("/register/email", userEndpoint.RegisterByEmail) - userRouter.POST("/login/email", userEndpoint.LoginByEmail) - userRouter.GET("/info", common.GetAuthMiddleware(), userEndpoint.GetUserInfo) + userRouter.POST("/sms", userEndpoint.SendVerificationCode) + userRouter.POST("/register", userEndpoint.RegisterByEmail) + userRouter.POST("/login", userEndpoint.LoginByEmail) + userRouter.GET("/info", common.MustAuthMiddleware(), userEndpoint.GetUserInfo) } } diff --git a/config/config_dev.yaml b/config/config_dev.yaml index f8523c8..a4adb16 100644 --- a/config/config_dev.yaml +++ b/config/config_dev.yaml @@ -5,7 +5,7 @@ server: database: driver: mysql host: mysql - port: 3306 # 容器内部端口,不是宿主机映射的 3006 + port: 3306 username: root password: texpixel#pwd123! dbname: doc_ai @@ -13,7 +13,7 @@ database: max_open: 100 redis: - addr: redis:6379 # 容器内部端口,不是宿主机映射的 6079 + addr: redis:6379 password: yoge@123321! db: 0 @@ -22,7 +22,7 @@ limit: log: appName: document_ai - level: info # debug, info, warn, error + level: info format: console # json, console outputPath: ./logs/app.log # 日志文件路径 maxSize: 2 # 单个日志文件最大尺寸,单位MB @@ -41,6 +41,6 @@ aliyun: oss: endpoint: oss-cn-beijing.aliyuncs.com inner_endpoint: oss-cn-beijing-internal.aliyuncs.com - access_key_id: LTAI5tKogxeiBb4gJGWEePWN - access_key_secret: l4oCxtt5iLSQ1DAs40guTzKUfrxXwq - bucket_name: bitwsd-doc-ai + access_key_id: LTAI5t8qXhow6NCdYDtu1saF + access_key_secret: qZ2SwYsNCEBckCVSOszH31yYwXU44A + bucket_name: texpixel-doc diff --git a/internal/model/formula/request.go b/internal/model/formula/request.go index 350cb7f..0cb582c 100644 --- a/internal/model/formula/request.go +++ b/internal/model/formula/request.go @@ -5,6 +5,7 @@ type CreateFormulaRecognitionRequest struct { FileHash string `json:"file_hash" binding:"required"` // file hash FileName string `json:"file_name" binding:"required"` // file name TaskType string `json:"task_type" binding:"required,oneof=FORMULA"` // task type + UserID int64 `json:"user_id"` // user id } type GetRecognitionStatusRequest struct { diff --git a/internal/model/task/request.go b/internal/model/task/request.go index 21e7f38..0c1a4d9 100644 --- a/internal/model/task/request.go +++ b/internal/model/task/request.go @@ -11,26 +11,26 @@ type TaskListRequest struct { TaskType string `json:"task_type" form:"task_type" binding:"required"` Page int `json:"page" form:"page"` PageSize int `json:"page_size" form:"page_size"` -} - -type PdfInfo struct { - PageCount int `json:"page_count"` - PageWidth int `json:"page_width"` - PageHeight int `json:"page_height"` + UserID int64 `json:"-"` } type TaskListDTO struct { - TaskID string `json:"task_id"` - FileName string `json:"file_name"` - Status string `json:"status"` - Path string `json:"path"` - TaskType string `json:"task_type"` - CreatedAt string `json:"created_at"` - PdfInfo PdfInfo `json:"pdf_info"` + TaskID string `json:"task_id"` + FileName string `json:"file_name"` + Status string `json:"status"` + Path string `json:"path"` + TaskType string `json:"task_type"` + CreatedAt string `json:"created_at"` + Latex string `json:"latex"` + Markdown string `json:"markdown"` + MathML string `json:"mathml"` + MathMLMW string `json:"mathml_mw"` + ImageBlob string `json:"image_blob"` + DocxURL string `json:"docx_url"` + PDFURL string `json:"pdf_url"` } type TaskListResponse struct { TaskList []*TaskListDTO `json:"task_list"` - HasMore bool `json:"has_more"` - NextPage int `json:"next_page"` + Total int64 `json:"total"` } diff --git a/internal/service/recognition_service.go b/internal/service/recognition_service.go index 76ac751..024acac 100644 --- a/internal/service/recognition_service.go +++ b/internal/service/recognition_service.go @@ -105,6 +105,7 @@ func (s *RecognitionService) CreateRecognitionTask(ctx context.Context, req *for sess := dao.DB.WithContext(ctx) taskDao := dao.NewRecognitionTaskDao() task := &dao.RecognitionTask{ + UserID: req.UserID, TaskUUID: utils.NewUUID(), TaskType: dao.TaskType(req.TaskType), Status: dao.TaskStatusPending, @@ -166,7 +167,8 @@ func (s *RecognitionService) GetFormualTask(ctx context.Context, taskNo string) return nil, common.NewError(common.CodeDBError, "查询任务结果失败", err) } latex := taskRet.NewContentCodec().GetContent().(string) - return &formula.GetFormulaTaskResponse{TaskNo: taskNo, Latex: latex, Status: int(task.Status)}, nil + markdown := fmt.Sprintf("$$%s$$", latex) + return &formula.GetFormulaTaskResponse{TaskNo: taskNo, Latex: latex, Markdown: markdown, Status: int(task.Status)}, nil } func (s *RecognitionService) handleFormulaRecognition(ctx context.Context, taskID int64) error { @@ -281,7 +283,7 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6 headers := map[string]string{"Content-Type": "application/json", utils.RequestIDHeaderKey: utils.GetRequestIDFromContext(ctx)} // 发送请求时会使用带超时的context - resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, "http://cloud.texpixel.com:1080/formula/predict", bytes.NewReader(jsonData), headers) + resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/formula/predict", bytes.NewReader(jsonData), headers) if err != nil { if ctx.Err() == context.DeadlineExceeded { log.Error(ctx, "func", "processFormulaTask", "msg", "请求超时") diff --git a/internal/service/task.go b/internal/service/task.go index e861736..eea12d1 100644 --- a/internal/service/task.go +++ b/internal/service/task.go @@ -3,25 +3,30 @@ package service import ( "context" "errors" + "fmt" "strings" - "gitea.com/bitwsd/document_ai/pkg/log" "gitea.com/bitwsd/document_ai/internal/model/task" "gitea.com/bitwsd/document_ai/internal/storage/dao" - "gorm.io/gorm" + "gitea.com/bitwsd/document_ai/pkg/log" ) type TaskService struct { - db *gorm.DB + recognitionTaskDao *dao.RecognitionTaskDao + evaluateTaskDao *dao.EvaluateTaskDao + recognitionResultDao *dao.RecognitionResultDao } func NewTaskService() *TaskService { - return &TaskService{dao.DB} + return &TaskService{ + recognitionTaskDao: dao.NewRecognitionTaskDao(), + evaluateTaskDao: dao.NewEvaluateTaskDao(), + recognitionResultDao: dao.NewRecognitionResultDao(), + } } func (svc *TaskService) EvaluateTask(ctx context.Context, req *task.EvaluateTaskRequest) error { - taskDao := dao.NewRecognitionTaskDao() - task, err := taskDao.GetByTaskNo(svc.db.WithContext(ctx), req.TaskNo) + task, err := svc.recognitionTaskDao.GetByTaskNo(dao.DB.WithContext(ctx), req.TaskNo) if err != nil { log.Error(ctx, "func", "EvaluateTask", "msg", "get task by task no failed", "error", err) return err @@ -36,14 +41,13 @@ func (svc *TaskService) EvaluateTask(ctx context.Context, req *task.EvaluateTask return errors.New("task not finished") } - evaluateTaskDao := dao.NewEvaluateTaskDao() evaluateTask := &dao.EvaluateTask{ TaskID: task.ID, Satisfied: req.Satisfied, Feedback: req.Feedback, Comment: strings.Join(req.Suggestion, ","), } - err = evaluateTaskDao.Create(svc.db.WithContext(ctx), evaluateTask) + err = svc.evaluateTaskDao.Create(dao.DB.WithContext(ctx), evaluateTask) if err != nil { log.Error(ctx, "func", "EvaluateTask", "msg", "create evaluate task failed", "error", err) return err @@ -53,19 +57,43 @@ func (svc *TaskService) EvaluateTask(ctx context.Context, req *task.EvaluateTask } func (svc *TaskService) GetTaskList(ctx context.Context, req *task.TaskListRequest) (*task.TaskListResponse, error) { - taskDao := dao.NewRecognitionTaskDao() - tasks, err := taskDao.GetTaskList(svc.db.WithContext(ctx), dao.TaskType(req.TaskType), req.Page, req.PageSize) + tasks, total, err := svc.recognitionTaskDao.GetTaskList(dao.DB.WithContext(ctx), req.UserID, dao.TaskType(req.TaskType), req.Page, req.PageSize) if err != nil { log.Error(ctx, "func", "GetTaskList", "msg", "get task list failed", "error", err) return nil, err } + + taskIDs := make([]int64, 0, len(tasks)) + for _, item := range tasks { + taskIDs = append(taskIDs, item.ID) + } + + recognitionResults, err := svc.recognitionResultDao.GetByTaskIDs(dao.DB.WithContext(ctx), taskIDs) + if err != nil { + log.Error(ctx, "func", "GetTaskList", "msg", "get recognition results failed", "error", err) + return nil, err + } + + recognitionResultMap := make(map[int64]*dao.RecognitionResult) + for _, item := range recognitionResults { + recognitionResultMap[item.TaskID] = item + } + resp := &task.TaskListResponse{ TaskList: make([]*task.TaskListDTO, 0, len(tasks)), - HasMore: false, - NextPage: 0, + Total: total, } for _, item := range tasks { + var latex string + var markdown string + recognitionResult := recognitionResultMap[item.ID] + if recognitionResult != nil { + latex = recognitionResult.NewContentCodec().GetContent().(string) + markdown = fmt.Sprintf("$$%s$$", latex) + } resp.TaskList = append(resp.TaskList, &task.TaskListDTO{ + Latex: latex, + Markdown: markdown, TaskID: item.TaskUUID, FileName: item.FileName, Status: item.Status.String(), diff --git a/internal/storage/dao/result.go b/internal/storage/dao/result.go index 568a2bb..ac30810 100644 --- a/internal/storage/dao/result.go +++ b/internal/storage/dao/result.go @@ -84,6 +84,11 @@ func (dao *RecognitionResultDao) GetByTaskID(tx *gorm.DB, taskID int64) (result return } +func (dao *RecognitionResultDao) GetByTaskIDs(tx *gorm.DB, taskIDs []int64) (results []*RecognitionResult, err error) { + err = tx.Where("task_id IN (?)", taskIDs).Find(&results).Error + return +} + func (dao *RecognitionResultDao) Update(tx *gorm.DB, id int64, updates map[string]interface{}) error { return tx.Model(&RecognitionResult{}).Where("id = ?", id).Updates(updates).Error } diff --git a/internal/storage/dao/task.go b/internal/storage/dao/task.go index 0a70ed6..1f148ba 100644 --- a/internal/storage/dao/task.go +++ b/internal/storage/dao/task.go @@ -69,9 +69,9 @@ func (dao *RecognitionTaskDao) GetByTaskNo(tx *gorm.DB, taskUUID string) (task * return } -func (dao *RecognitionTaskDao) GetTaskByFileURL(tx *gorm.DB, userID int64, fileHash string) (task *RecognitionTask, err error) { +func (dao *RecognitionTaskDao) GetTaskByFileURL(tx *gorm.DB, fileHash string) (task *RecognitionTask, err error) { task = &RecognitionTask{} - err = tx.Model(RecognitionTask{}).Where("user_id = ? AND file_hash = ?", userID, fileHash).First(task).Error + err = tx.Model(RecognitionTask{}).Where("file_hash = ?", fileHash).Last(task).Error return } @@ -87,8 +87,13 @@ func (dao *RecognitionTaskDao) GetTaskByID(tx *gorm.DB, id int64) (task *Recogni return task, nil } -func (dao *RecognitionTaskDao) GetTaskList(tx *gorm.DB, taskType TaskType, page int, pageSize int) (tasks []*RecognitionTask, err error) { +func (dao *RecognitionTaskDao) GetTaskList(tx *gorm.DB, userID int64, taskType TaskType, page int, pageSize int) (tasks []*RecognitionTask, total int64, err error) { offset := (page - 1) * pageSize - err = tx.Model(RecognitionTask{}).Where("task_type = ?", taskType).Offset(offset).Limit(pageSize).Order(clause.OrderByColumn{Column: clause.Column{Name: "id"}, Desc: true}).Find(&tasks).Error - return + query := tx.Model(RecognitionTask{}).Where("user_id = ? AND task_type = ?", userID, taskType) + err = query.Count(&total).Error + if err != nil { + return nil, 0, err + } + err = query.Offset(offset).Limit(pageSize).Order(clause.OrderByColumn{Column: clause.Column{Name: "id"}, Desc: true}).Find(&tasks).Error + return tasks, total, err } diff --git a/pkg/common/middleware.go b/pkg/common/middleware.go index ae4a0f1..2536fe1 100644 --- a/pkg/common/middleware.go +++ b/pkg/common/middleware.go @@ -45,6 +45,19 @@ func AuthMiddleware(ctx *gin.Context) { ctx.Set(constant.ContextUserID, claims.UserId) } +func MustAuthMiddleware() gin.HandlerFunc { + return func(ctx *gin.Context) { + token := ctx.GetHeader("Authorization") + if token != "" { + token = strings.TrimPrefix(token, "Bearer ") + claims, err := jwt.ParseToken(token) + if err == nil { + ctx.Set(constant.ContextUserID, claims.UserId) + } + } + } +} + func GetAuthMiddleware() gin.HandlerFunc { return func(ctx *gin.Context) { token := ctx.GetHeader("Authorization") diff --git a/pkg/cors/cors.go b/pkg/cors/cors.go index faaafd2..7ec6a2d 100644 --- a/pkg/cors/cors.go +++ b/pkg/cors/cors.go @@ -19,9 +19,9 @@ type Config struct { func DefaultConfig() Config { return Config{ AllowOrigins: []string{"*"}, - AllowMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"}, - AllowHeaders: []string{"Origin", "Content-Type", "Accept"}, - ExposeHeaders: []string{"Content-Length"}, + AllowMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"}, + AllowHeaders: []string{"Origin", "Content-Type", "Accept", "Authorization", "X-Requested-With"}, + ExposeHeaders: []string{"Content-Length", "Content-Type"}, AllowCredentials: true, MaxAge: 86400, // 24 hours } @@ -30,16 +30,30 @@ func DefaultConfig() Config { func Cors(config Config) gin.HandlerFunc { return func(c *gin.Context) { origin := c.Request.Header.Get("Origin") + if origin == "" { + c.Next() + return + } // 检查是否允许该来源 - allowOrigin := "*" + allowOrigin := "" for _, o := range config.AllowOrigins { + if o == "*" { + // 通配符时,回显实际 origin(兼容 credentials) + allowOrigin = origin + break + } if o == origin { allowOrigin = origin break } } + if allowOrigin == "" { + c.Next() + return + } + c.Header("Access-Control-Allow-Origin", allowOrigin) c.Header("Access-Control-Allow-Methods", strings.Join(config.AllowMethods, ",")) c.Header("Access-Control-Allow-Headers", strings.Join(config.AllowHeaders, ","))