feat: add PDF document recognition with 10-page pre-hook
- Migrate recognition_results table to JSON schema (meta_data + content),
replacing flat latex/markdown/mathml/mml columns
- Add TaskTypePDF constant and update all formula read/write paths
- Add PDFRecognitionService using pdftoppm (Poppler) for CGO-free page
rendering; limits processing to first 10 pages (pre-hook)
- Reuse existing downstream OCR endpoint (cloud.texpixel.com) for each
page image; stores results as [{page_number, markdown}] JSON array
- Add Redis queue + distributed lock for PDF worker goroutine
- Add REST endpoints: POST /v1/pdf/recognition, GET /v1/pdf/recognition/:task_no
- Add .pdf to OSS upload file type whitelist
- Add migrations/pdf_recognition.sql for safe data migration
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
|||||||
"gitea.com/texpixel/document_ai/api/v1/analytics"
|
"gitea.com/texpixel/document_ai/api/v1/analytics"
|
||||||
"gitea.com/texpixel/document_ai/api/v1/formula"
|
"gitea.com/texpixel/document_ai/api/v1/formula"
|
||||||
"gitea.com/texpixel/document_ai/api/v1/oss"
|
"gitea.com/texpixel/document_ai/api/v1/oss"
|
||||||
|
"gitea.com/texpixel/document_ai/api/v1/pdf"
|
||||||
"gitea.com/texpixel/document_ai/api/v1/task"
|
"gitea.com/texpixel/document_ai/api/v1/task"
|
||||||
"gitea.com/texpixel/document_ai/api/v1/user"
|
"gitea.com/texpixel/document_ai/api/v1/user"
|
||||||
"gitea.com/texpixel/document_ai/pkg/common"
|
"gitea.com/texpixel/document_ai/pkg/common"
|
||||||
@@ -55,6 +56,13 @@ func SetupRouter(engine *gin.RouterGroup) {
|
|||||||
userAuthRouter.GET("/info", common.MustAuthMiddleware(), userEndpoint.GetUserInfo)
|
userAuthRouter.GET("/info", common.MustAuthMiddleware(), userEndpoint.GetUserInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pdfRouter := v1.Group("/pdf", common.GetAuthMiddleware())
|
||||||
|
{
|
||||||
|
endpoint := pdf.NewPDFEndpoint()
|
||||||
|
pdfRouter.POST("/recognition", endpoint.CreateTask)
|
||||||
|
pdfRouter.GET("/recognition/:task_no", endpoint.GetTaskStatus)
|
||||||
|
}
|
||||||
|
|
||||||
// 数据埋点路由
|
// 数据埋点路由
|
||||||
analyticsRouter := v1.Group("/analytics", common.GetAuthMiddleware())
|
analyticsRouter := v1.Group("/analytics", common.GetAuthMiddleware())
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ func (h *OSSEndpoint) GetSignatureURL(ctx *gin.Context) {
|
|||||||
ctx.JSON(http.StatusOK, common.ErrorResponse(ctx, common.CodeParamError, "invalid file name"))
|
ctx.JSON(http.StatusOK, common.ErrorResponse(ctx, common.CodeParamError, "invalid file name"))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if !utils.InArray(extend, []string{".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}) {
|
if !utils.InArray(extend, []string{".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp", ".pdf"}) {
|
||||||
ctx.JSON(http.StatusOK, common.ErrorResponse(ctx, common.CodeParamError, "invalid file type"))
|
ctx.JSON(http.StatusOK, common.ErrorResponse(ctx, common.CodeParamError, "invalid file type"))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
95
api/v1/pdf/handler.go
Normal file
95
api/v1/pdf/handler.go
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
package pdf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
pdfmodel "gitea.com/texpixel/document_ai/internal/model/pdf"
|
||||||
|
"gitea.com/texpixel/document_ai/internal/service"
|
||||||
|
"gitea.com/texpixel/document_ai/pkg/common"
|
||||||
|
"gitea.com/texpixel/document_ai/pkg/constant"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PDFEndpoint struct {
|
||||||
|
pdfService *service.PDFRecognitionService
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPDFEndpoint() *PDFEndpoint {
|
||||||
|
return &PDFEndpoint{
|
||||||
|
pdfService: service.NewPDFRecognitionService(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateTask godoc
|
||||||
|
// @Summary Create a PDF recognition task
|
||||||
|
// @Description Create a new PDF recognition task (max 10 pages processed)
|
||||||
|
// @Tags PDF
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param request body pdfmodel.CreatePDFRecognitionRequest true "Create PDF task request"
|
||||||
|
// @Success 200 {object} common.Response{data=pdfmodel.CreatePDFTaskResponse}
|
||||||
|
// @Failure 400 {object} common.Response
|
||||||
|
// @Failure 500 {object} common.Response
|
||||||
|
// @Router /v1/pdf/recognition [post]
|
||||||
|
func (e *PDFEndpoint) CreateTask(c *gin.Context) {
|
||||||
|
var req pdfmodel.CreatePDFRecognitionRequest
|
||||||
|
if err := c.BindJSON(&req); err != nil {
|
||||||
|
c.JSON(http.StatusOK, common.ErrorResponse(c, common.CodeParamError, "参数错误"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
req.UserID = c.GetInt64(constant.ContextUserID)
|
||||||
|
|
||||||
|
if strings.ToLower(filepath.Ext(req.FileName)) != ".pdf" {
|
||||||
|
c.JSON(http.StatusOK, common.ErrorResponse(c, common.CodeParamError, "仅支持PDF文件"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
task, err := e.pdfService.CreatePDFTask(c, &req)
|
||||||
|
if err != nil {
|
||||||
|
if bizErr, ok := err.(*common.BusinessError); ok {
|
||||||
|
c.JSON(http.StatusOK, common.ErrorResponse(c, int(bizErr.Code), bizErr.Message))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.JSON(http.StatusOK, common.ErrorResponse(c, common.CodeSystemError, "创建任务失败"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JSON(http.StatusOK, common.SuccessResponse(c, &pdfmodel.CreatePDFTaskResponse{
|
||||||
|
TaskNo: task.TaskUUID,
|
||||||
|
Status: int(task.Status),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetTaskStatus godoc
|
||||||
|
// @Summary Get PDF recognition task status and results
|
||||||
|
// @Description Poll task status; pages field populated when status=2 (completed)
|
||||||
|
// @Tags PDF
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param task_no path string true "Task No"
|
||||||
|
// @Success 200 {object} common.Response{data=pdfmodel.GetPDFTaskResponse}
|
||||||
|
// @Failure 404 {object} common.Response
|
||||||
|
// @Failure 500 {object} common.Response
|
||||||
|
// @Router /v1/pdf/recognition/{task_no} [get]
|
||||||
|
func (e *PDFEndpoint) GetTaskStatus(c *gin.Context) {
|
||||||
|
var req pdfmodel.GetPDFTaskRequest
|
||||||
|
if err := c.ShouldBindUri(&req); err != nil {
|
||||||
|
c.JSON(http.StatusOK, common.ErrorResponse(c, common.CodeParamError, "参数错误"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := e.pdfService.GetPDFTask(c, req.TaskNo)
|
||||||
|
if err != nil {
|
||||||
|
if bizErr, ok := err.(*common.BusinessError); ok {
|
||||||
|
c.JSON(http.StatusOK, common.ErrorResponse(c, int(bizErr.Code), bizErr.Message))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.JSON(http.StatusOK, common.ErrorResponse(c, common.CodeSystemError, "查询任务失败"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JSON(http.StatusOK, common.SuccessResponse(c, resp))
|
||||||
|
}
|
||||||
@@ -195,12 +195,27 @@ func migrateData(testDB, prodDB *gorm.DB) error {
|
|||||||
mathml = *item.MathML
|
mathml = *item.MathML
|
||||||
}
|
}
|
||||||
|
|
||||||
newResult := dao.RecognitionResult{
|
contentJSON, err := dao.MarshalFormulaContent(dao.FormulaContent{
|
||||||
TaskID: newTask.ID, // 使用新任务的ID
|
|
||||||
TaskType: dao.TaskType(item.TaskType),
|
|
||||||
Latex: latex,
|
Latex: latex,
|
||||||
Markdown: markdown,
|
Markdown: markdown,
|
||||||
MathML: mathml,
|
MathML: mathml,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("[%d/%d] 序列化公式内容失败: task_id=%d, error=%v", i+1, len(tasksWithResults), newTask.ID, err)
|
||||||
|
tx.Rollback()
|
||||||
|
errorCount++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
newResult := dao.RecognitionResult{
|
||||||
|
TaskID: newTask.ID, // 使用新任务的ID
|
||||||
|
TaskType: dao.TaskType(item.TaskType),
|
||||||
|
Content: contentJSON,
|
||||||
|
}
|
||||||
|
if err := newResult.SetMetaData(dao.ResultMetaData{TotalNum: 1}); err != nil {
|
||||||
|
log.Printf("[%d/%d] 序列化MetaData失败: task_id=%d, error=%v", i+1, len(tasksWithResults), newTask.ID, err)
|
||||||
|
tx.Rollback()
|
||||||
|
errorCount++
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
// 保留原始时间戳
|
// 保留原始时间戳
|
||||||
if item.ResultCreatedAt != nil {
|
if item.ResultCreatedAt != nil {
|
||||||
|
|||||||
4
go.mod
4
go.mod
@@ -1,6 +1,6 @@
|
|||||||
module gitea.com/texpixel/document_ai
|
module gitea.com/texpixel/document_ai
|
||||||
|
|
||||||
go 1.20
|
go 1.23.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/alibabacloud-go/darabonba-openapi v0.2.1
|
github.com/alibabacloud-go/darabonba-openapi v0.2.1
|
||||||
@@ -75,7 +75,7 @@ require (
|
|||||||
golang.org/x/arch v0.8.0 // indirect
|
golang.org/x/arch v0.8.0 // indirect
|
||||||
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
|
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
|
||||||
golang.org/x/net v0.25.0 // indirect
|
golang.org/x/net v0.25.0 // indirect
|
||||||
golang.org/x/sys v0.20.0 // indirect
|
golang.org/x/sys v0.33.0 // indirect
|
||||||
golang.org/x/text v0.20.0 // indirect
|
golang.org/x/text v0.20.0 // indirect
|
||||||
golang.org/x/time v0.5.0 // indirect
|
golang.org/x/time v0.5.0 // indirect
|
||||||
google.golang.org/protobuf v1.34.1 // indirect
|
google.golang.org/protobuf v1.34.1 // indirect
|
||||||
|
|||||||
26
go.sum
26
go.sum
@@ -33,7 +33,9 @@ github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9
|
|||||||
github.com/aliyun/credentials-go v1.1.2 h1:qU1vwGIBb3UJ8BwunHDRFtAhS6jnQLnde/yk0+Ih2GY=
|
github.com/aliyun/credentials-go v1.1.2 h1:qU1vwGIBb3UJ8BwunHDRFtAhS6jnQLnde/yk0+Ih2GY=
|
||||||
github.com/aliyun/credentials-go v1.1.2/go.mod h1:ozcZaMR5kLM7pwtCMEpVmQ242suV6qTJya2bDq4X1Tw=
|
github.com/aliyun/credentials-go v1.1.2/go.mod h1:ozcZaMR5kLM7pwtCMEpVmQ242suV6qTJya2bDq4X1Tw=
|
||||||
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
|
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
|
||||||
|
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
|
||||||
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
|
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
|
||||||
|
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
|
||||||
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
||||||
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||||
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||||
@@ -51,11 +53,13 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||||
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
|
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
|
||||||
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
|
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
|
||||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
||||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
||||||
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
||||||
|
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
||||||
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||||
@@ -65,6 +69,7 @@ github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm
|
|||||||
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
||||||
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||||
|
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||||
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
||||||
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
|
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
|
||||||
@@ -78,8 +83,11 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
|||||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||||
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA=
|
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA=
|
||||||
|
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
|
||||||
github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A=
|
github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A=
|
||||||
|
github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI=
|
||||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||||
|
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
@@ -89,9 +97,13 @@ github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00/go.mod h1:wJfORR
|
|||||||
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
|
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
|
||||||
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
|
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
|
||||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||||
|
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
|
||||||
github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 h1:L0QtFUgDarD7Fpv9jeVMgy/+Ec0mtnmYuImjTz6dtDA=
|
github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 h1:L0QtFUgDarD7Fpv9jeVMgy/+Ec0mtnmYuImjTz6dtDA=
|
||||||
|
github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
|
||||||
github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
|
github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
|
||||||
|
github.com/jackc/pgx/v5 v5.5.5/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
|
||||||
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
|
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
|
||||||
|
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
||||||
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
|
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
|
||||||
@@ -106,9 +118,11 @@ github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuV
|
|||||||
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||||
|
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||||
|
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||||
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
|
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
|
||||||
@@ -120,7 +134,9 @@ github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D
|
|||||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
|
github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
|
||||||
|
github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
|
||||||
github.com/microsoft/go-mssqldb v1.7.2 h1:CHkFJiObW7ItKTJfHo1QX7QBBD1iV+mn1eOyRP3b/PA=
|
github.com/microsoft/go-mssqldb v1.7.2 h1:CHkFJiObW7ItKTJfHo1QX7QBBD1iV+mn1eOyRP3b/PA=
|
||||||
|
github.com/microsoft/go-mssqldb v1.7.2/go.mod h1:kOvZKUdrhhFQmxLZqbwUV0rHkNkZpthMITIb2Ko1IoA=
|
||||||
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
|
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
|
||||||
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
@@ -137,9 +153,11 @@ github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h
|
|||||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||||
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/redis/go-redis/v9 v9.7.0 h1:HhLSs+B6O021gwzl+locl0zEDnyNkxMtf/Z3NNBMa9E=
|
github.com/redis/go-redis/v9 v9.7.0 h1:HhLSs+B6O021gwzl+locl0zEDnyNkxMtf/Z3NNBMa9E=
|
||||||
github.com/redis/go-redis/v9 v9.7.0/go.mod h1:f6zhXITC7JUJIlPEiBOTXxJgPLdZcA93GewI7inzyWw=
|
github.com/redis/go-redis/v9 v9.7.0/go.mod h1:f6zhXITC7JUJIlPEiBOTXxJgPLdZcA93GewI7inzyWw=
|
||||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||||
|
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||||
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
||||||
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||||
@@ -211,6 +229,7 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
|
|||||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||||
|
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
@@ -219,8 +238,8 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc
|
|||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
|
||||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||||
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
|
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
|
||||||
@@ -254,8 +273,11 @@ gorm.io/datatypes v1.2.7/go.mod h1:M2iO+6S3hhi4nAyYe444Pcb0dcIiOMJ7QHaUXxyiNZY=
|
|||||||
gorm.io/driver/mysql v1.5.7 h1:MndhOPYOfEp2rHKgkZIhJ16eVUIRf2HmzgoPmh7FCWo=
|
gorm.io/driver/mysql v1.5.7 h1:MndhOPYOfEp2rHKgkZIhJ16eVUIRf2HmzgoPmh7FCWo=
|
||||||
gorm.io/driver/mysql v1.5.7/go.mod h1:sEtPWMiqiN1N1cMXoXmBbd8C6/l+TESwriotuRRpkDM=
|
gorm.io/driver/mysql v1.5.7/go.mod h1:sEtPWMiqiN1N1cMXoXmBbd8C6/l+TESwriotuRRpkDM=
|
||||||
gorm.io/driver/postgres v1.5.0 h1:u2FXTy14l45qc3UeCJ7QaAXZmZfDDv0YrthvmRq1l0U=
|
gorm.io/driver/postgres v1.5.0 h1:u2FXTy14l45qc3UeCJ7QaAXZmZfDDv0YrthvmRq1l0U=
|
||||||
|
gorm.io/driver/postgres v1.5.0/go.mod h1:FUZXzO+5Uqg5zzwzv4KK49R8lvGIyscBOqYrtI1Ce9A=
|
||||||
gorm.io/driver/sqlite v1.4.3 h1:HBBcZSDnWi5BW3B3rwvVTc510KGkBkexlOg0QrmLUuU=
|
gorm.io/driver/sqlite v1.4.3 h1:HBBcZSDnWi5BW3B3rwvVTc510KGkBkexlOg0QrmLUuU=
|
||||||
|
gorm.io/driver/sqlite v1.4.3/go.mod h1:0Aq3iPO+v9ZKbcdiz8gLWRw5VOPcBOPUQJFLq5e2ecI=
|
||||||
gorm.io/driver/sqlserver v1.6.0 h1:VZOBQVsVhkHU/NzNhRJKoANt5pZGQAS1Bwc6m6dgfnc=
|
gorm.io/driver/sqlserver v1.6.0 h1:VZOBQVsVhkHU/NzNhRJKoANt5pZGQAS1Bwc6m6dgfnc=
|
||||||
|
gorm.io/driver/sqlserver v1.6.0/go.mod h1:WQzt4IJo/WHKnckU9jXBLMJIVNMVeTu25dnOzehntWw=
|
||||||
gorm.io/gorm v1.25.7/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
|
gorm.io/gorm v1.25.7/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
|
||||||
gorm.io/gorm v1.30.0 h1:qbT5aPv1UH8gI99OsRlvDToLxW5zR7FzS9acZDOZcgs=
|
gorm.io/gorm v1.30.0 h1:qbT5aPv1UH8gI99OsRlvDToLxW5zR7FzS9acZDOZcgs=
|
||||||
gorm.io/gorm v1.30.0/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
|
gorm.io/gorm v1.30.0/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
|
||||||
|
|||||||
34
internal/model/pdf/request.go
Normal file
34
internal/model/pdf/request.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package pdf
|
||||||
|
|
||||||
|
// CreatePDFRecognitionRequest 创建PDF识别任务
|
||||||
|
type CreatePDFRecognitionRequest struct {
|
||||||
|
FileURL string `json:"file_url" binding:"required"`
|
||||||
|
FileHash string `json:"file_hash" binding:"required"`
|
||||||
|
FileName string `json:"file_name" binding:"required"`
|
||||||
|
UserID int64 `json:"user_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetPDFTaskRequest URI 参数
|
||||||
|
type GetPDFTaskRequest struct {
|
||||||
|
TaskNo string `uri:"task_no" binding:"required"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreatePDFTaskResponse 创建任务响应
|
||||||
|
type CreatePDFTaskResponse struct {
|
||||||
|
TaskNo string `json:"task_no"`
|
||||||
|
Status int `json:"status"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// PDFPageResult 单页结果
|
||||||
|
type PDFPageResult struct {
|
||||||
|
PageNumber int `json:"page_number"`
|
||||||
|
Markdown string `json:"markdown"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetPDFTaskResponse 查询任务状态和结果
|
||||||
|
type GetPDFTaskResponse struct {
|
||||||
|
TaskNo string `json:"task_no"`
|
||||||
|
Status int `json:"status"`
|
||||||
|
TotalPages int `json:"total_pages"`
|
||||||
|
Pages []PDFPageResult `json:"pages"`
|
||||||
|
}
|
||||||
343
internal/service/pdf_recognition_service.go
Normal file
343
internal/service/pdf_recognition_service.go
Normal file
@@ -0,0 +1,343 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
pdfmodel "gitea.com/texpixel/document_ai/internal/model/pdf"
|
||||||
|
"gitea.com/texpixel/document_ai/internal/storage/cache"
|
||||||
|
"gitea.com/texpixel/document_ai/internal/storage/dao"
|
||||||
|
"gitea.com/texpixel/document_ai/pkg/common"
|
||||||
|
"gitea.com/texpixel/document_ai/pkg/httpclient"
|
||||||
|
"gitea.com/texpixel/document_ai/pkg/log"
|
||||||
|
"gitea.com/texpixel/document_ai/pkg/oss"
|
||||||
|
"gitea.com/texpixel/document_ai/pkg/requestid"
|
||||||
|
"gitea.com/texpixel/document_ai/pkg/utils"
|
||||||
|
"gorm.io/gorm"
|
||||||
|
|
||||||
|
"gitea.com/texpixel/document_ai/internal/model/formula"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
pdfMaxPages = 10
|
||||||
|
pdfOCREndpoint = "https://cloud.texpixel.com:10443/doc_process/v1/image/ocr"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PDFRecognitionService 处理 PDF 识别任务
|
||||||
|
type PDFRecognitionService struct {
|
||||||
|
db *gorm.DB
|
||||||
|
queueLimit chan struct{}
|
||||||
|
stopChan chan struct{}
|
||||||
|
httpClient *httpclient.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPDFRecognitionService() *PDFRecognitionService {
|
||||||
|
s := &PDFRecognitionService{
|
||||||
|
db: dao.DB,
|
||||||
|
queueLimit: make(chan struct{}, 3),
|
||||||
|
stopChan: make(chan struct{}),
|
||||||
|
httpClient: httpclient.NewClient(nil),
|
||||||
|
}
|
||||||
|
|
||||||
|
utils.SafeGo(func() {
|
||||||
|
lock, err := cache.GetPDFDistributedLock(context.Background())
|
||||||
|
if err != nil || !lock {
|
||||||
|
log.Error(context.Background(), "func", "NewPDFRecognitionService", "msg", "获取PDF分布式锁失败")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.processPDFQueue(context.Background())
|
||||||
|
})
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreatePDFTask 创建识别任务并入队
|
||||||
|
func (s *PDFRecognitionService) CreatePDFTask(ctx context.Context, req *pdfmodel.CreatePDFRecognitionRequest) (*dao.RecognitionTask, error) {
|
||||||
|
task := &dao.RecognitionTask{
|
||||||
|
UserID: req.UserID,
|
||||||
|
TaskUUID: utils.NewUUID(),
|
||||||
|
TaskType: dao.TaskTypePDF,
|
||||||
|
Status: dao.TaskStatusPending,
|
||||||
|
FileURL: req.FileURL,
|
||||||
|
FileName: req.FileName,
|
||||||
|
FileHash: req.FileHash,
|
||||||
|
IP: common.GetIPFromContext(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := dao.NewRecognitionTaskDao().Create(dao.DB.WithContext(ctx), task); err != nil {
|
||||||
|
log.Error(ctx, "func", "CreatePDFTask", "msg", "创建任务失败", "error", err)
|
||||||
|
return nil, common.NewError(common.CodeDBError, "创建任务失败", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := cache.PushPDFTask(ctx, task.ID); err != nil {
|
||||||
|
log.Error(ctx, "func", "CreatePDFTask", "msg", "推入队列失败", "error", err)
|
||||||
|
return nil, common.NewError(common.CodeSystemError, "推入队列失败", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return task, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetPDFTask 查询任务状态和结果
|
||||||
|
func (s *PDFRecognitionService) GetPDFTask(ctx context.Context, taskNo string) (*pdfmodel.GetPDFTaskResponse, error) {
|
||||||
|
sess := dao.DB.WithContext(ctx)
|
||||||
|
task, err := dao.NewRecognitionTaskDao().GetByTaskNo(sess, taskNo)
|
||||||
|
if err != nil {
|
||||||
|
if err == gorm.ErrRecordNotFound {
|
||||||
|
return nil, common.NewError(common.CodeNotFound, "任务不存在", err)
|
||||||
|
}
|
||||||
|
return nil, common.NewError(common.CodeDBError, "查询任务失败", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 类型校验:防止公式任务被当成 PDF 解析
|
||||||
|
if task.TaskType != dao.TaskTypePDF {
|
||||||
|
return nil, common.NewError(common.CodeNotFound, "任务不存在", nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := &pdfmodel.GetPDFTaskResponse{
|
||||||
|
TaskNo: taskNo,
|
||||||
|
Status: int(task.Status),
|
||||||
|
}
|
||||||
|
|
||||||
|
if task.Status != dao.TaskStatusCompleted {
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := dao.NewRecognitionResultDao().GetByTaskID(sess, task.ID)
|
||||||
|
if err != nil || result == nil {
|
||||||
|
return nil, common.NewError(common.CodeDBError, "查询识别结果失败", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pages, err := result.GetPDFContent()
|
||||||
|
if err != nil {
|
||||||
|
return nil, common.NewError(common.CodeSystemError, "解析识别结果失败", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp.TotalPages = len(pages)
|
||||||
|
for _, p := range pages {
|
||||||
|
resp.Pages = append(resp.Pages, pdfmodel.PDFPageResult{
|
||||||
|
PageNumber: p.PageNumber,
|
||||||
|
Markdown: p.Markdown,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// processPDFQueue 持续消费队列
|
||||||
|
func (s *PDFRecognitionService) processPDFQueue(ctx context.Context) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-s.stopChan:
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
s.processOnePDFTask(ctx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PDFRecognitionService) processOnePDFTask(ctx context.Context) {
|
||||||
|
s.queueLimit <- struct{}{}
|
||||||
|
defer func() { <-s.queueLimit }()
|
||||||
|
|
||||||
|
taskID, err := cache.PopPDFTask(ctx)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processOnePDFTask", "msg", "获取任务失败", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
task, err := dao.NewRecognitionTaskDao().GetTaskByID(dao.DB.WithContext(ctx), taskID)
|
||||||
|
if err != nil || task == nil {
|
||||||
|
log.Error(ctx, "func", "processOnePDFTask", "msg", "任务不存在", "task_id", taskID)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx = context.WithValue(ctx, utils.RequestIDKey, task.TaskUUID)
|
||||||
|
requestid.SetRequestID(task.TaskUUID, func() {
|
||||||
|
if err := s.processPDFTask(ctx, taskID, task.FileURL); err != nil {
|
||||||
|
log.Error(ctx, "func", "processOnePDFTask", "msg", "处理PDF任务失败", "error", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// processPDFTask 核心处理:下载 → pre-hook → 逐页OCR → 写入DB
|
||||||
|
func (s *PDFRecognitionService) processPDFTask(ctx context.Context, taskID int64, fileURL string) error {
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
taskDao := dao.NewRecognitionTaskDao()
|
||||||
|
resultDao := dao.NewRecognitionResultDao()
|
||||||
|
|
||||||
|
isSuccess := false
|
||||||
|
defer func() {
|
||||||
|
status, remark := dao.TaskStatusFailed, "任务处理失败"
|
||||||
|
if isSuccess {
|
||||||
|
status, remark = dao.TaskStatusCompleted, ""
|
||||||
|
}
|
||||||
|
_ = taskDao.Update(dao.DB.WithContext(context.Background()),
|
||||||
|
map[string]interface{}{"id": taskID},
|
||||||
|
map[string]interface{}{"status": status, "completed_at": time.Now(), "remark": remark},
|
||||||
|
)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// 更新为处理中
|
||||||
|
if err := taskDao.Update(dao.DB.WithContext(ctx),
|
||||||
|
map[string]interface{}{"id": taskID},
|
||||||
|
map[string]interface{}{"status": dao.TaskStatusProcessing},
|
||||||
|
); err != nil {
|
||||||
|
return fmt.Errorf("更新任务状态失败: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 下载 PDF
|
||||||
|
reader, err := oss.DownloadFile(ctx, fileURL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("下载PDF失败: %w", err)
|
||||||
|
}
|
||||||
|
defer reader.Close()
|
||||||
|
|
||||||
|
pdfBytes, err := io.ReadAll(reader)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("读取PDF数据失败: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// pre-hook: 用 pdftoppm 渲染前 pdfMaxPages 页为 PNG
|
||||||
|
pageImages, err := renderPDFPages(ctx, pdfBytes, pdfMaxPages)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("渲染PDF页面失败: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
processPages := len(pageImages)
|
||||||
|
log.Info(ctx, "func", "processPDFTask", "msg", "开始处理PDF",
|
||||||
|
"task_id", taskID, "process_pages", processPages)
|
||||||
|
|
||||||
|
// 逐页 OCR,结果收集
|
||||||
|
var pages []dao.PDFPageContent
|
||||||
|
for i, imgBytes := range pageImages {
|
||||||
|
ocrResult, err := s.callOCR(ctx, imgBytes)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("OCR第%d页失败: %w", i+1, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pages = append(pages, dao.PDFPageContent{
|
||||||
|
PageNumber: i + 1,
|
||||||
|
Markdown: ocrResult.Markdown,
|
||||||
|
})
|
||||||
|
log.Info(ctx, "func", "processPDFTask", "msg", "页面OCR完成",
|
||||||
|
"page", i+1, "total", processPages)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 序列化并写入 DB(单行)
|
||||||
|
contentJSON, err := dao.MarshalPDFContent(pages)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("序列化PDF内容失败: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dbResult := dao.RecognitionResult{
|
||||||
|
TaskID: taskID,
|
||||||
|
TaskType: dao.TaskTypePDF,
|
||||||
|
Content: contentJSON,
|
||||||
|
}
|
||||||
|
if err := dbResult.SetMetaData(dao.ResultMetaData{TotalNum: processPages}); err != nil {
|
||||||
|
return fmt.Errorf("序列化MetaData失败: %w", err)
|
||||||
|
}
|
||||||
|
if err := resultDao.Create(dao.DB.WithContext(ctx), dbResult); err != nil {
|
||||||
|
return fmt.Errorf("保存PDF结果失败: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
isSuccess = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// renderPDFPages 使用 pdftoppm 将 PDF 渲染为 PNG 字节切片,最多渲染 maxPages 页
|
||||||
|
func renderPDFPages(ctx context.Context, pdfBytes []byte, maxPages int) ([][]byte, error) {
|
||||||
|
tmpDir, err := os.MkdirTemp("", "pdf-ocr-*")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("创建临时目录失败: %w", err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(tmpDir)
|
||||||
|
|
||||||
|
pdfPath := filepath.Join(tmpDir, "input.pdf")
|
||||||
|
if err := os.WriteFile(pdfPath, pdfBytes, 0600); err != nil {
|
||||||
|
return nil, fmt.Errorf("写入临时PDF失败: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
outPrefix := filepath.Join(tmpDir, "page")
|
||||||
|
cmd := exec.CommandContext(ctx, "pdftoppm",
|
||||||
|
"-r", "150",
|
||||||
|
"-png",
|
||||||
|
"-l", fmt.Sprintf("%d", maxPages),
|
||||||
|
pdfPath,
|
||||||
|
outPrefix,
|
||||||
|
)
|
||||||
|
if out, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
return nil, fmt.Errorf("pdftoppm失败: %w, output: %s", err, string(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := filepath.Glob(filepath.Join(tmpDir, "page-*.png"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("查找渲染输出文件失败: %w", err)
|
||||||
|
}
|
||||||
|
if len(files) == 0 {
|
||||||
|
return nil, fmt.Errorf("pdftoppm未输出任何页面")
|
||||||
|
}
|
||||||
|
sort.Strings(files)
|
||||||
|
|
||||||
|
pages := make([][]byte, 0, len(files))
|
||||||
|
for _, f := range files {
|
||||||
|
data, err := os.ReadFile(f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("读取页面图片失败: %w", err)
|
||||||
|
}
|
||||||
|
pages = append(pages, data)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pages, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// callOCR 调用与公式识别相同的下游 OCR 接口
|
||||||
|
func (s *PDFRecognitionService) callOCR(ctx context.Context, imgBytes []byte) (*formula.ImageOCRResponse, error) {
|
||||||
|
reqBody := map[string]string{
|
||||||
|
"image_base64": base64.StdEncoding.EncodeToString(imgBytes),
|
||||||
|
}
|
||||||
|
jsonData, err := json.Marshal(reqBody)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
headers := map[string]string{
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
utils.RequestIDHeaderKey: utils.GetRequestIDFromContext(ctx),
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, pdfOCREndpoint, bytes.NewReader(jsonData), headers)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("请求OCR接口失败: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// 下游非 2xx 视为失败,避免把错误响应 body 当成识别结果存库
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return nil, fmt.Errorf("OCR接口返回非200状态: %d, body: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var ocrResp formula.ImageOCRResponse
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&ocrResp); err != nil {
|
||||||
|
return nil, fmt.Errorf("解析OCR响应失败: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ocrResp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PDFRecognitionService) Stop() {
|
||||||
|
close(s.stopChan)
|
||||||
|
}
|
||||||
@@ -169,18 +169,21 @@ func (s *RecognitionService) GetFormualTask(ctx context.Context, taskNo string)
|
|||||||
return nil, common.NewError(common.CodeDBError, "查询任务结果失败", err)
|
return nil, common.NewError(common.CodeDBError, "查询任务结果失败", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 构建 Markdown 格式
|
formulaContent, err := taskRet.GetFormulaContent()
|
||||||
markdown := taskRet.Markdown
|
if err != nil {
|
||||||
if markdown == "" {
|
log.Error(ctx, "func", "GetFormualTask", "msg", "解析公式内容失败", "error", err)
|
||||||
markdown = fmt.Sprintf("$$%s$$", taskRet.Latex)
|
return nil, common.NewError(common.CodeSystemError, "解析识别结果失败", err)
|
||||||
|
}
|
||||||
|
markdown := formulaContent.Markdown
|
||||||
|
if markdown == "" {
|
||||||
|
markdown = fmt.Sprintf("$$%s$$", formulaContent.Latex)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &formula.GetFormulaTaskResponse{
|
return &formula.GetFormulaTaskResponse{
|
||||||
TaskNo: taskNo,
|
TaskNo: taskNo,
|
||||||
Latex: taskRet.Latex,
|
Latex: formulaContent.Latex,
|
||||||
Markdown: markdown,
|
Markdown: markdown,
|
||||||
MathML: taskRet.MathML,
|
MathML: formulaContent.MathML,
|
||||||
MML: taskRet.MML,
|
MML: formulaContent.MML,
|
||||||
Status: int(task.Status),
|
Status: int(task.Status),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
@@ -539,14 +542,26 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
|
|||||||
log.Error(ctx, "func", "processFormulaTask", "msg", "解析响应JSON失败", "error", err)
|
log.Error(ctx, "func", "processFormulaTask", "msg", "解析响应JSON失败", "error", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = resultDao.Create(tx, dao.RecognitionResult{
|
contentJSON, err := dao.MarshalFormulaContent(dao.FormulaContent{
|
||||||
TaskID: taskID,
|
|
||||||
TaskType: dao.TaskTypeFormula,
|
|
||||||
Latex: ocrResp.Latex,
|
Latex: ocrResp.Latex,
|
||||||
Markdown: ocrResp.Markdown,
|
Markdown: ocrResp.Markdown,
|
||||||
MathML: ocrResp.MathML,
|
MathML: ocrResp.MathML,
|
||||||
MML: ocrResp.MML,
|
MML: ocrResp.MML,
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processFormulaTask", "msg", "序列化公式内容失败", "error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
result := dao.RecognitionResult{
|
||||||
|
TaskID: taskID,
|
||||||
|
TaskType: dao.TaskTypeFormula,
|
||||||
|
Content: contentJSON,
|
||||||
|
}
|
||||||
|
if err = result.SetMetaData(dao.ResultMetaData{TotalNum: 1}); err != nil {
|
||||||
|
log.Error(ctx, "func", "processFormulaTask", "msg", "序列化MetaData失败", "error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = resultDao.Create(tx, result)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "processFormulaTask", "msg", "保存任务结果失败", "error", err)
|
log.Error(ctx, "func", "processFormulaTask", "msg", "保存任务结果失败", "error", err)
|
||||||
return err
|
return err
|
||||||
@@ -662,15 +677,25 @@ func (s *RecognitionService) processVLFormulaTask(ctx context.Context, taskID in
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if result == nil {
|
if result == nil {
|
||||||
formulaRes := &dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Latex: latex}
|
contentJSON, err := dao.MarshalFormulaContent(dao.FormulaContent{Latex: latex})
|
||||||
err = resultDao.Create(dao.DB.WithContext(ctx), *formulaRes)
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processVLFormulaTask", "msg", "序列化公式内容失败", "error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newResult := dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Content: contentJSON}
|
||||||
|
_ = newResult.SetMetaData(dao.ResultMetaData{TotalNum: 1})
|
||||||
|
err = resultDao.Create(dao.DB.WithContext(ctx), newResult)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "processVLFormulaTask", "msg", "创建任务结果失败", "error", err)
|
log.Error(ctx, "func", "processVLFormulaTask", "msg", "创建任务结果失败", "error", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
result.Latex = latex
|
contentJSON, err := dao.MarshalFormulaContent(dao.FormulaContent{Latex: latex})
|
||||||
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{"latex": latex})
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processVLFormulaTask", "msg", "序列化公式内容失败", "error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{"content": contentJSON})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "processVLFormulaTask", "msg", "更新任务结果失败", "error", err)
|
log.Error(ctx, "func", "processVLFormulaTask", "msg", "更新任务结果失败", "error", err)
|
||||||
return err
|
return err
|
||||||
@@ -851,23 +876,35 @@ func (s *RecognitionService) processMathpixTask(ctx context.Context, taskID int6
|
|||||||
|
|
||||||
if result == nil {
|
if result == nil {
|
||||||
// 创建新结果
|
// 创建新结果
|
||||||
err = resultDao.Create(dao.DB.WithContext(ctx), dao.RecognitionResult{
|
contentJSON, err := dao.MarshalFormulaContent(dao.FormulaContent{
|
||||||
TaskID: taskID,
|
|
||||||
TaskType: dao.TaskTypeFormula,
|
|
||||||
Latex: mathpixResp.LatexStyled,
|
Latex: mathpixResp.LatexStyled,
|
||||||
Markdown: mathpixResp.Text,
|
Markdown: mathpixResp.Text,
|
||||||
MathML: mathpixResp.GetMathML(),
|
MathML: mathpixResp.GetMathML(),
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processMathpixTask", "msg", "序列化公式内容失败", "error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newResult := dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Content: contentJSON}
|
||||||
|
_ = newResult.SetMetaData(dao.ResultMetaData{TotalNum: 1})
|
||||||
|
err = resultDao.Create(dao.DB.WithContext(ctx), newResult)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "processMathpixTask", "msg", "创建任务结果失败", "error", err)
|
log.Error(ctx, "func", "processMathpixTask", "msg", "创建任务结果失败", "error", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 更新现有结果
|
// 更新现有结果
|
||||||
|
contentJSON, err := dao.MarshalFormulaContent(dao.FormulaContent{
|
||||||
|
Latex: mathpixResp.LatexStyled,
|
||||||
|
Markdown: mathpixResp.Text,
|
||||||
|
MathML: mathpixResp.GetMathML(),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processMathpixTask", "msg", "序列化公式内容失败", "error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{
|
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{
|
||||||
"latex": mathpixResp.LatexStyled,
|
"content": contentJSON,
|
||||||
"markdown": mathpixResp.Text,
|
|
||||||
"mathml": mathpixResp.GetMathML(),
|
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "processMathpixTask", "msg", "更新任务结果失败", "error", err)
|
log.Error(ctx, "func", "processMathpixTask", "msg", "更新任务结果失败", "error", err)
|
||||||
@@ -1027,23 +1064,35 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
|
|||||||
|
|
||||||
if result == nil {
|
if result == nil {
|
||||||
// 创建新结果
|
// 创建新结果
|
||||||
err = resultDao.Create(dao.DB.WithContext(ctx), dao.RecognitionResult{
|
contentJSON, err := dao.MarshalFormulaContent(dao.FormulaContent{
|
||||||
TaskID: taskID,
|
|
||||||
TaskType: dao.TaskTypeFormula,
|
|
||||||
Markdown: markdownResult,
|
Markdown: markdownResult,
|
||||||
Latex: latex,
|
Latex: latex,
|
||||||
MathML: mml,
|
MathML: mml,
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "序列化公式内容失败", "error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newResult := dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Content: contentJSON}
|
||||||
|
_ = newResult.SetMetaData(dao.ResultMetaData{TotalNum: 1})
|
||||||
|
err = resultDao.Create(dao.DB.WithContext(ctx), newResult)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "创建任务结果失败", "error", err)
|
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "创建任务结果失败", "error", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 更新现有结果
|
// 更新现有结果
|
||||||
|
contentJSON, err := dao.MarshalFormulaContent(dao.FormulaContent{
|
||||||
|
Markdown: markdownResult,
|
||||||
|
Latex: latex,
|
||||||
|
MathML: mml,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "序列化公式内容失败", "error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{
|
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{
|
||||||
"markdown": markdownResult,
|
"content": contentJSON,
|
||||||
"latex": latex,
|
|
||||||
"mathml": mml,
|
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "更新任务结果失败", "error", err)
|
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "更新任务结果失败", "error", err)
|
||||||
|
|||||||
@@ -89,17 +89,17 @@ func (svc *TaskService) GetTaskList(ctx context.Context, req *task.TaskListReque
|
|||||||
Total: total,
|
Total: total,
|
||||||
}
|
}
|
||||||
for _, item := range tasks {
|
for _, item := range tasks {
|
||||||
var latex string
|
var latex, markdown, mathML, mml string
|
||||||
var markdown string
|
|
||||||
var mathML string
|
|
||||||
var mml string
|
|
||||||
recognitionResult := recognitionResultMap[item.ID]
|
recognitionResult := recognitionResultMap[item.ID]
|
||||||
if recognitionResult != nil {
|
if recognitionResult != nil && recognitionResult.TaskType == dao.TaskTypeFormula {
|
||||||
latex = recognitionResult.Latex
|
if fc, err := recognitionResult.GetFormulaContent(); err == nil {
|
||||||
markdown = recognitionResult.Markdown
|
latex = fc.Latex
|
||||||
mathML = recognitionResult.MathML
|
markdown = fc.Markdown
|
||||||
mml = recognitionResult.MML
|
mathML = fc.MathML
|
||||||
|
mml = fc.MML
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// PDF 类型的 TaskListDTO 暂不展开 content(列表页只显示状态)
|
||||||
originURL, err := oss.GetDownloadURL(ctx, item.FileURL)
|
originURL, err := oss.GetDownloadURL(ctx, item.FileURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "func", "GetTaskList", "msg", "get origin url failed", "error", err)
|
log.Error(ctx, "func", "GetTaskList", "msg", "get origin url failed", "error", err)
|
||||||
@@ -148,10 +148,18 @@ func (svc *TaskService) ExportTask(ctx context.Context, req *task.ExportTaskRequ
|
|||||||
return nil, "", errors.New("recognition result not found")
|
return nil, "", errors.New("recognition result not found")
|
||||||
}
|
}
|
||||||
|
|
||||||
markdown := recognitionResult.Markdown
|
var markdown string
|
||||||
if markdown == "" {
|
switch recognitionResult.TaskType {
|
||||||
log.Error(ctx, "func", "ExportTask", "msg", "markdown not found")
|
case dao.TaskTypeFormula:
|
||||||
return nil, "", errors.New("markdown not found")
|
fc, err := recognitionResult.GetFormulaContent()
|
||||||
|
if err != nil || fc.Markdown == "" {
|
||||||
|
log.Error(ctx, "func", "ExportTask", "msg", "公式结果解析失败或markdown为空", "error", err)
|
||||||
|
return nil, "", errors.New("markdown not found")
|
||||||
|
}
|
||||||
|
markdown = fc.Markdown
|
||||||
|
default:
|
||||||
|
log.Error(ctx, "func", "ExportTask", "msg", "不支持的导出任务类型", "task_type", recognitionResult.TaskType)
|
||||||
|
return nil, "", errors.New("unsupported task type for export")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 获取文件名(去掉扩展名)
|
// 获取文件名(去掉扩展名)
|
||||||
|
|||||||
27
internal/storage/cache/pdf.go
vendored
Normal file
27
internal/storage/cache/pdf.go
vendored
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
PDFRecognitionTaskQueue = "pdf_recognition_queue"
|
||||||
|
PDFRecognitionDistLock = "pdf_recognition_dist_lock"
|
||||||
|
)
|
||||||
|
|
||||||
|
func PushPDFTask(ctx context.Context, taskID int64) (int64, error) {
|
||||||
|
return RedisClient.LPush(ctx, PDFRecognitionTaskQueue, taskID).Result()
|
||||||
|
}
|
||||||
|
|
||||||
|
func PopPDFTask(ctx context.Context) (int64, error) {
|
||||||
|
result, err := RedisClient.BRPop(ctx, 0, PDFRecognitionTaskQueue).Result()
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return strconv.ParseInt(result[1], 10, 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetPDFDistributedLock(ctx context.Context) (bool, error) {
|
||||||
|
return RedisClient.SetNX(ctx, PDFRecognitionDistLock, "locked", DefaultLockTimeout).Result()
|
||||||
|
}
|
||||||
@@ -1,45 +1,104 @@
|
|||||||
package dao
|
package dao
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type RecognitionResult struct {
|
// FormulaContent 公式识别的 content 字段结构
|
||||||
BaseModel
|
type FormulaContent struct {
|
||||||
TaskID int64 `gorm:"column:task_id;bigint;not null;default:0;comment:任务ID" json:"task_id"`
|
Latex string `json:"latex"`
|
||||||
TaskType TaskType `gorm:"column:task_type;varchar(16);not null;comment:任务类型;default:''" json:"task_type"`
|
Markdown string `json:"markdown"`
|
||||||
Latex string `json:"latex" gorm:"column:latex;type:text;not null;default:''"`
|
MathML string `json:"mathml"`
|
||||||
Markdown string `json:"markdown" gorm:"column:markdown;type:text;not null;default:''"` // Markdown 格式
|
MML string `json:"mml"`
|
||||||
MathML string `json:"mathml" gorm:"column:mathml;type:text;not null;default:''"` // MathML 格式
|
|
||||||
MML string `json:"mml" gorm:"column:mml;type:text;not null;default:''"` // MML 格式
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type RecognitionResultDao struct {
|
// PDFPageContent PDF 单页识别结果
|
||||||
|
type PDFPageContent struct {
|
||||||
|
PageNumber int `json:"page_number"`
|
||||||
|
Markdown string `json:"markdown"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ResultMetaData recognition_results.meta_data 字段结构
|
||||||
|
type ResultMetaData struct {
|
||||||
|
TotalNum int `json:"total_num"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecognitionResult recognition_results 表模型
|
||||||
|
type RecognitionResult struct {
|
||||||
|
BaseModel
|
||||||
|
TaskID int64 `gorm:"column:task_id;bigint;not null;default:0;index;comment:任务ID" json:"task_id"`
|
||||||
|
TaskType TaskType `gorm:"column:task_type;varchar(16);not null;comment:任务类型;default:''" json:"task_type"`
|
||||||
|
MetaData string `gorm:"column:meta_data;type:json;comment:元数据" json:"meta_data"`
|
||||||
|
Content string `gorm:"column:content;type:json;comment:识别内容JSON" json:"content"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetMetaData 序列化并写入 MetaData 字段
|
||||||
|
func (r *RecognitionResult) SetMetaData(meta ResultMetaData) error {
|
||||||
|
b, err := json.Marshal(meta)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
r.MetaData = string(b)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetFormulaContent 从 Content 字段反序列化公式结果
|
||||||
|
func (r *RecognitionResult) GetFormulaContent() (*FormulaContent, error) {
|
||||||
|
var c FormulaContent
|
||||||
|
if err := json.Unmarshal([]byte(r.Content), &c); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &c, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetPDFContent 从 Content 字段反序列化 PDF 分页结果
|
||||||
|
func (r *RecognitionResult) GetPDFContent() ([]PDFPageContent, error) {
|
||||||
|
var pages []PDFPageContent
|
||||||
|
if err := json.Unmarshal([]byte(r.Content), &pages); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return pages, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MarshalFormulaContent 将公式结果序列化为 JSON 字符串(供写入 Content)
|
||||||
|
func MarshalFormulaContent(c FormulaContent) (string, error) {
|
||||||
|
b, err := json.Marshal(c)
|
||||||
|
return string(b), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// MarshalPDFContent 将 PDF 分页结果序列化为 JSON 字符串(供写入 Content)
|
||||||
|
func MarshalPDFContent(pages []PDFPageContent) (string, error) {
|
||||||
|
b, err := json.Marshal(pages)
|
||||||
|
return string(b), err
|
||||||
|
}
|
||||||
|
|
||||||
|
type RecognitionResultDao struct{}
|
||||||
|
|
||||||
func NewRecognitionResultDao() *RecognitionResultDao {
|
func NewRecognitionResultDao() *RecognitionResultDao {
|
||||||
return &RecognitionResultDao{}
|
return &RecognitionResultDao{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 模型方法
|
|
||||||
func (dao *RecognitionResultDao) Create(tx *gorm.DB, data RecognitionResult) error {
|
func (dao *RecognitionResultDao) Create(tx *gorm.DB, data RecognitionResult) error {
|
||||||
return tx.Create(&data).Error
|
return tx.Create(&data).Error
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dao *RecognitionResultDao) GetByTaskID(tx *gorm.DB, taskID int64) (result *RecognitionResult, err error) {
|
func (dao *RecognitionResultDao) GetByTaskID(tx *gorm.DB, taskID int64) (*RecognitionResult, error) {
|
||||||
result = &RecognitionResult{}
|
result := &RecognitionResult{}
|
||||||
err = tx.Where("task_id = ?", taskID).First(result).Error
|
err := tx.Where("task_id = ?", taskID).First(result).Error
|
||||||
if err != nil && err == gorm.ErrRecordNotFound {
|
if err != nil && err == gorm.ErrRecordNotFound {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
return
|
return result, err
|
||||||
}
|
|
||||||
|
|
||||||
func (dao *RecognitionResultDao) GetByTaskIDs(tx *gorm.DB, taskIDs []int64) (results []*RecognitionResult, err error) {
|
|
||||||
err = tx.Where("task_id IN (?)", taskIDs).Find(&results).Error
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dao *RecognitionResultDao) Update(tx *gorm.DB, id int64, updates map[string]interface{}) error {
|
func (dao *RecognitionResultDao) Update(tx *gorm.DB, id int64, updates map[string]interface{}) error {
|
||||||
return tx.Model(&RecognitionResult{}).Where("id = ?", id).Updates(updates).Error
|
return tx.Model(&RecognitionResult{}).Where("id = ?", id).Updates(updates).Error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (dao *RecognitionResultDao) GetByTaskIDs(tx *gorm.DB, taskIDs []int64) ([]*RecognitionResult, error) {
|
||||||
|
var results []*RecognitionResult
|
||||||
|
err := tx.Where("task_id IN (?)", taskIDs).Find(&results).Error
|
||||||
|
return results, err
|
||||||
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ const (
|
|||||||
TaskTypeText TaskType = "TEXT"
|
TaskTypeText TaskType = "TEXT"
|
||||||
TaskTypeTable TaskType = "TABLE"
|
TaskTypeTable TaskType = "TABLE"
|
||||||
TaskTypeLayout TaskType = "LAYOUT"
|
TaskTypeLayout TaskType = "LAYOUT"
|
||||||
|
TaskTypePDF TaskType = "PDF"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t TaskType) String() string {
|
func (t TaskType) String() string {
|
||||||
|
|||||||
32
migrations/pdf_recognition.sql
Normal file
32
migrations/pdf_recognition.sql
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
-- migrations/pdf_recognition.sql
|
||||||
|
-- 将 recognition_results 表重构为 JSON content schema
|
||||||
|
-- 执行顺序:加新列 → 洗历史数据 → 删旧列
|
||||||
|
|
||||||
|
-- Step 1: 新增 JSON 字段(保留旧字段,等数据迁移完再删)
|
||||||
|
ALTER TABLE `recognition_results`
|
||||||
|
ADD COLUMN `meta_data` JSON DEFAULT NULL COMMENT '元数据 {"total_num":1}' AFTER `task_type`,
|
||||||
|
ADD COLUMN `content` JSON DEFAULT NULL COMMENT '识别内容 JSON' AFTER `meta_data`;
|
||||||
|
|
||||||
|
-- Step 2: 将旧列数据洗入新 JSON 字段
|
||||||
|
-- 所有现有记录均为 FORMULA 类型(单页),meta_data.total_num = 1
|
||||||
|
-- content 结构: {"latex":"...","markdown":"...","mathml":"...","mml":"..."}
|
||||||
|
UPDATE `recognition_results`
|
||||||
|
SET
|
||||||
|
`meta_data` = JSON_OBJECT('total_num', 1),
|
||||||
|
`content` = JSON_OBJECT(
|
||||||
|
'latex', IFNULL(`latex`, ''),
|
||||||
|
'markdown', IFNULL(`markdown`, ''),
|
||||||
|
'mathml', IFNULL(`mathml`, ''),
|
||||||
|
'mml', IFNULL(`mml`, '')
|
||||||
|
)
|
||||||
|
WHERE `content` IS NULL;
|
||||||
|
|
||||||
|
-- Step 3: 验证数据洗涤完成(应返回 0)
|
||||||
|
-- SELECT COUNT(*) FROM `recognition_results` WHERE `content` IS NULL;
|
||||||
|
|
||||||
|
-- Step 4: 删除旧字段
|
||||||
|
ALTER TABLE `recognition_results`
|
||||||
|
DROP COLUMN `latex`,
|
||||||
|
DROP COLUMN `markdown`,
|
||||||
|
DROP COLUMN `mathml`,
|
||||||
|
DROP COLUMN `mml`;
|
||||||
Reference in New Issue
Block a user