Compare commits
9 Commits
4bd8cef372
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f86898ae5f | ||
|
|
a9db8576eb | ||
| 9ceb5fe92a | |||
|
|
50922641a9 | ||
|
|
904ea3d146 | ||
|
|
696919611c | ||
|
|
ea0f5d8765 | ||
| 0bc77f61e2 | |||
| 083142491f |
16
Dockerfile
16
Dockerfile
@@ -1,16 +1,22 @@
|
||||
# Build stage
|
||||
FROM registry.cn-beijing.aliyuncs.com/bitwsd/golang AS builder
|
||||
FROM crpi-8s2ierii2xan4klg.cn-beijing.personal.cr.aliyuncs.com/texpixel/golang:1.20-apline AS builder
|
||||
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy source code
|
||||
COPY . .
|
||||
|
||||
ENV GOPROXY=https://goproxy.cn,direct
|
||||
ENV GOSUMDB=off
|
||||
|
||||
|
||||
# Build binary
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -mod=vendor -o main ./main.go
|
||||
RUN go mod download && \
|
||||
CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o doc_ai ./main.go
|
||||
|
||||
# Runtime stage
|
||||
FROM registry.cn-beijing.aliyuncs.com/bitwsd/alpine
|
||||
FROM crpi-8s2ierii2xan4klg.cn-beijing.personal.cr.aliyuncs.com/texpixel/alpine:latest
|
||||
|
||||
# Set timezone
|
||||
RUN apk add --no-cache tzdata && \
|
||||
@@ -21,7 +27,7 @@ RUN apk add --no-cache tzdata && \
|
||||
WORKDIR /app
|
||||
|
||||
# Copy binary from builder
|
||||
COPY --from=builder /app/main .
|
||||
COPY --from=builder /app/doc_ai .
|
||||
|
||||
# Copy config files
|
||||
COPY config/config_*.yaml ./config/
|
||||
@@ -34,7 +40,7 @@ RUN mkdir -p /data/formula && \
|
||||
EXPOSE 8024
|
||||
|
||||
# Set entrypoint
|
||||
ENTRYPOINT ["./main"]
|
||||
ENTRYPOINT ["./doc_ai"]
|
||||
|
||||
# Default command (can be overridden)
|
||||
CMD ["-env", "prod"]
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.com/bitwsd/document_ai/config"
|
||||
@@ -59,7 +60,7 @@ func GetSignatureURL(ctx *gin.Context) {
|
||||
ctx.JSON(http.StatusOK, common.SuccessResponse(ctx, gin.H{"sign_url": "", "repeat": true, "path": task.FileURL}))
|
||||
return
|
||||
}
|
||||
extend := filepath.Ext(req.FileName)
|
||||
extend := strings.ToLower(filepath.Ext(req.FileName))
|
||||
if extend == "" {
|
||||
ctx.JSON(http.StatusOK, common.ErrorResponse(ctx, common.CodeParamError, "invalid file name"))
|
||||
return
|
||||
|
||||
@@ -3,7 +3,7 @@ package task
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
"gitea.com/bitwsd/document_ai/internal/model/task"
|
||||
"gitea.com/bitwsd/document_ai/internal/service"
|
||||
"gitea.com/bitwsd/document_ai/pkg/common"
|
||||
|
||||
@@ -3,7 +3,7 @@ package user
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
"gitea.com/bitwsd/document_ai/config"
|
||||
model "gitea.com/bitwsd/document_ai/internal/model/user"
|
||||
"gitea.com/bitwsd/document_ai/internal/service"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
|
||||
@@ -40,6 +40,6 @@ aliyun:
|
||||
oss:
|
||||
endpoint: oss-cn-beijing.aliyuncs.com
|
||||
inner_endpoint: oss-cn-beijing-internal.aliyuncs.com
|
||||
access_key_id: LTAI5tKogxeiBb4gJGWEePWN
|
||||
access_key_secret: l4oCxtt5iLSQ1DAs40guTzKUfrxXwq
|
||||
bucket_name: bitwsd-doc-ai
|
||||
access_key_id: LTAI5t8qXhow6NCdYDtu1saF
|
||||
access_key_secret: qZ2SwYsNCEBckCVSOszH31yYwXU44A
|
||||
bucket_name: texpixel-doc
|
||||
|
||||
5
go.mod
5
go.mod
@@ -3,7 +3,6 @@ module gitea.com/bitwsd/document_ai
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
gitea.com/bitwsd/core v0.0.0-20241128075635-8d72a929b914
|
||||
github.com/alibabacloud-go/darabonba-openapi v0.2.1
|
||||
github.com/alibabacloud-go/dysmsapi-20170525/v2 v2.0.18
|
||||
github.com/alibabacloud-go/tea v1.1.19
|
||||
@@ -13,7 +12,9 @@ require (
|
||||
github.com/gin-gonic/gin v1.10.0
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/redis/go-redis/v9 v9.7.0
|
||||
github.com/rs/zerolog v1.33.0
|
||||
github.com/spf13/viper v1.19.0
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1
|
||||
gorm.io/driver/mysql v1.5.7
|
||||
gorm.io/gorm v1.25.12
|
||||
)
|
||||
@@ -54,7 +55,6 @@ require (
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
||||
github.com/rs/zerolog v1.33.0 // indirect
|
||||
github.com/sagikazarmark/locafero v0.4.0 // indirect
|
||||
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
|
||||
github.com/sourcegraph/conc v0.3.0 // indirect
|
||||
@@ -76,6 +76,5 @@ require (
|
||||
golang.org/x/time v0.5.0 // indirect
|
||||
google.golang.org/protobuf v1.34.1 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
12
go.sum
12
go.sum
@@ -1,5 +1,3 @@
|
||||
gitea.com/bitwsd/core v0.0.0-20241128075635-8d72a929b914 h1:3aRCeiuq/PWMr2yjEN9Y5NusfmpdMKiO4i/5tM5qc34=
|
||||
gitea.com/bitwsd/core v0.0.0-20241128075635-8d72a929b914/go.mod h1:hbEUo3t/AFGCnQbxwdG4oiw2IHdlRgK02cqd0yicP1Y=
|
||||
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4 h1:iC9YFYKDGEy3n/FtqJnOkZsene9olVspKmkX5A2YBEo=
|
||||
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4/go.mod h1:sCavSAvdzOjul4cEqeVtvlSaSScfNsTQ+46HwlTL1hc=
|
||||
github.com/alibabacloud-go/darabonba-openapi v0.1.18/go.mod h1:PB4HffMhJVmAgNKNq3wYbTUlFvPgxJpTzd1F5pTuUsc=
|
||||
@@ -33,9 +31,7 @@ github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9
|
||||
github.com/aliyun/credentials-go v1.1.2 h1:qU1vwGIBb3UJ8BwunHDRFtAhS6jnQLnde/yk0+Ih2GY=
|
||||
github.com/aliyun/credentials-go v1.1.2/go.mod h1:ozcZaMR5kLM7pwtCMEpVmQ242suV6qTJya2bDq4X1Tw=
|
||||
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
|
||||
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
|
||||
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
|
||||
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
|
||||
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
||||
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||
@@ -53,13 +49,11 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
|
||||
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
||||
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
||||
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||
@@ -69,7 +63,6 @@ github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm
|
||||
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
||||
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
||||
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
|
||||
@@ -82,7 +75,6 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
@@ -103,11 +95,9 @@ github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuV
|
||||
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
|
||||
@@ -134,11 +124,9 @@ github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/redis/go-redis/v9 v9.7.0 h1:HhLSs+B6O021gwzl+locl0zEDnyNkxMtf/Z3NNBMa9E=
|
||||
github.com/redis/go-redis/v9 v9.7.0/go.mod h1:f6zhXITC7JUJIlPEiBOTXxJgPLdZcA93GewI7inzyWw=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
||||
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
|
||||
@@ -11,3 +11,8 @@ type GetFormulaTaskResponse struct {
|
||||
Count int `json:"count"`
|
||||
Latex string `json:"latex"`
|
||||
}
|
||||
|
||||
// FormulaRecognitionResponse 公式识别服务返回的响应
|
||||
type FormulaRecognitionResponse struct {
|
||||
Result string `json:"result"`
|
||||
}
|
||||
|
||||
@@ -11,11 +11,11 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/config"
|
||||
"gitea.com/bitwsd/document_ai/internal/model/formula"
|
||||
"gitea.com/bitwsd/document_ai/internal/storage/cache"
|
||||
"gitea.com/bitwsd/document_ai/internal/storage/dao"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
|
||||
"gitea.com/bitwsd/document_ai/pkg/common"
|
||||
"gitea.com/bitwsd/document_ai/pkg/constant"
|
||||
@@ -200,7 +200,7 @@ func (s *RecognitionService) processVLFormula(ctx context.Context, taskID int64)
|
||||
log.Info(ctx, "func", "processVLFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
|
||||
|
||||
// 处理具体任务
|
||||
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL); err != nil {
|
||||
if err := s.processVLFormulaTask(ctx, taskID, task.FileURL, utils.ModelVLQwen3VL32BInstruct); err != nil {
|
||||
log.Error(ctx, "func", "processVLFormulaQueue", "msg", "处理任务失败", "error", err)
|
||||
return
|
||||
}
|
||||
@@ -263,11 +263,11 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
|
||||
return err
|
||||
}
|
||||
|
||||
downloadURL, err := oss.GetDownloadURL(ctx, fileURL)
|
||||
if err != nil {
|
||||
log.Error(ctx, "func", "processFormulaTask", "msg", "获取下载URL失败", "error", err)
|
||||
return err
|
||||
}
|
||||
// downloadURL, err := oss.GetDownloadURL(ctx, fileURL)
|
||||
// if err != nil {
|
||||
// log.Error(ctx, "func", "processFormulaTask", "msg", "获取下载URL失败", "error", err)
|
||||
// return err
|
||||
// }
|
||||
|
||||
// 将图片转为base64编码
|
||||
base64Image := base64.StdEncoding.EncodeToString(imageData)
|
||||
@@ -275,7 +275,6 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
|
||||
// 创建JSON请求
|
||||
requestData := map[string]string{
|
||||
"image_base64": base64Image,
|
||||
"img_url": downloadURL,
|
||||
}
|
||||
|
||||
jsonData, err := json.Marshal(requestData)
|
||||
@@ -288,7 +287,7 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
|
||||
headers := map[string]string{"Content-Type": "application/json", utils.RequestIDHeaderKey: utils.GetRequestIDFromContext(ctx)}
|
||||
|
||||
// 发送请求时会使用带超时的context
|
||||
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, s.getURL(ctx), bytes.NewReader(jsonData), headers)
|
||||
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, "http://cloud.texpixel.com:1080/formula/predict", bytes.NewReader(jsonData), headers)
|
||||
if err != nil {
|
||||
if ctx.Err() == context.DeadlineExceeded {
|
||||
log.Error(ctx, "func", "processFormulaTask", "msg", "请求超时")
|
||||
@@ -299,13 +298,21 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
log.Info(ctx, "func", "processFormulaTask", "msg", "请求成功", "resp", resp.Body)
|
||||
log.Info(ctx, "func", "processFormulaTask", "msg", "请求成功")
|
||||
body := &bytes.Buffer{}
|
||||
if _, err = body.ReadFrom(resp.Body); err != nil {
|
||||
log.Error(ctx, "func", "processFormulaTask", "msg", "读取响应体失败", "error", err)
|
||||
return err
|
||||
}
|
||||
katex := utils.ToKatex(body.String())
|
||||
log.Info(ctx, "func", "processFormulaTask", "msg", "响应内容", "body", body.String())
|
||||
|
||||
// 解析 JSON 响应
|
||||
var formulaResp formula.FormulaRecognitionResponse
|
||||
if err := json.Unmarshal(body.Bytes(), &formulaResp); err != nil {
|
||||
log.Error(ctx, "func", "processFormulaTask", "msg", "解析响应JSON失败", "error", err)
|
||||
return err
|
||||
}
|
||||
katex := utils.ToKatex(formulaResp.Result)
|
||||
content := &dao.FormulaRecognitionContent{Latex: katex}
|
||||
b, _ := json.Marshal(content)
|
||||
// Save recognition result
|
||||
@@ -322,7 +329,7 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *RecognitionService) processVLFormulaTask(ctx context.Context, taskID int64, fileURL string) error {
|
||||
func (s *RecognitionService) processVLFormulaTask(ctx context.Context, taskID int64, fileURL string, model string) error {
|
||||
isSuccess := false
|
||||
defer func() {
|
||||
if !isSuccess {
|
||||
@@ -349,28 +356,11 @@ func (s *RecognitionService) processVLFormulaTask(ctx context.Context, taskID in
|
||||
log.Error(ctx, "func", "processVLFormulaTask", "msg", "读取图片数据失败", "error", err)
|
||||
return err
|
||||
}
|
||||
prompt := `
|
||||
Please perform OCR on the image and output only LaTeX code.
|
||||
Important instructions:
|
||||
|
||||
* "The image contains mathematical formulas, no plain text."
|
||||
|
||||
* "Preserve all layout, symbols, subscripts, summations, parentheses, etc., exactly as shown."
|
||||
|
||||
* "Use \[ ... \] or align environments to represent multiline math expressions."
|
||||
|
||||
* "Use adaptive symbols such as \left and \right where applicable."
|
||||
|
||||
* "Do not include any extra commentary, template answers, or unrelated equations."
|
||||
|
||||
* "Only output valid LaTeX code based on the actual content of the image, and not change the original mathematical expression."
|
||||
|
||||
* "The output result must be can render by better-react-mathjax."
|
||||
`
|
||||
prompt := `Please perform OCR on the image and output only LaTeX code.`
|
||||
base64Image := base64.StdEncoding.EncodeToString(imageData)
|
||||
|
||||
requestBody := formula.VLFormulaRequest{
|
||||
Model: "Qwen/Qwen2.5-VL-32B-Instruct",
|
||||
Model: model,
|
||||
Stream: false,
|
||||
MaxTokens: 512,
|
||||
Temperature: 0.1,
|
||||
@@ -517,24 +507,12 @@ func (s *RecognitionService) processOneTask(ctx context.Context) {
|
||||
ctx = context.WithValue(ctx, utils.RequestIDKey, task.TaskUUID)
|
||||
log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
|
||||
|
||||
// 处理具体任务
|
||||
if err := s.processFormulaTask(ctx, taskID, task.FileURL); err != nil {
|
||||
// 处理任务
|
||||
err = s.processFormulaTask(ctx, taskID, task.FileURL)
|
||||
if err != nil {
|
||||
log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
log.Info(ctx, "func", "processFormulaQueue", "msg", "处理任务成功", "task_id", taskID)
|
||||
}
|
||||
|
||||
func (s *RecognitionService) getURL(ctx context.Context) string {
|
||||
return "http://cloud.srcstar.com:8045/formula/predict"
|
||||
count, err := cache.IncrURLCount(ctx)
|
||||
if err != nil {
|
||||
log.Error(ctx, "func", "getURL", "msg", "获取URL计数失败", "error", err)
|
||||
return "http://cloud.srcstar.com:8026/formula/predict"
|
||||
}
|
||||
if count%2 == 0 {
|
||||
return "http://cloud.srcstar.com:8026/formula/predict"
|
||||
}
|
||||
return "https://cloud.texpixel.com:1080/formula/predict"
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
"gitea.com/bitwsd/document_ai/internal/model/task"
|
||||
"gitea.com/bitwsd/document_ai/internal/storage/dao"
|
||||
"gorm.io/gorm"
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
"gitea.com/bitwsd/document_ai/internal/storage/cache"
|
||||
"gitea.com/bitwsd/document_ai/internal/storage/dao"
|
||||
"gitea.com/bitwsd/document_ai/pkg/sms"
|
||||
|
||||
6
main.go
6
main.go
@@ -10,9 +10,9 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"gitea.com/bitwsd/core/common/cors"
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/core/common/middleware"
|
||||
"gitea.com/bitwsd/document_ai/pkg/cors"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
"gitea.com/bitwsd/document_ai/pkg/middleware"
|
||||
"gitea.com/bitwsd/document_ai/api"
|
||||
"gitea.com/bitwsd/document_ai/config"
|
||||
"gitea.com/bitwsd/document_ai/internal/storage/cache"
|
||||
|
||||
61
pkg/cors/cors.go
Normal file
61
pkg/cors/cors.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package cors
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
AllowOrigins []string
|
||||
AllowMethods []string
|
||||
AllowHeaders []string
|
||||
ExposeHeaders []string
|
||||
AllowCredentials bool
|
||||
MaxAge int
|
||||
}
|
||||
|
||||
func DefaultConfig() Config {
|
||||
return Config{
|
||||
AllowOrigins: []string{"*"},
|
||||
AllowMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
|
||||
AllowHeaders: []string{"Origin", "Content-Type", "Accept"},
|
||||
ExposeHeaders: []string{"Content-Length"},
|
||||
AllowCredentials: true,
|
||||
MaxAge: 86400, // 24 hours
|
||||
}
|
||||
}
|
||||
|
||||
func Cors(config Config) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
origin := c.Request.Header.Get("Origin")
|
||||
|
||||
// 检查是否允许该来源
|
||||
allowOrigin := "*"
|
||||
for _, o := range config.AllowOrigins {
|
||||
if o == origin {
|
||||
allowOrigin = origin
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
c.Header("Access-Control-Allow-Origin", allowOrigin)
|
||||
c.Header("Access-Control-Allow-Methods", strings.Join(config.AllowMethods, ","))
|
||||
c.Header("Access-Control-Allow-Headers", strings.Join(config.AllowHeaders, ","))
|
||||
c.Header("Access-Control-Expose-Headers", strings.Join(config.ExposeHeaders, ","))
|
||||
c.Header("Access-Control-Max-Age", strconv.Itoa(config.MaxAge))
|
||||
|
||||
if config.AllowCredentials {
|
||||
c.Header("Access-Control-Allow-Credentials", "true")
|
||||
}
|
||||
|
||||
if c.Request.Method == "OPTIONS" {
|
||||
c.AbortWithStatus(204)
|
||||
return
|
||||
}
|
||||
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
)
|
||||
|
||||
// RetryConfig 重试配置
|
||||
|
||||
30
pkg/log/log_config.go
Normal file
30
pkg/log/log_config.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package log
|
||||
|
||||
var (
|
||||
maxSize = 100 // MB
|
||||
outputPath = "/app/logs/app.log"
|
||||
)
|
||||
|
||||
type LogConfig struct {
|
||||
AppName string `yaml:"appName"` // 应用名称
|
||||
Level string `yaml:"level"` // debug, info, warn, error
|
||||
Format string `yaml:"format"` // json, console
|
||||
OutputPath string `yaml:"outputPath"` // 日志文件路径
|
||||
MaxSize int `yaml:"maxSize"` // 单个日志文件最大尺寸,单位MB
|
||||
MaxAge int `yaml:"maxAge"` // 日志保留天数
|
||||
MaxBackups int `yaml:"maxBackups"` // 保留的旧日志文件最大数量
|
||||
Compress bool `yaml:"compress"` // 是否压缩旧日志
|
||||
}
|
||||
|
||||
func DefaultLogConfig() *LogConfig {
|
||||
return &LogConfig{
|
||||
Level: "info",
|
||||
Format: "json",
|
||||
OutputPath: outputPath,
|
||||
MaxSize: maxSize,
|
||||
MaxAge: 7,
|
||||
MaxBackups: 3,
|
||||
Compress: true,
|
||||
}
|
||||
}
|
||||
|
||||
152
pkg/log/logger.go
Normal file
152
pkg/log/logger.go
Normal file
@@ -0,0 +1,152 @@
|
||||
package log
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"gopkg.in/natefinch/lumberjack.v2"
|
||||
)
|
||||
|
||||
type LogType string
|
||||
|
||||
const (
|
||||
TypeAccess LogType = "access"
|
||||
TypeBusiness LogType = "business"
|
||||
TypeError LogType = "error"
|
||||
)
|
||||
|
||||
var (
|
||||
logger zerolog.Logger
|
||||
)
|
||||
|
||||
// Setup 初始化日志配置
|
||||
func Setup(conf LogConfig) error {
|
||||
// 确保日志目录存在
|
||||
if err := os.MkdirAll(filepath.Dir(conf.OutputPath), 0755); err != nil {
|
||||
return fmt.Errorf("create log directory failed: %v", err)
|
||||
}
|
||||
|
||||
// 配置日志轮转
|
||||
writer := &lumberjack.Logger{
|
||||
Filename: conf.OutputPath,
|
||||
MaxSize: conf.MaxSize, // MB
|
||||
MaxAge: conf.MaxAge, // days
|
||||
MaxBackups: conf.MaxBackups,
|
||||
Compress: conf.Compress,
|
||||
}
|
||||
|
||||
// 设置日志级别
|
||||
level, err := zerolog.ParseLevel(conf.Level)
|
||||
if err != nil {
|
||||
level = zerolog.InfoLevel
|
||||
}
|
||||
zerolog.SetGlobalLevel(level)
|
||||
|
||||
// 初始化logger 并添加 app_name
|
||||
logger = zerolog.New(writer).With().
|
||||
Timestamp().
|
||||
Str("app_name", conf.AppName). // 添加 app_name
|
||||
Logger()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// log 统一的日志记录函数
|
||||
func log(ctx context.Context, level zerolog.Level, logType LogType, kv ...interface{}) {
|
||||
if len(kv)%2 != 0 {
|
||||
kv = append(kv, "MISSING")
|
||||
}
|
||||
|
||||
event := logger.WithLevel(level)
|
||||
|
||||
// 添加日志类型
|
||||
event.Str("type", string(logType))
|
||||
|
||||
// 添加请求ID
|
||||
if reqID, exists := ctx.Value("request_id").(string); exists {
|
||||
event.Str("request_id", reqID)
|
||||
}
|
||||
|
||||
// 添加调用位置
|
||||
if pc, file, line, ok := runtime.Caller(2); ok {
|
||||
event.Str("caller", fmt.Sprintf("%s:%d %s", filepath.Base(file), line, runtime.FuncForPC(pc).Name()))
|
||||
}
|
||||
|
||||
// 处理key-value对
|
||||
for i := 0; i < len(kv); i += 2 {
|
||||
key, ok := kv[i].(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
value := kv[i+1]
|
||||
|
||||
switch v := value.(type) {
|
||||
case error:
|
||||
event.AnErr(key, v)
|
||||
case int:
|
||||
event.Int(key, v)
|
||||
case int64:
|
||||
event.Int64(key, v)
|
||||
case float64:
|
||||
event.Float64(key, v)
|
||||
case bool:
|
||||
event.Bool(key, v)
|
||||
case time.Duration:
|
||||
event.Dur(key, v)
|
||||
case time.Time:
|
||||
event.Time(key, v)
|
||||
case []byte:
|
||||
event.Bytes(key, v)
|
||||
case string:
|
||||
event.Str(key, v)
|
||||
default:
|
||||
event.Interface(key, v)
|
||||
}
|
||||
}
|
||||
|
||||
event.Send()
|
||||
}
|
||||
|
||||
// Debug 记录调试日志
|
||||
func Debug(ctx context.Context, kv ...interface{}) {
|
||||
log(ctx, zerolog.DebugLevel, TypeBusiness, kv...)
|
||||
}
|
||||
|
||||
// Info 记录信息日志
|
||||
func Info(ctx context.Context, kv ...interface{}) {
|
||||
log(ctx, zerolog.InfoLevel, TypeBusiness, kv...)
|
||||
}
|
||||
|
||||
// Warn 记录警告日志
|
||||
func Warn(ctx context.Context, kv ...interface{}) {
|
||||
log(ctx, zerolog.WarnLevel, TypeError, kv...)
|
||||
}
|
||||
|
||||
// Error 记录错误日志
|
||||
func Error(ctx context.Context, kv ...interface{}) {
|
||||
log(ctx, zerolog.ErrorLevel, TypeError, kv...)
|
||||
}
|
||||
|
||||
func Fatal(ctx context.Context, kv ...interface{}) {
|
||||
// 获取错误堆栈
|
||||
buf := make([]byte, 4096)
|
||||
n := runtime.Stack(buf, false)
|
||||
|
||||
// 添加堆栈信息到kv
|
||||
newKv := make([]interface{}, 0, len(kv)+2)
|
||||
newKv = append(newKv, kv...)
|
||||
newKv = append(newKv, "stack", string(buf[:n]))
|
||||
|
||||
log(ctx, zerolog.FatalLevel, TypeError, newKv...)
|
||||
}
|
||||
|
||||
// Access 记录访问日志
|
||||
func Access(ctx context.Context, kv ...interface{}) {
|
||||
log(ctx, zerolog.InfoLevel, TypeAccess, kv...)
|
||||
}
|
||||
|
||||
75
pkg/middleware/access.go
Normal file
75
pkg/middleware/access.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
const (
|
||||
maxBodySize = 1024 * 500 // 500KB 限制
|
||||
)
|
||||
|
||||
// 自定义 ResponseWriter 来捕获响应
|
||||
type bodyWriter struct {
|
||||
gin.ResponseWriter
|
||||
body *bytes.Buffer
|
||||
}
|
||||
|
||||
func (w *bodyWriter) Write(b []byte) (int, error) {
|
||||
w.body.Write(b)
|
||||
return w.ResponseWriter.Write(b)
|
||||
}
|
||||
|
||||
func AccessLog() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
start := time.Now()
|
||||
path := c.Request.URL.Path
|
||||
raw := c.Request.URL.RawQuery
|
||||
|
||||
// 处理请求体
|
||||
var reqBody string
|
||||
if c.Request.Body != nil && (c.Request.Method == "POST" || c.Request.Method == "PUT") {
|
||||
// 读取并限制请求体大小
|
||||
bodyBytes, _ := io.ReadAll(io.LimitReader(c.Request.Body, maxBodySize))
|
||||
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) // 重新设置 body
|
||||
reqBody = string(bodyBytes)
|
||||
}
|
||||
|
||||
// 设置自定义 ResponseWriter
|
||||
var responseBody string
|
||||
if !strings.Contains(c.GetHeader("Accept"), "text/event-stream") {
|
||||
bw := &bodyWriter{body: &bytes.Buffer{}, ResponseWriter: c.Writer}
|
||||
c.Writer = bw
|
||||
}
|
||||
|
||||
c.Next()
|
||||
|
||||
// 获取响应体(非 SSE)
|
||||
if writer, ok := c.Writer.(*bodyWriter); ok {
|
||||
responseBody = writer.body.String()
|
||||
if len(responseBody) > maxBodySize {
|
||||
responseBody = responseBody[:maxBodySize] + "... (truncated)"
|
||||
}
|
||||
}
|
||||
|
||||
// 记录访问日志
|
||||
log.Access(c.Request.Context(),
|
||||
"request_id", c.GetString("request_id"),
|
||||
"method", c.Request.Method,
|
||||
"path", path,
|
||||
"query", raw,
|
||||
"ip", c.ClientIP(),
|
||||
"user_agent", c.Request.UserAgent(),
|
||||
"status", c.Writer.Status(),
|
||||
"duration", time.Since(start),
|
||||
"request_body", reqBody,
|
||||
"response_body", responseBody,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
19
pkg/middleware/requestid.go
Normal file
19
pkg/middleware/requestid.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
func RequestID() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
requestID := c.Request.Header.Get("X-Request-ID")
|
||||
if requestID == "" {
|
||||
requestID = uuid.New().String()
|
||||
}
|
||||
c.Request.Header.Set("X-Request-ID", requestID)
|
||||
c.Set("request_id", requestID)
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/config"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
"github.com/aliyun/aliyun-oss-go-sdk/oss"
|
||||
)
|
||||
|
||||
@@ -125,9 +125,7 @@ func DownloadFile(ctx context.Context, ossPath string) (io.ReadCloser, error) {
|
||||
}
|
||||
|
||||
// Create OSS client
|
||||
client, err := oss.New(endpoint,
|
||||
config.GlobalConfig.Aliyun.OSS.AccessKeyID,
|
||||
config.GlobalConfig.Aliyun.OSS.AccessKeySecret)
|
||||
client, err := oss.New(endpoint, config.GlobalConfig.Aliyun.OSS.AccessKeyID, config.GlobalConfig.Aliyun.OSS.AccessKeySecret)
|
||||
if err != nil {
|
||||
log.Error(ctx, "func", "DownloadFile", "msg", "create oss client failed", "error", err)
|
||||
return nil, err
|
||||
|
||||
@@ -23,6 +23,8 @@ func rmDollarSurr(text string) string {
|
||||
func ToKatex(formula string) string {
|
||||
res := formula
|
||||
|
||||
res = strings.ReplaceAll(res, "\n", "")
|
||||
|
||||
// Remove mbox surrounding
|
||||
res = changeAll(res, `\mbox `, " ", "{", "}", "", "")
|
||||
res = changeAll(res, `\mbox`, " ", "{", "}", "", "")
|
||||
|
||||
6
pkg/utils/model.go
Normal file
6
pkg/utils/model.go
Normal file
@@ -0,0 +1,6 @@
|
||||
package utils
|
||||
|
||||
const (
|
||||
ModelVLDeepSeekOCR = "deepseek-ai/DeepSeek-OCR"
|
||||
ModelVLQwen3VL32BInstruct = "Qwen/Qwen3-VL-32B-Instruct"
|
||||
)
|
||||
@@ -3,7 +3,7 @@ package utils
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitea.com/bitwsd/core/common/log"
|
||||
"gitea.com/bitwsd/document_ai/pkg/log"
|
||||
)
|
||||
|
||||
func SafeGo(fn func()) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
package utils
|
||||
|
||||
const (
|
||||
SiliconFlowToken = "Bearer sk-akbroznlbxikkbiouzasspbbzwgxubnjjtqlujxmxsnvpmhn"
|
||||
SiliconFlowToken = "Bearer sk-wiggxqscvjdveqvwcdywwpipcinglkzkewkcfjnrgjqbdbmc"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user