Compare commits

17 Commits

Author SHA1 Message Date
liuyuanchuang
d1a56a2ab3 fix: panic 2026-01-27 23:23:42 +08:00
liuyuanchuang
41df42dea4 feat: decode uid 2026-01-27 22:28:13 +08:00
liuyuanchuang
be3e82fc2e feat: decode user_id 2026-01-27 22:26:25 +08:00
liuyuanchuang
9e01ee79f1 Merge branch 'master' into test 2026-01-27 22:22:48 +08:00
liuyuanchuang
52c9e48a0f fix: rm router db 2026-01-27 22:22:06 +08:00
liuyuanchuang
9b7657cd73 Merge branch 'master' into test 2026-01-27 22:20:27 +08:00
liuyuanchuang
a04eedc423 feat: add track point 2026-01-27 22:20:07 +08:00
liuyuanchuang
a5f1ad153e refactor: update package path 2026-01-27 21:56:21 +08:00
liuyuanchuang
db3beeddb9 Merge branch 'master' of https://code.texpixel.com/YogeLiu/doc_ai_backed into test 2026-01-27 17:40:47 +08:00
eabfd83fdf feat: add scrip; 2026-01-27 17:40:15 +08:00
97c3617731 feat: replace export url 2026-01-25 09:10:54 +08:00
ece026bea2 feat: add new path for recognize 2026-01-25 09:10:54 +08:00
b9124451d2 feat: update default init env 2026-01-25 09:08:51 +08:00
2e158d3fee feat: add new path for recognize 2025-12-31 17:53:12 +08:00
be1047618e Merge branch 'master' into test 2025-12-27 22:22:15 +08:00
3293f1f8a5 fix: downgrade error 2025-12-27 22:21:34 +08:00
ff6795b469 feat: convert markdown to mml 2025-12-27 22:06:48 +08:00
36 changed files with 1913 additions and 112 deletions

View File

@@ -1,11 +1,12 @@
package api
import (
"gitea.com/bitwsd/document_ai/api/v1/formula"
"gitea.com/bitwsd/document_ai/api/v1/oss"
"gitea.com/bitwsd/document_ai/api/v1/task"
"gitea.com/bitwsd/document_ai/api/v1/user"
"gitea.com/bitwsd/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/api/v1/analytics"
"gitea.com/texpixel/document_ai/api/v1/formula"
"gitea.com/texpixel/document_ai/api/v1/oss"
"gitea.com/texpixel/document_ai/api/v1/task"
"gitea.com/texpixel/document_ai/api/v1/user"
"gitea.com/texpixel/document_ai/pkg/common"
"github.com/gin-gonic/gin"
)
@@ -47,6 +48,13 @@ func SetupRouter(engine *gin.RouterGroup) {
userRouter.GET("/info", common.MustAuthMiddleware(), userEndpoint.GetUserInfo)
}
}
// 数据埋点路由
analyticsRouter := v1.Group("/analytics", common.GetAuthMiddleware())
{
analyticsHandler := analytics.NewAnalyticsHandler()
analyticsRouter.POST("/track", analyticsHandler.TrackEvent)
}
}
}

View File

@@ -0,0 +1,50 @@
package analytics
import (
"net/http"
"gitea.com/texpixel/document_ai/internal/model/analytics"
"gitea.com/texpixel/document_ai/internal/service"
"gitea.com/texpixel/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/pkg/log"
"github.com/gin-gonic/gin"
)
type AnalyticsHandler struct {
analyticsService *service.AnalyticsService
}
func NewAnalyticsHandler() *AnalyticsHandler {
return &AnalyticsHandler{
analyticsService: service.NewAnalyticsService(),
}
}
// TrackEvent 记录单个事件
// @Summary 记录单个埋点事件
// @Description 记录用户行为埋点事件
// @Tags Analytics
// @Accept json
// @Produce json
// @Param request body analytics.TrackEventRequest true "事件信息"
// @Success 200 {object} common.Response
// @Router /api/v1/analytics/track [post]
func (h *AnalyticsHandler) TrackEvent(c *gin.Context) {
var req analytics.TrackEventRequest
if err := c.ShouldBindJSON(&req); err != nil {
log.Error(c.Request.Context(), "bind request failed", "error", err)
c.JSON(http.StatusOK, common.ErrorResponse(c, common.CodeParamError, "invalid request"))
return
}
userID := common.GetUserIDFromContext(c)
req.UserID = userID
if err := h.analyticsService.TrackEvent(c.Request.Context(), &req); err != nil {
log.Error(c.Request.Context(), "track event failed", "error", err)
c.JSON(http.StatusOK, common.ErrorResponse(c, common.CodeSystemError, "failed to track event"))
return
}
c.JSON(http.StatusOK, common.SuccessResponse(c, "success"))
}

View File

@@ -5,12 +5,12 @@ import (
"path/filepath"
"strings"
"gitea.com/bitwsd/document_ai/internal/model/formula"
"gitea.com/bitwsd/document_ai/internal/service"
"gitea.com/bitwsd/document_ai/internal/storage/dao"
"gitea.com/bitwsd/document_ai/pkg/common"
"gitea.com/bitwsd/document_ai/pkg/constant"
"gitea.com/bitwsd/document_ai/pkg/utils"
"gitea.com/texpixel/document_ai/internal/model/formula"
"gitea.com/texpixel/document_ai/internal/service"
"gitea.com/texpixel/document_ai/internal/storage/dao"
"gitea.com/texpixel/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/pkg/constant"
"gitea.com/texpixel/document_ai/pkg/utils"
"github.com/gin-gonic/gin"
)

View File

@@ -8,11 +8,11 @@ import (
"strings"
"time"
"gitea.com/bitwsd/document_ai/config"
"gitea.com/bitwsd/document_ai/internal/storage/dao"
"gitea.com/bitwsd/document_ai/pkg/common"
"gitea.com/bitwsd/document_ai/pkg/oss"
"gitea.com/bitwsd/document_ai/pkg/utils"
"gitea.com/texpixel/document_ai/config"
"gitea.com/texpixel/document_ai/internal/storage/dao"
"gitea.com/texpixel/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/pkg/oss"
"gitea.com/texpixel/document_ai/pkg/utils"
"github.com/gin-gonic/gin"
"gorm.io/gorm"
)

View File

@@ -3,10 +3,10 @@ package task
import (
"net/http"
"gitea.com/bitwsd/document_ai/internal/model/task"
"gitea.com/bitwsd/document_ai/internal/service"
"gitea.com/bitwsd/document_ai/pkg/common"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/internal/model/task"
"gitea.com/texpixel/document_ai/internal/service"
"gitea.com/texpixel/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/pkg/log"
"github.com/gin-gonic/gin"
)

View File

@@ -3,13 +3,13 @@ package user
import (
"net/http"
"gitea.com/bitwsd/document_ai/config"
model "gitea.com/bitwsd/document_ai/internal/model/user"
"gitea.com/bitwsd/document_ai/internal/service"
"gitea.com/bitwsd/document_ai/pkg/common"
"gitea.com/bitwsd/document_ai/pkg/constant"
"gitea.com/bitwsd/document_ai/pkg/jwt"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/config"
model "gitea.com/texpixel/document_ai/internal/model/user"
"gitea.com/texpixel/document_ai/internal/service"
"gitea.com/texpixel/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/pkg/constant"
"gitea.com/texpixel/document_ai/pkg/jwt"
"gitea.com/texpixel/document_ai/pkg/log"
"github.com/gin-gonic/gin"
)

73
cmd/migrate/README.md Normal file
View File

@@ -0,0 +1,73 @@
# 数据迁移工具
用于将测试数据库的数据迁移到生产数据库避免ID冲突使用事务确保数据一致性。
## 功能特性
- ✅ 自动避免ID冲突使用数据库自增ID
- ✅ 使用事务确保每个任务和结果数据的一致性
- ✅ 自动跳过已存在的任务基于task_uuid
- ✅ 保留原始时间戳
- ✅ 处理NULL值
- ✅ 详细的日志输出和统计信息
## 使用方法
### 基本用法
```bash
# 从dev环境迁移到prod环境
go run cmd/migrate/main.go -test-env=dev -prod-env=prod
# 从prod环境迁移到dev环境测试反向迁移
go run cmd/migrate/main.go -test-env=prod -prod-env=dev
```
### 参数说明
- `-test-env`: 测试环境配置文件名dev/prod默认值dev
- `-prod-env`: 生产环境配置文件名dev/prod默认值prod
### 编译后使用
```bash
# 编译
go build -o migrate cmd/migrate/main.go
# 运行
./migrate -test-env=dev -prod-env=prod
```
## 工作原理
1. **连接数据库**:同时连接测试数据库和生产数据库
2. **读取数据**从测试数据库读取所有任务和结果数据LEFT JOIN
3. **检查重复**:基于`task_uuid`检查生产数据库中是否已存在
4. **事务迁移**:为每个任务创建独立事务:
- 创建任务记录自动生成新ID
- 如果存在结果数据创建结果记录关联新任务ID
- 提交事务或回滚
5. **统计报告**:输出迁移统计信息
## 注意事项
1. **配置文件**:确保`config/config_dev.yaml``config/config_prod.yaml`存在且配置正确
2. **数据库权限**:确保数据库用户有读写权限
3. **网络连接**:确保能同时连接到两个数据库
4. **数据备份**:迁移前建议备份生产数据库
5. **ID冲突**脚本会自动处理ID冲突使用数据库自增ID不会覆盖现有数据
## 输出示例
```
从测试数据库读取到 100 条任务记录
[1/100] 创建任务成功: task_uuid=xxx, 新ID=1001
[1/100] 创建结果成功: task_id=1001
[2/100] 跳过已存在的任务: task_uuid=yyy, id=1002
...
迁移完成统计:
成功: 95 条
跳过: 3 条
失败: 2 条
数据迁移完成!
```

255
cmd/migrate/main.go Normal file
View File

@@ -0,0 +1,255 @@
package main
import (
"context"
"flag"
"fmt"
"log"
"time"
"gitea.com/texpixel/document_ai/config"
"gitea.com/texpixel/document_ai/internal/storage/dao"
"github.com/spf13/viper"
"gorm.io/driver/mysql"
"gorm.io/gorm"
"gorm.io/gorm/logger"
)
func main() {
// 解析命令行参数
testEnv := flag.String("test-env", "dev", "测试环境配置 (dev/prod)")
prodEnv := flag.String("prod-env", "prod", "生产环境配置 (dev/prod)")
flag.Parse()
// 加载测试环境配置
testConfigPath := fmt.Sprintf("./config/config_%s.yaml", *testEnv)
testConfig, err := loadDatabaseConfig(testConfigPath)
if err != nil {
log.Fatalf("加载测试环境配置失败: %v", err)
}
// 连接测试数据库
testDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8mb4&parseTime=True&loc=Asia%%2FShanghai",
testConfig.Username, testConfig.Password, testConfig.Host, testConfig.Port, testConfig.DBName)
testDB, err := gorm.Open(mysql.Open(testDSN), &gorm.Config{
Logger: logger.Default.LogMode(logger.Info),
})
if err != nil {
log.Fatalf("连接测试数据库失败: %v", err)
}
// 加载生产环境配置
prodConfigPath := fmt.Sprintf("./config/config_%s.yaml", *prodEnv)
prodConfig, err := loadDatabaseConfig(prodConfigPath)
if err != nil {
log.Fatalf("加载生产环境配置失败: %v", err)
}
// 连接生产数据库
prodDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8mb4&parseTime=True&loc=Asia%%2FShanghai",
prodConfig.Username, prodConfig.Password, prodConfig.Host, prodConfig.Port, prodConfig.DBName)
prodDB, err := gorm.Open(mysql.Open(prodDSN), &gorm.Config{
Logger: logger.Default.LogMode(logger.Info),
})
if err != nil {
log.Fatalf("连接生产数据库失败: %v", err)
}
// 执行迁移
if err := migrateData(testDB, prodDB); err != nil {
log.Fatalf("数据迁移失败: %v", err)
}
log.Println("数据迁移完成!")
}
func migrateData(testDB, prodDB *gorm.DB) error {
_ = context.Background() // 保留以备将来使用
// 从测试数据库读取所有任务数据(包含结果)
type TaskWithResult struct {
// Task 字段
TaskID int64 `gorm:"column:id"`
UserID int64 `gorm:"column:user_id"`
TaskUUID string `gorm:"column:task_uuid"`
FileName string `gorm:"column:file_name"`
FileHash string `gorm:"column:file_hash"`
FileURL string `gorm:"column:file_url"`
TaskType string `gorm:"column:task_type"`
Status int `gorm:"column:status"`
CompletedAt time.Time `gorm:"column:completed_at"`
Remark string `gorm:"column:remark"`
IP string `gorm:"column:ip"`
TaskCreatedAt time.Time `gorm:"column:created_at"`
TaskUpdatedAt time.Time `gorm:"column:updated_at"`
// Result 字段
ResultID *int64 `gorm:"column:result_id"`
ResultTaskID *int64 `gorm:"column:result_task_id"`
ResultTaskType *string `gorm:"column:result_task_type"`
Latex *string `gorm:"column:latex"`
Markdown *string `gorm:"column:markdown"`
MathML *string `gorm:"column:mathml"`
ResultCreatedAt *time.Time `gorm:"column:result_created_at"`
ResultUpdatedAt *time.Time `gorm:"column:result_updated_at"`
}
var tasksWithResults []TaskWithResult
query := `
SELECT
t.id,
t.user_id,
t.task_uuid,
t.file_name,
t.file_hash,
t.file_url,
t.task_type,
t.status,
t.completed_at,
t.remark,
t.ip,
t.created_at,
t.updated_at,
r.id as result_id,
r.task_id as result_task_id,
r.task_type as result_task_type,
r.latex,
r.markdown,
r.mathml,
r.created_at as result_created_at,
r.updated_at as result_updated_at
FROM recognition_tasks t
LEFT JOIN recognition_results r ON t.id = r.task_id
ORDER BY t.id
`
if err := testDB.Raw(query).Scan(&tasksWithResults).Error; err != nil {
return fmt.Errorf("读取测试数据失败: %v", err)
}
log.Printf("从测试数据库读取到 %d 条任务记录", len(tasksWithResults))
successCount := 0
skipCount := 0
errorCount := 0
// 为每个任务使用独立事务,确保单个任务失败不影响其他任务
for i, item := range tasksWithResults {
// 开始事务
tx := prodDB.Begin()
// 检查生产数据库中是否已存在相同的 task_uuid
var existingTask dao.RecognitionTask
err := tx.Where("task_uuid = ?", item.TaskUUID).First(&existingTask).Error
if err == nil {
log.Printf("[%d/%d] 跳过已存在的任务: task_uuid=%s, id=%d", i+1, len(tasksWithResults), item.TaskUUID, existingTask.ID)
tx.Rollback()
skipCount++
continue
}
if err != gorm.ErrRecordNotFound {
log.Printf("[%d/%d] 检查任务是否存在时出错: task_uuid=%s, error=%v", i+1, len(tasksWithResults), item.TaskUUID, err)
tx.Rollback()
errorCount++
continue
}
// 创建新任务不指定ID让数据库自动生成
newTask := &dao.RecognitionTask{
UserID: item.UserID,
TaskUUID: item.TaskUUID,
FileName: item.FileName,
FileHash: item.FileHash,
FileURL: item.FileURL,
TaskType: dao.TaskType(item.TaskType),
Status: dao.TaskStatus(item.Status),
CompletedAt: item.CompletedAt,
Remark: item.Remark,
IP: item.IP,
}
// 保留原始时间戳
newTask.CreatedAt = item.TaskCreatedAt
newTask.UpdatedAt = item.TaskUpdatedAt
if err := tx.Create(newTask).Error; err != nil {
log.Printf("[%d/%d] 创建任务失败: task_uuid=%s, error=%v", i+1, len(tasksWithResults), item.TaskUUID, err)
tx.Rollback()
errorCount++
continue
}
log.Printf("[%d/%d] 创建任务成功: task_uuid=%s, 新ID=%d", i+1, len(tasksWithResults), item.TaskUUID, newTask.ID)
// 如果有结果数据,创建结果记录
if item.ResultID != nil {
// 处理可能为NULL的字段
latex := ""
if item.Latex != nil {
latex = *item.Latex
}
markdown := ""
if item.Markdown != nil {
markdown = *item.Markdown
}
mathml := ""
if item.MathML != nil {
mathml = *item.MathML
}
newResult := dao.RecognitionResult{
TaskID: newTask.ID, // 使用新任务的ID
TaskType: dao.TaskType(item.TaskType),
Latex: latex,
Markdown: markdown,
MathML: mathml,
}
// 保留原始时间戳
if item.ResultCreatedAt != nil {
newResult.CreatedAt = *item.ResultCreatedAt
}
if item.ResultUpdatedAt != nil {
newResult.UpdatedAt = *item.ResultUpdatedAt
}
if err := tx.Create(&newResult).Error; err != nil {
log.Printf("[%d/%d] 创建结果失败: task_id=%d, error=%v", i+1, len(tasksWithResults), newTask.ID, err)
tx.Rollback() // 回滚整个事务(包括任务)
errorCount++
continue
}
log.Printf("[%d/%d] 创建结果成功: task_id=%d", i+1, len(tasksWithResults), newTask.ID)
}
// 提交事务
if err := tx.Commit().Error; err != nil {
log.Printf("[%d/%d] 提交事务失败: task_uuid=%s, error=%v", i+1, len(tasksWithResults), item.TaskUUID, err)
errorCount++
continue
}
successCount++
}
log.Printf("迁移完成统计:")
log.Printf(" 成功: %d 条", successCount)
log.Printf(" 跳过: %d 条", skipCount)
log.Printf(" 失败: %d 条", errorCount)
return nil
}
// loadDatabaseConfig 从配置文件加载数据库配置
func loadDatabaseConfig(configPath string) (config.DatabaseConfig, error) {
v := viper.New()
v.SetConfigFile(configPath)
if err := v.ReadInConfig(); err != nil {
return config.DatabaseConfig{}, err
}
var dbConfig config.DatabaseConfig
if err := v.UnmarshalKey("database", &dbConfig); err != nil {
return config.DatabaseConfig{}, err
}
return dbConfig, nil
}

View File

@@ -1,7 +1,7 @@
package config
import (
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/pkg/log"
"github.com/spf13/viper"
)

View File

@@ -0,0 +1,284 @@
# 数据埋点 API 调用示例
## 基础信息
- **接口路径**: `/doc_ai/v1/analytics/track`
- **请求方法**: `POST`
- **Content-Type**: `application/json`
- **认证**: 可选Bearer Token
## 1. 基础埋点事件(最小参数)
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d '{
"user_id": 12345,
"event_name": "button_click"
}'
```
## 2. 完整埋点事件(包含所有字段)
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{
"user_id": 12345,
"event_name": "formula_recognition_start",
"properties": {
"file_name": "math_formula.png",
"file_size": 102400,
"file_type": "image/png",
"upload_method": "drag_drop"
},
"device_info": {
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
"screen_width": 1920,
"screen_height": 1080,
"language": "zh-CN",
"timezone": "Asia/Shanghai",
"platform": "MacIntel"
},
"meta_data": {
"task_id": "task_123456",
"timestamp": 1706342400000
}
}'
```
## 3. 页面浏览事件
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d '{
"user_id": 12345,
"event_name": "page_view",
"properties": {
"page_url": "https://example.com/home",
"page_title": "首页",
"page_name": "home",
"referrer": "https://example.com/login"
},
"device_info": {
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
"screen_width": 1920,
"screen_height": 1080
}
}'
```
## 4. 任务相关事件
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{
"user_id": 12345,
"event_name": "task_create",
"properties": {
"task_type": "formula_recognition",
"file_name": "equation.png",
"file_size": 204800
},
"meta_data": {
"task_id": "task_789012"
}
}'
```
## 5. 任务完成事件
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d '{
"user_id": 12345,
"event_name": "task_complete",
"properties": {
"duration_seconds": 5.2,
"success": true,
"result_type": "latex"
},
"meta_data": {
"task_id": "task_789012"
}
}'
```
## 6. 表单提交事件
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d '{
"user_id": 12345,
"event_name": "form_submit",
"properties": {
"form_name": "user_registration",
"form_fields": ["email", "password", "phone"],
"success": true,
"validation_errors": 0
}
}'
```
## 7. 文件上传事件
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d '{
"user_id": 12345,
"event_name": "file_upload",
"properties": {
"file_name": "document.pdf",
"file_size": 5242880,
"file_type": "application/pdf",
"upload_source": "drag_drop",
"upload_duration_ms": 1200
}
}'
```
## 8. 错误追踪事件
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d '{
"user_id": 12345,
"event_name": "error_occurred",
"properties": {
"error_type": "network_error",
"error_message": "Failed to fetch data",
"error_code": "NET_001",
"page_url": "https://example.com/tasks",
"user_action": "click_submit_button"
}
}'
```
## 9. 使用环境变量(推荐)
```bash
# 设置环境变量
export API_BASE_URL="http://localhost:8080"
export JWT_TOKEN="YOUR_JWT_TOKEN"
export USER_ID=12345
# 调用接口
curl -X POST ${API_BASE_URL}/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-H "Authorization: Bearer ${JWT_TOKEN}" \
-d "{
\"user_id\": ${USER_ID},
\"event_name\": \"button_click\",
\"properties\": {
\"button_name\": \"submit\",
\"button_position\": \"bottom\"
}
}"
```
## 10. 使用 JSON 文件
创建 `event.json` 文件:
```json
{
"user_id": 12345,
"event_name": "custom_event",
"properties": {
"action": "click",
"element": "button",
"value": "submit"
},
"device_info": {
"user_agent": "Mozilla/5.0",
"screen_width": 1920,
"screen_height": 1080
},
"meta_data": {
"task_id": "task_123"
}
}
```
然后执行:
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d @event.json
```
## 11. 批量埋点接口
```bash
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track/batch \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{
"events": [
{
"user_id": 12345,
"event_name": "page_view",
"properties": {
"page_name": "home"
}
},
{
"user_id": 12345,
"event_name": "button_click",
"properties": {
"button_name": "start"
}
}
]
}'
```
## 响应示例
### 成功响应
```json
{
"code": 200,
"message": "success",
"data": null
}
```
### 错误响应
```json
{
"code": 400,
"message": "invalid request",
"data": null
}
```
## 注意事项
1. **user_id****event_name** 是必填字段
2. **properties**、**device_info**、**meta_data** 都是可选字段,类型为 JSON 对象
3. 如果提供了 Authorization headertoken 中的 user_id 会被设置到上下文中,但请求体中的 user_id 仍然需要提供
4. 建议在生产环境中始终使用 HTTPS
5. 批量接口最多支持 100 个事件
## 测试命令(本地开发)
```bash
# 最简单的测试
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d '{"user_id": 1, "event_name": "test_event"}'
# 查看响应详情
curl -X POST http://localhost:8080/doc_ai/v1/analytics/track \
-H "Content-Type: application/json" \
-d '{"user_id": 1, "event_name": "test_event"}' \
-v
```

308
frontend-sdk/analytics.ts Normal file
View File

@@ -0,0 +1,308 @@
// Analytics SDK for Frontend
// 前端数据埋点 SDK
interface EventProperties {
[key: string]: any;
}
interface DeviceInfo {
user_agent?: string;
screen_width?: number;
screen_height?: number;
language?: string;
timezone?: string;
platform?: string;
}
interface MetaData {
task_id?: string | number;
[key: string]: any;
}
interface TrackEventParams {
event_name: string;
properties?: EventProperties;
device_info?: DeviceInfo;
meta_data?: MetaData;
}
interface AnalyticsConfig {
apiUrl: string;
token?: string;
userId?: number | string;
enableAutoTrack?: boolean;
debug?: boolean;
}
class Analytics {
private config: AnalyticsConfig;
private userId: number | string | null = null;
private eventQueue: TrackEventParams[] = [];
private isSending: boolean = false;
constructor(config: AnalyticsConfig) {
this.config = {
enableAutoTrack: true,
debug: false,
...config,
};
if (this.config.userId) {
this.userId = this.config.userId;
}
// 自动收集设备信息
if (this.config.enableAutoTrack) {
this.initAutoTrack();
}
}
/**
* 设置用户ID
*/
setUserId(userId: number | string) {
this.userId = userId;
}
/**
* 获取设备信息
*/
private getDeviceInfo(): DeviceInfo {
return {
user_agent: navigator.userAgent,
screen_width: window.screen.width,
screen_height: window.screen.height,
language: navigator.language,
timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
platform: navigator.platform,
};
}
/**
* 记录单个事件
*/
async track(params: TrackEventParams): Promise<void> {
if (!this.userId) {
console.warn('Analytics: userId not set, event will not be tracked');
return;
}
const eventData = {
user_id: this.userId,
event_name: params.event_name,
properties: params.properties || {},
device_info: {
...this.getDeviceInfo(),
...params.device_info,
},
meta_data: {
timestamp: Date.now(),
...params.meta_data,
},
};
if (this.config.debug) {
console.log('Analytics Track:', eventData);
}
try {
const response = await fetch(`${this.config.apiUrl}/track`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(this.config.token && { Authorization: `Bearer ${this.config.token}` }),
},
body: JSON.stringify(eventData),
});
if (!response.ok) {
throw new Error(`Failed to track event: ${response.statusText}`);
}
if (this.config.debug) {
console.log('Analytics: Event tracked successfully');
}
} catch (error) {
console.error('Analytics: Failed to track event', error);
// 失败时加入队列,稍后重试
this.eventQueue.push(params);
}
}
/**
* 批量记录事件
*/
async trackBatch(events: TrackEventParams[]): Promise<void> {
if (!this.userId) {
console.warn('Analytics: userId not set, events will not be tracked');
return;
}
const batchData = {
events: events.map((params) => ({
user_id: this.userId,
event_name: params.event_name,
properties: params.properties || {},
device_info: {
...this.getDeviceInfo(),
...params.device_info,
},
meta_data: {
timestamp: Date.now(),
...params.meta_data,
},
})),
};
if (this.config.debug) {
console.log('Analytics Track Batch:', batchData);
}
try {
const response = await fetch(`${this.config.apiUrl}/track/batch`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(this.config.token && { Authorization: `Bearer ${this.config.token}` }),
},
body: JSON.stringify(batchData),
});
if (!response.ok) {
throw new Error(`Failed to track batch events: ${response.statusText}`);
}
if (this.config.debug) {
console.log('Analytics: Batch events tracked successfully');
}
} catch (error) {
console.error('Analytics: Failed to track batch events', error);
}
}
/**
* 页面浏览事件
*/
trackPageView(pageName?: string) {
this.track({
event_name: 'page_view',
properties: {
page_url: window.location.href,
page_title: document.title,
page_name: pageName || document.title,
referrer: document.referrer,
},
});
}
/**
* 点击事件
*/
trackClick(elementName: string, properties?: EventProperties) {
this.track({
event_name: 'click',
properties: {
element_name: elementName,
page_url: window.location.href,
...properties,
},
});
}
/**
* 表单提交事件
*/
trackFormSubmit(formName: string, properties?: EventProperties) {
this.track({
event_name: 'form_submit',
properties: {
form_name: formName,
page_url: window.location.href,
...properties,
},
});
}
/**
* 任务相关事件
*/
trackTask(taskId: string | number, action: string, properties?: EventProperties) {
this.track({
event_name: `task_${action}`,
properties: {
action,
...properties,
},
meta_data: {
task_id: taskId,
},
});
}
/**
* 初始化自动埋点
*/
private initAutoTrack() {
// 页面加载完成时记录
if (document.readyState === 'complete') {
this.trackPageView();
} else {
window.addEventListener('load', () => this.trackPageView());
}
// 页面离开前发送队列中的事件
window.addEventListener('beforeunload', () => {
if (this.eventQueue.length > 0) {
this.flushQueue();
}
});
// 页面可见性变化
document.addEventListener('visibilitychange', () => {
if (document.hidden && this.eventQueue.length > 0) {
this.flushQueue();
}
});
}
/**
* 刷新队列中的事件
*/
private flushQueue() {
if (this.isSending || this.eventQueue.length === 0) {
return;
}
this.isSending = true;
const eventsToSend = [...this.eventQueue];
this.eventQueue = [];
this.trackBatch(eventsToSend).finally(() => {
this.isSending = false;
});
}
/**
* 手动刷新队列
*/
flush() {
this.flushQueue();
}
}
// 导出单例实例
let analyticsInstance: Analytics | null = null;
export function initAnalytics(config: AnalyticsConfig): Analytics {
analyticsInstance = new Analytics(config);
return analyticsInstance;
}
export function getAnalytics(): Analytics {
if (!analyticsInstance) {
throw new Error('Analytics not initialized. Call initAnalytics first.');
}
return analyticsInstance;
}
export default Analytics;

View File

@@ -0,0 +1,217 @@
// Analytics SDK 使用示例
import { initAnalytics, getAnalytics } from './analytics';
// 1. 初始化 SDK
const analytics = initAnalytics({
apiUrl: 'https://your-api-domain.com/doc_ai/v1/analytics',
token: 'your-auth-token', // 从登录后获取
userId: 12345, // 用户ID
enableAutoTrack: true, // 启用自动埋点(页面浏览等)
debug: true, // 开发环境下启用调试
});
// 2. 设置用户ID登录后
analytics.setUserId(12345);
// 3. 记录页面浏览
analytics.trackPageView('首页');
// 4. 记录点击事件
const handleButtonClick = () => {
analytics.trackClick('提交按钮', {
button_text: '提交',
button_position: 'bottom',
});
};
// 5. 记录表单提交
const handleFormSubmit = (formData: any) => {
analytics.trackFormSubmit('用户注册表单', {
form_fields: Object.keys(formData),
success: true,
});
};
// 6. 记录任务相关事件
const handleTaskCreate = (taskId: string) => {
analytics.trackTask(taskId, 'create', {
task_type: 'formula_recognition',
file_type: 'image/png',
});
};
const handleTaskComplete = (taskId: string) => {
analytics.trackTask(taskId, 'complete', {
duration_seconds: 5.2,
success: true,
});
};
// 7. 记录自定义事件
const handleFileUpload = (file: File) => {
analytics.track({
event_name: 'file_upload',
properties: {
file_name: file.name,
file_size: file.size,
file_type: file.type,
},
meta_data: {
upload_source: 'drag_drop',
},
});
};
// 8. 批量记录事件
const handleBatchActions = () => {
analytics.trackBatch([
{
event_name: 'button_click',
properties: { button_name: 'save' },
},
{
event_name: 'data_export',
properties: { format: 'pdf' },
},
]);
};
// 9. React 组件中使用
import React, { useEffect } from 'react';
function HomePage() {
useEffect(() => {
// 页面加载时记录
getAnalytics().trackPageView('首页');
}, []);
const handleClick = () => {
getAnalytics().trackClick('首页-开始按钮');
};
return (
<div>
<h1></h1>
<button onClick={handleClick}></button>
</div>
);
}
// 10. Vue 组件中使用
export default {
name: 'HomePage',
mounted() {
getAnalytics().trackPageView('首页');
},
methods: {
handleClick() {
getAnalytics().trackClick('首页-开始按钮');
},
},
};
// 11. 记录用户行为流程
class FormulaRecognitionFlow {
private analytics = getAnalytics();
private taskId: string | null = null;
// 开始识别流程
startRecognition(file: File) {
this.analytics.track({
event_name: 'formula_recognition_start',
properties: {
file_name: file.name,
file_size: file.size,
},
});
}
// 上传成功
uploadSuccess(taskId: string) {
this.taskId = taskId;
this.analytics.trackTask(taskId, 'upload_success', {
step: 'upload',
});
}
// 识别进行中
recognitionProcessing() {
if (this.taskId) {
this.analytics.trackTask(this.taskId, 'processing', {
step: 'recognition',
});
}
}
// 识别完成
recognitionComplete(result: any) {
if (this.taskId) {
this.analytics.trackTask(this.taskId, 'complete', {
step: 'complete',
has_result: !!result,
});
}
}
// 识别失败
recognitionFailed(error: string) {
if (this.taskId) {
this.analytics.trackTask(this.taskId, 'failed', {
step: 'error',
error_message: error,
});
}
}
// 查看结果
viewResult() {
if (this.taskId) {
this.analytics.trackTask(this.taskId, 'view_result', {
step: 'view',
});
}
}
// 导出结果
exportResult(format: string) {
if (this.taskId) {
this.analytics.trackTask(this.taskId, 'export', {
step: 'export',
export_format: format,
});
}
}
}
// 12. 错误追踪
window.addEventListener('error', (event) => {
getAnalytics().track({
event_name: 'javascript_error',
properties: {
error_message: event.message,
error_filename: event.filename,
error_line: event.lineno,
error_column: event.colno,
},
});
});
// 13. 性能追踪
window.addEventListener('load', () => {
const perfData = performance.timing;
const pageLoadTime = perfData.loadEventEnd - perfData.navigationStart;
getAnalytics().track({
event_name: 'page_performance',
properties: {
page_load_time: pageLoadTime,
dns_time: perfData.domainLookupEnd - perfData.domainLookupStart,
tcp_time: perfData.connectEnd - perfData.connectStart,
request_time: perfData.responseEnd - perfData.requestStart,
dom_parse_time: perfData.domComplete - perfData.domLoading,
},
});
});
export {};

10
go.mod
View File

@@ -1,4 +1,4 @@
module gitea.com/bitwsd/document_ai
module gitea.com/texpixel/document_ai
go 1.20
@@ -17,13 +17,15 @@ require (
github.com/spf13/viper v1.19.0
golang.org/x/crypto v0.23.0
gopkg.in/natefinch/lumberjack.v2 v2.2.1
gorm.io/datatypes v1.2.7
gorm.io/driver/mysql v1.5.7
gorm.io/gorm v1.25.12
gorm.io/gorm v1.30.0
)
require github.com/go-sql-driver/mysql v1.7.0 // indirect
require github.com/go-sql-driver/mysql v1.8.1 // indirect
require (
filippo.io/edwards25519 v1.1.0 // indirect
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4 // indirect
github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect
github.com/alibabacloud-go/endpoint-util v1.1.0 // indirect
@@ -74,7 +76,7 @@ require (
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
golang.org/x/text v0.20.0 // indirect
golang.org/x/time v0.5.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect

27
go.sum
View File

@@ -1,3 +1,5 @@
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4 h1:iC9YFYKDGEy3n/FtqJnOkZsene9olVspKmkX5A2YBEo=
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4/go.mod h1:sCavSAvdzOjul4cEqeVtvlSaSScfNsTQ+46HwlTL1hc=
github.com/alibabacloud-go/darabonba-openapi v0.1.18/go.mod h1:PB4HffMhJVmAgNKNq3wYbTUlFvPgxJpTzd1F5pTuUsc=
@@ -69,11 +71,14 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc=
github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA=
github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
@@ -83,6 +88,10 @@ github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 h1:l5lAOZEym3oK3
github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 h1:L0QtFUgDarD7Fpv9jeVMgy/+Ec0mtnmYuImjTz6dtDA=
github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
@@ -110,6 +119,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
github.com/microsoft/go-mssqldb v1.7.2 h1:CHkFJiObW7ItKTJfHo1QX7QBBD1iV+mn1eOyRP3b/PA=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -199,6 +210,7 @@ golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -211,8 +223,8 @@ golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -237,10 +249,15 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gorm.io/datatypes v1.2.7 h1:ww9GAhF1aGXZY3EB3cJPJ7//JiuQo7DlQA7NNlVaTdk=
gorm.io/datatypes v1.2.7/go.mod h1:M2iO+6S3hhi4nAyYe444Pcb0dcIiOMJ7QHaUXxyiNZY=
gorm.io/driver/mysql v1.5.7 h1:MndhOPYOfEp2rHKgkZIhJ16eVUIRf2HmzgoPmh7FCWo=
gorm.io/driver/mysql v1.5.7/go.mod h1:sEtPWMiqiN1N1cMXoXmBbd8C6/l+TESwriotuRRpkDM=
gorm.io/driver/postgres v1.5.0 h1:u2FXTy14l45qc3UeCJ7QaAXZmZfDDv0YrthvmRq1l0U=
gorm.io/driver/sqlite v1.4.3 h1:HBBcZSDnWi5BW3B3rwvVTc510KGkBkexlOg0QrmLUuU=
gorm.io/driver/sqlserver v1.6.0 h1:VZOBQVsVhkHU/NzNhRJKoANt5pZGQAS1Bwc6m6dgfnc=
gorm.io/gorm v1.25.7/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8=
gorm.io/gorm v1.25.12/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ=
gorm.io/gorm v1.30.0 h1:qbT5aPv1UH8gI99OsRlvDToLxW5zR7FzS9acZDOZcgs=
gorm.io/gorm v1.30.0/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

View File

@@ -0,0 +1,34 @@
package analytics
import "time"
// TrackEventRequest 埋点事件请求
type TrackEventRequest struct {
TaskNo string `json:"task_no" binding:"required"`
UserID int64 `json:"user_id"`
EventName string `json:"event_name" binding:"required"`
Properties map[string]interface{} `json:"properties"`
DeviceInfo map[string]interface{} `json:"device_info"`
MetaData map[string]interface{} `json:"meta_data"`
}
// BatchTrackEventRequest 批量埋点事件请求
type BatchTrackEventRequest struct {
Events []TrackEventRequest `json:"events" binding:"required,min=1,max=100"`
}
// QueryEventsRequest 查询事件请求
type QueryEventsRequest struct {
UserID *int64 `json:"user_id" form:"user_id"`
EventName string `json:"event_name" form:"event_name"`
StartTime *time.Time `json:"start_time" form:"start_time"`
EndTime *time.Time `json:"end_time" form:"end_time"`
Page int `json:"page" form:"page" binding:"required,min=1"`
PageSize int `json:"page_size" form:"page_size" binding:"required,min=1,max=100"`
}
// EventStatsRequest 事件统计请求
type EventStatsRequest struct {
StartTime time.Time `json:"start_time" form:"start_time" binding:"required"`
EndTime time.Time `json:"end_time" form:"end_time" binding:"required"`
}

View File

@@ -0,0 +1,36 @@
package analytics
import "time"
// EventResponse 事件响应
type EventResponse struct {
ID int64 `json:"id"`
UserID int64 `json:"user_id"`
EventName string `json:"event_name"`
Properties map[string]interface{} `json:"properties"`
DeviceInfo map[string]interface{} `json:"device_info"`
MetaData map[string]interface{} `json:"meta_data"`
CreatedAt time.Time `json:"created_at"`
}
// EventListResponse 事件列表响应
type EventListResponse struct {
Events []*EventResponse `json:"events"`
Total int64 `json:"total"`
Page int `json:"page"`
Size int `json:"size"`
}
// EventStatsResponse 事件统计响应
type EventStatsResponse struct {
EventName string `json:"event_name"`
Count int64 `json:"count"`
UniqueUsers int64 `json:"unique_users"`
}
// EventStatsListResponse 事件统计列表响应
type EventStatsListResponse struct {
Stats []*EventStatsResponse `json:"stats"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
}

View File

@@ -22,3 +22,10 @@ type GetFormulaTaskResponse struct {
type FormulaRecognitionResponse struct {
Result string `json:"result"`
}
// ImageOCRResponse 图片OCR接口返回的响应
type ImageOCRResponse struct {
Latex string `json:"latex"` // LaTeX 格式内容
Markdown string `json:"markdown"` // Markdown 格式内容
MathML string `json:"mathml"` // MathML 格式(无公式时为空)
}

View File

@@ -0,0 +1,232 @@
package service
import (
"context"
"encoding/json"
"fmt"
"time"
"gitea.com/texpixel/document_ai/internal/model/analytics"
"gitea.com/texpixel/document_ai/internal/storage/dao"
"gitea.com/texpixel/document_ai/pkg/log"
"gorm.io/datatypes"
)
type AnalyticsService struct {
eventDao *dao.AnalyticsEventDao
}
func NewAnalyticsService() *AnalyticsService {
return &AnalyticsService{
eventDao: dao.NewAnalyticsEventDao(),
}
}
// TrackEvent 记录单个事件
func (s *AnalyticsService) TrackEvent(ctx context.Context, req *analytics.TrackEventRequest) error {
// 将 map 转换为 JSON
propertiesJSON, err := json.Marshal(req.Properties)
if err != nil {
log.Error(ctx, "marshal properties failed", "error", err)
return fmt.Errorf("invalid properties format")
}
deviceInfoJSON, err := json.Marshal(req.DeviceInfo)
if err != nil {
log.Error(ctx, "marshal device_info failed", "error", err)
return fmt.Errorf("invalid device_info format")
}
metaDataJSON, err := json.Marshal(req.MetaData)
if err != nil {
log.Error(ctx, "marshal meta_data failed", "error", err)
return fmt.Errorf("invalid meta_data format")
}
event := &dao.AnalyticsEvent{
UserID: req.UserID,
EventName: req.EventName,
Properties: datatypes.JSON(propertiesJSON),
DeviceInfo: datatypes.JSON(deviceInfoJSON),
MetaData: datatypes.JSON(metaDataJSON),
CreatedAt: time.Now(),
}
if err := s.eventDao.Create(dao.DB.WithContext(ctx), event); err != nil {
log.Error(ctx, "create analytics event failed", "error", err)
return fmt.Errorf("failed to track event")
}
log.Info(ctx, "event tracked successfully",
"event_id", event.ID,
"user_id", req.UserID,
"event_name", req.EventName)
return nil
}
// BatchTrackEvents 批量记录事件
func (s *AnalyticsService) BatchTrackEvents(ctx context.Context, req *analytics.BatchTrackEventRequest) error {
events := make([]*dao.AnalyticsEvent, 0, len(req.Events))
for _, eventReq := range req.Events {
propertiesJSON, err := json.Marshal(eventReq.Properties)
if err != nil {
log.Error(ctx, "marshal properties failed", "error", err)
continue
}
deviceInfoJSON, err := json.Marshal(eventReq.DeviceInfo)
if err != nil {
log.Error(ctx, "marshal device_info failed", "error", err)
continue
}
metaDataJSON, err := json.Marshal(eventReq.MetaData)
if err != nil {
log.Error(ctx, "marshal meta_data failed", "error", err)
continue
}
event := &dao.AnalyticsEvent{
UserID: eventReq.UserID,
EventName: eventReq.EventName,
Properties: datatypes.JSON(propertiesJSON),
DeviceInfo: datatypes.JSON(deviceInfoJSON),
MetaData: datatypes.JSON(metaDataJSON),
CreatedAt: time.Now(),
}
events = append(events, event)
}
if len(events) == 0 {
return fmt.Errorf("no valid events to track")
}
if err := s.eventDao.BatchCreate(dao.DB.WithContext(ctx), events); err != nil {
log.Error(ctx, "batch create analytics events failed", "error", err)
return fmt.Errorf("failed to batch track events")
}
log.Info(ctx, "batch events tracked successfully", "count", len(events))
return nil
}
// QueryEvents 查询事件
func (s *AnalyticsService) QueryEvents(ctx context.Context, req *analytics.QueryEventsRequest) (*analytics.EventListResponse, error) {
var events []*dao.AnalyticsEvent
var total int64
var err error
// 根据不同条件查询
if req.UserID != nil && req.EventName != "" {
// 查询用户的指定事件
events, total, err = s.eventDao.GetUserEventsByName(dao.DB.WithContext(ctx), *req.UserID, req.EventName, req.Page, req.PageSize)
} else if req.UserID != nil {
// 查询用户的所有事件
events, total, err = s.eventDao.GetUserEvents(dao.DB.WithContext(ctx), *req.UserID, req.Page, req.PageSize)
} else if req.EventName != "" {
// 查询指定事件
events, total, err = s.eventDao.GetEventsByName(dao.DB.WithContext(ctx), req.EventName, req.Page, req.PageSize)
} else if req.StartTime != nil && req.EndTime != nil {
// 查询时间范围内的事件
events, total, err = s.eventDao.GetEventsByTimeRange(dao.DB.WithContext(ctx), *req.StartTime, *req.EndTime, req.Page, req.PageSize)
} else {
return nil, fmt.Errorf("invalid query parameters")
}
if err != nil {
log.Error(ctx, "query events failed", "error", err)
return nil, fmt.Errorf("failed to query events")
}
// 转换为响应格式
eventResponses := make([]*analytics.EventResponse, 0, len(events))
for _, event := range events {
var properties, deviceInfo, metaData map[string]interface{}
if len(event.Properties) > 0 {
json.Unmarshal(event.Properties, &properties)
}
if len(event.DeviceInfo) > 0 {
json.Unmarshal(event.DeviceInfo, &deviceInfo)
}
if len(event.MetaData) > 0 {
json.Unmarshal(event.MetaData, &metaData)
}
eventResponses = append(eventResponses, &analytics.EventResponse{
ID: event.ID,
UserID: event.UserID,
EventName: event.EventName,
Properties: properties,
DeviceInfo: deviceInfo,
MetaData: metaData,
CreatedAt: event.CreatedAt,
})
}
return &analytics.EventListResponse{
Events: eventResponses,
Total: total,
Page: req.Page,
Size: req.PageSize,
}, nil
}
// GetEventStats 获取事件统计
func (s *AnalyticsService) GetEventStats(ctx context.Context, req *analytics.EventStatsRequest) (*analytics.EventStatsListResponse, error) {
results, err := s.eventDao.GetEventStats(dao.DB.WithContext(ctx), req.StartTime, req.EndTime)
if err != nil {
log.Error(ctx, "get event stats failed", "error", err)
return nil, fmt.Errorf("failed to get event stats")
}
stats := make([]*analytics.EventStatsResponse, 0, len(results))
for _, result := range results {
stats = append(stats, &analytics.EventStatsResponse{
EventName: result["event_name"].(string),
Count: result["count"].(int64),
UniqueUsers: result["unique_users"].(int64),
})
}
return &analytics.EventStatsListResponse{
Stats: stats,
StartTime: req.StartTime,
EndTime: req.EndTime,
}, nil
}
// CountUserEvents 统计用户事件数量
func (s *AnalyticsService) CountUserEvents(ctx context.Context, userID int64) (int64, error) {
count, err := s.eventDao.CountUserEvents(dao.DB.WithContext(ctx), userID)
if err != nil {
log.Error(ctx, "count user events failed", "error", err, "user_id", userID)
return 0, fmt.Errorf("failed to count user events")
}
return count, nil
}
// CountEventsByName 统计指定事件的数量
func (s *AnalyticsService) CountEventsByName(ctx context.Context, eventName string) (int64, error) {
count, err := s.eventDao.CountEventsByName(dao.DB.WithContext(ctx), eventName)
if err != nil {
log.Error(ctx, "count events by name failed", "error", err, "event_name", eventName)
return 0, fmt.Errorf("failed to count events")
}
return count, nil
}
// CleanOldEvents 清理旧数据(可以定时执行)
func (s *AnalyticsService) CleanOldEvents(ctx context.Context, retentionDays int) error {
beforeTime := time.Now().AddDate(0, 0, -retentionDays)
if err := s.eventDao.DeleteOldEvents(dao.DB.WithContext(ctx), beforeTime); err != nil {
log.Error(ctx, "clean old events failed", "error", err, "before_time", beforeTime)
return fmt.Errorf("failed to clean old events")
}
log.Info(ctx, "old events cleaned successfully", "retention_days", retentionDays)
return nil
}

View File

@@ -7,21 +7,23 @@ import (
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"strings"
"time"
"gitea.com/bitwsd/document_ai/config"
"gitea.com/bitwsd/document_ai/internal/model/formula"
"gitea.com/bitwsd/document_ai/internal/storage/cache"
"gitea.com/bitwsd/document_ai/internal/storage/dao"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/config"
"gitea.com/texpixel/document_ai/internal/model/formula"
"gitea.com/texpixel/document_ai/internal/storage/cache"
"gitea.com/texpixel/document_ai/internal/storage/dao"
"gitea.com/texpixel/document_ai/pkg/log"
"gitea.com/bitwsd/document_ai/pkg/common"
"gitea.com/bitwsd/document_ai/pkg/constant"
"gitea.com/bitwsd/document_ai/pkg/httpclient"
"gitea.com/bitwsd/document_ai/pkg/oss"
"gitea.com/bitwsd/document_ai/pkg/utils"
"gitea.com/texpixel/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/pkg/constant"
"gitea.com/texpixel/document_ai/pkg/httpclient"
"gitea.com/texpixel/document_ai/pkg/oss"
"gitea.com/texpixel/document_ai/pkg/requestid"
"gitea.com/texpixel/document_ai/pkg/utils"
"gorm.io/gorm"
)
@@ -510,8 +512,8 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
// 设置Content-Type头为application/json
headers := map[string]string{"Content-Type": "application/json", utils.RequestIDHeaderKey: utils.GetRequestIDFromContext(ctx)}
// 发送请求时会使用带超时的context
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/vlm/formula/predict", bytes.NewReader(jsonData), headers)
// 发送请求到新的 OCR 接口
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/doc_process/v1/image/ocr", bytes.NewReader(jsonData), headers)
if err != nil {
if ctx.Err() == context.DeadlineExceeded {
log.Error(ctx, "func", "processFormulaTask", "msg", "请求超时")
@@ -531,12 +533,18 @@ func (s *RecognitionService) processFormulaTask(ctx context.Context, taskID int6
log.Info(ctx, "func", "processFormulaTask", "msg", "响应内容", "body", body.String())
// 解析 JSON 响应
var formulaResp formula.FormulaRecognitionResponse
if err := json.Unmarshal(body.Bytes(), &formulaResp); err != nil {
var ocrResp formula.ImageOCRResponse
if err := json.Unmarshal(body.Bytes(), &ocrResp); err != nil {
log.Error(ctx, "func", "processFormulaTask", "msg", "解析响应JSON失败", "error", err)
return err
}
err = resultDao.Create(tx, dao.RecognitionResult{TaskID: taskID, TaskType: dao.TaskTypeFormula, Latex: formulaResp.Result})
err = resultDao.Create(tx, dao.RecognitionResult{
TaskID: taskID,
TaskType: dao.TaskTypeFormula,
Latex: ocrResp.Latex,
Markdown: ocrResp.Markdown,
MathML: ocrResp.MathML,
})
if err != nil {
log.Error(ctx, "func", "processFormulaTask", "msg", "保存任务结果失败", "error", err)
return err
@@ -704,15 +712,19 @@ func (s *RecognitionService) processOneTask(ctx context.Context) {
}
ctx = context.WithValue(ctx, utils.RequestIDKey, task.TaskUUID)
log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
err = s.processBaiduOCRTask(ctx, taskID, task.FileURL)
if err != nil {
log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err)
return
}
// 使用 gls 设置 request_id确保在整个任务处理过程中可用
requestid.SetRequestID(task.TaskUUID, func() {
log.Info(ctx, "func", "processFormulaQueue", "msg", "获取任务成功", "task_id", taskID)
log.Info(ctx, "func", "processFormulaQueue", "msg", "处理任务成功", "task_id", taskID)
err = s.processFormulaTask(ctx, taskID, task.FileURL)
if err != nil {
log.Error(ctx, "func", "processFormulaQueue", "msg", "处理任务失败", "error", err)
return
}
log.Info(ctx, "func", "processFormulaQueue", "msg", "处理任务成功", "task_id", taskID)
})
}
// processMathpixTask 使用 Mathpix API 处理公式识别任务(用于增强识别)
@@ -996,6 +1008,11 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
}
markdownResult := strings.Join(markdownTexts, "\n\n---\n\n")
latex, mml, e := s.HandleConvert(ctx, markdownResult)
if e != nil {
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "转换失败", "error", err)
}
// 更新或创建识别结果
resultDao := dao.NewRecognitionResultDao()
result, err := resultDao.GetByTaskID(dao.DB.WithContext(ctx), taskID)
@@ -1012,6 +1029,8 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
TaskID: taskID,
TaskType: dao.TaskTypeFormula,
Markdown: markdownResult,
Latex: latex,
MathML: mml,
})
if err != nil {
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "创建任务结果失败", "error", err)
@@ -1021,6 +1040,8 @@ func (s *RecognitionService) processBaiduOCRTask(ctx context.Context, taskID int
// 更新现有结果
err = resultDao.Update(dao.DB.WithContext(ctx), result.ID, map[string]interface{}{
"markdown": markdownResult,
"latex": latex,
"mathml": mml,
})
if err != nil {
log.Error(ctx, "func", "processBaiduOCRTask", "msg", "更新任务结果失败", "error", err)
@@ -1044,3 +1065,55 @@ func (s *RecognitionService) TestProcessMathpixTask(ctx context.Context, taskID
}
return s.processMathpixTask(ctx, taskID, task.FileURL)
}
// ConvertResponse Python 接口返回结构
type ConvertResponse struct {
Latex string `json:"latex"`
MathML string `json:"mathml"`
Error string `json:"error,omitempty"`
}
func (s *RecognitionService) HandleConvert(ctx context.Context, markdown string) (latex string, mml string, err error) {
url := "https://cloud.texpixel.com:10443/doc_converter/v1/convert"
// 构建 multipart form
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
_ = writer.WriteField("markdown_input", markdown)
writer.Close()
// 使用正确的 Content-Type包含 boundary
headers := map[string]string{
"Content-Type": writer.FormDataContentType(),
}
resp, err := s.httpClient.RequestWithRetry(ctx, http.MethodPost, url, body, headers)
if err != nil {
return "", "", err
}
defer resp.Body.Close()
// 读取响应体
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return "", "", err
}
// 检查 HTTP 状态码
if resp.StatusCode != http.StatusOK {
return "", "", fmt.Errorf("convert failed: status %d, body: %s", resp.StatusCode, string(respBody))
}
// 解析 JSON 响应
var convertResp ConvertResponse
if err := json.Unmarshal(respBody, &convertResp); err != nil {
return "", "", fmt.Errorf("unmarshal response failed: %v, body: %s", err, string(respBody))
}
// 检查业务错误
if convertResp.Error != "" {
return "", "", fmt.Errorf("convert error: %s", convertResp.Error)
}
return convertResp.Latex, convertResp.MathML, nil
}

View File

@@ -3,17 +3,17 @@ package service
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"mime/multipart"
"net/http"
"strings"
"gitea.com/bitwsd/document_ai/internal/model/task"
"gitea.com/bitwsd/document_ai/internal/storage/dao"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/bitwsd/document_ai/pkg/oss"
"gitea.com/texpixel/document_ai/internal/model/task"
"gitea.com/texpixel/document_ai/internal/storage/dao"
"gitea.com/texpixel/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/pkg/oss"
)
type TaskService struct {
@@ -151,19 +151,29 @@ func (svc *TaskService) ExportTask(ctx context.Context, req *task.ExportTaskRequ
return nil, "", errors.New("markdown not found")
}
// call http://localhost:8055/export
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
_ = writer.WriteField("markdown_input", markdown)
_ = writer.WriteField("type", req.Type)
writer.Close()
// 获取文件名(去掉扩展名)
filename := strings.TrimSuffix(recognitionTask.FileName, "."+strings.ToLower(strings.Split(recognitionTask.FileName, ".")[len(strings.Split(recognitionTask.FileName, "."))-1]))
if filename == "" {
filename = "texpixel"
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/export", body)
// 构建 JSON 请求体
requestBody := map[string]string{
"markdown": markdown,
"filename": filename,
}
jsonData, err := json.Marshal(requestBody)
if err != nil {
log.Error(ctx, "func", "ExportTask", "msg", "json marshal failed", "error", err)
return nil, "", err
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://cloud.texpixel.com:10443/doc_process/v1/convert/file", bytes.NewReader(jsonData))
if err != nil {
log.Error(ctx, "func", "ExportTask", "msg", "create http request failed", "error", err)
return nil, "", err
}
httpReq.Header.Set("Content-Type", writer.FormDataContentType())
httpReq.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(httpReq)
@@ -184,16 +194,8 @@ func (svc *TaskService) ExportTask(ctx context.Context, req *task.ExportTaskRequ
return nil, "", err
}
// determine content type based on export type
var contentType string
switch req.Type {
case "pdf":
contentType = "application/pdf"
case "docx":
contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
default:
contentType = "application/octet-stream"
}
// 新接口只返回 DOCX 格式
contentType := "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
return fileData, contentType, nil
}

View File

@@ -6,11 +6,11 @@ import (
"fmt"
"math/rand"
"gitea.com/bitwsd/document_ai/internal/storage/cache"
"gitea.com/bitwsd/document_ai/internal/storage/dao"
"gitea.com/bitwsd/document_ai/pkg/common"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/bitwsd/document_ai/pkg/sms"
"gitea.com/texpixel/document_ai/internal/storage/cache"
"gitea.com/texpixel/document_ai/internal/storage/dao"
"gitea.com/texpixel/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/pkg/sms"
"golang.org/x/crypto/bcrypt"
)

View File

@@ -5,7 +5,7 @@ import (
"fmt"
"time"
"gitea.com/bitwsd/document_ai/config"
"gitea.com/texpixel/document_ai/config"
"github.com/redis/go-redis/v9"
)

View File

@@ -0,0 +1,170 @@
package dao
import (
"time"
"gorm.io/datatypes"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
// AnalyticsEvent 数据埋点事件表
type AnalyticsEvent struct {
ID int64 `gorm:"bigint;primaryKey;autoIncrement;column:id;comment:主键ID" json:"id"`
UserID int64 `gorm:"column:user_id;not null;index:idx_user_id;comment:用户ID" json:"user_id"`
EventName string `gorm:"column:event_name;varchar(128);not null;index:idx_event_name;comment:事件名称" json:"event_name"`
Properties datatypes.JSON `gorm:"column:properties;type:json;comment:事件属性(JSON)" json:"properties"`
DeviceInfo datatypes.JSON `gorm:"column:device_info;type:json;comment:设备信息(JSON)" json:"device_info"`
MetaData datatypes.JSON `gorm:"column:meta_data;type:json;comment:元数据(JSON包含task_id等)" json:"meta_data"`
CreatedAt time.Time `gorm:"column:created_at;comment:创建时间;not null;default:current_timestamp;index:idx_created_at" json:"created_at"`
}
func (e *AnalyticsEvent) TableName() string {
return "analytics_events"
}
// AnalyticsEventDao 数据埋点事件DAO
type AnalyticsEventDao struct{}
func NewAnalyticsEventDao() *AnalyticsEventDao {
return &AnalyticsEventDao{}
}
// Create 创建事件记录
func (dao *AnalyticsEventDao) Create(tx *gorm.DB, event *AnalyticsEvent) error {
return tx.Create(event).Error
}
// BatchCreate 批量创建事件记录
func (dao *AnalyticsEventDao) BatchCreate(tx *gorm.DB, events []*AnalyticsEvent) error {
if len(events) == 0 {
return nil
}
return tx.CreateInBatches(events, 100).Error
}
// GetByID 根据ID获取事件
func (dao *AnalyticsEventDao) GetByID(tx *gorm.DB, id int64) (*AnalyticsEvent, error) {
event := &AnalyticsEvent{}
err := tx.Where("id = ?", id).First(event).Error
if err != nil {
if err == gorm.ErrRecordNotFound {
return nil, nil
}
return nil, err
}
return event, nil
}
// GetUserEvents 获取用户的事件列表
func (dao *AnalyticsEventDao) GetUserEvents(tx *gorm.DB, userID int64, page, pageSize int) ([]*AnalyticsEvent, int64, error) {
var events []*AnalyticsEvent
var total int64
offset := (page - 1) * pageSize
query := tx.Model(&AnalyticsEvent{}).Where("user_id = ?", userID)
err := query.Count(&total).Error
if err != nil {
return nil, 0, err
}
err = query.Offset(offset).Limit(pageSize).
Order(clause.OrderByColumn{Column: clause.Column{Name: "created_at"}, Desc: true}).
Find(&events).Error
return events, total, err
}
// GetEventsByName 根据事件名称获取事件列表
func (dao *AnalyticsEventDao) GetEventsByName(tx *gorm.DB, eventName string, page, pageSize int) ([]*AnalyticsEvent, int64, error) {
var events []*AnalyticsEvent
var total int64
offset := (page - 1) * pageSize
query := tx.Model(&AnalyticsEvent{}).Where("event_name = ?", eventName)
err := query.Count(&total).Error
if err != nil {
return nil, 0, err
}
err = query.Offset(offset).Limit(pageSize).
Order(clause.OrderByColumn{Column: clause.Column{Name: "created_at"}, Desc: true}).
Find(&events).Error
return events, total, err
}
// GetUserEventsByName 获取用户指定事件的列表
func (dao *AnalyticsEventDao) GetUserEventsByName(tx *gorm.DB, userID int64, eventName string, page, pageSize int) ([]*AnalyticsEvent, int64, error) {
var events []*AnalyticsEvent
var total int64
offset := (page - 1) * pageSize
query := tx.Model(&AnalyticsEvent{}).Where("user_id = ? AND event_name = ?", userID, eventName)
err := query.Count(&total).Error
if err != nil {
return nil, 0, err
}
err = query.Offset(offset).Limit(pageSize).
Order(clause.OrderByColumn{Column: clause.Column{Name: "created_at"}, Desc: true}).
Find(&events).Error
return events, total, err
}
// GetEventsByTimeRange 根据时间范围获取事件列表
func (dao *AnalyticsEventDao) GetEventsByTimeRange(tx *gorm.DB, startTime, endTime time.Time, page, pageSize int) ([]*AnalyticsEvent, int64, error) {
var events []*AnalyticsEvent
var total int64
offset := (page - 1) * pageSize
query := tx.Model(&AnalyticsEvent{}).Where("created_at BETWEEN ? AND ?", startTime, endTime)
err := query.Count(&total).Error
if err != nil {
return nil, 0, err
}
err = query.Offset(offset).Limit(pageSize).
Order(clause.OrderByColumn{Column: clause.Column{Name: "created_at"}, Desc: true}).
Find(&events).Error
return events, total, err
}
// CountEventsByName 统计指定事件的数量
func (dao *AnalyticsEventDao) CountEventsByName(tx *gorm.DB, eventName string) (int64, error) {
var count int64
err := tx.Model(&AnalyticsEvent{}).Where("event_name = ?", eventName).Count(&count).Error
return count, err
}
// CountUserEvents 统计用户的事件数量
func (dao *AnalyticsEventDao) CountUserEvents(tx *gorm.DB, userID int64) (int64, error) {
var count int64
err := tx.Model(&AnalyticsEvent{}).Where("user_id = ?", userID).Count(&count).Error
return count, err
}
// GetEventStats 获取事件统计信息(按事件名称分组)
func (dao *AnalyticsEventDao) GetEventStats(tx *gorm.DB, startTime, endTime time.Time) ([]map[string]interface{}, error) {
var results []map[string]interface{}
err := tx.Model(&AnalyticsEvent{}).
Select("event_name, COUNT(*) as count, COUNT(DISTINCT user_id) as unique_users").
Where("created_at BETWEEN ? AND ?", startTime, endTime).
Group("event_name").
Order("count DESC").
Find(&results).Error
return results, err
}
// DeleteOldEvents 删除旧事件(数据清理)
func (dao *AnalyticsEventDao) DeleteOldEvents(tx *gorm.DB, beforeTime time.Time) error {
return tx.Where("created_at < ?", beforeTime).Delete(&AnalyticsEvent{}).Error
}

View File

@@ -3,16 +3,19 @@ package dao
import (
"fmt"
"gitea.com/bitwsd/document_ai/config"
"gitea.com/texpixel/document_ai/config"
"gorm.io/driver/mysql"
"gorm.io/gorm"
"gorm.io/gorm/logger"
)
var DB *gorm.DB
func InitDB(conf config.DatabaseConfig) {
dns := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8mb4&parseTime=True&loc=Asia%%2FShanghai", conf.Username, conf.Password, conf.Host, conf.Port, conf.DBName)
db, err := gorm.Open(mysql.Open(dns), &gorm.Config{})
db, err := gorm.Open(mysql.Open(dns), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent), // 禁用 GORM 日志输出
})
if err != nil {
panic(err)
}

23
main.go
View File

@@ -10,23 +10,26 @@ import (
"syscall"
"time"
"gitea.com/bitwsd/document_ai/api"
"gitea.com/bitwsd/document_ai/config"
"gitea.com/bitwsd/document_ai/internal/storage/cache"
"gitea.com/bitwsd/document_ai/internal/storage/dao"
"gitea.com/bitwsd/document_ai/pkg/common"
"gitea.com/bitwsd/document_ai/pkg/cors"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/bitwsd/document_ai/pkg/middleware"
"gitea.com/bitwsd/document_ai/pkg/sms"
"gitea.com/texpixel/document_ai/api"
"gitea.com/texpixel/document_ai/config"
"gitea.com/texpixel/document_ai/internal/storage/cache"
"gitea.com/texpixel/document_ai/internal/storage/dao"
"gitea.com/texpixel/document_ai/pkg/common"
"gitea.com/texpixel/document_ai/pkg/cors"
"gitea.com/texpixel/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/pkg/middleware"
"gitea.com/texpixel/document_ai/pkg/sms"
"github.com/gin-gonic/gin"
)
func main() {
// 加载配置
env := "dev"
env := ""
flag.StringVar(&env, "env", "dev", "environment (dev/prod)")
flag.Parse()
fmt.Println("env:", env)
configPath := fmt.Sprintf("./config/config_%s.yaml", env)
if err := config.Init(configPath); err != nil {
panic(err)

View File

@@ -0,0 +1,18 @@
-- 数据埋点事件表
CREATE TABLE IF NOT EXISTS `analytics_events` (
`id` BIGINT NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`user_id` BIGINT NOT NULL COMMENT '用户ID',
`event_name` VARCHAR(128) NOT NULL COMMENT '事件名称',
`properties` JSON DEFAULT NULL COMMENT '事件属性(JSON)',
`device_info` JSON DEFAULT NULL COMMENT '设备信息(JSON)',
`meta_data` JSON DEFAULT NULL COMMENT '元数据(JSON包含task_id等)',
`created_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
PRIMARY KEY (`id`),
INDEX `idx_user_id` (`user_id`),
INDEX `idx_event_name` (`event_name`),
INDEX `idx_created_at` (`created_at`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='数据埋点事件表';
-- 创建复合索引以提高查询性能
CREATE INDEX `idx_user_event` ON `analytics_events` (`user_id`, `event_name`);
CREATE INDEX `idx_event_time` ON `analytics_events` (`event_name`, `created_at`);

View File

@@ -6,8 +6,8 @@ import (
"strings"
"time"
"gitea.com/bitwsd/document_ai/pkg/constant"
"gitea.com/bitwsd/document_ai/pkg/jwt"
"gitea.com/texpixel/document_ai/pkg/constant"
"gitea.com/texpixel/document_ai/pkg/jwt"
"github.com/gin-gonic/gin"
)

View File

@@ -3,7 +3,7 @@ package common
import (
"context"
"gitea.com/bitwsd/document_ai/pkg/constant"
"gitea.com/texpixel/document_ai/pkg/constant"
)
type Response struct {

View File

@@ -10,7 +10,7 @@ import (
"net/http"
"time"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/pkg/log"
)
// RetryConfig 重试配置

View File

@@ -8,7 +8,7 @@ import (
"runtime"
"time"
"gitea.com/bitwsd/document_ai/pkg/requestid"
"gitea.com/texpixel/document_ai/pkg/requestid"
"github.com/rs/zerolog"
"gopkg.in/natefinch/lumberjack.v2"

View File

@@ -6,7 +6,7 @@ import (
"strings"
"time"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/pkg/log"
"github.com/gin-gonic/gin"
)
@@ -72,4 +72,3 @@ func AccessLog() gin.HandlerFunc {
)
}
}

View File

@@ -1,7 +1,7 @@
package middleware
import (
"gitea.com/bitwsd/document_ai/pkg/requestid"
"gitea.com/texpixel/document_ai/pkg/requestid"
"github.com/gin-gonic/gin"
"github.com/google/uuid"

View File

@@ -12,8 +12,8 @@ import (
"strings"
"time"
"gitea.com/bitwsd/document_ai/config"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/config"
"gitea.com/texpixel/document_ai/pkg/log"
"github.com/aliyun/aliyun-oss-go-sdk/oss"
)

View File

@@ -4,7 +4,7 @@ import (
"errors"
"sync"
"gitea.com/bitwsd/document_ai/config"
"gitea.com/texpixel/document_ai/config"
openapi "github.com/alibabacloud-go/darabonba-openapi/client"
dysmsapi "github.com/alibabacloud-go/dysmsapi-20170525/v2/client"
aliutil "github.com/alibabacloud-go/tea-utils/service"

View File

@@ -3,7 +3,7 @@ package utils
import (
"context"
"gitea.com/bitwsd/document_ai/pkg/log"
"gitea.com/texpixel/document_ai/pkg/log"
)
func SafeGo(fn func()) {

10
prod_deploy.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
docker build -t crpi-8s2ierii2xan4klg.cn-beijing.personal.cr.aliyuncs.com/texpixel/doc_ai_backend:latest . && docker push crpi-8s2ierii2xan4klg.cn-beijing.personal.cr.aliyuncs.com/texpixel/doc_ai_backend:latest
ssh ecs << 'ENDSSH'
docker stop doc_ai doc_ai_backend 2>/dev/null || true
docker rm doc_ai doc_ai_backend 2>/dev/null || true
docker pull crpi-8s2ierii2xan4klg.cn-beijing.personal.cr.aliyuncs.com/texpixel/doc_ai_backend:latest
docker run -d --name doc_ai -p 8024:8024 --restart unless-stopped crpi-8s2ierii2xan4klg.cn-beijing.personal.cr.aliyuncs.com/texpixel/doc_ai_backend:latest -env=prod
ENDSSH