feat: add PDF document recognition with 10-page pre-hook
- Migrate recognition_results table to JSON schema (meta_data + content),
replacing flat latex/markdown/mathml/mml columns
- Add TaskTypePDF constant and update all formula read/write paths
- Add PDFRecognitionService using pdftoppm (Poppler) for CGO-free page
rendering; limits processing to first 10 pages (pre-hook)
- Reuse existing downstream OCR endpoint (cloud.texpixel.com) for each
page image; stores results as [{page_number, markdown}] JSON array
- Add Redis queue + distributed lock for PDF worker goroutine
- Add REST endpoints: POST /v1/pdf/recognition, GET /v1/pdf/recognition/:task_no
- Add .pdf to OSS upload file type whitelist
- Add migrations/pdf_recognition.sql for safe data migration
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
34
internal/model/pdf/request.go
Normal file
34
internal/model/pdf/request.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package pdf
|
||||
|
||||
// CreatePDFRecognitionRequest 创建PDF识别任务
|
||||
type CreatePDFRecognitionRequest struct {
|
||||
FileURL string `json:"file_url" binding:"required"`
|
||||
FileHash string `json:"file_hash" binding:"required"`
|
||||
FileName string `json:"file_name" binding:"required"`
|
||||
UserID int64 `json:"user_id"`
|
||||
}
|
||||
|
||||
// GetPDFTaskRequest URI 参数
|
||||
type GetPDFTaskRequest struct {
|
||||
TaskNo string `uri:"task_no" binding:"required"`
|
||||
}
|
||||
|
||||
// CreatePDFTaskResponse 创建任务响应
|
||||
type CreatePDFTaskResponse struct {
|
||||
TaskNo string `json:"task_no"`
|
||||
Status int `json:"status"`
|
||||
}
|
||||
|
||||
// PDFPageResult 单页结果
|
||||
type PDFPageResult struct {
|
||||
PageNumber int `json:"page_number"`
|
||||
Markdown string `json:"markdown"`
|
||||
}
|
||||
|
||||
// GetPDFTaskResponse 查询任务状态和结果
|
||||
type GetPDFTaskResponse struct {
|
||||
TaskNo string `json:"task_no"`
|
||||
Status int `json:"status"`
|
||||
TotalPages int `json:"total_pages"`
|
||||
Pages []PDFPageResult `json:"pages"`
|
||||
}
|
||||
Reference in New Issue
Block a user