- Migrate recognition_results table to JSON schema (meta_data + content),
replacing flat latex/markdown/mathml/mml columns
- Add TaskTypePDF constant and update all formula read/write paths
- Add PDFRecognitionService using pdftoppm (Poppler) for CGO-free page
rendering; limits processing to first 10 pages (pre-hook)
- Reuse existing downstream OCR endpoint (cloud.texpixel.com) for each
page image; stores results as [{page_number, markdown}] JSON array
- Add Redis queue + distributed lock for PDF worker goroutine
- Add REST endpoints: POST /v1/pdf/recognition, GET /v1/pdf/recognition/:task_no
- Add .pdf to OSS upload file type whitelist
- Add migrations/pdf_recognition.sql for safe data migration
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
28 lines
684 B
Go
28 lines
684 B
Go
package cache
|
|
|
|
import (
|
|
"context"
|
|
"strconv"
|
|
)
|
|
|
|
const (
|
|
PDFRecognitionTaskQueue = "pdf_recognition_queue"
|
|
PDFRecognitionDistLock = "pdf_recognition_dist_lock"
|
|
)
|
|
|
|
func PushPDFTask(ctx context.Context, taskID int64) (int64, error) {
|
|
return RedisClient.LPush(ctx, PDFRecognitionTaskQueue, taskID).Result()
|
|
}
|
|
|
|
func PopPDFTask(ctx context.Context) (int64, error) {
|
|
result, err := RedisClient.BRPop(ctx, 0, PDFRecognitionTaskQueue).Result()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return strconv.ParseInt(result[1], 10, 64)
|
|
}
|
|
|
|
func GetPDFDistributedLock(ctx context.Context) (bool, error) {
|
|
return RedisClient.SetNX(ctx, PDFRecognitionDistLock, "locked", DefaultLockTimeout).Result()
|
|
}
|