33 lines
1.2 KiB
MySQL
33 lines
1.2 KiB
MySQL
|
|
-- migrations/pdf_recognition.sql
|
|||
|
|
-- 将 recognition_results 表重构为 JSON content schema
|
|||
|
|
-- 执行顺序:加新列 → 洗历史数据 → 删旧列
|
|||
|
|
|
|||
|
|
-- Step 1: 新增 JSON 字段(保留旧字段,等数据迁移完再删)
|
|||
|
|
ALTER TABLE `recognition_results`
|
|||
|
|
ADD COLUMN `meta_data` JSON DEFAULT NULL COMMENT '元数据 {"total_num":1}' AFTER `task_type`,
|
|||
|
|
ADD COLUMN `content` JSON DEFAULT NULL COMMENT '识别内容 JSON' AFTER `meta_data`;
|
|||
|
|
|
|||
|
|
-- Step 2: 将旧列数据洗入新 JSON 字段
|
|||
|
|
-- 所有现有记录均为 FORMULA 类型(单页),meta_data.total_num = 1
|
|||
|
|
-- content 结构: {"latex":"...","markdown":"...","mathml":"...","mml":"..."}
|
|||
|
|
UPDATE `recognition_results`
|
|||
|
|
SET
|
|||
|
|
`meta_data` = JSON_OBJECT('total_num', 1),
|
|||
|
|
`content` = JSON_OBJECT(
|
|||
|
|
'latex', IFNULL(`latex`, ''),
|
|||
|
|
'markdown', IFNULL(`markdown`, ''),
|
|||
|
|
'mathml', IFNULL(`mathml`, ''),
|
|||
|
|
'mml', IFNULL(`mml`, '')
|
|||
|
|
)
|
|||
|
|
WHERE `content` IS NULL;
|
|||
|
|
|
|||
|
|
-- Step 3: 验证数据洗涤完成(应返回 0)
|
|||
|
|
-- SELECT COUNT(*) FROM `recognition_results` WHERE `content` IS NULL;
|
|||
|
|
|
|||
|
|
-- Step 4: 删除旧字段
|
|||
|
|
ALTER TABLE `recognition_results`
|
|||
|
|
DROP COLUMN `latex`,
|
|||
|
|
DROP COLUMN `markdown`,
|
|||
|
|
DROP COLUMN `mathml`,
|
|||
|
|
DROP COLUMN `mml`;
|