-- migrations/pdf_recognition.sql -- 将 recognition_results 表重构为 JSON content schema -- 执行顺序:加新列 → 洗历史数据 → 删旧列 -- Step 1: 新增 JSON 字段(保留旧字段,等数据迁移完再删) ALTER TABLE `recognition_results` ADD COLUMN `meta_data` JSON DEFAULT NULL COMMENT '元数据 {"total_num":1}' AFTER `task_type`, ADD COLUMN `content` JSON DEFAULT NULL COMMENT '识别内容 JSON' AFTER `meta_data`; -- Step 2: 将旧列数据洗入新 JSON 字段 -- 所有现有记录均为 FORMULA 类型(单页),meta_data.total_num = 1 -- content 结构: {"latex":"...","markdown":"...","mathml":"...","mml":"..."} UPDATE `recognition_results` SET `meta_data` = JSON_OBJECT('total_num', 1), `content` = JSON_OBJECT( 'latex', IFNULL(`latex`, ''), 'markdown', IFNULL(`markdown`, ''), 'mathml', IFNULL(`mathml`, ''), 'mml', IFNULL(`mml`, '') ) WHERE `content` IS NULL; -- Step 3: 验证数据洗涤完成(应返回 0) -- SELECT COUNT(*) FROM `recognition_results` WHERE `content` IS NULL; -- Step 4: 删除旧字段 ALTER TABLE `recognition_results` DROP COLUMN `latex`, DROP COLUMN `markdown`, DROP COLUMN `mathml`, DROP COLUMN `mml`;