初步修改完成，但仍然有问题

2024-03-27 04:54:49 +00:00
parent 6373e19132
commit dbf35fe9c4
3 changed files with 28 additions and 8 deletions
--- a/src/models/ocr_model/train/train.py
+++ b/src/models/ocr_model/train/train.py
@@ -15,7 +15,7 @@ from ...globals import MAX_TOKEN_SIZE, MIN_WIDTH, MIN_HEIGHT

 def train(model, tokenizer, train_dataset, eval_dataset, collate_fn_with_tokenizer):
    training_args = TrainingArguments(**CONFIG)
-    debug_mode = False
+    debug_mode = True
    if debug_mode:
        training_args.auto_find_batch_size = False
        training_args.num_train_epochs = 2
@@ -96,6 +96,10 @@ if __name__ == '__main__':
    # model = TexTeller()
    model = TexTeller.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/model/ckpt')

+    # =================  debug  =======================
+    foo = train_dataset[:3]
+    # =================  debug  =======================
+
    enable_train    = True
    enable_evaluate = True
    if enable_train: