update

2024-03-18 15:48:04 +00:00
parent 5d089b5a7f
commit 74341c7e8a
6 changed files with 330 additions and 118 deletions
--- a/src/models/globals.py
+++ b/src/models/globals.py
@@ -3,7 +3,7 @@ IMAGE_MEAN = 0.9545467
 IMAGE_STD  = 0.15394445

 # Vocabulary size for TexTeller
-VOCAB_SIZE = 10000
+VOCAB_SIZE = 15000

 # Fixed size for input image for TexTeller
 FIXED_IMG_SIZE = 448
@@ -12,7 +12,7 @@ FIXED_IMG_SIZE = 448
 IMG_CHANNELS = 1  # grayscale image

 # Max size of token for embedding
-MAX_TOKEN_SIZE = 512
+MAX_TOKEN_SIZE = 1024

 # Scaling ratio for random resizing when training
 MAX_RESIZE_RATIO = 1.15
--- a/src/models/ocr_model/model/TexTeller.py
+++ b/src/models/ocr_model/model/TexTeller.py
@@ -17,7 +17,7 @@ from transformers import (


 class TexTeller(VisionEncoderDecoderModel):
-    REPO_NAME = 'OleehyO/TexTeller'
+    REPO_NAME = '/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv2/checkpoint-356000'
    def __init__(self, decoder_path=None, tokenizer_path=None):
        encoder = ViTModel(ViTConfig(
            image_size=FIXED_IMG_SIZE,