This commit is contained in:
三洋三洋
2024-03-18 15:48:04 +00:00
parent 5d089b5a7f
commit 74341c7e8a
6 changed files with 330 additions and 118 deletions

View File

@@ -3,7 +3,7 @@ IMAGE_MEAN = 0.9545467
IMAGE_STD = 0.15394445
# Vocabulary size for TexTeller
VOCAB_SIZE = 10000
VOCAB_SIZE = 15000
# Fixed size for input image for TexTeller
FIXED_IMG_SIZE = 448
@@ -12,7 +12,7 @@ FIXED_IMG_SIZE = 448
IMG_CHANNELS = 1 # grayscale image
# Max size of token for embedding
MAX_TOKEN_SIZE = 512
MAX_TOKEN_SIZE = 1024
# Scaling ratio for random resizing when training
MAX_RESIZE_RATIO = 1.15

View File

@@ -17,7 +17,7 @@ from transformers import (
class TexTeller(VisionEncoderDecoderModel):
REPO_NAME = 'OleehyO/TexTeller'
REPO_NAME = '/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv2/checkpoint-356000'
def __init__(self, decoder_path=None, tokenizer_path=None):
encoder = ViTModel(ViTConfig(
image_size=FIXED_IMG_SIZE,