src/models/ocr_model/model/TexTeller.py

from pathlib import Path

from ...globals import (
    VOCAB_SIZE,
    FIXED_IMG_SIZE,
    IMG_CHANNELS,
    MAX_TOKEN_SIZE
)

from transformers import (
    RobertaTokenizerFast,
    VisionEncoderDecoderModel,
    VisionEncoderDecoderConfig,
)


class TexTeller(VisionEncoderDecoderModel):
    REPO_NAME = 'OleehyO/TexTeller'
    def __init__(self):
        config = VisionEncoderDecoderConfig.from_pretrained(Path(__file__).resolve().parent / "config.json")
        config.encoder.image_size              = FIXED_IMG_SIZE
        config.encoder.num_channels            = IMG_CHANNELS
        config.decoder.vocab_size              = VOCAB_SIZE
        config.decoder.max_position_embeddings = MAX_TOKEN_SIZE

        super().__init__(config=config)
    
    @classmethod
    def from_pretrained(cls, model_path: str = None):
        if model_path is None or model_path == 'default':
            return VisionEncoderDecoderModel.from_pretrained(cls.REPO_NAME)
        model_path = Path(model_path).resolve()
        return VisionEncoderDecoderModel.from_pretrained(str(model_path))

    @classmethod
    def get_tokenizer(cls, tokenizer_path: str = None) -> RobertaTokenizerFast:
        if tokenizer_path is None or tokenizer_path == 'default':
            return RobertaTokenizerFast.from_pretrained(cls.REPO_NAME)
        tokenizer_path = Path(tokenizer_path).resolve()
        return RobertaTokenizerFast.from_pretrained(str(tokenizer_path))
Initial commit 2024-02-11 08:06:50 +00:00			`from pathlib import Path`

checkpoint 2024-04-16 13:56:56 +00:00			`from ...globals import (`
Initial commit 2024-02-11 08:06:50 +00:00			`VOCAB_SIZE,`
			`FIXED_IMG_SIZE,`
			`IMG_CHANNELS,`
Eliminated dependency on paddleocr Change to trocr 2024-05-27 16:45:33 +00:00			`MAX_TOKEN_SIZE`
Initial commit 2024-02-11 08:06:50 +00:00			`)`

			`from transformers import (`
			`RobertaTokenizerFast,`
			`VisionEncoderDecoderModel,`
Change the model configuration to trocr 2024-05-28 04:20:07 +00:00			`VisionEncoderDecoderConfig,`
Initial commit 2024-02-11 08:06:50 +00:00			`)`


			`class TexTeller(VisionEncoderDecoderModel):`
TexTellerv2 release 2024-03-25 11:23:54 +00:00			`REPO_NAME = 'OleehyO/TexTeller'`
Eliminated dependency on paddleocr Change to trocr 2024-05-27 16:45:33 +00:00			`def __init__(self):`
Change the model configuration to trocr 2024-05-28 04:20:07 +00:00			`config = VisionEncoderDecoderConfig.from_pretrained(Path(__file__).resolve().parent / "config.json")`
			`config.encoder.image_size = FIXED_IMG_SIZE`
			`config.encoder.num_channels = IMG_CHANNELS`
			`config.decoder.vocab_size = VOCAB_SIZE`
			`config.decoder.max_position_embeddings = MAX_TOKEN_SIZE`
Eliminated dependency on paddleocr Change to trocr 2024-05-27 16:45:33 +00:00
			`super().__init__(config=config)`
Initial commit 2024-02-11 08:06:50 +00:00
			`@classmethod`
			`def from_pretrained(cls, model_path: str = None):`
Update files 2024-02-12 11:40:51 +00:00			`if model_path is None or model_path == 'default':`
Initial commit 2024-02-11 08:06:50 +00:00			`return VisionEncoderDecoderModel.from_pretrained(cls.REPO_NAME)`
			`model_path = Path(model_path).resolve()`
			`return VisionEncoderDecoderModel.from_pretrained(str(model_path))`

			`@classmethod`
			`def get_tokenizer(cls, tokenizer_path: str = None) -> RobertaTokenizerFast:`
Update files 2024-02-12 11:40:51 +00:00			`if tokenizer_path is None or tokenizer_path == 'default':`
Initial commit 2024-02-11 08:06:50 +00:00			`return RobertaTokenizerFast.from_pretrained(cls.REPO_NAME)`
			`tokenizer_path = Path(tokenizer_path).resolve()`
			`return RobertaTokenizerFast.from_pretrained(str(tokenizer_path))`