Eliminated dependency on paddleocr

Change to trocr
This commit is contained in:
三洋三洋
2024-05-27 16:45:33 +00:00
parent cc602f5a82
commit 5c9cff2125
2 changed files with 13 additions and 15 deletions

View File

@@ -9,9 +9,10 @@ nltk
python-multipart python-multipart
augraphy augraphy
onnxruntime-gpu
streamlit==1.30 streamlit==1.30
streamlit-paste-button streamlit-paste-button
paddleocr shapely
pyclipper
onnxruntime-gpu

View File

@@ -4,29 +4,26 @@ from ...globals import (
VOCAB_SIZE, VOCAB_SIZE,
FIXED_IMG_SIZE, FIXED_IMG_SIZE,
IMG_CHANNELS, IMG_CHANNELS,
MAX_TOKEN_SIZE
) )
from transformers import ( from transformers import (
ViTConfig,
ViTModel,
TrOCRConfig,
TrOCRForCausalLM,
RobertaTokenizerFast, RobertaTokenizerFast,
VisionEncoderDecoderModel, VisionEncoderDecoderModel,
VisionEncoderDecoderConfig
) )
class TexTeller(VisionEncoderDecoderModel): class TexTeller(VisionEncoderDecoderModel):
REPO_NAME = 'OleehyO/TexTeller' REPO_NAME = 'OleehyO/TexTeller'
def __init__(self, decoder_path=None, tokenizer_path=None): def __init__(self):
encoder = ViTModel(ViTConfig( config = VisionEncoderDecoderConfig.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/model/trocr-small')
image_size=FIXED_IMG_SIZE, config.encoder.image_size = FIXED_IMG_SIZE
num_channels=IMG_CHANNELS config.encoder.num_channels = IMG_CHANNELS
)) config.decoder.vocab_size=VOCAB_SIZE
decoder = TrOCRForCausalLM(TrOCRConfig( config.decoder.max_position_embeddings=MAX_TOKEN_SIZE
vocab_size=VOCAB_SIZE,
)) super().__init__(config=config)
super().__init__(encoder=encoder, decoder=decoder)
@classmethod @classmethod
def from_pretrained(cls, model_path: str = None): def from_pretrained(cls, model_path: str = None):