Eliminated dependency on paddleocr

Change to trocr
2024-05-27 16:45:33 +00:00
parent cc602f5a82
commit 5c9cff2125
2 changed files with 13 additions and 15 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,9 +9,10 @@ nltk
 python-multipart
 augraphy
 onnxruntime-gpu
 streamlit==1.30
 streamlit-paste-button
-paddleocr
+shapely
 pyclipper
 onnxruntime-gpu
--- a/src/models/ocr_model/model/TexTeller.py
+++ b/src/models/ocr_model/model/TexTeller.py
@@ -4,29 +4,26 @@ from ...globals import (
    VOCAB_SIZE,
    FIXED_IMG_SIZE,
    IMG_CHANNELS,
    MAX_TOKEN_SIZE
 )
 from transformers import (
    ViTConfig,
    ViTModel,
    TrOCRConfig,
    TrOCRForCausalLM,
    RobertaTokenizerFast,
    VisionEncoderDecoderModel,
    VisionEncoderDecoderConfig
 )
 class TexTeller(VisionEncoderDecoderModel):
    REPO_NAME = 'OleehyO/TexTeller'
-    def __init__(self, decoder_path=None, tokenizer_path=None):
+    def __init__(self):
-        encoder = ViTModel(ViTConfig(
+        config = VisionEncoderDecoderConfig.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/model/trocr-small')
-            image_size=FIXED_IMG_SIZE,
+        config.encoder.image_size = FIXED_IMG_SIZE
-            num_channels=IMG_CHANNELS
+        config.encoder.num_channels = IMG_CHANNELS
-        ))
+        config.decoder.vocab_size=VOCAB_SIZE
-        decoder = TrOCRForCausalLM(TrOCRConfig(
+        config.decoder.max_position_embeddings=MAX_TOKEN_SIZE
-            vocab_size=VOCAB_SIZE,
+
-        ))
+        super().__init__(config=config)
        super().__init__(encoder=encoder, decoder=decoder)
    @classmethod
    def from_pretrained(cls, model_path: str = None):