修改了v3(支持自然场景、混合文字场景识别)版本的inference.py模版

2024-04-05 07:25:06 +00:00
parent 5b730329b4
commit 34ac31504a
3 changed files with 46 additions and 10 deletions
--- a/src/inference.py
+++ b/src/inference.py
@@ -1,9 +1,11 @@
 import os
 import argparse
 import cv2 as cv
 from pathlib import Path
-from models.ocr_model.utils.inference import inference
+from models.ocr_model.utils.inference import inference as latex_inference
 from models.ocr_model.model.TexTeller import TexTeller
 from utils import load_det_tex_model, load_lang_models
 if __name__ == '__main__':
@@ -21,16 +23,31 @@ if __name__ == '__main__':
        action='store_true',
        help='use cuda or not'
    )
    # =================  new feature  ==================
    parser.add_argument(
        '-mix', 
        type=str,
        help='use mix mode, only Chinese and English are supported.'
    )
    # ==================================================
    args = parser.parse_args()
    # You can use your own checkpoint and tokenizer path.
    print('Loading model and tokenizer...')
-    model = TexTeller.from_pretrained()
+    latex_rec_model = TexTeller.from_pretrained()
    tokenizer = TexTeller.get_tokenizer()
    print('Model and tokenizer loaded.')
-    img_path = [args.img]
+    # img_path = [args.img]
    img = cv.imread(args.img)
    print('Inference...')
-    res = inference(model, tokenizer, img_path, args.cuda)
+    if not args.mix:
-    print(res[0])
+        res = latex_inference(latex_rec_model, tokenizer, [img], args.cuda)
        print(res[0])
    else:
        # latex_det_model = load_det_tex_model()
        # lang_model      = load_lang_models()...
        ...
        # res: str = mix_inference(latex_det_model, latex_rec_model, lang_model, img, args.cuda)
        # print(res)
--- a/src/models/ocr_model/utils/inference.py
+++ b/src/models/ocr_model/utils/inference.py
@@ -13,16 +13,16 @@ from models.globals import MAX_TOKEN_SIZE
 def inference(
    model: TexTeller, 
    tokenizer: RobertaTokenizerFast,
-    imgs_path: Union[List[str], List[np.ndarray]], 
+    imgs: Union[List[str], List[np.ndarray]], 
    use_cuda: bool,
    num_beams: int = 1,
 ) -> List[str]:
    model.eval()
-    if isinstance(imgs_path[0], str):
+    if isinstance(imgs[0], str):
-        imgs = convert2rgb(imgs_path) 
+        imgs = convert2rgb(imgs) 
    else:  # already numpy array(rgb format)
-        assert isinstance(imgs_path[0], np.ndarray)
+        assert isinstance(imgs[0], np.ndarray)
-        imgs = imgs_path 
+        imgs = imgs 
    imgs = inference_transform(imgs)
    pixel_values = torch.stack(imgs)
--- a/src/utils.py
+++ b/src/utils.py
@@ -0,0 +1,19 @@
 import numpy as np
 from models.ocr_model.utils.inference import inference as latex_inference
 def load_lang_models(language: str):
    ...
    # language: 'ch' or 'en'
    # return det_model, rec_model (or model)
 def load_det_tex_model():
    ...
    # return the loaded latex detection model
 def mix_inference(latex_det_model, latex_rec_model, lang_model, img: np.ndarray, use_cuda: bool) -> str:
    ...
    # latex_inference(...)