From 2ae59776fa9149f9bfc518e41d924f743f5f67c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=89=E6=B4=8B=E4=B8=89=E6=B4=8B?= <1258009915@qq.com> Date: Sat, 22 Jun 2024 21:49:47 +0800 Subject: [PATCH 1/4] Add optimum --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 8721ff9..fd37181 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ install_requires = [ "streamlit-paste-button", "shapely", "pyclipper", + "optimum[exporters]" ] # Add platform-specific dependencies From cd519d8e9902dee379804e3ba1cfd8edf1c64e6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=89=E6=B4=8B=E4=B8=89=E6=B4=8B?= <1258009915@qq.com> Date: Sat, 22 Jun 2024 21:51:51 +0800 Subject: [PATCH 2/4] Support onnx runtime --- src/client_demo.py | 4 +-- src/models/ocr_model/model/TexTeller.py | 21 ++++++++----- src/models/ocr_model/utils/inference.py | 8 +++-- src/server.py | 39 ++++++++++++++++++++----- src/web.py | 19 ++++++++---- 5 files changed, 65 insertions(+), 26 deletions(-) diff --git a/src/client_demo.py b/src/client_demo.py index 8e28ebf..bfd8d95 100644 --- a/src/client_demo.py +++ b/src/client_demo.py @@ -6,7 +6,7 @@ det_server_url = "http://127.0.0.1:8000/fdet" img_path = "/your/image/path/" with open(img_path, 'rb') as img: files = {'img': img} - response = requests.post(det_server_url, files=files) - # response = requests.post(rec_server_url, files=files) + response = requests.post(rec_server_url, files=files) + # response = requests.post(det_server_url, files=files) print(response.text) diff --git a/src/models/ocr_model/model/TexTeller.py b/src/models/ocr_model/model/TexTeller.py index f7fab2a..381bcf8 100644 --- a/src/models/ocr_model/model/TexTeller.py +++ b/src/models/ocr_model/model/TexTeller.py @@ -1,4 +1,5 @@ from pathlib import Path +from optimum.onnxruntime import ORTModelForVision2Seq from ...globals import ( VOCAB_SIZE, @@ -10,25 +11,29 @@ from ...globals import ( from transformers import ( RobertaTokenizerFast, VisionEncoderDecoderModel, - VisionEncoderDecoderConfig, + VisionEncoderDecoderConfig ) class TexTeller(VisionEncoderDecoderModel): REPO_NAME = 'OleehyO/TexTeller' def __init__(self): - config = VisionEncoderDecoderConfig.from_pretrained(Path(__file__).resolve().parent / "config.json") - config.encoder.image_size = FIXED_IMG_SIZE - config.encoder.num_channels = IMG_CHANNELS - config.decoder.vocab_size = VOCAB_SIZE - config.decoder.max_position_embeddings = MAX_TOKEN_SIZE + config = VisionEncoderDecoderConfig.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/model/trocr-small') + config.encoder.image_size = FIXED_IMG_SIZE + config.encoder.num_channels = IMG_CHANNELS + config.decoder.vocab_size=VOCAB_SIZE + config.decoder.max_position_embeddings=MAX_TOKEN_SIZE super().__init__(config=config) @classmethod - def from_pretrained(cls, model_path: str = None): + def from_pretrained(cls, model_path: str = None, use_onnx=False, onnx_provider=None): if model_path is None or model_path == 'default': - return VisionEncoderDecoderModel.from_pretrained(cls.REPO_NAME) + if not use_onnx: + return VisionEncoderDecoderModel.from_pretrained(cls.REPO_NAME) + else: + use_gpu = True if onnx_provider == 'cuda' else False + return ORTModelForVision2Seq.from_pretrained(cls.REPO_NAME, provider="CUDAExecutionProvider" if use_gpu else "CPUExecutionProvider") model_path = Path(model_path).resolve() return VisionEncoderDecoderModel.from_pretrained(str(model_path)) diff --git a/src/models/ocr_model/utils/inference.py b/src/models/ocr_model/utils/inference.py index 10f1b0d..7d8e4d9 100644 --- a/src/models/ocr_model/utils/inference.py +++ b/src/models/ocr_model/utils/inference.py @@ -20,7 +20,9 @@ def inference( ) -> List[str]: if imgs == []: return [] - model.eval() + if hasattr(model, 'eval'): + # not onnx session, turn model.eval() + model.eval() if isinstance(imgs[0], str): imgs = convert2rgb(imgs) else: # already numpy array(rgb format) @@ -29,7 +31,9 @@ def inference( imgs = inference_transform(imgs) pixel_values = torch.stack(imgs) - model = model.to(accelerator) + if hasattr(model, 'eval'): + # not onnx session, move weights to device + model = model.to(accelerator) pixel_values = pixel_values.to(accelerator) generate_config = GenerationConfig( diff --git a/src/server.py b/src/server.py index b5e83ac..e6c0320 100644 --- a/src/server.py +++ b/src/server.py @@ -1,3 +1,4 @@ +import sys import argparse import tempfile import time @@ -17,6 +18,10 @@ from models.det_model.inference import PredictConfig from models.ocr_model.utils.to_katex import to_katex +PYTHON_VERSION = str(sys.version_info.major) + '.' + str(sys.version_info.minor) +LIBPATH = Path(sys.executable).parent.parent / 'lib' / ('python' + PYTHON_VERSION) / 'site-packages' +CUDNNPATH = LIBPATH / 'nvidia' / 'cudnn' / 'lib' + parser = argparse.ArgumentParser() parser.add_argument( '-ckpt', '--checkpoint_dir', type=str @@ -31,6 +36,7 @@ parser.add_argument('--ngpu_per_replica', type=float, default=0.0) parser.add_argument('--inference-mode', type=str, default='cpu') parser.add_argument('--num_beams', type=int, default=1) +parser.add_argument('-onnx', action='store_true', help='using onnx runtime') args = parser.parse_args() if args.ngpu_per_replica > 0 and not args.inference_mode == 'cuda': @@ -41,7 +47,7 @@ if args.ngpu_per_replica > 0 and not args.inference_mode == 'cuda': num_replicas=args.num_replicas, ray_actor_options={ "num_cpus": args.ncpu_per_replica, - "num_gpus": args.ngpu_per_replica + "num_gpus": args.ngpu_per_replica * 1.0 / 2 } ) class TexTellerRecServer: @@ -50,14 +56,16 @@ class TexTellerRecServer: checkpoint_path: str, tokenizer_path: str, inf_mode: str = 'cpu', + use_onnx: bool = False, num_beams: int = 1 ) -> None: - self.model = TexTeller.from_pretrained(checkpoint_path) + self.model = TexTeller.from_pretrained(checkpoint_path, use_onnx=use_onnx, onnx_provider=inf_mode) self.tokenizer = TexTeller.get_tokenizer(tokenizer_path) self.inf_mode = inf_mode self.num_beams = num_beams - self.model = self.model.to(inf_mode) if inf_mode != 'cpu' else self.model + if not use_onnx: + self.model = self.model.to(inf_mode) if inf_mode != 'cpu' else self.model def predict(self, image_nparray) -> str: return to_katex(rec_inference( @@ -65,14 +73,28 @@ class TexTellerRecServer: accelerator=self.inf_mode, num_beams=self.num_beams )[0]) - -@serve.deployment(num_replicas=args.num_replicas) +@serve.deployment( + num_replicas=args.num_replicas, + ray_actor_options={ + "num_cpus": args.ncpu_per_replica, + "num_gpus": args.ngpu_per_replica * 1.0 / 2, + "runtime_env": { + "env_vars": { + "LD_LIBRARY_PATH": f"{str(CUDNNPATH)}/:$LD_LIBRARY_PATH" + } + } + }, +) class TexTellerDetServer: def __init__( - self + self, + inf_mode='cpu' ): self.infer_config = PredictConfig("./models/det_model/model/infer_cfg.yml") - self.latex_det_model = InferenceSession("./models/det_model/model/rtdetr_r50vd_6x_coco.onnx") + self.latex_det_model = InferenceSession( + "./models/det_model/model/rtdetr_r50vd_6x_coco.onnx", + providers=['CUDAExecutionProvider'] if inf_mode == 'cuda' else ['CPUExecutionProvider'] + ) async def predict(self, image_nparray) -> str: with tempfile.TemporaryDirectory() as temp_dir: @@ -120,11 +142,12 @@ if __name__ == '__main__': rec_server = TexTellerRecServer.bind( ckpt_dir, tknz_dir, inf_mode=args.inference_mode, + use_onnx=args.onnx, num_beams=args.num_beams ) det_server = None if Path('./models/det_model/model/rtdetr_r50vd_6x_coco.onnx').exists(): - det_server = TexTellerDetServer.bind() + det_server = TexTellerDetServer.bind(args.inference_mode) ingress = Ingress.bind(det_server, rec_server) # ingress_handle = serve.run(ingress, route_prefix="/predict") diff --git a/src/web.py b/src/web.py index 4c37e36..a0fe030 100644 --- a/src/web.py +++ b/src/web.py @@ -50,17 +50,20 @@ fail_gif_html = ''' ''' @st.cache_resource -def get_texteller(): - return TexTeller.from_pretrained(os.environ['CHECKPOINT_DIR']) +def get_texteller(use_onnx, accelerator): + return TexTeller.from_pretrained(os.environ['CHECKPOINT_DIR'], use_onnx=use_onnx, onnx_provider=accelerator) @st.cache_resource def get_tokenizer(): return TexTeller.get_tokenizer(os.environ['TOKENIZER_DIR']) @st.cache_resource -def get_det_models(): +def get_det_models(accelerator): infer_config = PredictConfig("./models/det_model/model/infer_cfg.yml") - latex_det_model = InferenceSession("./models/det_model/model/rtdetr_r50vd_6x_coco.onnx") + latex_det_model = InferenceSession( + "./models/det_model/model/rtdetr_r50vd_6x_coco.onnx", + providers=['CUDAExecutionProvider'] if accelerator == 'cuda' else ['CPUExecutionProvider'] + ) return infer_config, latex_det_model @st.cache_resource() @@ -141,18 +144,22 @@ with st.sidebar: on_change=change_side_bar ) + st.markdown("## Seepup Setting") + use_onnx = st.toggle("ONNX Runtime ") + + ############################## ############################## ################################ ################################ -texteller = get_texteller() +texteller = get_texteller(use_onnx, accelerator) tokenizer = get_tokenizer() latex_rec_models = [texteller, tokenizer] if inf_mode == "Paragraph recognition": - infer_config, latex_det_model = get_det_models() + infer_config, latex_det_model = get_det_models(accelerator) lang_ocr_models = get_ocr_models(accelerator) st.markdown(html_string, unsafe_allow_html=True) From 25f6cddf72b537147a0be7c943a06dc2389eb1f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=89=E6=B4=8B=E4=B8=89=E6=B4=8B?= <1258009915@qq.com> Date: Sat, 22 Jun 2024 21:52:30 +0800 Subject: [PATCH 3/4] Update README --- README.md | 3 ++- assets/README_zh.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e16d206..d4499da 100644 --- a/README.md +++ b/README.md @@ -209,11 +209,12 @@ python server.py | `-ckpt` | The path to the weights file,*default is TexTeller's pretrained weights*. | | `-tknz` | The path to the tokenizer,*default is TexTeller's tokenizer*. | | `-port` | The server's service port,*default is 8000*. | -| `--inference-mode` | Whether to use GPU(cuda or mps) for inference,*default is CPU*. | +| `--inference-mode` | Whether to use "cuda" or "mps" for inference,*default is "cpu"*. | | `--num_beams` | The number of beams for beam search,*default is 1*. | | `--num_replicas` | The number of service replicas to run on the server,*default is 1 replica*. You can use more replicas to achieve greater throughput.| | `--ncpu_per_replica` | The number of CPU cores used per service replica,*default is 1*.| | `--ngpu_per_replica` | The number of GPUs used per service replica,*default is 1*. You can set this value between 0 and 1 to run multiple service replicas on one GPU to share the GPU, thereby improving GPU utilization. (Note, if --num_replicas is 2, --ngpu_per_replica is 0.7, then 2 GPUs must be available) | +| `-onnx` | Perform inference using Onnx Runtime, *disabled by default* | > [!NOTE] > A client demo can be found at `src/client/demo.py`, you can refer to `demo.py` to send requests to the server diff --git a/assets/README_zh.md b/assets/README_zh.md index 1f9a0d9..4bbc907 100644 --- a/assets/README_zh.md +++ b/assets/README_zh.md @@ -247,11 +247,12 @@ python server.py | `-ckpt` | 权重文件的路径,*默认为TexTeller的预训练权重*。| | `-tknz` | 分词器的路径,*默认为TexTeller的分词器*。| | `-port` | 服务器的服务端口,*默认是8000*。| -| `--inference-mode` | 是否使用GPU(cuda或mps)推理,*默认为CPU*。| +| `--inference-mode` | 使用"cuda"或"mps"推理,*默认为"cpu"*。| | `--num_beams` | beam search的beam数量,*默认是1*。| | `--num_replicas` | 在服务器上运行的服务副本数量,*默认1个副本*。你可以使用更多的副本来获取更大的吞吐量。| | `--ncpu_per_replica` | 每个服务副本所用的CPU核心数,*默认为1*。| | `--ngpu_per_replica` | 每个服务副本所用的GPU数量,*默认为1*。你可以把这个值设置成 0~1之间的数,这样会在一个GPU上运行多个服务副本来共享GPU,从而提高GPU的利用率。(注意,如果 --num_replicas 2, --ngpu_per_replica 0.7, 那么就必须要有2个GPU可用) | +| `-onnx` | 使用Onnx Runtime进行推理,*默认不使用*。| > [!NOTE] > 一个客户端demo可以在 `TexTeller/client/demo.py`找到,你可以参考 `demo.py`来给server发送请求 From 6793142557256f640f458488d61748e23c57e6e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=89=E6=B4=8B=E4=B8=89=E6=B4=8B?= <1258009915@qq.com> Date: Sat, 22 Jun 2024 22:08:08 +0800 Subject: [PATCH 4/4] Update model config --- src/models/ocr_model/model/TexTeller.py | 10 ++++---- src/models/ocr_model/model/config.json | 32 +++++++++++++++++-------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/models/ocr_model/model/TexTeller.py b/src/models/ocr_model/model/TexTeller.py index 381bcf8..0a811f2 100644 --- a/src/models/ocr_model/model/TexTeller.py +++ b/src/models/ocr_model/model/TexTeller.py @@ -18,11 +18,11 @@ from transformers import ( class TexTeller(VisionEncoderDecoderModel): REPO_NAME = 'OleehyO/TexTeller' def __init__(self): - config = VisionEncoderDecoderConfig.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/model/trocr-small') - config.encoder.image_size = FIXED_IMG_SIZE - config.encoder.num_channels = IMG_CHANNELS - config.decoder.vocab_size=VOCAB_SIZE - config.decoder.max_position_embeddings=MAX_TOKEN_SIZE + config = VisionEncoderDecoderConfig.from_pretrained(Path(__file__).resolve().parent / "config.json") + config.encoder.image_size = FIXED_IMG_SIZE + config.encoder.num_channels = IMG_CHANNELS + config.decoder.vocab_size = VOCAB_SIZE + config.decoder.max_position_embeddings = MAX_TOKEN_SIZE super().__init__(config=config) diff --git a/src/models/ocr_model/model/config.json b/src/models/ocr_model/model/config.json index f8ab627..45365ba 100644 --- a/src/models/ocr_model/model/config.json +++ b/src/models/ocr_model/model/config.json @@ -1,4 +1,5 @@ { + "_name_or_path": "OleehyO/TexTeller", "architectures": [ "VisionEncoderDecoderModel" ], @@ -10,9 +11,11 @@ "architectures": null, "attention_dropout": 0.0, "bad_words_ids": null, + "begin_suppress_tokens": null, "bos_token_id": 0, "chunk_size_feed_forward": 0, "classifier_dropout": 0.0, + "cross_attention_hidden_size": 768, "d_model": 1024, "decoder_attention_heads": 16, "decoder_ffn_dim": 4096, @@ -23,9 +26,9 @@ "do_sample": false, "dropout": 0.1, "early_stopping": false, - "cross_attention_hidden_size": 768, "encoder_no_repeat_ngram_size": 0, "eos_token_id": 2, + "exponential_decay_length_penalty": null, "finetuning_task": null, "forced_bos_token_id": null, "forced_eos_token_id": null, @@ -40,9 +43,10 @@ "LABEL_0": 0, "LABEL_1": 1 }, + "layernorm_embedding": true, "length_penalty": 1.0, "max_length": 20, - "max_position_embeddings": 512, + "max_position_embeddings": 1024, "min_length": 0, "model_type": "trocr", "no_repeat_ngram_size": 0, @@ -62,8 +66,10 @@ "return_dict_in_generate": false, "scale_embedding": false, "sep_token_id": null, + "suppress_tokens": null, "task_specific_params": null, "temperature": 1.0, + "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, @@ -71,10 +77,11 @@ "top_p": 1.0, "torch_dtype": null, "torchscript": false, - "transformers_version": "4.12.0.dev0", + "typical_p": 1.0, "use_bfloat16": false, "use_cache": false, - "vocab_size": 50265 + "use_learned_position_embeddings": true, + "vocab_size": 15000 }, "encoder": { "_name_or_path": "", @@ -82,15 +89,18 @@ "architectures": null, "attention_probs_dropout_prob": 0.0, "bad_words_ids": null, + "begin_suppress_tokens": null, "bos_token_id": null, "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, "decoder_start_token_id": null, "diversity_penalty": 0.0, "do_sample": false, "early_stopping": false, - "cross_attention_hidden_size": null, "encoder_no_repeat_ngram_size": 0, + "encoder_stride": 16, "eos_token_id": null, + "exponential_decay_length_penalty": null, "finetuning_task": null, "forced_bos_token_id": null, "forced_eos_token_id": null, @@ -101,7 +111,7 @@ "0": "LABEL_0", "1": "LABEL_1" }, - "image_size": 384, + "image_size": 448, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, @@ -119,7 +129,7 @@ "num_attention_heads": 12, "num_beam_groups": 1, "num_beams": 1, - "num_channels": 3, + "num_channels": 1, "num_hidden_layers": 12, "num_return_sequences": 1, "output_attentions": false, @@ -136,8 +146,10 @@ "return_dict": true, "return_dict_in_generate": false, "sep_token_id": null, + "suppress_tokens": null, "task_specific_params": null, "temperature": 1.0, + "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, @@ -145,12 +157,12 @@ "top_p": 1.0, "torch_dtype": null, "torchscript": false, - "transformers_version": "4.12.0.dev0", + "typical_p": 1.0, "use_bfloat16": false }, "is_encoder_decoder": true, "model_type": "vision-encoder-decoder", "tie_word_embeddings": false, - "torch_dtype": "float32", - "transformers_version": null + "transformers_version": "4.41.2", + "use_cache": true }