@@ -209,11 +209,12 @@ python server.py
|
|||||||
| `-ckpt` | The path to the weights file,*default is TexTeller's pretrained weights*. |
|
| `-ckpt` | The path to the weights file,*default is TexTeller's pretrained weights*. |
|
||||||
| `-tknz` | The path to the tokenizer,*default is TexTeller's tokenizer*. |
|
| `-tknz` | The path to the tokenizer,*default is TexTeller's tokenizer*. |
|
||||||
| `-port` | The server's service port,*default is 8000*. |
|
| `-port` | The server's service port,*default is 8000*. |
|
||||||
| `--inference-mode` | Whether to use GPU(cuda or mps) for inference,*default is CPU*. |
|
| `--inference-mode` | Whether to use "cuda" or "mps" for inference,*default is "cpu"*. |
|
||||||
| `--num_beams` | The number of beams for beam search,*default is 1*. |
|
| `--num_beams` | The number of beams for beam search,*default is 1*. |
|
||||||
| `--num_replicas` | The number of service replicas to run on the server,*default is 1 replica*. You can use more replicas to achieve greater throughput.|
|
| `--num_replicas` | The number of service replicas to run on the server,*default is 1 replica*. You can use more replicas to achieve greater throughput.|
|
||||||
| `--ncpu_per_replica` | The number of CPU cores used per service replica,*default is 1*.|
|
| `--ncpu_per_replica` | The number of CPU cores used per service replica,*default is 1*.|
|
||||||
| `--ngpu_per_replica` | The number of GPUs used per service replica,*default is 1*. You can set this value between 0 and 1 to run multiple service replicas on one GPU to share the GPU, thereby improving GPU utilization. (Note, if --num_replicas is 2, --ngpu_per_replica is 0.7, then 2 GPUs must be available) |
|
| `--ngpu_per_replica` | The number of GPUs used per service replica,*default is 1*. You can set this value between 0 and 1 to run multiple service replicas on one GPU to share the GPU, thereby improving GPU utilization. (Note, if --num_replicas is 2, --ngpu_per_replica is 0.7, then 2 GPUs must be available) |
|
||||||
|
| `-onnx` | Perform inference using Onnx Runtime, *disabled by default* |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> A client demo can be found at `src/client/demo.py`, you can refer to `demo.py` to send requests to the server
|
> A client demo can be found at `src/client/demo.py`, you can refer to `demo.py` to send requests to the server
|
||||||
|
|||||||
@@ -247,11 +247,12 @@ python server.py
|
|||||||
| `-ckpt` | 权重文件的路径,*默认为TexTeller的预训练权重*。|
|
| `-ckpt` | 权重文件的路径,*默认为TexTeller的预训练权重*。|
|
||||||
| `-tknz` | 分词器的路径,*默认为TexTeller的分词器*。|
|
| `-tknz` | 分词器的路径,*默认为TexTeller的分词器*。|
|
||||||
| `-port` | 服务器的服务端口,*默认是8000*。|
|
| `-port` | 服务器的服务端口,*默认是8000*。|
|
||||||
| `--inference-mode` | 是否使用GPU(cuda或mps)推理,*默认为CPU*。|
|
| `--inference-mode` | 使用"cuda"或"mps"推理,*默认为"cpu"*。|
|
||||||
| `--num_beams` | beam search的beam数量,*默认是1*。|
|
| `--num_beams` | beam search的beam数量,*默认是1*。|
|
||||||
| `--num_replicas` | 在服务器上运行的服务副本数量,*默认1个副本*。你可以使用更多的副本来获取更大的吞吐量。|
|
| `--num_replicas` | 在服务器上运行的服务副本数量,*默认1个副本*。你可以使用更多的副本来获取更大的吞吐量。|
|
||||||
| `--ncpu_per_replica` | 每个服务副本所用的CPU核心数,*默认为1*。|
|
| `--ncpu_per_replica` | 每个服务副本所用的CPU核心数,*默认为1*。|
|
||||||
| `--ngpu_per_replica` | 每个服务副本所用的GPU数量,*默认为1*。你可以把这个值设置成 0~1之间的数,这样会在一个GPU上运行多个服务副本来共享GPU,从而提高GPU的利用率。(注意,如果 --num_replicas 2, --ngpu_per_replica 0.7, 那么就必须要有2个GPU可用) |
|
| `--ngpu_per_replica` | 每个服务副本所用的GPU数量,*默认为1*。你可以把这个值设置成 0~1之间的数,这样会在一个GPU上运行多个服务副本来共享GPU,从而提高GPU的利用率。(注意,如果 --num_replicas 2, --ngpu_per_replica 0.7, 那么就必须要有2个GPU可用) |
|
||||||
|
| `-onnx` | 使用Onnx Runtime进行推理,*默认不使用*。|
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> 一个客户端demo可以在 `TexTeller/client/demo.py`找到,你可以参考 `demo.py`来给server发送请求
|
> 一个客户端demo可以在 `TexTeller/client/demo.py`找到,你可以参考 `demo.py`来给server发送请求
|
||||||
|
|||||||
1
setup.py
1
setup.py
@@ -20,6 +20,7 @@ install_requires = [
|
|||||||
"streamlit-paste-button",
|
"streamlit-paste-button",
|
||||||
"shapely",
|
"shapely",
|
||||||
"pyclipper",
|
"pyclipper",
|
||||||
|
"optimum[exporters]"
|
||||||
]
|
]
|
||||||
|
|
||||||
# Add platform-specific dependencies
|
# Add platform-specific dependencies
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ det_server_url = "http://127.0.0.1:8000/fdet"
|
|||||||
img_path = "/your/image/path/"
|
img_path = "/your/image/path/"
|
||||||
with open(img_path, 'rb') as img:
|
with open(img_path, 'rb') as img:
|
||||||
files = {'img': img}
|
files = {'img': img}
|
||||||
response = requests.post(det_server_url, files=files)
|
response = requests.post(rec_server_url, files=files)
|
||||||
# response = requests.post(rec_server_url, files=files)
|
# response = requests.post(det_server_url, files=files)
|
||||||
|
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from optimum.onnxruntime import ORTModelForVision2Seq
|
||||||
|
|
||||||
from ...globals import (
|
from ...globals import (
|
||||||
VOCAB_SIZE,
|
VOCAB_SIZE,
|
||||||
@@ -10,7 +11,7 @@ from ...globals import (
|
|||||||
from transformers import (
|
from transformers import (
|
||||||
RobertaTokenizerFast,
|
RobertaTokenizerFast,
|
||||||
VisionEncoderDecoderModel,
|
VisionEncoderDecoderModel,
|
||||||
VisionEncoderDecoderConfig,
|
VisionEncoderDecoderConfig
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -26,9 +27,13 @@ class TexTeller(VisionEncoderDecoderModel):
|
|||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pretrained(cls, model_path: str = None):
|
def from_pretrained(cls, model_path: str = None, use_onnx=False, onnx_provider=None):
|
||||||
if model_path is None or model_path == 'default':
|
if model_path is None or model_path == 'default':
|
||||||
return VisionEncoderDecoderModel.from_pretrained(cls.REPO_NAME)
|
if not use_onnx:
|
||||||
|
return VisionEncoderDecoderModel.from_pretrained(cls.REPO_NAME)
|
||||||
|
else:
|
||||||
|
use_gpu = True if onnx_provider == 'cuda' else False
|
||||||
|
return ORTModelForVision2Seq.from_pretrained(cls.REPO_NAME, provider="CUDAExecutionProvider" if use_gpu else "CPUExecutionProvider")
|
||||||
model_path = Path(model_path).resolve()
|
model_path = Path(model_path).resolve()
|
||||||
return VisionEncoderDecoderModel.from_pretrained(str(model_path))
|
return VisionEncoderDecoderModel.from_pretrained(str(model_path))
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
{
|
{
|
||||||
|
"_name_or_path": "OleehyO/TexTeller",
|
||||||
"architectures": [
|
"architectures": [
|
||||||
"VisionEncoderDecoderModel"
|
"VisionEncoderDecoderModel"
|
||||||
],
|
],
|
||||||
@@ -10,9 +11,11 @@
|
|||||||
"architectures": null,
|
"architectures": null,
|
||||||
"attention_dropout": 0.0,
|
"attention_dropout": 0.0,
|
||||||
"bad_words_ids": null,
|
"bad_words_ids": null,
|
||||||
|
"begin_suppress_tokens": null,
|
||||||
"bos_token_id": 0,
|
"bos_token_id": 0,
|
||||||
"chunk_size_feed_forward": 0,
|
"chunk_size_feed_forward": 0,
|
||||||
"classifier_dropout": 0.0,
|
"classifier_dropout": 0.0,
|
||||||
|
"cross_attention_hidden_size": 768,
|
||||||
"d_model": 1024,
|
"d_model": 1024,
|
||||||
"decoder_attention_heads": 16,
|
"decoder_attention_heads": 16,
|
||||||
"decoder_ffn_dim": 4096,
|
"decoder_ffn_dim": 4096,
|
||||||
@@ -23,9 +26,9 @@
|
|||||||
"do_sample": false,
|
"do_sample": false,
|
||||||
"dropout": 0.1,
|
"dropout": 0.1,
|
||||||
"early_stopping": false,
|
"early_stopping": false,
|
||||||
"cross_attention_hidden_size": 768,
|
|
||||||
"encoder_no_repeat_ngram_size": 0,
|
"encoder_no_repeat_ngram_size": 0,
|
||||||
"eos_token_id": 2,
|
"eos_token_id": 2,
|
||||||
|
"exponential_decay_length_penalty": null,
|
||||||
"finetuning_task": null,
|
"finetuning_task": null,
|
||||||
"forced_bos_token_id": null,
|
"forced_bos_token_id": null,
|
||||||
"forced_eos_token_id": null,
|
"forced_eos_token_id": null,
|
||||||
@@ -40,9 +43,10 @@
|
|||||||
"LABEL_0": 0,
|
"LABEL_0": 0,
|
||||||
"LABEL_1": 1
|
"LABEL_1": 1
|
||||||
},
|
},
|
||||||
|
"layernorm_embedding": true,
|
||||||
"length_penalty": 1.0,
|
"length_penalty": 1.0,
|
||||||
"max_length": 20,
|
"max_length": 20,
|
||||||
"max_position_embeddings": 512,
|
"max_position_embeddings": 1024,
|
||||||
"min_length": 0,
|
"min_length": 0,
|
||||||
"model_type": "trocr",
|
"model_type": "trocr",
|
||||||
"no_repeat_ngram_size": 0,
|
"no_repeat_ngram_size": 0,
|
||||||
@@ -62,8 +66,10 @@
|
|||||||
"return_dict_in_generate": false,
|
"return_dict_in_generate": false,
|
||||||
"scale_embedding": false,
|
"scale_embedding": false,
|
||||||
"sep_token_id": null,
|
"sep_token_id": null,
|
||||||
|
"suppress_tokens": null,
|
||||||
"task_specific_params": null,
|
"task_specific_params": null,
|
||||||
"temperature": 1.0,
|
"temperature": 1.0,
|
||||||
|
"tf_legacy_loss": false,
|
||||||
"tie_encoder_decoder": false,
|
"tie_encoder_decoder": false,
|
||||||
"tie_word_embeddings": true,
|
"tie_word_embeddings": true,
|
||||||
"tokenizer_class": null,
|
"tokenizer_class": null,
|
||||||
@@ -71,10 +77,11 @@
|
|||||||
"top_p": 1.0,
|
"top_p": 1.0,
|
||||||
"torch_dtype": null,
|
"torch_dtype": null,
|
||||||
"torchscript": false,
|
"torchscript": false,
|
||||||
"transformers_version": "4.12.0.dev0",
|
"typical_p": 1.0,
|
||||||
"use_bfloat16": false,
|
"use_bfloat16": false,
|
||||||
"use_cache": false,
|
"use_cache": false,
|
||||||
"vocab_size": 50265
|
"use_learned_position_embeddings": true,
|
||||||
|
"vocab_size": 15000
|
||||||
},
|
},
|
||||||
"encoder": {
|
"encoder": {
|
||||||
"_name_or_path": "",
|
"_name_or_path": "",
|
||||||
@@ -82,15 +89,18 @@
|
|||||||
"architectures": null,
|
"architectures": null,
|
||||||
"attention_probs_dropout_prob": 0.0,
|
"attention_probs_dropout_prob": 0.0,
|
||||||
"bad_words_ids": null,
|
"bad_words_ids": null,
|
||||||
|
"begin_suppress_tokens": null,
|
||||||
"bos_token_id": null,
|
"bos_token_id": null,
|
||||||
"chunk_size_feed_forward": 0,
|
"chunk_size_feed_forward": 0,
|
||||||
|
"cross_attention_hidden_size": null,
|
||||||
"decoder_start_token_id": null,
|
"decoder_start_token_id": null,
|
||||||
"diversity_penalty": 0.0,
|
"diversity_penalty": 0.0,
|
||||||
"do_sample": false,
|
"do_sample": false,
|
||||||
"early_stopping": false,
|
"early_stopping": false,
|
||||||
"cross_attention_hidden_size": null,
|
|
||||||
"encoder_no_repeat_ngram_size": 0,
|
"encoder_no_repeat_ngram_size": 0,
|
||||||
|
"encoder_stride": 16,
|
||||||
"eos_token_id": null,
|
"eos_token_id": null,
|
||||||
|
"exponential_decay_length_penalty": null,
|
||||||
"finetuning_task": null,
|
"finetuning_task": null,
|
||||||
"forced_bos_token_id": null,
|
"forced_bos_token_id": null,
|
||||||
"forced_eos_token_id": null,
|
"forced_eos_token_id": null,
|
||||||
@@ -101,7 +111,7 @@
|
|||||||
"0": "LABEL_0",
|
"0": "LABEL_0",
|
||||||
"1": "LABEL_1"
|
"1": "LABEL_1"
|
||||||
},
|
},
|
||||||
"image_size": 384,
|
"image_size": 448,
|
||||||
"initializer_range": 0.02,
|
"initializer_range": 0.02,
|
||||||
"intermediate_size": 3072,
|
"intermediate_size": 3072,
|
||||||
"is_decoder": false,
|
"is_decoder": false,
|
||||||
@@ -119,7 +129,7 @@
|
|||||||
"num_attention_heads": 12,
|
"num_attention_heads": 12,
|
||||||
"num_beam_groups": 1,
|
"num_beam_groups": 1,
|
||||||
"num_beams": 1,
|
"num_beams": 1,
|
||||||
"num_channels": 3,
|
"num_channels": 1,
|
||||||
"num_hidden_layers": 12,
|
"num_hidden_layers": 12,
|
||||||
"num_return_sequences": 1,
|
"num_return_sequences": 1,
|
||||||
"output_attentions": false,
|
"output_attentions": false,
|
||||||
@@ -136,8 +146,10 @@
|
|||||||
"return_dict": true,
|
"return_dict": true,
|
||||||
"return_dict_in_generate": false,
|
"return_dict_in_generate": false,
|
||||||
"sep_token_id": null,
|
"sep_token_id": null,
|
||||||
|
"suppress_tokens": null,
|
||||||
"task_specific_params": null,
|
"task_specific_params": null,
|
||||||
"temperature": 1.0,
|
"temperature": 1.0,
|
||||||
|
"tf_legacy_loss": false,
|
||||||
"tie_encoder_decoder": false,
|
"tie_encoder_decoder": false,
|
||||||
"tie_word_embeddings": true,
|
"tie_word_embeddings": true,
|
||||||
"tokenizer_class": null,
|
"tokenizer_class": null,
|
||||||
@@ -145,12 +157,12 @@
|
|||||||
"top_p": 1.0,
|
"top_p": 1.0,
|
||||||
"torch_dtype": null,
|
"torch_dtype": null,
|
||||||
"torchscript": false,
|
"torchscript": false,
|
||||||
"transformers_version": "4.12.0.dev0",
|
"typical_p": 1.0,
|
||||||
"use_bfloat16": false
|
"use_bfloat16": false
|
||||||
},
|
},
|
||||||
"is_encoder_decoder": true,
|
"is_encoder_decoder": true,
|
||||||
"model_type": "vision-encoder-decoder",
|
"model_type": "vision-encoder-decoder",
|
||||||
"tie_word_embeddings": false,
|
"tie_word_embeddings": false,
|
||||||
"torch_dtype": "float32",
|
"transformers_version": "4.41.2",
|
||||||
"transformers_version": null
|
"use_cache": true
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,9 @@ def inference(
|
|||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
if imgs == []:
|
if imgs == []:
|
||||||
return []
|
return []
|
||||||
model.eval()
|
if hasattr(model, 'eval'):
|
||||||
|
# not onnx session, turn model.eval()
|
||||||
|
model.eval()
|
||||||
if isinstance(imgs[0], str):
|
if isinstance(imgs[0], str):
|
||||||
imgs = convert2rgb(imgs)
|
imgs = convert2rgb(imgs)
|
||||||
else: # already numpy array(rgb format)
|
else: # already numpy array(rgb format)
|
||||||
@@ -29,7 +31,9 @@ def inference(
|
|||||||
imgs = inference_transform(imgs)
|
imgs = inference_transform(imgs)
|
||||||
pixel_values = torch.stack(imgs)
|
pixel_values = torch.stack(imgs)
|
||||||
|
|
||||||
model = model.to(accelerator)
|
if hasattr(model, 'eval'):
|
||||||
|
# not onnx session, move weights to device
|
||||||
|
model = model.to(accelerator)
|
||||||
pixel_values = pixel_values.to(accelerator)
|
pixel_values = pixel_values.to(accelerator)
|
||||||
|
|
||||||
generate_config = GenerationConfig(
|
generate_config = GenerationConfig(
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
@@ -17,6 +18,10 @@ from models.det_model.inference import PredictConfig
|
|||||||
from models.ocr_model.utils.to_katex import to_katex
|
from models.ocr_model.utils.to_katex import to_katex
|
||||||
|
|
||||||
|
|
||||||
|
PYTHON_VERSION = str(sys.version_info.major) + '.' + str(sys.version_info.minor)
|
||||||
|
LIBPATH = Path(sys.executable).parent.parent / 'lib' / ('python' + PYTHON_VERSION) / 'site-packages'
|
||||||
|
CUDNNPATH = LIBPATH / 'nvidia' / 'cudnn' / 'lib'
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-ckpt', '--checkpoint_dir', type=str
|
'-ckpt', '--checkpoint_dir', type=str
|
||||||
@@ -31,6 +36,7 @@ parser.add_argument('--ngpu_per_replica', type=float, default=0.0)
|
|||||||
|
|
||||||
parser.add_argument('--inference-mode', type=str, default='cpu')
|
parser.add_argument('--inference-mode', type=str, default='cpu')
|
||||||
parser.add_argument('--num_beams', type=int, default=1)
|
parser.add_argument('--num_beams', type=int, default=1)
|
||||||
|
parser.add_argument('-onnx', action='store_true', help='using onnx runtime')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.ngpu_per_replica > 0 and not args.inference_mode == 'cuda':
|
if args.ngpu_per_replica > 0 and not args.inference_mode == 'cuda':
|
||||||
@@ -41,7 +47,7 @@ if args.ngpu_per_replica > 0 and not args.inference_mode == 'cuda':
|
|||||||
num_replicas=args.num_replicas,
|
num_replicas=args.num_replicas,
|
||||||
ray_actor_options={
|
ray_actor_options={
|
||||||
"num_cpus": args.ncpu_per_replica,
|
"num_cpus": args.ncpu_per_replica,
|
||||||
"num_gpus": args.ngpu_per_replica
|
"num_gpus": args.ngpu_per_replica * 1.0 / 2
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
class TexTellerRecServer:
|
class TexTellerRecServer:
|
||||||
@@ -50,14 +56,16 @@ class TexTellerRecServer:
|
|||||||
checkpoint_path: str,
|
checkpoint_path: str,
|
||||||
tokenizer_path: str,
|
tokenizer_path: str,
|
||||||
inf_mode: str = 'cpu',
|
inf_mode: str = 'cpu',
|
||||||
|
use_onnx: bool = False,
|
||||||
num_beams: int = 1
|
num_beams: int = 1
|
||||||
) -> None:
|
) -> None:
|
||||||
self.model = TexTeller.from_pretrained(checkpoint_path)
|
self.model = TexTeller.from_pretrained(checkpoint_path, use_onnx=use_onnx, onnx_provider=inf_mode)
|
||||||
self.tokenizer = TexTeller.get_tokenizer(tokenizer_path)
|
self.tokenizer = TexTeller.get_tokenizer(tokenizer_path)
|
||||||
self.inf_mode = inf_mode
|
self.inf_mode = inf_mode
|
||||||
self.num_beams = num_beams
|
self.num_beams = num_beams
|
||||||
|
|
||||||
self.model = self.model.to(inf_mode) if inf_mode != 'cpu' else self.model
|
if not use_onnx:
|
||||||
|
self.model = self.model.to(inf_mode) if inf_mode != 'cpu' else self.model
|
||||||
|
|
||||||
def predict(self, image_nparray) -> str:
|
def predict(self, image_nparray) -> str:
|
||||||
return to_katex(rec_inference(
|
return to_katex(rec_inference(
|
||||||
@@ -65,14 +73,28 @@ class TexTellerRecServer:
|
|||||||
accelerator=self.inf_mode, num_beams=self.num_beams
|
accelerator=self.inf_mode, num_beams=self.num_beams
|
||||||
)[0])
|
)[0])
|
||||||
|
|
||||||
|
@serve.deployment(
|
||||||
@serve.deployment(num_replicas=args.num_replicas)
|
num_replicas=args.num_replicas,
|
||||||
|
ray_actor_options={
|
||||||
|
"num_cpus": args.ncpu_per_replica,
|
||||||
|
"num_gpus": args.ngpu_per_replica * 1.0 / 2,
|
||||||
|
"runtime_env": {
|
||||||
|
"env_vars": {
|
||||||
|
"LD_LIBRARY_PATH": f"{str(CUDNNPATH)}/:$LD_LIBRARY_PATH"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
class TexTellerDetServer:
|
class TexTellerDetServer:
|
||||||
def __init__(
|
def __init__(
|
||||||
self
|
self,
|
||||||
|
inf_mode='cpu'
|
||||||
):
|
):
|
||||||
self.infer_config = PredictConfig("./models/det_model/model/infer_cfg.yml")
|
self.infer_config = PredictConfig("./models/det_model/model/infer_cfg.yml")
|
||||||
self.latex_det_model = InferenceSession("./models/det_model/model/rtdetr_r50vd_6x_coco.onnx")
|
self.latex_det_model = InferenceSession(
|
||||||
|
"./models/det_model/model/rtdetr_r50vd_6x_coco.onnx",
|
||||||
|
providers=['CUDAExecutionProvider'] if inf_mode == 'cuda' else ['CPUExecutionProvider']
|
||||||
|
)
|
||||||
|
|
||||||
async def predict(self, image_nparray) -> str:
|
async def predict(self, image_nparray) -> str:
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
@@ -120,11 +142,12 @@ if __name__ == '__main__':
|
|||||||
rec_server = TexTellerRecServer.bind(
|
rec_server = TexTellerRecServer.bind(
|
||||||
ckpt_dir, tknz_dir,
|
ckpt_dir, tknz_dir,
|
||||||
inf_mode=args.inference_mode,
|
inf_mode=args.inference_mode,
|
||||||
|
use_onnx=args.onnx,
|
||||||
num_beams=args.num_beams
|
num_beams=args.num_beams
|
||||||
)
|
)
|
||||||
det_server = None
|
det_server = None
|
||||||
if Path('./models/det_model/model/rtdetr_r50vd_6x_coco.onnx').exists():
|
if Path('./models/det_model/model/rtdetr_r50vd_6x_coco.onnx').exists():
|
||||||
det_server = TexTellerDetServer.bind()
|
det_server = TexTellerDetServer.bind(args.inference_mode)
|
||||||
ingress = Ingress.bind(det_server, rec_server)
|
ingress = Ingress.bind(det_server, rec_server)
|
||||||
|
|
||||||
# ingress_handle = serve.run(ingress, route_prefix="/predict")
|
# ingress_handle = serve.run(ingress, route_prefix="/predict")
|
||||||
|
|||||||
19
src/web.py
19
src/web.py
@@ -50,17 +50,20 @@ fail_gif_html = '''
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
@st.cache_resource
|
@st.cache_resource
|
||||||
def get_texteller():
|
def get_texteller(use_onnx, accelerator):
|
||||||
return TexTeller.from_pretrained(os.environ['CHECKPOINT_DIR'])
|
return TexTeller.from_pretrained(os.environ['CHECKPOINT_DIR'], use_onnx=use_onnx, onnx_provider=accelerator)
|
||||||
|
|
||||||
@st.cache_resource
|
@st.cache_resource
|
||||||
def get_tokenizer():
|
def get_tokenizer():
|
||||||
return TexTeller.get_tokenizer(os.environ['TOKENIZER_DIR'])
|
return TexTeller.get_tokenizer(os.environ['TOKENIZER_DIR'])
|
||||||
|
|
||||||
@st.cache_resource
|
@st.cache_resource
|
||||||
def get_det_models():
|
def get_det_models(accelerator):
|
||||||
infer_config = PredictConfig("./models/det_model/model/infer_cfg.yml")
|
infer_config = PredictConfig("./models/det_model/model/infer_cfg.yml")
|
||||||
latex_det_model = InferenceSession("./models/det_model/model/rtdetr_r50vd_6x_coco.onnx")
|
latex_det_model = InferenceSession(
|
||||||
|
"./models/det_model/model/rtdetr_r50vd_6x_coco.onnx",
|
||||||
|
providers=['CUDAExecutionProvider'] if accelerator == 'cuda' else ['CPUExecutionProvider']
|
||||||
|
)
|
||||||
return infer_config, latex_det_model
|
return infer_config, latex_det_model
|
||||||
|
|
||||||
@st.cache_resource()
|
@st.cache_resource()
|
||||||
@@ -141,18 +144,22 @@ with st.sidebar:
|
|||||||
on_change=change_side_bar
|
on_change=change_side_bar
|
||||||
)
|
)
|
||||||
|
|
||||||
|
st.markdown("## Seepup Setting")
|
||||||
|
use_onnx = st.toggle("ONNX Runtime ")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
############################## </sidebar> ##############################
|
############################## </sidebar> ##############################
|
||||||
|
|
||||||
|
|
||||||
################################ <page> ################################
|
################################ <page> ################################
|
||||||
|
|
||||||
texteller = get_texteller()
|
texteller = get_texteller(use_onnx, accelerator)
|
||||||
tokenizer = get_tokenizer()
|
tokenizer = get_tokenizer()
|
||||||
latex_rec_models = [texteller, tokenizer]
|
latex_rec_models = [texteller, tokenizer]
|
||||||
|
|
||||||
if inf_mode == "Paragraph recognition":
|
if inf_mode == "Paragraph recognition":
|
||||||
infer_config, latex_det_model = get_det_models()
|
infer_config, latex_det_model = get_det_models(accelerator)
|
||||||
lang_ocr_models = get_ocr_models(accelerator)
|
lang_ocr_models = get_ocr_models(accelerator)
|
||||||
|
|
||||||
st.markdown(html_string, unsafe_allow_html=True)
|
st.markdown(html_string, unsafe_allow_html=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user