diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ac95090..6ada4fd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,8 +4,10 @@ repos:
     hooks:
       - id: ruff
         args: [--fix, --respect-gitignore, --config=pyproject.toml]
+        exclude: ^texteller/models/thrid_party/paddleocr/
       - id: ruff-format
         args: [--config=pyproject.toml]
+        exclude: ^texteller/models/thrid_party/paddleocr/
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.5.0
diff --git a/src/models/ocr_model/train/dataset/formulas.jsonl b/src/models/ocr_model/train/dataset/formulas.jsonl
deleted file mode 100644
index 5a07425..0000000
--- a/src/models/ocr_model/train/dataset/formulas.jsonl
+++ /dev/null
@@ -1,35 +0,0 @@
-{"img_name": "0.png", "formula": "\\[\\mathbb{C}^{4}\\stackrel{{\\pi_{1}}}{{\\longleftarrow}}\\mathcal{ F}\\stackrel{{\\pi_{2}}}{{\\rightarrow}}\\mathcal{PT},\\]"}
-{"img_name": "1.png", "formula": "\\[W^{*}_{Z}(x_{1},x_{2})=W_{f\\lrcorner Z}(y_{1},y_{2})=\\mathcal{P}\\exp\\left( \\int_{\\gamma}A_{\\mu}dx^{\\mu}\\right).\\]"}
-{"img_name": "2.png", "formula": "\\[G=W^{*}_{Z}(q,p)=\\tilde{H}H^{-1}\\]"}
-{"img_name": "3.png", "formula": "\\[H=W^{*}_{Z}(p,x),\\ \\ \\tilde{H}=W^{*}_{Z}(q,x).\\]"}
-{"img_name": "4.png", "formula": "\\[v\\cdot f^{*}A|_{x}=(f\\lrcorner Z)_{*}v\\cdot A|_{f\\lrcorner Z(x)},\\quad x\\in Z, \\ v\\in T_{x}Z.\\]"}
-{"img_name": "5.png", "formula": "\\[(f\\lrcorner Z)_{*}v\\cdot A|_{f\\lrcorner Z(x)}=v^{\\alpha\\dot{\\alpha}}\\Big{(} \\frac{\\partial y^{\\beta\\dot{\\beta}}}{\\partial x^{\\alpha\\dot{\\alpha}}}A_{\\beta \\dot{\\beta}}\\Big{)}\\Big{|}_{f\\lrcorner Z(x)},\\ x\\in Z,\\ v\\in T_{x}Z,\\]"}
-{"img_name": "6.png", "formula": "\\[\\{T_{i},T_{j}\\}=\\{\\tilde{T}^{i},\\tilde{T}^{j}\\}=0,\\ \\ \\{T_{i},\\tilde{T}^{j}\\}=2i \\delta^{j}_{i}D,\\]"}
-{"img_name": "7.png", "formula": "\\[(\\partial_{s},q_{i},\\tilde{q}^{k})\\rightarrow(D,M^{j}_{i}T_{j},\\tilde{M}^{k}_ {l}\\tilde{T}^{l}),\\]"}
-{"img_name": "8.png", "formula": "\\[M^{i}_{j}\\tilde{M}^{j}_{k}=\\delta^{i}_{k}.\\]"}
-{"img_name": "9.png", "formula": "\\[Q_{i\\alpha}=q_{i\\alpha}+\\omega_{i\\alpha},\\ \\tilde{Q}^{i}_{\\dot{\\alpha}}=q^{i}_{ \\dot{\\alpha}}+\\tilde{\\omega}^{i}_{\\dot{\\alpha}},\\ D_{\\alpha\\dot{\\alpha}}= \\partial_{\\alpha\\dot{\\alpha}}+A_{\\alpha\\dot{\\alpha}}.\\]"}
-{"img_name": "10.png", "formula": "\\[\\hat{f}(g,\\theta^{i\\alpha},\\tilde{\\theta}^{\\dot{\\alpha}}_{j})=(f(g),[V^{-1}]^ {\\alpha}_{\\beta}\\theta^{i\\beta},[\\tilde{V}^{-1}]^{\\dot{\\alpha}}_{\\dot{\\beta}} \\tilde{\\theta}^{\\dot{\\beta}}_{j}),\\ g\\in{\\cal G},\\]"}
-{"img_name": "11.png", "formula": "\\[v^{\\beta\\dot{\\beta}}V^{\\alpha}_{\\beta}\\tilde{V}^{\\dot{\\alpha}}_{\\dot{\\beta}} =((f\\lrcorner L_{0})_{*}v)^{\\alpha\\dot{\\alpha}},\\]"}
-{"img_name": "12.png", "formula": "\\[\\omega_{i\\alpha}=\\tilde{\\theta}^{\\dot{\\alpha}}_{i}h_{\\alpha\\dot{\\alpha}}(x^{ \\beta\\dot{\\beta}},\\tau^{\\beta\\dot{\\beta}}),\\ \\ \\tilde{\\omega}^{i}_{\\alpha}=\\theta^{i\\alpha}\\tilde{h}_{\\alpha\\dot{\\alpha}}(x^{ \\beta\\dot{\\beta}},\\tau^{\\beta\\dot{\\beta}}),\\]"}
-{"img_name": "13.png", "formula": "\\[\\begin{split}&\\lambda^{\\alpha}\\hat{f}^{*}\\omega_{i\\alpha}(z)= \\tilde{\\theta}^{\\dot{\\beta}}_{i}\\lambda^{\\alpha}\\left(V^{\\beta}_{\\alpha}h_{ \\beta\\dot{\\beta}}(x^{\\prime},\\tau^{\\prime})\\right),\\\\ &\\tilde{\\lambda}^{\\dot{\\alpha}}\\hat{f}^{*}\\tilde{\\omega}^{i}_{ \\dot{\\alpha}}(z)=\\theta^{i\\beta}\\tilde{\\lambda}^{\\dot{\\alpha}}\\left(\\tilde{V}^ {\\dot{\\beta}}_{\\dot{\\alpha}}\\tilde{h}_{\\beta\\dot{\\beta}}(x^{\\prime},\\tau^{ \\prime})\\right),\\end{split}\\]"}
-{"img_name": "14.png", "formula": "\\[A_{\\alpha\\dot{\\alpha}}=A_{\\alpha\\dot{\\alpha}}(x^{\\beta\\dot{\\beta}},\\tau^{ \\beta\\dot{\\beta}})\\]"}
-{"img_name": "15.png", "formula": "\\[D=\\lambda^{\\alpha}\\tilde{\\lambda}^{\\dot{\\alpha}}D_{\\alpha\\dot{\\alpha}}\\]"}
-{"img_name": "16.png", "formula": "\\[D=\\lambda^{\\alpha}\\tilde{\\lambda}^{\\dot{\\alpha}}\\partial_{\\alpha\\dot{\\alpha}}\\]"}
-{"img_name": "17.png", "formula": "\\[[v_{1}\\cdot D^{*},v_{2}\\cdot D^{*}]=0\\]"}
-{"img_name": "18.png", "formula": "\\[\\Phi_{A}=(\\omega_{i\\alpha},\\tilde{\\omega}^{i}_{\\dot{\\alpha}},A_{\\alpha\\dot{ \\alpha}})\\]"}
-{"img_name": "19.png", "formula": "\\[\\hat{f}:{\\cal F}^{6|4N}\\rightarrow{\\cal F}^{6|4N}\\]"}
-{"img_name": "20.png", "formula": "\\[\\sigma=(s,\\xi^{i},\\tilde{\\xi}_{j})\\in\\mathbb{C}^{1|2N}\\]"}
-{"img_name": "21.png", "formula": "\\[\\tau^{\\alpha\\dot{\\alpha}}(h_{\\alpha\\dot{\\alpha}}+\\tilde{h}_{\\alpha\\dot{\\alpha} })=0\\]"}
-{"img_name": "22.png", "formula": "\\[\\tau^{\\alpha\\dot{\\alpha}}\\rightarrow[V^{-1}]^{\\alpha}_{\\beta}[\\tilde{V}^{-1}]^{ \\dot{\\alpha}}_{\\dot{\\beta}}\\tau^{\\beta\\dot{\\beta}}\\]"}
-{"img_name": "23.png", "formula": "\\[\\tau^{\\beta\\dot{\\beta}}=\\sum_{i}\\theta^{i\\beta}\\tilde{\\theta}^{\\dot{\\beta}}_{i}\\]"}
-{"img_name": "24.png", "formula": "\\[\\theta^{i\\alpha}\\omega_{i\\alpha}+\\tilde{\\theta}^{i}_{\\dot{\\alpha}}\\tilde{ \\omega}^{\\dot{\\alpha}}_{i}=0\\]"}
-{"img_name": "25.png", "formula": "\\[\\tilde{T}^{i}=\\tilde{\\lambda}^{\\dot{\\alpha}}\\tilde{Q}^{i}_{\\dot{\\alpha}}\\]"}
-{"img_name": "26.png", "formula": "\\[\\tilde{T}^{i}=\\tilde{\\lambda}^{\\dot{\\alpha}}\\tilde{q}^{i}_{\\dot{\\alpha}}\\]"}
-{"img_name": "27.png", "formula": "\\[\\tilde{\\lambda}^{\\dot{\\alpha}}f^{*}A_{\\alpha\\dot{\\alpha}}=H^{-1}\\tilde{ \\lambda}^{\\dot{\\alpha}}\\partial_{\\alpha\\dot{\\alpha}}H\\]"}
-{"img_name": "28.png", "formula": "\\[\\tilde{q}^{i}=\\partial_{\\tilde{\\xi}_{i}}+i\\xi^{i}\\partial_{s}\\]"}
-{"img_name": "29.png", "formula": "\\[\\tilde{q}^{i}_{\\dot{\\alpha}}=\\frac{\\partial}{\\partial\\tilde{\\theta}^{\\dot{ \\alpha}}_{i}}+i\\theta^{i\\alpha}\\frac{\\partial}{\\partial x^{\\alpha\\dot{\\alpha}}}\\]"}
-{"img_name": "30.png", "formula": "\\[f\\lrcorner L(z)=\\pi_{1}\\circ f(z,\\lambda,\\tilde{\\lambda})\\ \\forall z\\in L\\]"}
-{"img_name": "31.png", "formula": "\\[q_{i\\alpha}=\\frac{\\partial}{\\partial\\theta^{i\\alpha}}+i\\tilde{\\theta}^{\\dot{ \\alpha}}_{i}\\frac{\\partial}{\\partial x^{\\alpha\\dot{\\alpha}}}\\]"}
-{"img_name": "32.png", "formula": "\\[q_{i}=\\partial_{\\xi^{i}}+i\\tilde{\\xi}_{i}\\partial_{s}\\]"}
-{"img_name": "33.png", "formula": "\\[v^{\\alpha\\dot{\\alpha}}=\\lambda^{\\alpha}\\tilde{\\lambda}^{\\dot{\\alpha}}\\]"}
-{"img_name": "34.png", "formula": "\\[z^{A}=(x^{\\alpha\\dot{\\alpha}},\\theta^{i\\alpha},\\tilde{\\theta}^{\\dot{\\alpha}}_{ j})\\]"}
diff --git a/src/models/ocr_model/train/dataset/loader.py b/src/models/ocr_model/train/dataset/loader.py
deleted file mode 100644
index f782f36..0000000
--- a/src/models/ocr_model/train/dataset/loader.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from PIL import Image
-from pathlib import Path
-import datasets
-import json
-
-DIR_URL = Path('absolute/path/to/dataset/directory')
-# e.g. DIR_URL = Path('/home/OleehyO/TeXTeller/src/models/ocr_model/train/dataset')
-
-
-class LatexFormulas(datasets.GeneratorBasedBuilder):
-    BUILDER_CONFIGS = []
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            features=datasets.Features({
-                "image": datasets.Image(),
-                "latex_formula": datasets.Value("string")
-            })
-        )
-
-    def _split_generators(self, dl_manager: datasets.DownloadManager):
-        dir_path = Path(dl_manager.download(str(DIR_URL)))
-        assert dir_path.is_dir()
-
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={
-                    'dir_path': dir_path,
-                }
-            )
-        ]
-
-    def _generate_examples(self, dir_path: Path):
-        images_path   = dir_path / 'images'
-        formulas_path = dir_path / 'formulas.jsonl'
-
-        img2formula = {}
-        with formulas_path.open('r', encoding='utf-8') as f:
-            for line in f:
-                single_json = json.loads(line)
-                img2formula[single_json['img_name']] = single_json['formula']
-
-        for img_path in images_path.iterdir():
-            if img_path.suffix not in ['.jpg', '.png']:
-                continue
-            yield str(img_path), {
-                "image": Image.open(img_path),
-                "latex_formula": img2formula[img_path.name]
-            }
diff --git a/src/models/ocr_model/train/training_args.py b/src/models/ocr_model/train/training_args.py
deleted file mode 100644
index 07334fa..0000000
--- a/src/models/ocr_model/train/training_args.py
+++ /dev/null
@@ -1,38 +0,0 @@
-CONFIG = {
-    "seed": 42,                            # Random seed for reproducibility
-    "use_cpu": False,                      # Whether to use CPU (it's easier to debug with CPU when starting to test the code)
-    "learning_rate": 5e-5,                 # Learning rate
-    "num_train_epochs": 10,                # Total number of training epochs
-    "per_device_train_batch_size": 4,      # Batch size per GPU for training
-    "per_device_eval_batch_size": 8,       # Batch size per GPU for evaluation
-
-    "output_dir": "train_result",          # Output directory
-    "overwrite_output_dir": False,         # If the output directory exists, do not delete its content
-    "report_to": ["tensorboard"],          # Report logs to TensorBoard
-
-    "save_strategy": "steps",              # Strategy to save checkpoints
-    "save_steps": 500,                     # Interval of steps to save checkpoints, can be int or a float (0~1), when float it represents the ratio of total training steps (e.g., can set to 1.0 / 2000)
-    "save_total_limit": 5,                 # Maximum number of models to save. The oldest models will be deleted if this number is exceeded
-
-    "logging_strategy": "steps",           # Log every certain number of steps
-    "logging_steps": 500,                  # Number of steps between each log
-    "logging_nan_inf_filter": False,       # Record logs for loss=nan or inf
-
-    "optim": "adamw_torch",                # Optimizer
-    "lr_scheduler_type": "cosine",         # Learning rate scheduler
-    "warmup_ratio": 0.1,                   # Ratio of warmup steps in total training steps (e.g., for 1000 steps, the first 100 steps gradually increase lr from 0 to the set lr)
-    "max_grad_norm": 1.0,                  # For gradient clipping, ensure the norm of the gradients does not exceed 1.0 (default 1.0)
-    "fp16": False,                         # Whether to use 16-bit floating point for training (generally not recommended, as loss can easily explode)
-    "bf16": False,                         # Whether to use Brain Floating Point (bfloat16) for training (recommended if architecture supports it)
-    "gradient_accumulation_steps": 1,      # Gradient accumulation steps, consider this parameter to achieve large batch size effects when batch size cannot be large
-    "jit_mode_eval": False,                # Whether to use PyTorch jit trace during eval (can speed up the model, but the model must be static, otherwise will throw errors)
-    "torch_compile": False,                # Whether to use torch.compile to compile the model (for better training and inference performance)
-
-    "dataloader_pin_memory": True,         # Can speed up data transfer between CPU and GPU
-    "dataloader_num_workers": 1,           # Default is not to use multiprocessing for data loading, usually set to 4*number of GPUs used
-
-    "evaluation_strategy": "steps",        # Evaluation strategy, can be "steps" or "epoch"
-    "eval_steps": 500,                     # If evaluation_strategy="step"
-
-    "remove_unused_columns": False,        # Don't change this unless you really know what you are doing.
-}
diff --git a/src/models/utils/__init__.py b/src/models/utils/__init__.py
deleted file mode 100644
index 775dc11..0000000
--- a/src/models/utils/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mix_inference import mix_inference
\ No newline at end of file
diff --git a/src/rec_infer_from_crop_imgs.py b/src/rec_infer_from_crop_imgs.py
deleted file mode 100644
index 89bef18..0000000
--- a/src/rec_infer_from_crop_imgs.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import os
-import argparse
-import cv2 as cv
-from pathlib import Path
-from models.ocr_model.utils.to_katex import to_katex
-from models.ocr_model.utils.inference import inference as latex_inference
-from models.ocr_model.model.TexTeller import TexTeller
-
-
-if __name__ == '__main__':
-    os.chdir(Path(__file__).resolve().parent)
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '-img_dir', 
-        type=str, 
-        help='path to the input image',
-        default='./detect_results/subimages'
-    )
-    parser.add_argument(
-        '-output_dir', 
-        type=str, 
-        help='path to the output dir',
-        default='./rec_results'
-    )
-    parser.add_argument(
-        '--inference-mode', 
-        type=str,
-        default='cpu',
-        help='Inference mode, select one of cpu, cuda, or mps'
-    )
-    parser.add_argument(
-        '--num-beam', 
-        type=int,
-        default=1,
-        help='number of beam search for decoding'
-    )
-
-    args = parser.parse_args()
-
-    print('Loading model and tokenizer...')
-    latex_rec_model = TexTeller.from_pretrained()
-    tokenizer = TexTeller.get_tokenizer()
-    print('Model and tokenizer loaded.')
-
-    # Create the output directory if it doesn't exist
-    os.makedirs(args.output_dir, exist_ok=True)
-
-    # Loop through all images in the input directory
-    for filename in os.listdir(args.img_dir):
-        img_path = os.path.join(args.img_dir, filename)
-        img = cv.imread(img_path)
-
-        if img is not None:
-            print(f'Inference for {filename}...')
-            res = latex_inference(latex_rec_model, tokenizer, [img], accelerator=args.inference_mode, num_beams=args.num_beam)
-            res = to_katex(res[0])
-
-            # Save the recognition result to a text file
-            output_file = os.path.join(args.output_dir, os.path.splitext(filename)[0] + '.txt')
-            with open(output_file, 'w') as f:
-                f.write(res)
-
-            print(f'Result saved to {output_file}')
-        else:
-            print(f"Warning: Could not read image {img_path}. Skipping...")
diff --git a/src/client_demo.py b/texteller/client_demo.py
similarity index 100%
rename from src/client_demo.py
rename to texteller/client_demo.py
diff --git a/src/infer_det.py b/texteller/infer_det.py
similarity index 65%
rename from src/infer_det.py
rename to texteller/infer_det.py
index 00baf9e..2250ae3 100644
--- a/src/infer_det.py
+++ b/texteller/infer_det.py
@@ -1,85 +1,96 @@
-import os
-import argparse
-import glob
-import subprocess
-
-import onnxruntime
-from pathlib import Path
-
-from models.det_model.inference import PredictConfig, predict_image
-
-
-parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--infer_cfg", type=str, help="infer_cfg.yml",
-                    default="./models/det_model/model/infer_cfg.yml")
-parser.add_argument('--onnx_file', type=str, help="onnx model file path",
-                    default="./models/det_model/model/rtdetr_r50vd_6x_coco.onnx")
-parser.add_argument("--image_dir", type=str, default='./testImgs')
-parser.add_argument("--image_file", type=str)
-parser.add_argument("--imgsave_dir", type=str, default="./detect_results")
-parser.add_argument('--use_gpu', action='store_true', help='Whether to use GPU for inference', default=True)
-
-
-def get_test_images(infer_dir, infer_img):
-    """
-    Get image path list in TEST mode
-    """
-    assert infer_img is not None or infer_dir is not None, \
-        "--image_file or --image_dir should be set"
-    assert infer_img is None or os.path.isfile(infer_img), \
-            "{} is not a file".format(infer_img)
-    assert infer_dir is None or os.path.isdir(infer_dir), \
-            "{} is not a directory".format(infer_dir)
-
-    # infer_img has a higher priority
-    if infer_img and os.path.isfile(infer_img):
-        return [infer_img]
-
-    images = set()
-    infer_dir = os.path.abspath(infer_dir)
-    assert os.path.isdir(infer_dir), \
-        "infer_dir {} is not a directory".format(infer_dir)
-    exts = ['jpg', 'jpeg', 'png', 'bmp']
-    exts += [ext.upper() for ext in exts]
-    for ext in exts:
-        images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
-    images = list(images)
-
-    assert len(images) > 0, "no image found in {}".format(infer_dir)
-    print("Found {} inference images in total.".format(len(images)))
-
-    return images
-
-def download_file(url, filename):
-    print(f"Downloading {filename}...")
-    subprocess.run(["wget", "-q", "--show-progress", "-O", filename, url], check=True)
-    print("Download complete.")
-
-if __name__ == '__main__':
-    cur_path = os.getcwd()
-    script_dirpath = Path(__file__).resolve().parent
-    os.chdir(script_dirpath)
-
-    FLAGS = parser.parse_args()
-
-    if not os.path.exists(FLAGS.infer_cfg):
-        infer_cfg_url = "https://huggingface.co/TonyLee1256/texteller_det/resolve/main/infer_cfg.yml?download=true"
-        download_file(infer_cfg_url, FLAGS.infer_cfg)
-
-    if not os.path.exists(FLAGS.onnx_file):
-        onnx_file_url = "https://huggingface.co/TonyLee1256/texteller_det/resolve/main/rtdetr_r50vd_6x_coco.onnx?download=true"
-        download_file(onnx_file_url, FLAGS.onnx_file)
-    
-    # load image list
-    img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
-
-    if FLAGS.use_gpu:
-        predictor = onnxruntime.InferenceSession(FLAGS.onnx_file, providers=['CUDAExecutionProvider'])
-    else:
-        predictor = onnxruntime.InferenceSession(FLAGS.onnx_file, providers=['CPUExecutionProvider'])
-    # load infer config
-    infer_config = PredictConfig(FLAGS.infer_cfg)
-
-    predict_image(FLAGS.imgsave_dir, infer_config, predictor, img_list)
-
-    os.chdir(cur_path)
+import os
+import argparse
+import glob
+import subprocess
+
+import onnxruntime
+from pathlib import Path
+
+from models.det_model.inference import PredictConfig, predict_image
+
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    "--infer_cfg", type=str, help="infer_cfg.yml", default="./models/det_model/model/infer_cfg.yml"
+)
+parser.add_argument(
+    '--onnx_file',
+    type=str,
+    help="onnx model file path",
+    default="./models/det_model/model/rtdetr_r50vd_6x_coco.onnx",
+)
+parser.add_argument("--image_dir", type=str, default='./testImgs')
+parser.add_argument("--image_file", type=str)
+parser.add_argument("--imgsave_dir", type=str, default="./detect_results")
+parser.add_argument(
+    '--use_gpu', action='store_true', help='Whether to use GPU for inference', default=True
+)
+
+
+def get_test_images(infer_dir, infer_img):
+    """
+    Get image path list in TEST mode
+    """
+    assert (
+        infer_img is not None or infer_dir is not None
+    ), "--image_file or --image_dir should be set"
+    assert infer_img is None or os.path.isfile(infer_img), "{} is not a file".format(infer_img)
+    assert infer_dir is None or os.path.isdir(infer_dir), "{} is not a directory".format(infer_dir)
+
+    # infer_img has a higher priority
+    if infer_img and os.path.isfile(infer_img):
+        return [infer_img]
+
+    images = set()
+    infer_dir = os.path.abspath(infer_dir)
+    assert os.path.isdir(infer_dir), "infer_dir {} is not a directory".format(infer_dir)
+    exts = ['jpg', 'jpeg', 'png', 'bmp']
+    exts += [ext.upper() for ext in exts]
+    for ext in exts:
+        images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
+    images = list(images)
+
+    assert len(images) > 0, "no image found in {}".format(infer_dir)
+    print("Found {} inference images in total.".format(len(images)))
+
+    return images
+
+
+def download_file(url, filename):
+    print(f"Downloading {filename}...")
+    subprocess.run(["wget", "-q", "--show-progress", "-O", filename, url], check=True)
+    print("Download complete.")
+
+
+if __name__ == '__main__':
+    cur_path = os.getcwd()
+    script_dirpath = Path(__file__).resolve().parent
+    os.chdir(script_dirpath)
+
+    FLAGS = parser.parse_args()
+
+    if not os.path.exists(FLAGS.infer_cfg):
+        infer_cfg_url = "https://huggingface.co/TonyLee1256/texteller_det/resolve/main/infer_cfg.yml?download=true"
+        download_file(infer_cfg_url, FLAGS.infer_cfg)
+
+    if not os.path.exists(FLAGS.onnx_file):
+        onnx_file_url = "https://huggingface.co/TonyLee1256/texteller_det/resolve/main/rtdetr_r50vd_6x_coco.onnx?download=true"
+        download_file(onnx_file_url, FLAGS.onnx_file)
+
+    # load image list
+    img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
+
+    if FLAGS.use_gpu:
+        predictor = onnxruntime.InferenceSession(
+            FLAGS.onnx_file, providers=['CUDAExecutionProvider']
+        )
+    else:
+        predictor = onnxruntime.InferenceSession(
+            FLAGS.onnx_file, providers=['CPUExecutionProvider']
+        )
+    # load infer config
+    infer_config = PredictConfig(FLAGS.infer_cfg)
+
+    predict_image(FLAGS.imgsave_dir, infer_config, predictor, img_list)
+
+    os.chdir(cur_path)
diff --git a/src/inference.py b/texteller/inference.py
similarity index 74%
rename from src/inference.py
rename to texteller/inference.py
index 07a0cae..f6cfe5b 100644
--- a/src/inference.py
+++ b/texteller/inference.py
@@ -18,32 +18,20 @@ from models.det_model.inference import PredictConfig
 if __name__ == '__main__':
     os.chdir(Path(__file__).resolve().parent)
     parser = argparse.ArgumentParser()
+    parser.add_argument('-img', type=str, required=True, help='path to the input image')
     parser.add_argument(
-        '-img', 
-        type=str, 
-        required=True,
-        help='path to the input image'
-    )
-    parser.add_argument(
-        '--inference-mode', 
+        '--inference-mode',
         type=str,
         default='cpu',
-        help='Inference mode, select one of cpu, cuda, or mps'
+        help='Inference mode, select one of cpu, cuda, or mps',
     )
     parser.add_argument(
-        '--num-beam', 
-        type=int,
-        default=1,
-        help='number of beam search for decoding'
+        '--num-beam', type=int, default=1, help='number of beam search for decoding'
     )
-    parser.add_argument(
-        '-mix', 
-        action='store_true',
-        help='use mix mode'
-    )
-    
+    parser.add_argument('-mix', action='store_true', help='use mix mode')
+
     args = parser.parse_args()
-    
+
     # You can use your own checkpoint and tokenizer path.
     print('Loading model and tokenizer...')
     latex_rec_model = TexTeller.from_pretrained()
@@ -63,8 +51,8 @@ if __name__ == '__main__':
 
         use_gpu = args.inference_mode == 'cuda'
         SIZE_LIMIT = 20 * 1024 * 1024
-        det_model_dir =  "./models/thrid_party/paddleocr/checkpoints/det/default_model.onnx"
-        rec_model_dir =  "./models/thrid_party/paddleocr/checkpoints/rec/default_model.onnx"
+        det_model_dir = "./models/thrid_party/paddleocr/checkpoints/det/default_model.onnx"
+        rec_model_dir = "./models/thrid_party/paddleocr/checkpoints/rec/default_model.onnx"
         # The CPU inference of the detection model will be faster than the GPU inference (in onnxruntime)
         det_use_gpu = False
         rec_use_gpu = use_gpu and not (os.path.getsize(rec_model_dir) < SIZE_LIMIT)
@@ -78,8 +66,16 @@ if __name__ == '__main__':
         detector = predict_det.TextDetector(paddleocr_args)
         paddleocr_args.use_gpu = rec_use_gpu
         recognizer = predict_rec.TextRecognizer(paddleocr_args)
-        
+
         lang_ocr_models = [detector, recognizer]
         latex_rec_models = [latex_rec_model, tokenizer]
-        res = mix_inference(img_path, infer_config, latex_det_model, lang_ocr_models, latex_rec_models, args.inference_mode, args.num_beam)
+        res = mix_inference(
+            img_path,
+            infer_config,
+            latex_det_model,
+            lang_ocr_models,
+            latex_rec_models,
+            args.inference_mode,
+            args.num_beam,
+        )
         print(res)
diff --git a/texteller/models/__pycache__/globals.cpython-310.pyc b/texteller/models/__pycache__/globals.cpython-310.pyc
new file mode 100644
index 0000000..48b23dd
Binary files /dev/null and b/texteller/models/__pycache__/globals.cpython-310.pyc differ
diff --git a/src/models/det_model/Bbox.py b/texteller/models/det_model/Bbox.py
similarity index 90%
rename from src/models/det_model/Bbox.py
rename to texteller/models/det_model/Bbox.py
index 9784541..53d5735 100644
--- a/src/models/det_model/Bbox.py
+++ b/texteller/models/det_model/Bbox.py
@@ -9,7 +9,7 @@ class Point:
     def __init__(self, x: int, y: int):
         self.x = int(x)
         self.y = int(y)
-    
+
     def __repr__(self) -> str:
         return f"Point(x={self.x}, y={self.y})"
 
@@ -28,30 +28,28 @@ class Bbox:
     @property
     def ul_point(self) -> Point:
         return self.p
-    
+
     @property
     def ur_point(self) -> Point:
         return Point(self.p.x + self.w, self.p.y)
-    
+
     @property
     def ll_point(self) -> Point:
         return Point(self.p.x, self.p.y + self.h)
-    
+
     @property
     def lr_point(self) -> Point:
         return Point(self.p.x + self.w, self.p.y + self.h)
-    
-    
+
     def same_row(self, other) -> bool:
-        if (
-            (self.p.y >= other.p.y and self.ll_point.y <= other.ll_point.y)
-            or (self.p.y <= other.p.y and self.ll_point.y >= other.ll_point.y)
+        if (self.p.y >= other.p.y and self.ll_point.y <= other.ll_point.y) or (
+            self.p.y <= other.p.y and self.ll_point.y >= other.ll_point.y
         ):
             return True
         if self.ll_point.y <= other.p.y or self.p.y >= other.ll_point.y:
             return False
         return 1.0 * abs(self.p.y - other.p.y) / max(self.h, other.h) < self.THREADHOLD
-    
+
     def __lt__(self, other) -> bool:
         '''
         from top to bottom, from left to right
@@ -60,7 +58,7 @@ class Bbox:
             return self.p.y < other.p.y
         else:
             return self.p.x < other.p.x
-    
+
     def __repr__(self) -> str:
         return f"Bbox(upper_left_point={self.p}, h={self.h}, w={self.w}), label={self.label}, confident={self.confidence}, content={self.content})"
 
@@ -76,16 +74,16 @@ def draw_bboxes(img: Image.Image, bboxes: List[Bbox], name="annotated_image.png"
         top = bbox.p.y
         right = bbox.p.x + bbox.w
         bottom = bbox.p.y + bbox.h
-        
+
         # Draw the rectangle on the image
         drawer.rectangle([left, top, right, bottom], outline="green", width=1)
-        
+
         # Optionally, add text label if it exists
         if bbox.label:
             drawer.text((left, top), bbox.label, fill="blue")
-        
+
         if bbox.content:
             drawer.text((left, bottom - 10), bbox.content[:10], fill="red")
 
     # Save the image with drawn rectangles
-    img.save(log_dir / name)
\ No newline at end of file
+    img.save(log_dir / name)
diff --git a/texteller/models/det_model/__pycache__/Bbox.cpython-310.pyc b/texteller/models/det_model/__pycache__/Bbox.cpython-310.pyc
new file mode 100644
index 0000000..c9e0f25
Binary files /dev/null and b/texteller/models/det_model/__pycache__/Bbox.cpython-310.pyc differ
diff --git a/texteller/models/det_model/__pycache__/inference.cpython-310.pyc b/texteller/models/det_model/__pycache__/inference.cpython-310.pyc
new file mode 100644
index 0000000..58073df
Binary files /dev/null and b/texteller/models/det_model/__pycache__/inference.cpython-310.pyc differ
diff --git a/texteller/models/det_model/__pycache__/preprocess.cpython-310.pyc b/texteller/models/det_model/__pycache__/preprocess.cpython-310.pyc
new file mode 100644
index 0000000..f8d3d37
Binary files /dev/null and b/texteller/models/det_model/__pycache__/preprocess.cpython-310.pyc differ
diff --git a/src/models/det_model/inference.py b/texteller/models/det_model/inference.py
similarity index 81%
rename from src/models/det_model/inference.py
rename to texteller/models/det_model/inference.py
index 5e0dd2c..c866ae7 100644
--- a/src/models/det_model/inference.py
+++ b/texteller/models/det_model/inference.py
@@ -12,10 +12,28 @@ from .Bbox import Bbox
 
 # Global dictionary
 SUPPORT_MODELS = {
-    'YOLO', 'PPYOLOE', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet',
-    'S2ANet', 'JDE', 'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet',
-    'TOOD', 'RetinaNet', 'StrongBaseline', 'STGCN', 'YOLOX', 'HRNet', 
-    'DETR'
+    'YOLO',
+    'PPYOLOE',
+    'RCNN',
+    'SSD',
+    'Face',
+    'FCOS',
+    'SOLOv2',
+    'TTFNet',
+    'S2ANet',
+    'JDE',
+    'FairMOT',
+    'DeepSORT',
+    'GFL',
+    'PicoDet',
+    'CenterNet',
+    'TOOD',
+    'RetinaNet',
+    'StrongBaseline',
+    'STGCN',
+    'YOLOX',
+    'HRNet',
+    'DETR',
 }
 
 
@@ -42,12 +60,12 @@ class PredictConfig(object):
         self.fpn_stride = yml_conf.get("fpn_stride", None)
 
         color_pool = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)]
-        self.colors = {label: color_pool[i % len(color_pool)] for i, label in enumerate(self.label_list)}
+        self.colors = {
+            label: color_pool[i % len(color_pool)] for i, label in enumerate(self.label_list)
+        }
 
         if self.arch == 'RCNN' and yml_conf.get('export_onnx', False):
-            print(
-                'The RCNN export model is used for ONNX and it only supports batch_size = 1'
-            )
+            print('The RCNN export model is used for ONNX and it only supports batch_size = 1')
         self.print_config()
 
     def check_model(self, yml_conf):
@@ -58,8 +76,7 @@ class PredictConfig(object):
         for support_model in SUPPORT_MODELS:
             if support_model in yml_conf['arch']:
                 return True
-        raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[
-            'arch'], SUPPORT_MODELS))
+        raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf['arch'], SUPPORT_MODELS))
 
     def print_config(self):
         print('-----------  Model Configuration -----------')
@@ -77,8 +94,15 @@ def draw_bbox(image, outputs, infer_config):
             label = infer_config.label_list[int(cls_id)]
             color = infer_config.colors[label]
             cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color, 2)
-            cv2.putText(image, "{}: {:.2f}".format(label, score),
-                        (int(xmin), int(ymin - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+            cv2.putText(
+                image,
+                "{}: {:.2f}".format(label, score),
+                (int(xmin), int(ymin - 5)),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.5,
+                color,
+                2,
+            )
     return image
 
 
@@ -104,7 +128,7 @@ def predict_image(imgsave_dir, infer_config, predictor, img_list):
 
         inputs = transforms(img_path)
         inputs_name = [var.name for var in predictor.get_inputs()]
-        inputs = {k: inputs[k][None, ] for k in inputs_name}
+        inputs = {k: inputs[k][None,] for k in inputs_name}
 
         # Start timing
         start_time = time.time()
@@ -119,7 +143,9 @@ def predict_image(imgsave_dir, infer_config, predictor, img_list):
         else:
             total_time += inference_time
             num_images += 1
-        print(f"ONNXRuntime predict time for {os.path.basename(img_path)}: {inference_time:.4f} seconds")
+        print(
+            f"ONNXRuntime predict time for {os.path.basename(img_path)}: {inference_time:.4f} seconds"
+        )
 
         print("ONNXRuntime predict: ")
         if infer_config.arch in ["HRNet"]:
@@ -128,8 +154,7 @@ def predict_image(imgsave_dir, infer_config, predictor, img_list):
             bboxes = np.array(outputs[0])
             for bbox in bboxes:
                 if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold:
-                    print(f"{int(bbox[0])} {bbox[1]} "
-                          f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}")
+                    print(f"{int(bbox[0])} {bbox[1]} " f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}")
 
         # Save the subimages (crop from the original image)
         subimg_counter = 1
@@ -137,7 +162,7 @@ def predict_image(imgsave_dir, infer_config, predictor, img_list):
             cls_id, score, xmin, ymin, xmax, ymax = output
             if score > infer_config.draw_threshold:
                 label = infer_config.label_list[int(cls_id)]
-                subimg = img[int(max(ymin, 0)):int(ymax), int(max(xmin, 0)):int(xmax)]
+                subimg = img[int(max(ymin, 0)) : int(ymax), int(max(xmin, 0)) : int(xmax)]
                 if len(subimg) == 0:
                     continue
 
@@ -151,8 +176,14 @@ def predict_image(imgsave_dir, infer_config, predictor, img_list):
         for output in np.array(outputs[0]):
             cls_id, score, xmin, ymin, xmax, ymax = output
             if score > infer_config.draw_threshold:
-                cv2.rectangle(img_with_mask, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 255, 255), -1) # 盖白
-        
+                cv2.rectangle(
+                    img_with_mask,
+                    (int(xmin), int(ymin)),
+                    (int(xmax), int(ymax)),
+                    (255, 255, 255),
+                    -1,
+                )  # 盖白
+
         img_with_bbox = draw_bbox(img, np.array(outputs[0]), infer_config)
 
         output_dir = imgsave_dir
@@ -178,7 +209,7 @@ def predict(img_path: str, predictor, infer_config) -> List[Bbox]:
     transforms = Compose(infer_config.preprocess_infos)
     inputs = transforms(img_path)
     inputs_name = [var.name for var in predictor.get_inputs()]
-    inputs = {k: inputs[k][None, ] for k in inputs_name}
+    inputs = {k: inputs[k][None,] for k in inputs_name}
 
     outputs = predictor.run(output_names=None, input_feed=inputs)[0]
     res = []
diff --git a/src/models/det_model/model/infer_cfg.yml b/texteller/models/det_model/model/infer_cfg.yml
similarity index 100%
rename from src/models/det_model/model/infer_cfg.yml
rename to texteller/models/det_model/model/infer_cfg.yml
diff --git a/src/models/det_model/preprocess.py b/texteller/models/det_model/preprocess.py
similarity index 86%
rename from src/models/det_model/preprocess.py
rename to texteller/models/det_model/preprocess.py
index 6b72494..935a2ae 100644
--- a/src/models/det_model/preprocess.py
+++ b/texteller/models/det_model/preprocess.py
@@ -15,10 +15,8 @@ def decode_image(img_path):
     im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
     im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
     img_info = {
-        "im_shape": np.array(
-            im.shape[:2], dtype=np.float32),
-        "scale_factor": np.array(
-            [1., 1.], dtype=np.float32)
+        "im_shape": np.array(im.shape[:2], dtype=np.float32),
+        "scale_factor": np.array([1.0, 1.0], dtype=np.float32),
     }
     return im, img_info
 
@@ -51,16 +49,9 @@ class Resize(object):
         assert self.target_size[0] > 0 and self.target_size[1] > 0
         im_channel = im.shape[2]
         im_scale_y, im_scale_x = self.generate_scale(im)
-        im = cv2.resize(
-            im,
-            None,
-            None,
-            fx=im_scale_x,
-            fy=im_scale_y,
-            interpolation=self.interp)
+        im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp)
         im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
-        im_info['scale_factor'] = np.array(
-            [im_scale_y, im_scale_x]).astype('float32')
+        im_info['scale_factor'] = np.array([im_scale_y, im_scale_x]).astype('float32')
         return im, im_info
 
     def generate_scale(self, im):
@@ -134,7 +125,9 @@ class Permute(object):
         channel_first (bool): whether convert HWC to CHW
     """
 
-    def __init__(self, ):
+    def __init__(
+        self,
+    ):
         super(Permute, self).__init__()
 
     def __call__(self, im, im_info):
@@ -151,7 +144,7 @@ class Permute(object):
 
 
 class PadStride(object):
-    """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
+    """padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
     Args:
         stride (bool): model with FPN need image shape % stride == 0
     """
@@ -198,18 +191,16 @@ class LetterBoxResize(object):
         ratio_h = float(height) / shape[0]
         ratio_w = float(width) / shape[1]
         ratio = min(ratio_h, ratio_w)
-        new_shape = (round(shape[1] * ratio),
-                     round(shape[0] * ratio))  # [width, height]
+        new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))  # [width, height]
         padw = (width - new_shape[0]) / 2
         padh = (height - new_shape[1]) / 2
         top, bottom = round(padh - 0.1), round(padh + 0.1)
         left, right = round(padw - 0.1), round(padw + 0.1)
 
-        img = cv2.resize(
-            img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
+        img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
         img = cv2.copyMakeBorder(
-            img, top, bottom, left, right, cv2.BORDER_CONSTANT,
-            value=color)  # padded rectangular
+            img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
+        )  # padded rectangular
         return img, ratio, padw, padh
 
     def __call__(self, im, im_info):
@@ -302,12 +293,7 @@ def _get_3rd_point(a, b):
     return third_pt
 
 
-def get_affine_transform(center,
-                         input_size,
-                         rot,
-                         output_size,
-                         shift=(0., 0.),
-                         inv=False):
+def get_affine_transform(center, input_size, rot, output_size, shift=(0.0, 0.0), inv=False):
     """Get the affine transform matrix, given the center/scale/rot/output_size.
 
     Args:
@@ -337,8 +323,8 @@ def get_affine_transform(center,
     dst_h = output_size[1]
 
     rot_rad = np.pi * rot / 180
-    src_dir = rotate_point([0., src_w * -0.5], rot_rad)
-    dst_dir = np.array([0., dst_w * -0.5])
+    src_dir = rotate_point([0.0, src_w * -0.5], rot_rad)
+    dst_dir = np.array([0.0, dst_w * -0.5])
 
     src = np.zeros((3, 2), dtype=np.float32)
     src[0, :] = center + scale_tmp * shift
@@ -359,16 +345,9 @@ def get_affine_transform(center,
 
 
 class WarpAffine(object):
-    """Warp affine the image
-    """
+    """Warp affine the image"""
 
-    def __init__(self,
-                 keep_res=False,
-                 pad=31,
-                 input_h=512,
-                 input_w=512,
-                 scale=0.4,
-                 shift=0.1):
+    def __init__(self, keep_res=False, pad=31, input_h=512, input_w=512, scale=0.4, shift=0.1):
         self.keep_res = keep_res
         self.pad = pad
         self.input_h = input_h
@@ -398,12 +377,11 @@ class WarpAffine(object):
         else:
             s = max(h, w) * 1.0
             input_h, input_w = self.input_h, self.input_w
-            c = np.array([w / 2., h / 2.], dtype=np.float32)
+            c = np.array([w / 2.0, h / 2.0], dtype=np.float32)
 
         trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
         img = cv2.resize(img, (w, h))
-        inp = cv2.warpAffine(
-            img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
+        inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
         return inp, im_info
 
 
@@ -432,13 +410,17 @@ def get_warp_matrix(theta, size_input, size_dst, size_target):
     matrix[0, 0] = np.cos(theta) * scale_x
     matrix[0, 1] = -np.sin(theta) * scale_x
     matrix[0, 2] = scale_x * (
-        -0.5 * size_input[0] * np.cos(theta) + 0.5 * size_input[1] *
-        np.sin(theta) + 0.5 * size_target[0])
+        -0.5 * size_input[0] * np.cos(theta)
+        + 0.5 * size_input[1] * np.sin(theta)
+        + 0.5 * size_target[0]
+    )
     matrix[1, 0] = np.sin(theta) * scale_y
     matrix[1, 1] = np.cos(theta) * scale_y
     matrix[1, 2] = scale_y * (
-        -0.5 * size_input[0] * np.sin(theta) - 0.5 * size_input[1] *
-        np.cos(theta) + 0.5 * size_target[1])
+        -0.5 * size_input[0] * np.sin(theta)
+        - 0.5 * size_input[1] * np.cos(theta)
+        + 0.5 * size_target[1]
+    )
     return matrix
 
 
@@ -462,22 +444,26 @@ class TopDownEvalAffine(object):
     def __call__(self, image, im_info):
         rot = 0
         imshape = im_info['im_shape'][::-1]
-        center = im_info['center'] if 'center' in im_info else imshape / 2.
+        center = im_info['center'] if 'center' in im_info else imshape / 2.0
         scale = im_info['scale'] if 'scale' in im_info else imshape
         if self.use_udp:
             trans = get_warp_matrix(
-                rot, center * 2.0,
-                [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
+                rot, center * 2.0, [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale
+            )
             image = cv2.warpAffine(
                 image,
-                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
-                flags=cv2.INTER_LINEAR)
+                trans,
+                (int(self.trainsize[0]), int(self.trainsize[1])),
+                flags=cv2.INTER_LINEAR,
+            )
         else:
             trans = get_affine_transform(center, scale, rot, self.trainsize)
             image = cv2.warpAffine(
                 image,
-                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
-                flags=cv2.INTER_LINEAR)
+                trans,
+                (int(self.trainsize[0]), int(self.trainsize[1])),
+                flags=cv2.INTER_LINEAR,
+            )
 
         return image, im_info
 
diff --git a/src/models/globals.py b/texteller/models/globals.py
similarity index 92%
rename from src/models/globals.py
rename to texteller/models/globals.py
index 4d437a0..8754d67 100644
--- a/src/models/globals.py
+++ b/texteller/models/globals.py
@@ -1,6 +1,6 @@
 # Formula image(grayscale) mean and variance
 IMAGE_MEAN = 0.9545467
-IMAGE_STD  = 0.15394445
+IMAGE_STD = 0.15394445
 
 # Vocabulary size for TexTeller
 VOCAB_SIZE = 15000
@@ -20,4 +20,4 @@ MIN_RESIZE_RATIO = 0.75
 
 # Minimum height and width for input image for TexTeller
 MIN_HEIGHT = 12
-MIN_WIDTH  = 30
+MIN_WIDTH = 30
diff --git a/src/models/ocr_model/model/TexTeller.py b/texteller/models/ocr_model/model/TexTeller.py
similarity index 63%
rename from src/models/ocr_model/model/TexTeller.py
rename to texteller/models/ocr_model/model/TexTeller.py
index 1f7e0ac..4f916cd 100644
--- a/src/models/ocr_model/model/TexTeller.py
+++ b/texteller/models/ocr_model/model/TexTeller.py
@@ -1,30 +1,24 @@
 from pathlib import Path
 
-from ...globals import (
-    VOCAB_SIZE,
-    FIXED_IMG_SIZE,
-    IMG_CHANNELS,
-    MAX_TOKEN_SIZE
-)
+from ...globals import VOCAB_SIZE, FIXED_IMG_SIZE, IMG_CHANNELS, MAX_TOKEN_SIZE
 
-from transformers import (
-    RobertaTokenizerFast,
-    VisionEncoderDecoderModel,
-    VisionEncoderDecoderConfig
-)
+from transformers import RobertaTokenizerFast, VisionEncoderDecoderModel, VisionEncoderDecoderConfig
 
 
 class TexTeller(VisionEncoderDecoderModel):
     REPO_NAME = 'OleehyO/TexTeller'
+
     def __init__(self):
-        config = VisionEncoderDecoderConfig.from_pretrained(Path(__file__).resolve().parent / "config.json")
-        config.encoder.image_size              = FIXED_IMG_SIZE
-        config.encoder.num_channels            = IMG_CHANNELS
-        config.decoder.vocab_size              = VOCAB_SIZE
+        config = VisionEncoderDecoderConfig.from_pretrained(
+            Path(__file__).resolve().parent / "config.json"
+        )
+        config.encoder.image_size = FIXED_IMG_SIZE
+        config.encoder.num_channels = IMG_CHANNELS
+        config.decoder.vocab_size = VOCAB_SIZE
         config.decoder.max_position_embeddings = MAX_TOKEN_SIZE
 
         super().__init__(config=config)
-    
+
     @classmethod
     def from_pretrained(cls, model_path: str = None, use_onnx=False, onnx_provider=None):
         if model_path is None or model_path == 'default':
@@ -32,8 +26,12 @@ class TexTeller(VisionEncoderDecoderModel):
                 return VisionEncoderDecoderModel.from_pretrained(cls.REPO_NAME)
             else:
                 from optimum.onnxruntime import ORTModelForVision2Seq
+
                 use_gpu = True if onnx_provider == 'cuda' else False
-                return ORTModelForVision2Seq.from_pretrained(cls.REPO_NAME, provider="CUDAExecutionProvider" if use_gpu else "CPUExecutionProvider")
+                return ORTModelForVision2Seq.from_pretrained(
+                    cls.REPO_NAME,
+                    provider="CUDAExecutionProvider" if use_gpu else "CPUExecutionProvider",
+                )
         model_path = Path(model_path).resolve()
         return VisionEncoderDecoderModel.from_pretrained(str(model_path))
 
diff --git a/texteller/models/ocr_model/model/__pycache__/TexTeller.cpython-310.pyc b/texteller/models/ocr_model/model/__pycache__/TexTeller.cpython-310.pyc
new file mode 100644
index 0000000..ece8c18
Binary files /dev/null and b/texteller/models/ocr_model/model/__pycache__/TexTeller.cpython-310.pyc differ
diff --git a/src/models/ocr_model/model/config.json b/texteller/models/ocr_model/model/config.json
similarity index 100%
rename from src/models/ocr_model/model/config.json
rename to texteller/models/ocr_model/model/config.json
diff --git a/texteller/models/ocr_model/train/__pycache__/train.cpython-310.pyc b/texteller/models/ocr_model/train/__pycache__/train.cpython-310.pyc
new file mode 100644
index 0000000..530caa5
Binary files /dev/null and b/texteller/models/ocr_model/train/__pycache__/train.cpython-310.pyc differ
diff --git a/texteller/models/ocr_model/train/__pycache__/training_args.cpython-310.pyc b/texteller/models/ocr_model/train/__pycache__/training_args.cpython-310.pyc
new file mode 100644
index 0000000..224449c
Binary files /dev/null and b/texteller/models/ocr_model/train/__pycache__/training_args.cpython-310.pyc differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_0.png b/texteller/models/ocr_model/train/augraphy_cache/image_0.png
new file mode 100644
index 0000000..a149048
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_0.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_1.png b/texteller/models/ocr_model/train/augraphy_cache/image_1.png
new file mode 100644
index 0000000..10a2184
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_1.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_10.png b/texteller/models/ocr_model/train/augraphy_cache/image_10.png
new file mode 100644
index 0000000..70401c2
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_10.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_11.png b/texteller/models/ocr_model/train/augraphy_cache/image_11.png
new file mode 100644
index 0000000..3acda0d
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_11.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_12.png b/texteller/models/ocr_model/train/augraphy_cache/image_12.png
new file mode 100644
index 0000000..b03dfb7
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_12.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_13.png b/texteller/models/ocr_model/train/augraphy_cache/image_13.png
new file mode 100644
index 0000000..64b7abb
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_13.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_14.png b/texteller/models/ocr_model/train/augraphy_cache/image_14.png
new file mode 100644
index 0000000..281ad58
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_14.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_15.png b/texteller/models/ocr_model/train/augraphy_cache/image_15.png
new file mode 100644
index 0000000..671e70c
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_15.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_16.png b/texteller/models/ocr_model/train/augraphy_cache/image_16.png
new file mode 100644
index 0000000..0061a0b
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_16.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_17.png b/texteller/models/ocr_model/train/augraphy_cache/image_17.png
new file mode 100644
index 0000000..321af30
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_17.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_18.png b/texteller/models/ocr_model/train/augraphy_cache/image_18.png
new file mode 100644
index 0000000..e9eb26b
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_18.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_19.png b/texteller/models/ocr_model/train/augraphy_cache/image_19.png
new file mode 100644
index 0000000..8f9ef59
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_19.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_2.png b/texteller/models/ocr_model/train/augraphy_cache/image_2.png
new file mode 100644
index 0000000..b538696
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_2.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_20.png b/texteller/models/ocr_model/train/augraphy_cache/image_20.png
new file mode 100644
index 0000000..db40eb2
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_20.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_21.png b/texteller/models/ocr_model/train/augraphy_cache/image_21.png
new file mode 100644
index 0000000..cc9d586
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_21.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_22.png b/texteller/models/ocr_model/train/augraphy_cache/image_22.png
new file mode 100644
index 0000000..220179c
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_22.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_23.png b/texteller/models/ocr_model/train/augraphy_cache/image_23.png
new file mode 100644
index 0000000..b7be139
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_23.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_24.png b/texteller/models/ocr_model/train/augraphy_cache/image_24.png
new file mode 100644
index 0000000..7476b76
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_24.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_25.png b/texteller/models/ocr_model/train/augraphy_cache/image_25.png
new file mode 100644
index 0000000..77b9c45
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_25.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_26.png b/texteller/models/ocr_model/train/augraphy_cache/image_26.png
new file mode 100644
index 0000000..e189b32
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_26.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_27.png b/texteller/models/ocr_model/train/augraphy_cache/image_27.png
new file mode 100644
index 0000000..a1d4133
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_27.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_28.png b/texteller/models/ocr_model/train/augraphy_cache/image_28.png
new file mode 100644
index 0000000..8b9a8b4
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_28.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_29.png b/texteller/models/ocr_model/train/augraphy_cache/image_29.png
new file mode 100644
index 0000000..cb50df4
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_29.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_3.png b/texteller/models/ocr_model/train/augraphy_cache/image_3.png
new file mode 100644
index 0000000..2d375b7
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_3.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_4.png b/texteller/models/ocr_model/train/augraphy_cache/image_4.png
new file mode 100644
index 0000000..9d53ce8
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_4.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_5.png b/texteller/models/ocr_model/train/augraphy_cache/image_5.png
new file mode 100644
index 0000000..43257bd
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_5.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_6.png b/texteller/models/ocr_model/train/augraphy_cache/image_6.png
new file mode 100644
index 0000000..dd1e098
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_6.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_7.png b/texteller/models/ocr_model/train/augraphy_cache/image_7.png
new file mode 100644
index 0000000..7baf0f4
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_7.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_8.png b/texteller/models/ocr_model/train/augraphy_cache/image_8.png
new file mode 100644
index 0000000..3d94283
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_8.png differ
diff --git a/texteller/models/ocr_model/train/augraphy_cache/image_9.png b/texteller/models/ocr_model/train/augraphy_cache/image_9.png
new file mode 100644
index 0000000..b42491b
Binary files /dev/null and b/texteller/models/ocr_model/train/augraphy_cache/image_9.png differ
diff --git a/src/models/ocr_model/train/dataset/images/0.png b/texteller/models/ocr_model/train/dataset/train/0.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/0.png
rename to texteller/models/ocr_model/train/dataset/train/0.png
diff --git a/src/models/ocr_model/train/dataset/images/1.png b/texteller/models/ocr_model/train/dataset/train/1.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/1.png
rename to texteller/models/ocr_model/train/dataset/train/1.png
diff --git a/src/models/ocr_model/train/dataset/images/10.png b/texteller/models/ocr_model/train/dataset/train/10.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/10.png
rename to texteller/models/ocr_model/train/dataset/train/10.png
diff --git a/src/models/ocr_model/train/dataset/images/11.png b/texteller/models/ocr_model/train/dataset/train/11.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/11.png
rename to texteller/models/ocr_model/train/dataset/train/11.png
diff --git a/src/models/ocr_model/train/dataset/images/12.png b/texteller/models/ocr_model/train/dataset/train/12.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/12.png
rename to texteller/models/ocr_model/train/dataset/train/12.png
diff --git a/src/models/ocr_model/train/dataset/images/13.png b/texteller/models/ocr_model/train/dataset/train/13.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/13.png
rename to texteller/models/ocr_model/train/dataset/train/13.png
diff --git a/src/models/ocr_model/train/dataset/images/14.png b/texteller/models/ocr_model/train/dataset/train/14.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/14.png
rename to texteller/models/ocr_model/train/dataset/train/14.png
diff --git a/src/models/ocr_model/train/dataset/images/15.png b/texteller/models/ocr_model/train/dataset/train/15.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/15.png
rename to texteller/models/ocr_model/train/dataset/train/15.png
diff --git a/src/models/ocr_model/train/dataset/images/16.png b/texteller/models/ocr_model/train/dataset/train/16.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/16.png
rename to texteller/models/ocr_model/train/dataset/train/16.png
diff --git a/src/models/ocr_model/train/dataset/images/17.png b/texteller/models/ocr_model/train/dataset/train/17.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/17.png
rename to texteller/models/ocr_model/train/dataset/train/17.png
diff --git a/src/models/ocr_model/train/dataset/images/18.png b/texteller/models/ocr_model/train/dataset/train/18.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/18.png
rename to texteller/models/ocr_model/train/dataset/train/18.png
diff --git a/src/models/ocr_model/train/dataset/images/19.png b/texteller/models/ocr_model/train/dataset/train/19.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/19.png
rename to texteller/models/ocr_model/train/dataset/train/19.png
diff --git a/src/models/ocr_model/train/dataset/images/2.png b/texteller/models/ocr_model/train/dataset/train/2.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/2.png
rename to texteller/models/ocr_model/train/dataset/train/2.png
diff --git a/src/models/ocr_model/train/dataset/images/20.png b/texteller/models/ocr_model/train/dataset/train/20.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/20.png
rename to texteller/models/ocr_model/train/dataset/train/20.png
diff --git a/src/models/ocr_model/train/dataset/images/21.png b/texteller/models/ocr_model/train/dataset/train/21.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/21.png
rename to texteller/models/ocr_model/train/dataset/train/21.png
diff --git a/src/models/ocr_model/train/dataset/images/22.png b/texteller/models/ocr_model/train/dataset/train/22.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/22.png
rename to texteller/models/ocr_model/train/dataset/train/22.png
diff --git a/src/models/ocr_model/train/dataset/images/23.png b/texteller/models/ocr_model/train/dataset/train/23.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/23.png
rename to texteller/models/ocr_model/train/dataset/train/23.png
diff --git a/src/models/ocr_model/train/dataset/images/24.png b/texteller/models/ocr_model/train/dataset/train/24.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/24.png
rename to texteller/models/ocr_model/train/dataset/train/24.png
diff --git a/src/models/ocr_model/train/dataset/images/25.png b/texteller/models/ocr_model/train/dataset/train/25.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/25.png
rename to texteller/models/ocr_model/train/dataset/train/25.png
diff --git a/src/models/ocr_model/train/dataset/images/26.png b/texteller/models/ocr_model/train/dataset/train/26.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/26.png
rename to texteller/models/ocr_model/train/dataset/train/26.png
diff --git a/src/models/ocr_model/train/dataset/images/27.png b/texteller/models/ocr_model/train/dataset/train/27.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/27.png
rename to texteller/models/ocr_model/train/dataset/train/27.png
diff --git a/src/models/ocr_model/train/dataset/images/28.png b/texteller/models/ocr_model/train/dataset/train/28.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/28.png
rename to texteller/models/ocr_model/train/dataset/train/28.png
diff --git a/src/models/ocr_model/train/dataset/images/29.png b/texteller/models/ocr_model/train/dataset/train/29.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/29.png
rename to texteller/models/ocr_model/train/dataset/train/29.png
diff --git a/src/models/ocr_model/train/dataset/images/3.png b/texteller/models/ocr_model/train/dataset/train/3.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/3.png
rename to texteller/models/ocr_model/train/dataset/train/3.png
diff --git a/src/models/ocr_model/train/dataset/images/30.png b/texteller/models/ocr_model/train/dataset/train/30.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/30.png
rename to texteller/models/ocr_model/train/dataset/train/30.png
diff --git a/src/models/ocr_model/train/dataset/images/31.png b/texteller/models/ocr_model/train/dataset/train/31.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/31.png
rename to texteller/models/ocr_model/train/dataset/train/31.png
diff --git a/src/models/ocr_model/train/dataset/images/32.png b/texteller/models/ocr_model/train/dataset/train/32.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/32.png
rename to texteller/models/ocr_model/train/dataset/train/32.png
diff --git a/src/models/ocr_model/train/dataset/images/33.png b/texteller/models/ocr_model/train/dataset/train/33.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/33.png
rename to texteller/models/ocr_model/train/dataset/train/33.png
diff --git a/src/models/ocr_model/train/dataset/images/34.png b/texteller/models/ocr_model/train/dataset/train/34.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/34.png
rename to texteller/models/ocr_model/train/dataset/train/34.png
diff --git a/src/models/ocr_model/train/dataset/images/4.png b/texteller/models/ocr_model/train/dataset/train/4.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/4.png
rename to texteller/models/ocr_model/train/dataset/train/4.png
diff --git a/src/models/ocr_model/train/dataset/images/5.png b/texteller/models/ocr_model/train/dataset/train/5.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/5.png
rename to texteller/models/ocr_model/train/dataset/train/5.png
diff --git a/src/models/ocr_model/train/dataset/images/6.png b/texteller/models/ocr_model/train/dataset/train/6.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/6.png
rename to texteller/models/ocr_model/train/dataset/train/6.png
diff --git a/src/models/ocr_model/train/dataset/images/7.png b/texteller/models/ocr_model/train/dataset/train/7.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/7.png
rename to texteller/models/ocr_model/train/dataset/train/7.png
diff --git a/src/models/ocr_model/train/dataset/images/8.png b/texteller/models/ocr_model/train/dataset/train/8.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/8.png
rename to texteller/models/ocr_model/train/dataset/train/8.png
diff --git a/src/models/ocr_model/train/dataset/images/9.png b/texteller/models/ocr_model/train/dataset/train/9.png
similarity index 100%
rename from src/models/ocr_model/train/dataset/images/9.png
rename to texteller/models/ocr_model/train/dataset/train/9.png
diff --git a/texteller/models/ocr_model/train/dataset/train/metadata.jsonl b/texteller/models/ocr_model/train/dataset/train/metadata.jsonl
new file mode 100644
index 0000000..23279de
--- /dev/null
+++ b/texteller/models/ocr_model/train/dataset/train/metadata.jsonl
@@ -0,0 +1,35 @@
+{"file_name": "0.png", "latex_formula": "\\[\\mathbb{C}^{4}\\stackrel{{\\pi_{1}}}{{\\longleftarrow}}\\mathcal{ F}\\stackrel{{\\pi_{2}}}{{\\rightarrow}}\\mathcal{PT},\\]"}
+{"file_name": "1.png", "latex_formula": "\\[W^{*}_{Z}(x_{1},x_{2})=W_{f\\lrcorner Z}(y_{1},y_{2})=\\mathcal{P}\\exp\\left( \\int_{\\gamma}A_{\\mu}dx^{\\mu}\\right).\\]"}
+{"file_name": "2.png", "latex_formula": "\\[G=W^{*}_{Z}(q,p)=\\tilde{H}H^{-1}\\]"}
+{"file_name": "3.png", "latex_formula": "\\[H=W^{*}_{Z}(p,x),\\ \\ \\tilde{H}=W^{*}_{Z}(q,x).\\]"}
+{"file_name": "4.png", "latex_formula": "\\[v\\cdot f^{*}A|_{x}=(f\\lrcorner Z)_{*}v\\cdot A|_{f\\lrcorner Z(x)},\\quad x\\in Z, \\ v\\in T_{x}Z.\\]"}
+{"file_name": "5.png", "latex_formula": "\\[(f\\lrcorner Z)_{*}v\\cdot A|_{f\\lrcorner Z(x)}=v^{\\alpha\\dot{\\alpha}}\\Big{(} \\frac{\\partial y^{\\beta\\dot{\\beta}}}{\\partial x^{\\alpha\\dot{\\alpha}}}A_{\\beta \\dot{\\beta}}\\Big{)}\\Big{|}_{f\\lrcorner Z(x)},\\ x\\in Z,\\ v\\in T_{x}Z,\\]"}
+{"file_name": "6.png", "latex_formula": "\\[\\{T_{i},T_{j}\\}=\\{\\tilde{T}^{i},\\tilde{T}^{j}\\}=0,\\ \\ \\{T_{i},\\tilde{T}^{j}\\}=2i \\delta^{j}_{i}D,\\]"}
+{"file_name": "7.png", "latex_formula": "\\[(\\partial_{s},q_{i},\\tilde{q}^{k})\\rightarrow(D,M^{j}_{i}T_{j},\\tilde{M}^{k}_ {l}\\tilde{T}^{l}),\\]"}
+{"file_name": "8.png", "latex_formula": "\\[M^{i}_{j}\\tilde{M}^{j}_{k}=\\delta^{i}_{k}.\\]"}
+{"file_name": "9.png", "latex_formula": "\\[Q_{i\\alpha}=q_{i\\alpha}+\\omega_{i\\alpha},\\ \\tilde{Q}^{i}_{\\dot{\\alpha}}=q^{i}_{ \\dot{\\alpha}}+\\tilde{\\omega}^{i}_{\\dot{\\alpha}},\\ D_{\\alpha\\dot{\\alpha}}= \\partial_{\\alpha\\dot{\\alpha}}+A_{\\alpha\\dot{\\alpha}}.\\]"}
+{"file_name": "10.png", "latex_formula": "\\[\\hat{f}(g,\\theta^{i\\alpha},\\tilde{\\theta}^{\\dot{\\alpha}}_{j})=(f(g),[V^{-1}]^ {\\alpha}_{\\beta}\\theta^{i\\beta},[\\tilde{V}^{-1}]^{\\dot{\\alpha}}_{\\dot{\\beta}} \\tilde{\\theta}^{\\dot{\\beta}}_{j}),\\ g\\in{\\cal G},\\]"}
+{"file_name": "11.png", "latex_formula": "\\[v^{\\beta\\dot{\\beta}}V^{\\alpha}_{\\beta}\\tilde{V}^{\\dot{\\alpha}}_{\\dot{\\beta}} =((f\\lrcorner L_{0})_{*}v)^{\\alpha\\dot{\\alpha}},\\]"}
+{"file_name": "12.png", "latex_formula": "\\[\\omega_{i\\alpha}=\\tilde{\\theta}^{\\dot{\\alpha}}_{i}h_{\\alpha\\dot{\\alpha}}(x^{ \\beta\\dot{\\beta}},\\tau^{\\beta\\dot{\\beta}}),\\ \\ \\tilde{\\omega}^{i}_{\\alpha}=\\theta^{i\\alpha}\\tilde{h}_{\\alpha\\dot{\\alpha}}(x^{ \\beta\\dot{\\beta}},\\tau^{\\beta\\dot{\\beta}}),\\]"}
+{"file_name": "13.png", "latex_formula": "\\[\\begin{split}&\\lambda^{\\alpha}\\hat{f}^{*}\\omega_{i\\alpha}(z)= \\tilde{\\theta}^{\\dot{\\beta}}_{i}\\lambda^{\\alpha}\\left(V^{\\beta}_{\\alpha}h_{ \\beta\\dot{\\beta}}(x^{\\prime},\\tau^{\\prime})\\right),\\\\ &\\tilde{\\lambda}^{\\dot{\\alpha}}\\hat{f}^{*}\\tilde{\\omega}^{i}_{ \\dot{\\alpha}}(z)=\\theta^{i\\beta}\\tilde{\\lambda}^{\\dot{\\alpha}}\\left(\\tilde{V}^ {\\dot{\\beta}}_{\\dot{\\alpha}}\\tilde{h}_{\\beta\\dot{\\beta}}(x^{\\prime},\\tau^{ \\prime})\\right),\\end{split}\\]"}
+{"file_name": "14.png", "latex_formula": "\\[A_{\\alpha\\dot{\\alpha}}=A_{\\alpha\\dot{\\alpha}}(x^{\\beta\\dot{\\beta}},\\tau^{ \\beta\\dot{\\beta}})\\]"}
+{"file_name": "15.png", "latex_formula": "\\[D=\\lambda^{\\alpha}\\tilde{\\lambda}^{\\dot{\\alpha}}D_{\\alpha\\dot{\\alpha}}\\]"}
+{"file_name": "16.png", "latex_formula": "\\[D=\\lambda^{\\alpha}\\tilde{\\lambda}^{\\dot{\\alpha}}\\partial_{\\alpha\\dot{\\alpha}}\\]"}
+{"file_name": "17.png", "latex_formula": "\\[[v_{1}\\cdot D^{*},v_{2}\\cdot D^{*}]=0\\]"}
+{"file_name": "18.png", "latex_formula": "\\[\\Phi_{A}=(\\omega_{i\\alpha},\\tilde{\\omega}^{i}_{\\dot{\\alpha}},A_{\\alpha\\dot{ \\alpha}})\\]"}
+{"file_name": "19.png", "latex_formula": "\\[\\hat{f}:{\\cal F}^{6|4N}\\rightarrow{\\cal F}^{6|4N}\\]"}
+{"file_name": "20.png", "latex_formula": "\\[\\sigma=(s,\\xi^{i},\\tilde{\\xi}_{j})\\in\\mathbb{C}^{1|2N}\\]"}
+{"file_name": "21.png", "latex_formula": "\\[\\tau^{\\alpha\\dot{\\alpha}}(h_{\\alpha\\dot{\\alpha}}+\\tilde{h}_{\\alpha\\dot{\\alpha} })=0\\]"}
+{"file_name": "22.png", "latex_formula": "\\[\\tau^{\\alpha\\dot{\\alpha}}\\rightarrow[V^{-1}]^{\\alpha}_{\\beta}[\\tilde{V}^{-1}]^{ \\dot{\\alpha}}_{\\dot{\\beta}}\\tau^{\\beta\\dot{\\beta}}\\]"}
+{"file_name": "23.png", "latex_formula": "\\[\\tau^{\\beta\\dot{\\beta}}=\\sum_{i}\\theta^{i\\beta}\\tilde{\\theta}^{\\dot{\\beta}}_{i}\\]"}
+{"file_name": "24.png", "latex_formula": "\\[\\theta^{i\\alpha}\\omega_{i\\alpha}+\\tilde{\\theta}^{i}_{\\dot{\\alpha}}\\tilde{ \\omega}^{\\dot{\\alpha}}_{i}=0\\]"}
+{"file_name": "25.png", "latex_formula": "\\[\\tilde{T}^{i}=\\tilde{\\lambda}^{\\dot{\\alpha}}\\tilde{Q}^{i}_{\\dot{\\alpha}}\\]"}
+{"file_name": "26.png", "latex_formula": "\\[\\tilde{T}^{i}=\\tilde{\\lambda}^{\\dot{\\alpha}}\\tilde{q}^{i}_{\\dot{\\alpha}}\\]"}
+{"file_name": "27.png", "latex_formula": "\\[\\tilde{\\lambda}^{\\dot{\\alpha}}f^{*}A_{\\alpha\\dot{\\alpha}}=H^{-1}\\tilde{ \\lambda}^{\\dot{\\alpha}}\\partial_{\\alpha\\dot{\\alpha}}H\\]"}
+{"file_name": "28.png", "latex_formula": "\\[\\tilde{q}^{i}=\\partial_{\\tilde{\\xi}_{i}}+i\\xi^{i}\\partial_{s}\\]"}
+{"file_name": "29.png", "latex_formula": "\\[\\tilde{q}^{i}_{\\dot{\\alpha}}=\\frac{\\partial}{\\partial\\tilde{\\theta}^{\\dot{ \\alpha}}_{i}}+i\\theta^{i\\alpha}\\frac{\\partial}{\\partial x^{\\alpha\\dot{\\alpha}}}\\]"}
+{"file_name": "30.png", "latex_formula": "\\[f\\lrcorner L(z)=\\pi_{1}\\circ f(z,\\lambda,\\tilde{\\lambda})\\ \\forall z\\in L\\]"}
+{"file_name": "31.png", "latex_formula": "\\[q_{i\\alpha}=\\frac{\\partial}{\\partial\\theta^{i\\alpha}}+i\\tilde{\\theta}^{\\dot{ \\alpha}}_{i}\\frac{\\partial}{\\partial x^{\\alpha\\dot{\\alpha}}}\\]"}
+{"file_name": "32.png", "latex_formula": "\\[q_{i}=\\partial_{\\xi^{i}}+i\\tilde{\\xi}_{i}\\partial_{s}\\]"}
+{"file_name": "33.png", "latex_formula": "\\[v^{\\alpha\\dot{\\alpha}}=\\lambda^{\\alpha}\\tilde{\\lambda}^{\\dot{\\alpha}}\\]"}
+{"file_name": "34.png", "latex_formula": "\\[z^{A}=(x^{\\alpha\\dot{\\alpha}},\\theta^{i\\alpha},\\tilde{\\theta}^{\\dot{\\alpha}}_{ j})\\]"}
diff --git a/src/models/ocr_model/train/train.py b/texteller/models/ocr_model/train/train.py
similarity index 71%
rename from src/models/ocr_model/train/train.py
rename to texteller/models/ocr_model/train/train.py
index 9d37f44..80b58af 100644
--- a/src/models/ocr_model/train/train.py
+++ b/texteller/models/ocr_model/train/train.py
@@ -5,18 +5,24 @@ from pathlib import Path
 
 from datasets import load_dataset
 from transformers import (
-    Trainer, 
-    TrainingArguments, 
-    Seq2SeqTrainer, 
-    Seq2SeqTrainingArguments, 
-    GenerationConfig
+    Trainer,
+    TrainingArguments,
+    Seq2SeqTrainer,
+    Seq2SeqTrainingArguments,
+    GenerationConfig,
 )
 
 from .training_args import CONFIG
 from ..model.TexTeller import TexTeller
-from ..utils.functional import tokenize_fn, collate_fn, img_train_transform, img_inf_transform, filter_fn
+from ..utils.functional import (
+    tokenize_fn,
+    collate_fn,
+    img_train_transform,
+    img_inf_transform,
+    filter_fn,
+)
 from ..utils.metrics import bleu_metric
-from ...globals import MAX_TOKEN_SIZE, MIN_WIDTH, MIN_HEIGHT   
+from ...globals import MAX_TOKEN_SIZE, MIN_WIDTH, MIN_HEIGHT
 
 
 def train(model, tokenizer, train_dataset, eval_dataset, collate_fn_with_tokenizer):
@@ -24,11 +30,9 @@ def train(model, tokenizer, train_dataset, eval_dataset, collate_fn_with_tokeniz
     trainer = Trainer(
         model,
         training_args,
-
         train_dataset=train_dataset,
         eval_dataset=eval_dataset,
-
-        tokenizer=tokenizer, 
+        tokenizer=tokenizer,
         data_collator=collate_fn_with_tokenizer,
     )
 
@@ -52,43 +56,44 @@ def evaluate(model, tokenizer, eval_dataset, collate_fn):
     trainer = Seq2SeqTrainer(
         model,
         seq2seq_config,
-
         eval_dataset=eval_dataset,
-        tokenizer=tokenizer, 
+        tokenizer=tokenizer,
         data_collator=collate_fn,
-        compute_metrics=partial(bleu_metric, tokenizer=tokenizer)
+        compute_metrics=partial(bleu_metric, tokenizer=tokenizer),
     )
 
     eval_res = trainer.evaluate()
     print(eval_res)
-    
+
 
 if __name__ == '__main__':
     script_dirpath = Path(__file__).resolve().parent
     os.chdir(script_dirpath)
 
-    dataset = load_dataset(str(Path('./dataset/loader.py').resolve()))['train']
-    dataset = dataset.filter(lambda x: x['image'].height > MIN_HEIGHT and x['image'].width > MIN_WIDTH)
+    # dataset = load_dataset(str(Path('./dataset/loader.py').resolve()))['train']
+    dataset = load_dataset("imagefolder", data_dir=str(script_dirpath / 'dataset'))['train']
+    dataset = dataset.filter(
+        lambda x: x['image'].height > MIN_HEIGHT and x['image'].width > MIN_WIDTH
+    )
     dataset = dataset.shuffle(seed=42)
     dataset = dataset.flatten_indices()
 
     tokenizer = TexTeller.get_tokenizer()
     # If you want use your own tokenizer, please modify the path to your tokenizer
-    #+tokenizer = TexTeller.get_tokenizer('/path/to/your/tokenizer')
+    # +tokenizer = TexTeller.get_tokenizer('/path/to/your/tokenizer')
     filter_fn_with_tokenizer = partial(filter_fn, tokenizer=tokenizer)
-    dataset = dataset.filter(
-        filter_fn_with_tokenizer,
-        num_proc=8
-    )
+    dataset = dataset.filter(filter_fn_with_tokenizer, num_proc=8)
 
     map_fn = partial(tokenize_fn, tokenizer=tokenizer)
-    tokenized_dataset = dataset.map(map_fn, batched=True, remove_columns=dataset.column_names, num_proc=8)
+    tokenized_dataset = dataset.map(
+        map_fn, batched=True, remove_columns=dataset.column_names, num_proc=8
+    )
 
     # Split dataset into train and eval, ratio 9:1
-    split_dataset = tokenized_dataset.train_test_split(test_size=0.1, seed=42)    
+    split_dataset = tokenized_dataset.train_test_split(test_size=0.1, seed=42)
     train_dataset, eval_dataset = split_dataset['train'], split_dataset['test']
     train_dataset = train_dataset.with_transform(img_train_transform)
-    eval_dataset  = eval_dataset.with_transform(img_inf_transform)
+    eval_dataset = eval_dataset.with_transform(img_inf_transform)
     collate_fn_with_tokenizer = partial(collate_fn, tokenizer=tokenizer)
 
     # Train from scratch
@@ -96,14 +101,14 @@ if __name__ == '__main__':
     # or train from TexTeller pre-trained model: model = TexTeller.from_pretrained()
 
     # If you want to train from pre-trained model, please modify the path to your pre-trained checkpoint
-    #+e.g.
-    #+model = TexTeller.from_pretrained(
-    #+    '/path/to/your/model_checkpoint'
-    #+)
+    # +e.g.
+    # +model = TexTeller.from_pretrained(
+    # +    '/path/to/your/model_checkpoint'
+    # +)
 
     enable_train = True
     enable_evaluate = False
     if enable_train:
-        train(model, tokenizer, train_dataset, eval_dataset, collate_fn_with_tokenizer)  
+        train(model, tokenizer, train_dataset, eval_dataset, collate_fn_with_tokenizer)
     if enable_evaluate and len(eval_dataset) > 0:
         evaluate(model, tokenizer, eval_dataset, collate_fn_with_tokenizer)
diff --git a/texteller/models/ocr_model/train/training_args.py b/texteller/models/ocr_model/train/training_args.py
new file mode 100644
index 0000000..b377cab
--- /dev/null
+++ b/texteller/models/ocr_model/train/training_args.py
@@ -0,0 +1,31 @@
+CONFIG = {
+    "seed": 42,  # Random seed for reproducibility
+    "use_cpu": False,  # Whether to use CPU (it's easier to debug with CPU when starting to test the code)
+    "learning_rate": 5e-5,  # Learning rate
+    "num_train_epochs": 10,  # Total number of training epochs
+    "per_device_train_batch_size": 4,  # Batch size per GPU for training
+    "per_device_eval_batch_size": 8,  # Batch size per GPU for evaluation
+    "output_dir": "train_result",  # Output directory
+    "overwrite_output_dir": False,  # If the output directory exists, do not delete its content
+    "report_to": ["tensorboard"],  # Report logs to TensorBoard
+    "save_strategy": "steps",  # Strategy to save checkpoints
+    "save_steps": 500,  # Interval of steps to save checkpoints, can be int or a float (0~1), when float it represents the ratio of total training steps (e.g., can set to 1.0 / 2000)
+    "save_total_limit": 5,  # Maximum number of models to save. The oldest models will be deleted if this number is exceeded
+    "logging_strategy": "steps",  # Log every certain number of steps
+    "logging_steps": 500,  # Number of steps between each log
+    "logging_nan_inf_filter": False,  # Record logs for loss=nan or inf
+    "optim": "adamw_torch",  # Optimizer
+    "lr_scheduler_type": "cosine",  # Learning rate scheduler
+    "warmup_ratio": 0.1,  # Ratio of warmup steps in total training steps (e.g., for 1000 steps, the first 100 steps gradually increase lr from 0 to the set lr)
+    "max_grad_norm": 1.0,  # For gradient clipping, ensure the norm of the gradients does not exceed 1.0 (default 1.0)
+    "fp16": False,  # Whether to use 16-bit floating point for training (generally not recommended, as loss can easily explode)
+    "bf16": False,  # Whether to use Brain Floating Point (bfloat16) for training (recommended if architecture supports it)
+    "gradient_accumulation_steps": 1,  # Gradient accumulation steps, consider this parameter to achieve large batch size effects when batch size cannot be large
+    "jit_mode_eval": False,  # Whether to use PyTorch jit trace during eval (can speed up the model, but the model must be static, otherwise will throw errors)
+    "torch_compile": False,  # Whether to use torch.compile to compile the model (for better training and inference performance)
+    "dataloader_pin_memory": True,  # Can speed up data transfer between CPU and GPU
+    "dataloader_num_workers": 1,  # Default is not to use multiprocessing for data loading, usually set to 4*number of GPUs used
+    "evaluation_strategy": "steps",  # Evaluation strategy, can be "steps" or "epoch"
+    "eval_steps": 500,  # If evaluation_strategy="step"
+    "remove_unused_columns": False,  # Don't change this unless you really know what you are doing.
+}
diff --git a/texteller/models/ocr_model/utils/__pycache__/functional.cpython-310.pyc b/texteller/models/ocr_model/utils/__pycache__/functional.cpython-310.pyc
new file mode 100644
index 0000000..ed9478f
Binary files /dev/null and b/texteller/models/ocr_model/utils/__pycache__/functional.cpython-310.pyc differ
diff --git a/texteller/models/ocr_model/utils/__pycache__/helpers.cpython-310.pyc b/texteller/models/ocr_model/utils/__pycache__/helpers.cpython-310.pyc
new file mode 100644
index 0000000..604eb34
Binary files /dev/null and b/texteller/models/ocr_model/utils/__pycache__/helpers.cpython-310.pyc differ
diff --git a/texteller/models/ocr_model/utils/__pycache__/inference.cpython-310.pyc b/texteller/models/ocr_model/utils/__pycache__/inference.cpython-310.pyc
new file mode 100644
index 0000000..d90e698
Binary files /dev/null and b/texteller/models/ocr_model/utils/__pycache__/inference.cpython-310.pyc differ
diff --git a/texteller/models/ocr_model/utils/__pycache__/metrics.cpython-310.pyc b/texteller/models/ocr_model/utils/__pycache__/metrics.cpython-310.pyc
new file mode 100644
index 0000000..b6c07dc
Binary files /dev/null and b/texteller/models/ocr_model/utils/__pycache__/metrics.cpython-310.pyc differ
diff --git a/texteller/models/ocr_model/utils/__pycache__/ocr_aug.cpython-310.pyc b/texteller/models/ocr_model/utils/__pycache__/ocr_aug.cpython-310.pyc
new file mode 100644
index 0000000..19d359b
Binary files /dev/null and b/texteller/models/ocr_model/utils/__pycache__/ocr_aug.cpython-310.pyc differ
diff --git a/texteller/models/ocr_model/utils/__pycache__/to_katex.cpython-310.pyc b/texteller/models/ocr_model/utils/__pycache__/to_katex.cpython-310.pyc
new file mode 100644
index 0000000..97aca70
Binary files /dev/null and b/texteller/models/ocr_model/utils/__pycache__/to_katex.cpython-310.pyc differ
diff --git a/texteller/models/ocr_model/utils/__pycache__/transforms.cpython-310.pyc b/texteller/models/ocr_model/utils/__pycache__/transforms.cpython-310.pyc
new file mode 100644
index 0000000..7dd1bdb
Binary files /dev/null and b/texteller/models/ocr_model/utils/__pycache__/transforms.cpython-310.pyc differ
diff --git a/src/models/ocr_model/utils/functional.py b/texteller/models/ocr_model/utils/functional.py
similarity index 95%
rename from src/models/ocr_model/utils/functional.py
rename to texteller/models/ocr_model/utils/functional.py
index 9cb19ab..aa3199e 100644
--- a/src/models/ocr_model/utils/functional.py
+++ b/texteller/models/ocr_model/utils/functional.py
@@ -26,7 +26,7 @@ def collate_fn(samples: List[Dict[str, Any]], tokenizer=None) -> Dict[str, List[
     pixel_values = [dic.pop('pixel_values') for dic in samples]
 
     clm_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
-    
+
     batch = clm_collator(samples)
     batch['pixel_values'] = pixel_values
     batch['decoder_input_ids'] = batch.pop('input_ids')
@@ -54,6 +54,7 @@ def img_inf_transform(samples: Dict[str, List[Any]]) -> Dict[str, List[Any]]:
 
 def filter_fn(sample, tokenizer=None) -> bool:
     return (
-        sample['image'].height > MIN_HEIGHT and sample['image'].width > MIN_WIDTH
+        sample['image'].height > MIN_HEIGHT
+        and sample['image'].width > MIN_WIDTH
         and len(tokenizer(sample['latex_formula'])['input_ids']) < MAX_TOKEN_SIZE - 10
     )
diff --git a/src/models/ocr_model/utils/helpers.py b/texteller/models/ocr_model/utils/helpers.py
similarity index 92%
rename from src/models/ocr_model/utils/helpers.py
rename to texteller/models/ocr_model/utils/helpers.py
index d650556..50e8bd0 100644
--- a/src/models/ocr_model/utils/helpers.py
+++ b/texteller/models/ocr_model/utils/helpers.py
@@ -12,7 +12,7 @@ def convert2rgb(image_paths: List[str]) -> List[np.ndarray]:
             continue
         if image.dtype == np.uint16:
             print(f'Converting {path} to 8-bit, image may be lossy.')
-            image = cv2.convertScaleAbs(image, alpha=(255.0/65535.0))
+            image = cv2.convertScaleAbs(image, alpha=(255.0 / 65535.0))
 
         channels = 1 if len(image.shape) == 2 else image.shape[2]
         if channels == 4:
diff --git a/src/models/ocr_model/utils/inference.py b/texteller/models/ocr_model/utils/inference.py
similarity index 84%
rename from src/models/ocr_model/utils/inference.py
rename to texteller/models/ocr_model/utils/inference.py
index 7d8e4d9..d07100b 100644
--- a/src/models/ocr_model/utils/inference.py
+++ b/texteller/models/ocr_model/utils/inference.py
@@ -11,12 +11,12 @@ from ...globals import MAX_TOKEN_SIZE
 
 
 def inference(
-    model: TexTeller, 
+    model: TexTeller,
     tokenizer: RobertaTokenizerFast,
-    imgs: Union[List[str], List[np.ndarray]], 
+    imgs: Union[List[str], List[np.ndarray]],
     accelerator: str = 'cpu',
     num_beams: int = 1,
-    max_tokens = None
+    max_tokens=None,
 ) -> List[str]:
     if imgs == []:
         return []
@@ -24,10 +24,10 @@ def inference(
         # not onnx session, turn model.eval()
         model.eval()
     if isinstance(imgs[0], str):
-        imgs = convert2rgb(imgs) 
+        imgs = convert2rgb(imgs)
     else:  # already numpy array(rgb format)
         assert isinstance(imgs[0], np.ndarray)
-        imgs = imgs 
+        imgs = imgs
     imgs = inference_transform(imgs)
     pixel_values = torch.stack(imgs)
 
@@ -44,6 +44,6 @@ def inference(
         eos_token_id=tokenizer.eos_token_id,
         bos_token_id=tokenizer.bos_token_id,
     )
-    pred = model.generate(pixel_values, generation_config=generate_config)
+    pred = model.generate(pixel_values.to(model.device), generation_config=generate_config)
     res = tokenizer.batch_decode(pred, skip_special_tokens=True)
     return res
diff --git a/src/models/ocr_model/utils/metrics.py b/texteller/models/ocr_model/utils/metrics.py
similarity index 84%
rename from src/models/ocr_model/utils/metrics.py
rename to texteller/models/ocr_model/utils/metrics.py
index 1dd0702..13dc972 100644
--- a/src/models/ocr_model/utils/metrics.py
+++ b/texteller/models/ocr_model/utils/metrics.py
@@ -10,9 +10,11 @@ from transformers import EvalPrediction, RobertaTokenizer
 def bleu_metric(eval_preds: EvalPrediction, tokenizer: RobertaTokenizer) -> Dict:
     cur_dir = Path(os.getcwd())
     os.chdir(Path(__file__).resolve().parent)
-    metric = evaluate.load('google_bleu')  # Will download the metric from huggingface if not already downloaded
+    metric = evaluate.load(
+        'google_bleu'
+    )  # Will download the metric from huggingface if not already downloaded
     os.chdir(cur_dir)
-    
+
     logits, labels = eval_preds.predictions, eval_preds.label_ids
     preds = logits
 
diff --git a/src/models/ocr_model/utils/ocr_aug.py b/texteller/models/ocr_model/utils/ocr_aug.py
similarity index 94%
rename from src/models/ocr_model/utils/ocr_aug.py
rename to texteller/models/ocr_model/utils/ocr_aug.py
index 5678c61..a232735 100644
--- a/src/models/ocr_model/utils/ocr_aug.py
+++ b/texteller/models/ocr_model/utils/ocr_aug.py
@@ -1,9 +1,9 @@
 from augraphy import *
 import random
 
+
 def ocr_augmentation_pipeline():
-    pre_phase = [
-    ]
+    pre_phase = []
 
     ink_phase = [
         InkColorSwap(
@@ -16,7 +16,7 @@ def ocr_augmentation_pipeline():
             ink_swap_min_area_range=(10, 20),
             ink_swap_max_area_range=(400, 500),
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
         LinesDegradation(
             line_roi=(0.0, 0.0, 1.0, 1.0),
@@ -29,9 +29,8 @@ def ocr_augmentation_pipeline():
             line_replacement_probability=(0.4, 0.5),
             line_replacement_thickness=(1, 3),
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
-
         #  ============================
         OneOf(
             [
@@ -46,10 +45,9 @@ def ocr_augmentation_pipeline():
                 ),
             ],
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
         #  ============================
-
         #  ============================
         InkShifter(
             text_shift_scale_range=(18, 27),
@@ -59,10 +57,9 @@ def ocr_augmentation_pipeline():
             blur_sigma=0,
             noise_type="perlin",
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
         #  ============================
-
     ]
 
     paper_phase = [
@@ -72,14 +69,14 @@ def ocr_augmentation_pipeline():
             texture_width_range=(300, 500),
             texture_height_range=(300, 500),
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
         BrightnessTexturize(  # tested
             texturize_range=(0.9, 0.99),
             deviation=0.03,
             # p=0.2
-            p=0.4
-        )
+            p=0.4,
+        ),
     ]
 
     post_phase = [
@@ -90,9 +87,8 @@ def ocr_augmentation_pipeline():
             color_shift_brightness_range=(0.9, 1.1),
             color_shift_gaussian_kernel_range=(3, 3),
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
-
         DirtyDrum(  # tested
             line_width_range=(1, 6),
             line_concentration=random.uniform(0.05, 0.15),
@@ -102,9 +98,8 @@ def ocr_augmentation_pipeline():
             ksize=random.choice([(3, 3), (5, 5), (7, 7)]),
             sigmaX=0,
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
-
         # =====================================
         OneOf(
             [
@@ -127,10 +122,9 @@ def ocr_augmentation_pipeline():
                 ),
             ],
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
         # =====================================
-
         # =====================================
         OneOf(
             [
@@ -142,7 +136,7 @@ def ocr_augmentation_pipeline():
                 ),
             ],
             # p=0.2
-            p=0.4
+            p=0.4,
         ),
         # =====================================
     ]
@@ -152,7 +146,7 @@ def ocr_augmentation_pipeline():
         paper_phase=paper_phase,
         post_phase=post_phase,
         pre_phase=pre_phase,
-        log=False
+        log=False,
     )
 
     return pipeline
diff --git a/src/models/ocr_model/utils/to_katex.py b/texteller/models/ocr_model/utils/to_katex.py
similarity index 93%
rename from src/models/ocr_model/utils/to_katex.py
rename to texteller/models/ocr_model/utils/to_katex.py
index b6166dc..20518a7 100644
--- a/src/models/ocr_model/utils/to_katex.py
+++ b/texteller/models/ocr_model/utils/to_katex.py
@@ -5,9 +5,9 @@ def change(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, ne
     result = ""
     i = 0
     n = len(input_str)
-    
+
     while i < n:
-        if input_str[i:i+len(old_inst)] == old_inst:
+        if input_str[i : i + len(old_inst)] == old_inst:
             # check if the old_inst is followed by old_surr_l
             start = i + len(old_inst)
         else:
@@ -33,12 +33,12 @@ def change(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, ne
                     count += 1
                 escaped = False
                 j += 1
-            
+
             if count == 0:
                 assert j < n
                 assert input_str[start] == old_surr_l
                 assert input_str[j] == old_surr_r
-                inner_content = input_str[start + 1:j]
+                inner_content = input_str[start + 1 : j]
                 # Replace the content with new pattern
                 result += new_inst + new_surr_l + inner_content + new_surr_r
                 i = j + 1
@@ -53,7 +53,7 @@ def change(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, ne
         else:
             result += input_str[i:start]
             i = start
-    
+
     if old_inst != new_inst and (old_inst + old_surr_l) in result:
         return change(result, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r)
     else:
@@ -68,12 +68,12 @@ def find_substring_positions(string, substring):
 def rm_dollar_surr(content):
     pattern = re.compile(r'\\[a-zA-Z]+\$.*?\$|\$.*?\$')
     matches = pattern.findall(content)
-    
+
     for match in matches:
         if not re.match(r'\\[a-zA-Z]+', match):
             new_match = match.strip('$')
             content = content.replace(match, ' ' + new_match + ' ')
-    
+
     return content
 
 
@@ -81,7 +81,11 @@ def change_all(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l
     pos = find_substring_positions(input_str, old_inst + old_surr_l)
     res = list(input_str)
     for p in pos[::-1]:
-        res[p:] = list(change(''.join(res[p:]), old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r))
+        res[p:] = list(
+            change(
+                ''.join(res[p:]), old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r
+            )
+        )
     res = ''.join(res)
     return res
 
@@ -106,7 +110,6 @@ def to_katex(formula: str) -> str:
     res = change_all(res, r'\raisebox', r' ', r'{', r'}', r'', r' ')
     res = change_all(res, r'\vbox', r' ', r'{', r'}', r'', r' ')
 
-
     origin_instructions = [
         r'\Huge',
         r'\huge',
@@ -116,9 +119,9 @@ def to_katex(formula: str) -> str:
         r'\normalsize',
         r'\small',
         r'\footnotesize',
-        r'\tiny'
+        r'\tiny',
     ]
-    for (old_ins, new_ins) in zip(origin_instructions, origin_instructions):
+    for old_ins, new_ins in zip(origin_instructions, origin_instructions):
         res = change_all(res, old_ins, new_ins, r'$', r'$', '{', '}')
     res = change_all(res, r'\boldmath ', r'\bm', r'{', r'}', r'{', r'}')
     res = change_all(res, r'\boldmath', r'\bm', r'{', r'}', r'{', r'}')
@@ -127,7 +130,7 @@ def to_katex(formula: str) -> str:
     res = change_all(res, r'\scriptsize', r'\scriptsize', r'$', r'$', r'{', r'}')
     res = change_all(res, r'\emph', r'\textit', r'{', r'}', r'{', r'}')
     res = change_all(res, r'\emph ', r'\textit', r'{', r'}', r'{', r'}')
-    
+
     origin_instructions = [
         r'\left',
         r'\middle',
@@ -147,7 +150,7 @@ def to_katex(formula: str) -> str:
         r'\bigr',
         r'\Bigr',
         r'\biggr',
-        r'\Biggr'
+        r'\Biggr',
     ]
     for origin_ins in origin_instructions:
         res = change_all(res, origin_ins, origin_ins, r'{', r'}', r'', r'')
@@ -169,6 +172,7 @@ def to_katex(formula: str) -> str:
         texts = match.group(0)
         merged_content = ''.join(re.findall(r'\\text\{([^}]*)\}', texts))
         return f'\\text{{{merged_content}}}'
+
     res = re.sub(r'(\\text\{[^}]*\}\s*){2,}', merge_texts, res)
 
     res = res.replace(r'\bf ', '')
diff --git a/src/models/ocr_model/utils/transforms.py b/texteller/models/ocr_model/utils/transforms.py
similarity index 69%
rename from src/models/ocr_model/utils/transforms.py
rename to texteller/models/ocr_model/utils/transforms.py
index 2a1a64a..7da2de0 100644
--- a/src/models/ocr_model/utils/transforms.py
+++ b/texteller/models/ocr_model/utils/transforms.py
@@ -11,31 +11,32 @@ from collections import Counter
 from ...globals import (
     IMG_CHANNELS,
     FIXED_IMG_SIZE,
-    IMAGE_MEAN, IMAGE_STD,
-    MAX_RESIZE_RATIO, MIN_RESIZE_RATIO
+    IMAGE_MEAN,
+    IMAGE_STD,
+    MAX_RESIZE_RATIO,
+    MIN_RESIZE_RATIO,
 )
 from .ocr_aug import ocr_augmentation_pipeline
 
 # train_pipeline = default_augraphy_pipeline(scan_only=True)
 train_pipeline = ocr_augmentation_pipeline()
 
-general_transform_pipeline = v2.Compose([
-    v2.ToImage(),    
-    v2.ToDtype(torch.uint8, scale=True),  # optional, most input are already uint8 at this point
-    v2.Grayscale(),
-
-    v2.Resize(
-        size=FIXED_IMG_SIZE - 1,
-        interpolation=v2.InterpolationMode.BICUBIC,
-        max_size=FIXED_IMG_SIZE,
-        antialias=True
-    ),
-
-    v2.ToDtype(torch.float32, scale=True),  # Normalize expects float input
-    v2.Normalize(mean=[IMAGE_MEAN], std=[IMAGE_STD]),
-
-    # v2.ToPILImage()
-])
+general_transform_pipeline = v2.Compose(
+    [
+        v2.ToImage(),
+        v2.ToDtype(torch.uint8, scale=True),  # optional, most input are already uint8 at this point
+        v2.Grayscale(),
+        v2.Resize(
+            size=FIXED_IMG_SIZE - 1,
+            interpolation=v2.InterpolationMode.BICUBIC,
+            max_size=FIXED_IMG_SIZE,
+            antialias=True,
+        ),
+        v2.ToDtype(torch.float32, scale=True),  # Normalize expects float input
+        v2.Normalize(mean=[IMAGE_MEAN], std=[IMAGE_STD]),
+        # v2.ToPILImage()
+    ]
+)
 
 
 def trim_white_border(image: np.ndarray):
@@ -45,11 +46,10 @@ def trim_white_border(image: np.ndarray):
     if image.dtype != np.uint8:
         raise ValueError(f"Image should stored in uint8")
 
-    corners = [tuple(image[0, 0]), tuple(image[0, -1]),
-               tuple(image[-1, 0]), tuple(image[-1, -1])]
+    corners = [tuple(image[0, 0]), tuple(image[0, -1]), tuple(image[-1, 0]), tuple(image[-1, -1])]
     bg_color = Counter(corners).most_common(1)[0][0]
     bg_color_np = np.array(bg_color, dtype=np.uint8)
-    
+
     h, w = image.shape[:2]
     bg = np.full((h, w, 3), bg_color_np, dtype=np.uint8)
 
@@ -59,9 +59,9 @@ def trim_white_border(image: np.ndarray):
     threshold = 15
     _, diff = cv2.threshold(mask, threshold, 255, cv2.THRESH_BINARY)
 
-    x, y, w, h = cv2.boundingRect(diff) 
+    x, y, w, h = cv2.boundingRect(diff)
 
-    trimmed_image = image[y:y+h, x:x+w]
+    trimmed_image = image[y : y + h, x : x + w]
 
     return trimmed_image
 
@@ -69,12 +69,12 @@ def trim_white_border(image: np.ndarray):
 def add_white_border(image: np.ndarray, max_size: int) -> np.ndarray:
     randi = [random.randint(0, max_size) for _ in range(4)]
     pad_height_size = randi[1] + randi[3]
-    pad_width_size  = randi[0] + randi[2]
-    if (pad_height_size + image.shape[0] < 30):
+    pad_width_size = randi[0] + randi[2]
+    if pad_height_size + image.shape[0] < 30:
         compensate_height = int((30 - (pad_height_size + image.shape[0])) * 0.5) + 1
         randi[1] += compensate_height
         randi[3] += compensate_height
-    if (pad_width_size + image.shape[1] < 30):
+    if pad_width_size + image.shape[1] < 30:
         compensate_width = int((30 - (pad_width_size + image.shape[1])) * 0.5) + 1
         randi[0] += compensate_width
         randi[2] += compensate_width
@@ -82,32 +82,29 @@ def add_white_border(image: np.ndarray, max_size: int) -> np.ndarray:
         torch.from_numpy(image).permute(2, 0, 1),
         padding=randi,
         padding_mode='constant',
-        fill=(255, 255, 255)
+        fill=(255, 255, 255),
     )
 
 
 def padding(images: List[torch.Tensor], required_size: int) -> List[torch.Tensor]:
-    images = [  
+    images = [
         v2.functional.pad(
-            img,
-            padding=[0, 0, required_size - img.shape[2], required_size - img.shape[1]]
+            img, padding=[0, 0, required_size - img.shape[2], required_size - img.shape[1]]
         )
         for img in images
     ]
     return images
 
 
-def random_resize(
-    images: List[np.ndarray], 
-    minr: float, 
-    maxr: float
-) -> List[np.ndarray]:
+def random_resize(images: List[np.ndarray], minr: float, maxr: float) -> List[np.ndarray]:
     if len(images[0].shape) != 3 or images[0].shape[2] != 3:
         raise ValueError("Image is not in RGB format or channel is not in third dimension")
 
     ratios = [random.uniform(minr, maxr) for _ in range(len(images))]
     return [
-        cv2.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LANCZOS4)  # 抗锯齿
+        cv2.resize(
+            img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LANCZOS4
+        )  # 抗锯齿
         for img, r in zip(images, ratios)
     ]
 
@@ -133,7 +130,9 @@ def rotate(image: np.ndarray, min_angle: int, max_angle: int) -> np.ndarray:
     rotation_mat[1, 2] += (new_height / 2) - image_center[1]
 
     # Rotate the image with the specified border color (white in this case)
-    rotated_image = cv2.warpAffine(image, rotation_mat, (new_width, new_height), borderValue=(255, 255, 255))
+    rotated_image = cv2.warpAffine(
+        image, rotation_mat, (new_width, new_height), borderValue=(255, 255, 255)
+    )
 
     return rotated_image
 
@@ -147,7 +146,7 @@ def ocr_aug(image: np.ndarray) -> np.ndarray:
 
 
 def train_transform(images: List[Image.Image]) -> List[torch.Tensor]:
-    assert IMG_CHANNELS == 1 , "Only support grayscale images for now"
+    assert IMG_CHANNELS == 1, "Only support grayscale images for now"
 
     images = [np.array(img.convert('RGB')) for img in images]
     # random resize first
@@ -158,18 +157,20 @@ def train_transform(images: List[Image.Image]) -> List[torch.Tensor]:
     images = [ocr_aug(image) for image in images]
 
     # general transform pipeline
-    images = [general_transform_pipeline(image) for image in  images]
+    images = [general_transform_pipeline(image) for image in images]
     # padding to fixed size
     images = padding(images, FIXED_IMG_SIZE)
     return images
 
 
 def inference_transform(images: List[Union[np.ndarray, Image.Image]]) -> List[torch.Tensor]:
-    assert IMG_CHANNELS == 1 , "Only support grayscale images for now"
-    images = [np.array(img.convert('RGB')) if isinstance(img, Image.Image) else img for img in images]
+    assert IMG_CHANNELS == 1, "Only support grayscale images for now"
+    images = [
+        np.array(img.convert('RGB')) if isinstance(img, Image.Image) else img for img in images
+    ]
     images = [trim_white_border(image) for image in images]
     # general transform pipeline
-    images = [general_transform_pipeline(image) for image in  images]  # imgs: List[PIL.Image.Image]
+    images = [general_transform_pipeline(image) for image in images]  # imgs: List[PIL.Image.Image]
     # padding to fixed size
     images = padding(images, FIXED_IMG_SIZE)
 
diff --git a/src/models/thrid_party/paddleocr/checkpoints/det/default_model.onnx b/texteller/models/thrid_party/paddleocr/checkpoints/det/default_model.onnx
similarity index 100%
rename from src/models/thrid_party/paddleocr/checkpoints/det/default_model.onnx
rename to texteller/models/thrid_party/paddleocr/checkpoints/det/default_model.onnx
diff --git a/src/models/thrid_party/paddleocr/checkpoints/rec/default_model.onnx b/texteller/models/thrid_party/paddleocr/checkpoints/rec/default_model.onnx
similarity index 100%
rename from src/models/thrid_party/paddleocr/checkpoints/rec/default_model.onnx
rename to texteller/models/thrid_party/paddleocr/checkpoints/rec/default_model.onnx
diff --git a/src/models/thrid_party/paddleocr/infer/CTCLabelDecode.py b/texteller/models/thrid_party/paddleocr/infer/CTCLabelDecode.py
similarity index 96%
rename from src/models/thrid_party/paddleocr/infer/CTCLabelDecode.py
rename to texteller/models/thrid_party/paddleocr/infer/CTCLabelDecode.py
index 9ee9d34..de9a275 100644
--- a/src/models/thrid_party/paddleocr/infer/CTCLabelDecode.py
+++ b/texteller/models/thrid_party/paddleocr/infer/CTCLabelDecode.py
@@ -1,8 +1,9 @@
-import re
-import numpy as np
 import os
+import re
 from pathlib import Path
 
+import numpy as np
+
 
 class BaseRecLabelDecode(object):
     """Convert between text-label and text-index"""
@@ -102,7 +103,7 @@ class BaseRecLabelDecode(object):
             ):  # grouping word with '-', such as 'state-of-the-art'
                 c_state = "en&num"
 
-            if state == None:
+            if state is None:
                 state = c_state
 
             if state != c_state:
@@ -143,9 +144,7 @@ class BaseRecLabelDecode(object):
             for ignored_token in ignored_tokens:
                 selection &= text_index[batch_idx] != ignored_token
 
-            char_list = [
-                self.character[text_id] for text_id in text_index[batch_idx][selection]
-            ]
+            char_list = [self.character[text_id] for text_id in text_index[batch_idx][selection]]
             if text_prob is not None:
                 conf_list = text_prob[batch_idx][selection]
             else:
@@ -159,9 +158,7 @@ class BaseRecLabelDecode(object):
                 text = self.pred_reverse(text)
 
             if return_word_box:
-                word_list, word_col_list, state_list = self.get_word_info(
-                    text, selection
-                )
+                word_list, word_col_list, state_list = self.get_word_info(text, selection)
                 result_list.append(
                     (
                         text,
@@ -212,4 +209,4 @@ class CTCLabelDecode(BaseRecLabelDecode):
 
     def add_special_char(self, dict_character):
         dict_character = ["blank"] + dict_character
-        return dict_character
\ No newline at end of file
+        return dict_character
diff --git a/src/models/thrid_party/paddleocr/infer/DBPostProcess.py b/texteller/models/thrid_party/paddleocr/infer/DBPostProcess.py
similarity index 92%
rename from src/models/thrid_party/paddleocr/infer/DBPostProcess.py
rename to texteller/models/thrid_party/paddleocr/infer/DBPostProcess.py
index 84919e4..b8e407a 100644
--- a/src/models/thrid_party/paddleocr/infer/DBPostProcess.py
+++ b/texteller/models/thrid_party/paddleocr/infer/DBPostProcess.py
@@ -19,7 +19,7 @@ class DBPostProcess(object):
         use_dilation=False,
         score_mode="fast",
         box_type="quad",
-        **kwargs
+        **kwargs,
     ):
         self.thresh = thresh
         self.box_thresh = box_thresh
@@ -76,9 +76,7 @@ class DBPostProcess(object):
 
             box = np.array(box)
             box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
-            box[:, 1] = np.clip(
-                np.round(box[:, 1] / height * dest_height), 0, dest_height
-            )
+            box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
             boxes.append(box.tolist())
             scores.append(score)
         return boxes, scores
@@ -124,9 +122,7 @@ class DBPostProcess(object):
             box = np.array(box)
 
             box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
-            box[:, 1] = np.clip(
-                np.round(box[:, 1] / height * dest_height), 0, dest_height
-            )
+            box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
             boxes.append(box.astype("int32"))
             scores.append(score)
         return np.array(boxes, dtype="int32"), scores
@@ -215,15 +211,11 @@ class DBPostProcess(object):
             else:
                 mask = segmentation[batch_index]
             if self.box_type == "poly":
-                boxes, scores = self.polygons_from_bitmap(
-                    pred[batch_index], mask, src_w, src_h
-                )
+                boxes, scores = self.polygons_from_bitmap(pred[batch_index], mask, src_w, src_h)
             elif self.box_type == "quad":
-                boxes, scores = self.boxes_from_bitmap(
-                    pred[batch_index], mask, src_w, src_h
-                )
+                boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, src_w, src_h)
             else:
                 raise ValueError("box_type can only be one of ['quad', 'poly']")
 
             boxes_batch.append({"points": boxes})
-        return boxes_batch
\ No newline at end of file
+        return boxes_batch
diff --git a/texteller/models/thrid_party/paddleocr/infer/__pycache__/CTCLabelDecode.cpython-310.pyc b/texteller/models/thrid_party/paddleocr/infer/__pycache__/CTCLabelDecode.cpython-310.pyc
new file mode 100644
index 0000000..e8a121a
Binary files /dev/null and b/texteller/models/thrid_party/paddleocr/infer/__pycache__/CTCLabelDecode.cpython-310.pyc differ
diff --git a/texteller/models/thrid_party/paddleocr/infer/__pycache__/DBPostProcess.cpython-310.pyc b/texteller/models/thrid_party/paddleocr/infer/__pycache__/DBPostProcess.cpython-310.pyc
new file mode 100644
index 0000000..39f3cd1
Binary files /dev/null and b/texteller/models/thrid_party/paddleocr/infer/__pycache__/DBPostProcess.cpython-310.pyc differ
diff --git a/texteller/models/thrid_party/paddleocr/infer/__pycache__/operators.cpython-310.pyc b/texteller/models/thrid_party/paddleocr/infer/__pycache__/operators.cpython-310.pyc
new file mode 100644
index 0000000..6819fbf
Binary files /dev/null and b/texteller/models/thrid_party/paddleocr/infer/__pycache__/operators.cpython-310.pyc differ
diff --git a/texteller/models/thrid_party/paddleocr/infer/__pycache__/predict_det.cpython-310.pyc b/texteller/models/thrid_party/paddleocr/infer/__pycache__/predict_det.cpython-310.pyc
new file mode 100644
index 0000000..7ce39f5
Binary files /dev/null and b/texteller/models/thrid_party/paddleocr/infer/__pycache__/predict_det.cpython-310.pyc differ
diff --git a/texteller/models/thrid_party/paddleocr/infer/__pycache__/predict_rec.cpython-310.pyc b/texteller/models/thrid_party/paddleocr/infer/__pycache__/predict_rec.cpython-310.pyc
new file mode 100644
index 0000000..89b47c7
Binary files /dev/null and b/texteller/models/thrid_party/paddleocr/infer/__pycache__/predict_rec.cpython-310.pyc differ
diff --git a/texteller/models/thrid_party/paddleocr/infer/__pycache__/utility.cpython-310.pyc b/texteller/models/thrid_party/paddleocr/infer/__pycache__/utility.cpython-310.pyc
new file mode 100644
index 0000000..477417c
Binary files /dev/null and b/texteller/models/thrid_party/paddleocr/infer/__pycache__/utility.cpython-310.pyc differ
diff --git a/src/models/thrid_party/paddleocr/infer/operators.py b/texteller/models/thrid_party/paddleocr/infer/operators.py
similarity index 99%
rename from src/models/thrid_party/paddleocr/infer/operators.py
rename to texteller/models/thrid_party/paddleocr/infer/operators.py
index 5b1e284..e04a6d3 100644
--- a/src/models/thrid_party/paddleocr/infer/operators.py
+++ b/texteller/models/thrid_party/paddleocr/infer/operators.py
@@ -103,7 +103,7 @@ class DetResizeForTest(object):
             if int(resize_w) <= 0 or int(resize_h) <= 0:
                 return None, (None, None)
             img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        except:
+        except:  # noqa: E722
             print(img.shape, resize_w, resize_h)
             sys.exit(0)
         ratio_h = resize_h / float(h)
@@ -183,4 +183,4 @@ class KeepKeys(object):
         data_list = []
         for key in self.keep_keys:
             data_list.append(data[key])
-        return data_list
\ No newline at end of file
+        return data_list
diff --git a/src/models/thrid_party/paddleocr/infer/ppocr_keys_v1.txt b/texteller/models/thrid_party/paddleocr/infer/ppocr_keys_v1.txt
similarity index 99%
rename from src/models/thrid_party/paddleocr/infer/ppocr_keys_v1.txt
rename to texteller/models/thrid_party/paddleocr/infer/ppocr_keys_v1.txt
index 84b885d..b75af21 100644
--- a/src/models/thrid_party/paddleocr/infer/ppocr_keys_v1.txt
+++ b/texteller/models/thrid_party/paddleocr/infer/ppocr_keys_v1.txt
@@ -6620,4 +6620,4 @@ j
 緖
 續
 紹
-懮
\ No newline at end of file
+懮
diff --git a/src/models/thrid_party/paddleocr/infer/predict_det.py b/texteller/models/thrid_party/paddleocr/infer/predict_det.py
similarity index 87%
rename from src/models/thrid_party/paddleocr/infer/predict_det.py
rename to texteller/models/thrid_party/paddleocr/infer/predict_det.py
index 3ffdfd7..284c673 100755
--- a/src/models/thrid_party/paddleocr/infer/predict_det.py
+++ b/texteller/models/thrid_party/paddleocr/infer/predict_det.py
@@ -20,17 +20,17 @@ sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../..")))
 
 os.environ["FLAGS_allocator_strategy"] = "auto_growth"
 
+import sys
+import time
+
 import cv2
 import numpy as np
-import time
-import sys
 
 # import tools.infer.utility as utility
 import utility
-from utility import get_logger
-
 from DBPostProcess import DBPostProcess
 from operators import DetResizeForTest, KeepKeys, NormalizeImage, ToCHWImage
+from utility import get_logger
 
 
 def transform(data, ops=None):
@@ -43,6 +43,7 @@ def transform(data, ops=None):
             return None
     return data
 
+
 logger = get_logger()
 
 
@@ -63,10 +64,17 @@ class TextDetector(object):
         postprocess_params["box_type"] = args.det_box_type
 
         self.preprocess_op = [
-            DetResizeForTest(limit_side_len=args.det_limit_side_len, limit_type=args.det_limit_type),
-            NormalizeImage(std= [0.229, 0.224, 0.225], mean= [0.485, 0.456, 0.406], scale= 1./255., order= "hwc"),
+            DetResizeForTest(
+                limit_side_len=args.det_limit_side_len, limit_type=args.det_limit_type
+            ),
+            NormalizeImage(
+                std=[0.229, 0.224, 0.225],
+                mean=[0.485, 0.456, 0.406],
+                scale=1.0 / 255.0,
+                order="hwc",
+            ),
             ToCHWImage(),
-            KeepKeys(keep_keys= ["image", "shape"])
+            KeepKeys(keep_keys=["image", "shape"]),
         ]
         self.postprocess_op = DBPostProcess(**postprocess_params)
         (
@@ -84,7 +92,6 @@ class TextDetector(object):
             elif img_h is not None and img_w is not None and img_h > 0 and img_w > 0:
                 self.preprocess_op[0] = DetResizeForTest(image_shape=[img_h, img_w])
 
-
     def order_points_clockwise(self, pts):
         rect = np.zeros((4, 2), dtype="float32")
         s = pts.sum(axis=1)
@@ -201,10 +208,7 @@ class TextDetector(object):
         MIN_BOUND_DISTANCE = 50
         dt_boxes = np.zeros((0, 4, 2), dtype=np.float32)
         elapse = 0
-        if (
-            img.shape[0] / img.shape[1] > 2
-            and img.shape[0] > self.args.det_limit_side_len
-        ):
+        if img.shape[0] / img.shape[1] > 2 and img.shape[0] > self.args.det_limit_side_len:
             start_h = 0
             end_h = 0
             while end_h <= img.shape[0]:
@@ -217,30 +221,23 @@ class TextDetector(object):
                 # To prevent text blocks from being cut off, roll back a certain buffer area.
                 if (
                     len(sub_dt_boxes) == 0
-                    or img.shape[1] - max([x[-1][1] for x in sub_dt_boxes])
-                    > MIN_BOUND_DISTANCE
+                    or img.shape[1] - max([x[-1][1] for x in sub_dt_boxes]) > MIN_BOUND_DISTANCE
                 ):
                     start_h = end_h
                 else:
                     sorted_indices = np.argsort(sub_dt_boxes[:, 2, 1])
                     sub_dt_boxes = sub_dt_boxes[sorted_indices]
                     bottom_line = (
-                        0
-                        if len(sub_dt_boxes) <= 1
-                        else int(np.max(sub_dt_boxes[:-1, 2, 1]))
+                        0 if len(sub_dt_boxes) <= 1 else int(np.max(sub_dt_boxes[:-1, 2, 1]))
                     )
                     if bottom_line > 0:
                         start_h += bottom_line
-                        sub_dt_boxes = sub_dt_boxes[
-                            sub_dt_boxes[:, 2, 1] <= bottom_line
-                        ]
+                        sub_dt_boxes = sub_dt_boxes[sub_dt_boxes[:, 2, 1] <= bottom_line]
                     else:
                         start_h = end_h
                 if len(sub_dt_boxes) > 0:
                     if dt_boxes.shape[0] == 0:
-                        dt_boxes = sub_dt_boxes + np.array(
-                            [0, offset], dtype=np.float32
-                        )
+                        dt_boxes = sub_dt_boxes + np.array([0, offset], dtype=np.float32)
                     else:
                         dt_boxes = np.append(
                             dt_boxes,
@@ -248,10 +245,7 @@ class TextDetector(object):
                             axis=0,
                         )
                 elapse += sub_elapse
-        elif (
-            img.shape[1] / img.shape[0] > 3
-            and img.shape[1] > self.args.det_limit_side_len * 3
-        ):
+        elif img.shape[1] / img.shape[0] > 3 and img.shape[1] > self.args.det_limit_side_len * 3:
             start_w = 0
             end_w = 0
             while end_w <= img.shape[1]:
@@ -263,17 +257,14 @@ class TextDetector(object):
                 offset = start_w
                 if (
                     len(sub_dt_boxes) == 0
-                    or img.shape[0] - max([x[-1][0] for x in sub_dt_boxes])
-                    > MIN_BOUND_DISTANCE
+                    or img.shape[0] - max([x[-1][0] for x in sub_dt_boxes]) > MIN_BOUND_DISTANCE
                 ):
                     start_w = end_w
                 else:
                     sorted_indices = np.argsort(sub_dt_boxes[:, 2, 0])
                     sub_dt_boxes = sub_dt_boxes[sorted_indices]
                     right_line = (
-                        0
-                        if len(sub_dt_boxes) <= 1
-                        else int(np.max(sub_dt_boxes[:-1, 1, 0]))
+                        0 if len(sub_dt_boxes) <= 1 else int(np.max(sub_dt_boxes[:-1, 1, 0]))
                     )
                     if right_line > 0:
                         start_w += right_line
@@ -282,9 +273,7 @@ class TextDetector(object):
                         start_w = end_w
                 if len(sub_dt_boxes) > 0:
                     if dt_boxes.shape[0] == 0:
-                        dt_boxes = sub_dt_boxes + np.array(
-                            [offset, 0], dtype=np.float32
-                        )
+                        dt_boxes = sub_dt_boxes + np.array([offset, 0], dtype=np.float32)
                     else:
                         dt_boxes = np.append(
                             dt_boxes,
@@ -295,4 +284,3 @@ class TextDetector(object):
         else:
             dt_boxes, elapse = self.predict(img)
         return dt_boxes, elapse
-
diff --git a/src/models/thrid_party/paddleocr/infer/predict_rec.py b/texteller/models/thrid_party/paddleocr/infer/predict_rec.py
similarity index 96%
rename from src/models/thrid_party/paddleocr/infer/predict_rec.py
rename to texteller/models/thrid_party/paddleocr/infer/predict_rec.py
index a2d4a47..603f64f 100755
--- a/src/models/thrid_party/paddleocr/infer/predict_rec.py
+++ b/texteller/models/thrid_party/paddleocr/infer/predict_rec.py
@@ -39,7 +39,9 @@ class TextRecognizer(object):
         self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
         self.rec_batch_num = args.rec_batch_num
         self.rec_algorithm = args.rec_algorithm
-        self.postprocess_op = CTCLabelDecode(character_dict_path=args.rec_char_dict_path, use_space_char=args.use_space_char)
+        self.postprocess_op = CTCLabelDecode(
+            character_dict_path=args.rec_char_dict_path, use_space_char=args.use_space_char
+        )
         (
             self.predictor,
             self.input_tensor,
@@ -143,13 +145,9 @@ class TextRecognizer(object):
         imgC, imgH, imgW = image_shape
         feature_dim = int((imgH / 8) * (imgW / 8))
 
-        encoder_word_pos = (
-            np.array(range(0, feature_dim)).reshape((feature_dim, 1)).astype("int64")
-        )
+        encoder_word_pos = np.array(range(0, feature_dim)).reshape((feature_dim, 1)).astype("int64")
         gsrm_word_pos = (
-            np.array(range(0, max_text_length))
-            .reshape((max_text_length, 1))
-            .astype("int64")
+            np.array(range(0, max_text_length)).reshape((max_text_length, 1)).astype("int64")
         )
 
         gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length))
@@ -355,9 +353,7 @@ class TextRecognizer(object):
                 max_wh_ratio = max(max_wh_ratio, wh_ratio)
                 wh_ratio_list.append(wh_ratio)
             for ino in range(beg_img_no, end_img_no):
-                norm_img = self.resize_norm_img(
-                    img_list[indices[ino]], max_wh_ratio
-                )
+                norm_img = self.resize_norm_img(img_list[indices[ino]], max_wh_ratio)
                 norm_img = norm_img[np.newaxis, :]
                 norm_img_batch.append(norm_img)
             norm_img_batch = np.concatenate(norm_img_batch)
diff --git a/src/models/thrid_party/paddleocr/infer/utility.py b/texteller/models/thrid_party/paddleocr/infer/utility.py
similarity index 94%
rename from src/models/thrid_party/paddleocr/infer/utility.py
rename to texteller/models/thrid_party/paddleocr/infer/utility.py
index e92a77c..b2404d8 100644
--- a/src/models/thrid_party/paddleocr/infer/utility.py
+++ b/texteller/models/thrid_party/paddleocr/infer/utility.py
@@ -92,9 +92,7 @@ def init_args():
     parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320")
     parser.add_argument("--rec_batch_num", type=int, default=6)
     parser.add_argument("--max_text_length", type=int, default=25)
-    parser.add_argument(
-        "--rec_char_dict_path", type=str, default="./ppocr_keys_v1.txt"
-    )
+    parser.add_argument("--rec_char_dict_path", type=str, default="./ppocr_keys_v1.txt")
     parser.add_argument("--use_space_char", type=str2bool, default=True)
     parser.add_argument("--vis_font_path", type=str, default="./doc/fonts/simfang.ttf")
     parser.add_argument("--drop_score", type=float, default=0.5)
@@ -107,9 +105,7 @@ def init_args():
 
     # PGNet parmas
     parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5)
-    parser.add_argument(
-        "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt"
-    )
+    parser.add_argument("--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt")
     parser.add_argument("--e2e_pgnet_valid_set", type=str, default="totaltext")
     parser.add_argument("--e2e_pgnet_mode", type=str, default="fast")
 
@@ -194,15 +190,12 @@ def create_predictor(args, mode, logger):
     if not os.path.exists(model_file_path):
         raise ValueError("not find model file path {}".format(model_file_path))
     if args.use_gpu:
-        sess = ort.InferenceSession(
-            model_file_path, providers=["CUDAExecutionProvider"]
-        )
+        sess = ort.InferenceSession(model_file_path, providers=["CUDAExecutionProvider"])
     else:
         sess = ort.InferenceSession(model_file_path)
     return sess, sess.get_inputs()[0], None, None
 
 
-
 def get_output_tensors(args, mode, predictor):
     output_names = predictor.get_output_names()
     output_tensors = []
@@ -333,12 +326,8 @@ def draw_ocr_box_txt(
 
 
 def draw_box_txt_fine(img_size, box, txt, font_path="./doc/fonts/simfang.ttf"):
-    box_height = int(
-        math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
-    )
-    box_width = int(
-        math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
-    )
+    box_height = int(math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2))
+    box_width = int(math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2))
 
     if box_height > 2 * box_width and box_height > 30:
         img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
@@ -354,9 +343,7 @@ def draw_box_txt_fine(img_size, box, txt, font_path="./doc/fonts/simfang.ttf"):
             font = create_font(txt, (box_width, box_height), font_path)
             draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
 
-    pts1 = np.float32(
-        [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
-    )
+    pts1 = np.float32([[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]])
     pts2 = np.array(box, dtype=np.float32)
     M = cv2.getPerspectiveTransform(pts1, pts2)
 
@@ -411,9 +398,7 @@ def str_count(s):
     return s_len - math.ceil(en_dg_count / 2)
 
 
-def text_visual(
-    texts, scores, img_h=400, img_w=600, threshold=0.0, font_path="./doc/simfang.ttf"
-):
+def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.0, font_path="./doc/simfang.ttf"):
     """
     create new blank img and draw txt on it
     args:
@@ -425,9 +410,7 @@ def text_visual(
     return(array):
     """
     if scores is not None:
-        assert len(texts) == len(
-            scores
-        ), "The number of txts and corresponding scores must match"
+        assert len(texts) == len(scores), "The number of txts and corresponding scores must match"
 
     def create_blank_img():
         blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255
@@ -518,14 +501,10 @@ def get_rotate_crop_image(img, points):
     """
     assert len(points) == 4, "shape of points must be 4*2"
     img_crop_width = int(
-        max(
-            np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3])
-        )
+        max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3]))
     )
     img_crop_height = int(
-        max(
-            np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2])
-        )
+        max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))
     )
     pts_std = np.float32(
         [
@@ -605,6 +584,8 @@ def get_image_file_list(img_file, infer_list=None):
 
 
 logger_initialized = {}
+
+
 @functools.lru_cache()
 def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
     """Initialize and get a logger by name.
@@ -654,14 +635,10 @@ def get_rotate_crop_image(img, points):
     """
     assert len(points) == 4, "shape of points must be 4*2"
     img_crop_width = int(
-        max(
-            np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3])
-        )
+        max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3]))
     )
     img_crop_height = int(
-        max(
-            np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2])
-        )
+        max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))
     )
     pts_std = np.float32(
         [
@@ -708,6 +685,5 @@ def get_minarea_rect_crop(img, points):
     return crop_img
 
 
-
 if __name__ == "__main__":
     pass
diff --git a/src/models/tokenizer/train.py b/texteller/models/tokenizer/train.py
similarity index 89%
rename from src/models/tokenizer/train.py
rename to texteller/models/tokenizer/train.py
index aa44521..80e5e0e 100644
--- a/src/models/tokenizer/train.py
+++ b/texteller/models/tokenizer/train.py
@@ -15,10 +15,9 @@ if __name__ == '__main__':
     dataset = load_dataset('../ocr_model/train/dataset/loader.py')['train']
 
     new_tokenizer = tokenizer.train_new_from_iterator(
-        text_iterator=dataset['latex_formula'], 
-
+        text_iterator=dataset['latex_formula'],
         # If you want to use a different vocab size, **change VOCAB_SIZE from globals.py**
-        vocab_size=VOCAB_SIZE  
+        vocab_size=VOCAB_SIZE,
     )
 
     # Save the new tokenizer for later training and inference
diff --git a/texteller/models/utils/__init__.py b/texteller/models/utils/__init__.py
new file mode 100644
index 0000000..3597062
--- /dev/null
+++ b/texteller/models/utils/__init__.py
@@ -0,0 +1 @@
+from .mix_inference import mix_inference
diff --git a/texteller/models/utils/__pycache__/__init__.cpython-310.pyc b/texteller/models/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..7b3278e
Binary files /dev/null and b/texteller/models/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/texteller/models/utils/__pycache__/mix_inference.cpython-310.pyc b/texteller/models/utils/__pycache__/mix_inference.cpython-310.pyc
new file mode 100644
index 0000000..cadc973
Binary files /dev/null and b/texteller/models/utils/__pycache__/mix_inference.cpython-310.pyc differ
diff --git a/src/models/utils/mix_inference.py b/texteller/models/utils/mix_inference.py
similarity index 91%
rename from src/models/utils/mix_inference.py
rename to texteller/models/utils/mix_inference.py
index 9da3c85..0f8aa4f 100644
--- a/src/models/utils/mix_inference.py
+++ b/texteller/models/utils/mix_inference.py
@@ -20,12 +20,12 @@ MAXV = 999999999
 def mask_img(img, bboxes: List[Bbox], bg_color: np.ndarray) -> np.ndarray:
     mask_img = img.copy()
     for bbox in bboxes:
-        mask_img[bbox.p.y:bbox.p.y + bbox.h, bbox.p.x:bbox.p.x + bbox.w] = bg_color
+        mask_img[bbox.p.y : bbox.p.y + bbox.h, bbox.p.x : bbox.p.x + bbox.w] = bg_color
     return mask_img
 
 
 def bbox_merge(sorted_bboxes: List[Bbox]) -> List[Bbox]:
-    if (len(sorted_bboxes) == 0):
+    if len(sorted_bboxes) == 0:
         return []
     bboxes = sorted_bboxes.copy()
     guard = Bbox(MAXV, bboxes[-1].p.y, -1, -1, label="guard")
@@ -61,7 +61,7 @@ def split_conflict(ocr_bboxes: List[Bbox], latex_bboxes: List[Bbox]) -> List[Bbo
     candidate = heapq.heappop(bboxes)
     curr = heapq.heappop(bboxes)
     idx = 0
-    while (len(bboxes) > 0):
+    while len(bboxes) > 0:
         idx += 1
         assert candidate.p.x <= curr.p.x or not candidate.same_row(curr)
 
@@ -85,7 +85,7 @@ def split_conflict(ocr_bboxes: List[Bbox], latex_bboxes: List[Bbox]) -> List[Bbo
                     curr.p.x = candidate.ur_point.x
                     heapq.heappush(bboxes, curr)
                     curr = heapq.heappop(bboxes)
-                
+
         elif candidate.ur_point.x >= curr.ur_point.x:
             assert not (candidate.label != "text" and curr.label != "text")
 
@@ -100,8 +100,8 @@ def split_conflict(ocr_bboxes: List[Bbox], latex_bboxes: List[Bbox]) -> List[Bbo
                         candidate.ur_point.x - curr.ur_point.x,
                         label="text",
                         confidence=candidate.confidence,
-                        content=None
-                    )
+                        content=None,
+                    ),
                 )
                 candidate.w = curr.p.x - candidate.p.x
                 res.append(candidate)
@@ -128,7 +128,7 @@ def slice_from_image(img: np.ndarray, ocr_bboxes: List[Bbox]) -> List[np.ndarray
     for bbox in ocr_bboxes:
         x, y = int(bbox.p.x), int(bbox.p.y)
         w, h = int(bbox.w), int(bbox.h)
-        sliced_img = img[y:y+h, x:x+w]
+        sliced_img = img[y : y + h, x : x + w]
         sliced_imgs.append(sliced_img)
     return sliced_imgs
 
@@ -137,20 +137,17 @@ def mix_inference(
     img_path: str,
     infer_config,
     latex_det_model,
-
     lang_ocr_models,
-
     latex_rec_models,
     accelerator="cpu",
-    num_beams=1
+    num_beams=1,
 ) -> str:
     '''
     Input a mixed image of formula text and output str (in markdown syntax)
     '''
     global img
     img = cv2.imread(img_path)
-    corners = [tuple(img[0, 0]), tuple(img[0, -1]),
-               tuple(img[-1, 0]), tuple(img[-1, -1])]
+    corners = [tuple(img[0, 0]), tuple(img[0, -1]), tuple(img[-1, 0]), tuple(img[-1, -1])]
     bg_color = np.array(Counter(corners).most_common(1)[0][0])
 
     start_time = time.time()
@@ -172,10 +169,13 @@ def mix_inference(
     print(f"ocr_det_model time: {end_time - start_time:.2f}s")
     ocr_bboxes = [
         Bbox(
-            p[0][0], p[0][1], p[3][1]-p[0][1], p[1][0]-p[0][0],
+            p[0][0],
+            p[0][1],
+            p[3][1] - p[0][1],
+            p[1][0] - p[0][0],
             label="text",
             confidence=None,
-            content=None
+            content=None,
         )
         for p in det_prediction
     ]
@@ -198,12 +198,14 @@ def mix_inference(
     assert len(rec_predictions) == len(ocr_bboxes)
     for content, bbox in zip(rec_predictions, ocr_bboxes):
         bbox.content = content[0]
-    
-    latex_imgs =[]
+
+    latex_imgs = []
     for bbox in latex_bboxes:
-        latex_imgs.append(img[bbox.p.y:bbox.p.y + bbox.h, bbox.p.x:bbox.p.x + bbox.w])
+        latex_imgs.append(img[bbox.p.y : bbox.p.y + bbox.h, bbox.p.x : bbox.p.x + bbox.w])
     start_time = time.time()
-    latex_rec_res = latex_rec_predict(*latex_rec_models, latex_imgs, accelerator, num_beams, max_tokens=800)
+    latex_rec_res = latex_rec_predict(
+        *latex_rec_models, latex_imgs, accelerator, num_beams, max_tokens=800
+    )
     end_time = time.time()
     print(f"latex_rec_model time: {end_time - start_time:.2f}s")
 
@@ -214,7 +216,6 @@ def mix_inference(
         elif bbox.label == "isolated":
             bbox.content = '\n\n' + r"$$" + bbox.content + r"$$" + '\n\n'
 
-
     bboxes = sorted(ocr_bboxes + latex_bboxes)
     if bboxes == []:
         return ""
@@ -223,11 +224,7 @@ def mix_inference(
     prev = Bbox(bboxes[0].p.x, bboxes[0].p.y, -1, -1, label="guard")
     for curr in bboxes:
         # Add the formula number back to the isolated formula
-        if (
-            prev.label == "isolated"
-            and curr.label == "text"
-            and prev.same_row(curr)
-        ):
+        if prev.label == "isolated" and curr.label == "text" and prev.same_row(curr):
             curr.content = curr.content.strip()
             if curr.content.startswith('(') and curr.content.endswith(')'):
                 curr.content = curr.content[1:-1]
diff --git a/src/server.py b/texteller/server.py
similarity index 77%
rename from src/server.py
rename to texteller/server.py
index e6c0320..e17db20 100644
--- a/src/server.py
+++ b/texteller/server.py
@@ -23,12 +23,8 @@ LIBPATH = Path(sys.executable).parent.parent / 'lib' / ('python' + PYTHON_VERSIO
 CUDNNPATH = LIBPATH / 'nvidia' / 'cudnn' / 'lib'
 
 parser = argparse.ArgumentParser()
-parser.add_argument(
-    '-ckpt', '--checkpoint_dir', type=str
-)
-parser.add_argument(
-    '-tknz', '--tokenizer_dir', type=str
-)
+parser.add_argument('-ckpt', '--checkpoint_dir', type=str)
+parser.add_argument('-tknz', '--tokenizer_dir', type=str)
 parser.add_argument('-port', '--server_port', type=int, default=8000)
 parser.add_argument('--num_replicas', type=int, default=1)
 parser.add_argument('--ncpu_per_replica', type=float, default=1.0)
@@ -41,66 +37,67 @@ parser.add_argument('-onnx', action='store_true', help='using onnx runtime')
 args = parser.parse_args()
 if args.ngpu_per_replica > 0 and not args.inference_mode == 'cuda':
     raise ValueError("--inference-mode must be cuda or mps if ngpu_per_replica > 0")
-    
+
 
 @serve.deployment(
-    num_replicas=args.num_replicas, 
+    num_replicas=args.num_replicas,
     ray_actor_options={
-        "num_cpus": args.ncpu_per_replica, 
-        "num_gpus": args.ngpu_per_replica * 1.0 / 2
-    }
+        "num_cpus": args.ncpu_per_replica,
+        "num_gpus": args.ngpu_per_replica * 1.0 / 2,
+    },
 )
 class TexTellerRecServer:
     def __init__(
-        self, 
-        checkpoint_path: str, 
-        tokenizer_path: str, 
+        self,
+        checkpoint_path: str,
+        tokenizer_path: str,
         inf_mode: str = 'cpu',
         use_onnx: bool = False,
-        num_beams: int = 1
+        num_beams: int = 1,
     ) -> None:
-        self.model = TexTeller.from_pretrained(checkpoint_path, use_onnx=use_onnx, onnx_provider=inf_mode)
+        self.model = TexTeller.from_pretrained(
+            checkpoint_path, use_onnx=use_onnx, onnx_provider=inf_mode
+        )
         self.tokenizer = TexTeller.get_tokenizer(tokenizer_path)
         self.inf_mode = inf_mode
         self.num_beams = num_beams
 
         if not use_onnx:
             self.model = self.model.to(inf_mode) if inf_mode != 'cpu' else self.model
-    
+
     def predict(self, image_nparray) -> str:
-        return to_katex(rec_inference(
-            self.model, self.tokenizer, [image_nparray],
-            accelerator=self.inf_mode, num_beams=self.num_beams
-        )[0])
+        return to_katex(
+            rec_inference(
+                self.model,
+                self.tokenizer,
+                [image_nparray],
+                accelerator=self.inf_mode,
+                num_beams=self.num_beams,
+            )[0]
+        )
+
 
 @serve.deployment(
-    num_replicas=args.num_replicas, 
+    num_replicas=args.num_replicas,
     ray_actor_options={
-        "num_cpus": args.ncpu_per_replica, 
+        "num_cpus": args.ncpu_per_replica,
         "num_gpus": args.ngpu_per_replica * 1.0 / 2,
-        "runtime_env": {
-            "env_vars": {
-                "LD_LIBRARY_PATH": f"{str(CUDNNPATH)}/:$LD_LIBRARY_PATH"
-            }
-        }
+        "runtime_env": {"env_vars": {"LD_LIBRARY_PATH": f"{str(CUDNNPATH)}/:$LD_LIBRARY_PATH"}},
     },
 )
 class TexTellerDetServer:
-    def __init__(
-        self,
-        inf_mode='cpu'
-    ):
+    def __init__(self, inf_mode='cpu'):
         self.infer_config = PredictConfig("./models/det_model/model/infer_cfg.yml")
         self.latex_det_model = InferenceSession(
-            "./models/det_model/model/rtdetr_r50vd_6x_coco.onnx", 
-            providers=['CUDAExecutionProvider'] if inf_mode == 'cuda' else ['CPUExecutionProvider']
+            "./models/det_model/model/rtdetr_r50vd_6x_coco.onnx",
+            providers=['CUDAExecutionProvider'] if inf_mode == 'cuda' else ['CPUExecutionProvider'],
         )
 
     async def predict(self, image_nparray) -> str:
         with tempfile.TemporaryDirectory() as temp_dir:
             img_path = f"{temp_dir}/temp_image.jpg"
             cv2.imwrite(img_path, image_nparray)
-            
+
             latex_bboxes = det_inference(img_path, self.latex_det_model, self.infer_config)
             return latex_bboxes
 
@@ -110,10 +107,10 @@ class Ingress:
     def __init__(self, det_server: DeploymentHandle, rec_server: DeploymentHandle) -> None:
         self.det_server = det_server
         self.texteller_server = rec_server
-    
+
     async def __call__(self, request: Request) -> str:
         request_path = request.url.path
-        form   = await request.form()
+        form = await request.form()
         img_rb = await form['img'].read()
 
         img_nparray = np.frombuffer(img_rb, np.uint8)
@@ -121,7 +118,7 @@ class Ingress:
         img_nparray = cv2.cvtColor(img_nparray, cv2.COLOR_BGR2RGB)
 
         if request_path.startswith("/fdet"):
-            if self.det_server == None:
+            if self.det_server is None:
                 return "[ERROR] rtdetr_r50vd_6x_coco.onnx not found."
             pred = await self.det_server.predict.remote(img_nparray)
             return pred
@@ -140,18 +137,19 @@ if __name__ == '__main__':
 
     serve.start(http_options={"host": "0.0.0.0", "port": args.server_port})
     rec_server = TexTellerRecServer.bind(
-        ckpt_dir, tknz_dir, 
+        ckpt_dir,
+        tknz_dir,
         inf_mode=args.inference_mode,
         use_onnx=args.onnx,
-        num_beams=args.num_beams
+        num_beams=args.num_beams,
     )
     det_server = None
     if Path('./models/det_model/model/rtdetr_r50vd_6x_coco.onnx').exists():
         det_server = TexTellerDetServer.bind(args.inference_mode)
     ingress = Ingress.bind(det_server, rec_server)
 
-    # ingress_handle = serve.run(ingress, route_prefix="/predict")  
-    ingress_handle = serve.run(ingress, route_prefix="/") 
+    # ingress_handle = serve.run(ingress, route_prefix="/predict")
+    ingress_handle = serve.run(ingress, route_prefix="/")
 
     while True:
         time.sleep(1)
diff --git a/src/start_web.bat b/texteller/start_web.bat
similarity index 100%
rename from src/start_web.bat
rename to texteller/start_web.bat
diff --git a/src/start_web.sh b/texteller/start_web.sh
similarity index 100%
rename from src/start_web.sh
rename to texteller/start_web.sh
diff --git a/src/train_config.yaml b/texteller/train_config.yaml
similarity index 100%
rename from src/train_config.yaml
rename to texteller/train_config.yaml
diff --git a/src/web.py b/texteller/web.py
similarity index 83%
rename from src/web.py
rename to texteller/web.py
index 7e47a8a..7a497d9 100644
--- a/src/web.py
+++ b/texteller/web.py
@@ -20,10 +20,7 @@ from models.ocr_model.utils.inference import inference as latex_recognition
 from models.ocr_model.utils.to_katex import to_katex
 
 
-st.set_page_config(
-    page_title="TexTeller",
-    page_icon="🧮"
-)
+st.set_page_config(page_title="TexTeller", page_icon="🧮")
 
 html_string = '''
     <h1 style="color: black; text-align: center;">
@@ -49,23 +46,29 @@ fail_gif_html = '''
     </h1>
 '''
 
+
 @st.cache_resource
 def get_texteller(use_onnx, accelerator):
-    return TexTeller.from_pretrained(os.environ['CHECKPOINT_DIR'], use_onnx=use_onnx, onnx_provider=accelerator)
+    return TexTeller.from_pretrained(
+        os.environ['CHECKPOINT_DIR'], use_onnx=use_onnx, onnx_provider=accelerator
+    )
+
 
 @st.cache_resource
 def get_tokenizer():
     return TexTeller.get_tokenizer(os.environ['TOKENIZER_DIR'])
 
+
 @st.cache_resource
 def get_det_models(accelerator):
     infer_config = PredictConfig("./models/det_model/model/infer_cfg.yml")
     latex_det_model = InferenceSession(
-        "./models/det_model/model/rtdetr_r50vd_6x_coco.onnx", 
-        providers=['CUDAExecutionProvider'] if accelerator == 'cuda' else ['CPUExecutionProvider']
+        "./models/det_model/model/rtdetr_r50vd_6x_coco.onnx",
+        providers=['CUDAExecutionProvider'] if accelerator == 'cuda' else ['CPUExecutionProvider'],
     )
     return infer_config, latex_det_model
 
+
 @st.cache_resource()
 def get_ocr_models(accelerator):
     use_gpu = accelerator == 'cuda'
@@ -96,12 +99,15 @@ def get_image_base64(img_file):
     img.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode()
 
+
 def on_file_upload():
     st.session_state["UPLOADED_FILE_CHANGED"] = True
 
+
 def change_side_bar():
     st.session_state["CHANGE_SIDEBAR_FLAG"] = True
 
+
 if "start" not in st.session_state:
     st.session_state["start"] = 1
     st.toast('Hooray!', icon='🎉')
@@ -127,48 +133,35 @@ with st.sidebar:
     inf_mode = st.selectbox(
         "Inference mode",
         ("Formula recognition", "Paragraph recognition"),
-        on_change=change_side_bar
+        on_change=change_side_bar,
     )
 
     num_beams = st.number_input(
-        'Number of beams',
-        min_value=1,
-        max_value=20,
-        step=1,
-        on_change=change_side_bar
+        'Number of beams', min_value=1, max_value=20, step=1, on_change=change_side_bar
     )
 
-    accelerator = st.radio(
-        "Accelerator",
-        ("cpu", "cuda", "mps"),
-        on_change=change_side_bar
-    )
+    device = st.radio("device", ("cpu", "cuda", "mps"), on_change=change_side_bar)
 
     st.markdown("## Seedup")
     use_onnx = st.toggle("ONNX Runtime ")
 
 
-
 ##############################     </sidebar>    ##############################
 
 
 ################################     <page>    ################################
 
-texteller = get_texteller(use_onnx, accelerator)
+texteller = get_texteller(use_onnx, device)
 tokenizer = get_tokenizer()
 latex_rec_models = [texteller, tokenizer]
 
 if inf_mode == "Paragraph recognition":
-    infer_config, latex_det_model = get_det_models(accelerator)
-    lang_ocr_models = get_ocr_models(accelerator)
+    infer_config, latex_det_model = get_det_models(device)
+    lang_ocr_models = get_ocr_models(device)
 
 st.markdown(html_string, unsafe_allow_html=True)
 
-uploaded_file = st.file_uploader(
-    " ",
-    type=['jpg', 'png'],
-    on_change=on_file_upload
-)
+uploaded_file = st.file_uploader(" ", type=['jpg', 'png'], on_change=on_file_upload)
 
 paste_result = pbutton(
     label="📋 Paste an image",
@@ -177,15 +170,15 @@ paste_result = pbutton(
 )
 st.write("")
 
-if st.session_state["CHANGE_SIDEBAR_FLAG"] == True:
+if st.session_state["CHANGE_SIDEBAR_FLAG"] is True:
     st.session_state["CHANGE_SIDEBAR_FLAG"] = False
 elif uploaded_file or paste_result.image_data is not None:
-    if st.session_state["UPLOADED_FILE_CHANGED"] == False and paste_result.image_data is not None:
+    if st.session_state["UPLOADED_FILE_CHANGED"] is False and paste_result.image_data is not None:
         uploaded_file = io.BytesIO()
         paste_result.image_data.save(uploaded_file, format='PNG')
         uploaded_file.seek(0)
 
-    if st.session_state["UPLOADED_FILE_CHANGED"] == True:
+    if st.session_state["UPLOADED_FILE_CHANGED"] is True:
         st.session_state["UPLOADED_FILE_CHANGED"] = False
 
     img = Image.open(uploaded_file)
@@ -197,7 +190,8 @@ elif uploaded_file or paste_result.image_data is not None:
     with st.container(height=300):
         img_base64 = get_image_base64(uploaded_file)
 
-        st.markdown(f"""
+        st.markdown(
+            f"""
         <style>
         .centered-container {{
             text-align: center;
@@ -213,8 +207,11 @@ elif uploaded_file or paste_result.image_data is not None:
         <div class="centered-container">
             <img src="data:image/png;base64,{img_base64}" class="centered-image" alt="Input image">
         </div>
-        """, unsafe_allow_html=True)
-    st.markdown(f"""
+        """,
+            unsafe_allow_html=True,
+        )
+    st.markdown(
+        f"""
     <style>
     .centered-container {{
         text-align: center;
@@ -223,22 +220,28 @@ elif uploaded_file or paste_result.image_data is not None:
     <div class="centered-container">
         <p style="color:gray;">Input image ({img.height}✖️{img.width})</p>
     </div>
-    """, unsafe_allow_html=True)
+    """,
+        unsafe_allow_html=True,
+    )
 
     st.write("")
 
     with st.spinner("Predicting..."):
         if inf_mode == "Formula recognition":
             TexTeller_result = latex_recognition(
-                texteller,
-                tokenizer,
-                [png_file_path],
-                accelerator=accelerator,
-                num_beams=num_beams
+                texteller, tokenizer, [png_file_path], accelerator=device, num_beams=num_beams
             )[0]
             katex_res = to_katex(TexTeller_result)
         else:
-            katex_res = mix_inference(png_file_path, infer_config, latex_det_model, lang_ocr_models, latex_rec_models, accelerator, num_beams)
+            katex_res = mix_inference(
+                png_file_path,
+                infer_config,
+                latex_det_model,
+                lang_ocr_models,
+                latex_rec_models,
+                device,
+                num_beams,
+            )
 
         st.success('Completed!', icon="✅")
         st.markdown(suc_gif_html, unsafe_allow_html=True)