diff --git a/_modules/index.html b/_modules/index.html deleted file mode 100644 index 7c3e83a..0000000 --- a/_modules/index.html +++ /dev/null @@ -1,386 +0,0 @@ - - - - - - -
- - -
-from typing import List
-
-from onnxruntime import InferenceSession
-
-from texteller.types import Bbox
-
-from .preprocess import Compose
-
-_config = {
- "mode": "paddle",
- "draw_threshold": 0.5,
- "metric": "COCO",
- "use_dynamic_shape": False,
- "arch": "DETR",
- "min_subgraph_size": 3,
- "preprocess": [
- {"interp": 2, "keep_ratio": False, "target_size": [1600, 1600], "type": "Resize"},
- {
- "mean": [0.0, 0.0, 0.0],
- "norm_type": "none",
- "std": [1.0, 1.0, 1.0],
- "type": "NormalizeImage",
- },
- {"type": "Permute"},
- ],
- "label_list": ["isolated", "embedding"],
-}
-
-
-
-[docs]
-def latex_detect(img_path: str, predictor: InferenceSession) -> List[Bbox]:
- """
- Detect LaTeX formulas in an image and classify them as isolated or embedded.
-
- This function uses an ONNX model to detect LaTeX formulas in images. The model
- identifies two types of LaTeX formulas:
- - 'isolated': Standalone LaTeX formulas (typically displayed equations)
- - 'embedding': Inline LaTeX formulas embedded within text
-
- Args:
- img_path: Path to the input image file
- predictor: ONNX InferenceSession model for LaTeX detection
-
- Returns:
- List of Bbox objects representing the detected LaTeX formulas with their
- positions, classifications, and confidence scores
-
- Example:
- >>> from texteller.api import load_latexdet_model, latex_detect
- >>> model = load_latexdet_model()
- >>> bboxes = latex_detect("path/to/image.png", model)
- """
- transforms = Compose(_config["preprocess"])
- inputs = transforms(img_path)
- inputs_name = [var.name for var in predictor.get_inputs()]
- inputs = {k: inputs[k][None,] for k in inputs_name}
-
- outputs = predictor.run(output_names=None, input_feed=inputs)[0]
- res = []
- for output in outputs:
- cls_name = _config["label_list"][int(output[0])]
- score = output[1]
- xmin = int(max(output[2], 0))
- ymin = int(max(output[3], 0))
- xmax = int(output[4])
- ymax = int(output[5])
- if score > 0.5:
- res.append(Bbox(xmin, ymin, ymax - ymin, xmax - xmin, cls_name, score))
-
- return res
-
-
-import re
-import time
-from collections import Counter
-from typing import Literal
-
-import cv2
-import numpy as np
-import torch
-from onnxruntime import InferenceSession
-from optimum.onnxruntime import ORTModelForVision2Seq
-from transformers import GenerationConfig, RobertaTokenizerFast
-
-from texteller.constants import MAX_TOKEN_SIZE
-from texteller.logger import get_logger
-from texteller.paddleocr import predict_det, predict_rec
-from texteller.types import Bbox, TexTellerModel
-from texteller.utils import (
- bbox_merge,
- get_device,
- mask_img,
- readimgs,
- remove_style,
- slice_from_image,
- split_conflict,
- transform,
- add_newlines,
-)
-
-from .detection import latex_detect
-from .format import format_latex
-from .katex import to_katex
-
-_logger = get_logger()
-
-
-
-[docs]
-def img2latex(
- model: TexTellerModel,
- tokenizer: RobertaTokenizerFast,
- images: list[str] | list[np.ndarray],
- device: torch.device | None = None,
- out_format: Literal["latex", "katex"] = "latex",
- keep_style: bool = False,
- max_tokens: int = MAX_TOKEN_SIZE,
- num_beams: int = 1,
- no_repeat_ngram_size: int = 0,
-) -> list[str]:
- """
- Convert images to LaTeX or KaTeX formatted strings.
-
- Args:
- model: The TexTeller or ORTModelForVision2Seq model instance
- tokenizer: The tokenizer for the model
- images: List of image paths or numpy arrays (RGB format)
- device: The torch device to use (defaults to available GPU or CPU)
- out_format: Output format, either "latex" or "katex"
- keep_style: Whether to keep the style of the LaTeX
- max_tokens: Maximum number of tokens to generate
- num_beams: Number of beams for beam search
- no_repeat_ngram_size: Size of n-grams to prevent repetition
-
- Returns:
- List of LaTeX or KaTeX strings corresponding to each input image
-
- Example:
- >>> import torch
- >>> from texteller import load_model, load_tokenizer, img2latex
- >>>
- >>> model = load_model(model_path=None, use_onnx=False)
- >>> tokenizer = load_tokenizer(tokenizer_path=None)
- >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- >>>
- >>> res = img2latex(model, tokenizer, ["path/to/image.png"], device=device, out_format="katex")
- """
- assert isinstance(images, list)
- assert len(images) > 0
-
- if device is None:
- device = get_device()
-
- if device.type != model.device.type:
- if isinstance(model, ORTModelForVision2Seq):
- _logger.warning(
- f"Onnxruntime device mismatch: detected {str(device)} but model is on {str(model.device)}, using {str(model.device)} instead"
- )
- else:
- model = model.to(device=device)
-
- if isinstance(images[0], str):
- images = readimgs(images)
- else: # already numpy array(rgb format)
- assert isinstance(images[0], np.ndarray)
- images = images
-
- images = transform(images)
- pixel_values = torch.stack(images)
-
- generate_config = GenerationConfig(
- max_new_tokens=max_tokens,
- num_beams=num_beams,
- do_sample=False,
- pad_token_id=tokenizer.pad_token_id,
- eos_token_id=tokenizer.eos_token_id,
- bos_token_id=tokenizer.bos_token_id,
- no_repeat_ngram_size=no_repeat_ngram_size,
- )
- pred = model.generate(
- pixel_values.to(model.device),
- generation_config=generate_config,
- )
-
- res = tokenizer.batch_decode(pred, skip_special_tokens=True)
-
- if out_format == "katex":
- res = [to_katex(r) for r in res]
-
- if not keep_style:
- res = [remove_style(r) for r in res]
-
- res = [format_latex(r) for r in res]
- res = [add_newlines(r) for r in res]
- return res
-
-
-
-
-[docs]
-def paragraph2md(
- img_path: str,
- latexdet_model: InferenceSession,
- textdet_model: predict_det.TextDetector,
- textrec_model: predict_rec.TextRecognizer,
- latexrec_model: TexTellerModel,
- tokenizer: RobertaTokenizerFast,
- device: torch.device | None = None,
- num_beams=1,
-) -> str:
- """
- Convert an image containing both text and mathematical formulas to markdown format.
-
- This function processes a mixed-content image by:
- 1. Detecting mathematical formulas using a latex detection model
- 2. Masking detected formula areas and detecting text regions using OCR
- 3. Recognizing text in the detected regions
- 4. Converting formula regions to LaTeX using the latex recognition model
- 5. Combining all detected elements into a properly formatted markdown string
-
- Args:
- img_path: Path to the input image containing text and formulas
- latexdet_model: ONNX InferenceSession for LaTeX formula detection
- textdet_model: OCR text detector model
- textrec_model: OCR text recognition model
- latexrec_model: TexTeller model for LaTeX formula recognition
- tokenizer: Tokenizer for the LaTeX recognition model
- device: The torch device to use (defaults to available GPU or CPU)
- num_beams: Number of beams for beam search during LaTeX generation
-
- Returns:
- Markdown formatted string containing the recognized text and formulas
-
- Example:
- >>> from texteller import load_latexdet_model, load_textdet_model, load_textrec_model, load_tokenizer, paragraph2md
- >>>
- >>> # Load all required models
- >>> latexdet_model = load_latexdet_model()
- >>> textdet_model = load_textdet_model()
- >>> textrec_model = load_textrec_model()
- >>> latexrec_model = load_model()
- >>> tokenizer = load_tokenizer()
- >>>
- >>> # Convert image to markdown
- >>> markdown_text = paragraph2md(
- ... img_path="path/to/mixed_content_image.jpg",
- ... latexdet_model=latexdet_model,
- ... textdet_model=textdet_model,
- ... textrec_model=textrec_model,
- ... latexrec_model=latexrec_model,
- ... tokenizer=tokenizer,
- ... )
- """
- img = cv2.imread(img_path)
- corners = [tuple(img[0, 0]), tuple(img[0, -1]), tuple(img[-1, 0]), tuple(img[-1, -1])]
- bg_color = np.array(Counter(corners).most_common(1)[0][0])
-
- start_time = time.time()
- latex_bboxes = latex_detect(img_path, latexdet_model)
- end_time = time.time()
- _logger.info(f"latex_det_model time: {end_time - start_time:.2f}s")
- latex_bboxes = sorted(latex_bboxes)
- latex_bboxes = bbox_merge(latex_bboxes)
- masked_img = mask_img(img, latex_bboxes, bg_color)
-
- start_time = time.time()
- det_prediction, _ = textdet_model(masked_img)
- end_time = time.time()
- _logger.info(f"ocr_det_model time: {end_time - start_time:.2f}s")
- ocr_bboxes = [
- Bbox(
- p[0][0],
- p[0][1],
- p[3][1] - p[0][1],
- p[1][0] - p[0][0],
- label="text",
- confidence=None,
- content=None,
- )
- for p in det_prediction
- ]
-
- ocr_bboxes = sorted(ocr_bboxes)
- ocr_bboxes = bbox_merge(ocr_bboxes)
- ocr_bboxes = split_conflict(ocr_bboxes, latex_bboxes)
- ocr_bboxes = list(filter(lambda x: x.label == "text", ocr_bboxes))
-
- sliced_imgs: list[np.ndarray] = slice_from_image(img, ocr_bboxes)
- start_time = time.time()
- rec_predictions, _ = textrec_model(sliced_imgs)
- end_time = time.time()
- _logger.info(f"ocr_rec_model time: {end_time - start_time:.2f}s")
-
- assert len(rec_predictions) == len(ocr_bboxes)
- for content, bbox in zip(rec_predictions, ocr_bboxes):
- bbox.content = content[0]
-
- latex_imgs = []
- for bbox in latex_bboxes:
- latex_imgs.append(img[bbox.p.y : bbox.p.y + bbox.h, bbox.p.x : bbox.p.x + bbox.w])
- start_time = time.time()
- latex_rec_res = img2latex(
- model=latexrec_model,
- tokenizer=tokenizer,
- images=latex_imgs,
- num_beams=num_beams,
- out_format="katex",
- device=device,
- keep_style=False,
- )
- end_time = time.time()
- _logger.info(f"latex_rec_model time: {end_time - start_time:.2f}s")
-
- for bbox, content in zip(latex_bboxes, latex_rec_res):
- if bbox.label == "embedding":
- bbox.content = " $" + content + "$ "
- elif bbox.label == "isolated":
- bbox.content = "\n\n" + r"$$" + content + r"$$" + "\n\n"
-
- bboxes = sorted(ocr_bboxes + latex_bboxes)
- if bboxes == []:
- return ""
-
- md = ""
- prev = Bbox(bboxes[0].p.x, bboxes[0].p.y, -1, -1, label="guard")
- for curr in bboxes:
- # Add the formula number back to the isolated formula
- if prev.label == "isolated" and curr.label == "text" and prev.same_row(curr):
- curr.content = curr.content.strip()
- if curr.content.startswith("(") and curr.content.endswith(")"):
- curr.content = curr.content[1:-1]
-
- if re.search(r"\\tag\{.*\}$", md[:-4]) is not None:
- # in case of multiple tag
- md = md[:-5] + f", {curr.content}" + "}" + md[-4:]
- else:
- md = md[:-4] + f"\\tag{{{curr.content}}}" + md[-4:]
- continue
-
- if not prev.same_row(curr):
- md += " "
-
- if curr.label == "embedding":
- # remove the bold effect from inline formulas
- curr.content = remove_style(curr.content)
-
- # change split environment into aligned
- curr.content = curr.content.replace(r"\begin{split}", r"\begin{aligned}")
- curr.content = curr.content.replace(r"\end{split}", r"\end{aligned}")
-
- # remove extra spaces (keeping only one)
- curr.content = re.sub(r" +", " ", curr.content)
- assert curr.content.startswith("$") and curr.content.endswith("$")
- curr.content = " $" + curr.content.strip("$") + "$ "
- md += curr.content
- prev = curr
-
- return md.strip()
-
-
-import re
-
-from ..utils.latex import change_all
-from .format import format_latex
-
-
-def _rm_dollar_surr(content):
- pattern = re.compile(r"\\[a-zA-Z]+\$.*?\$|\$.*?\$")
- matches = pattern.findall(content)
-
- for match in matches:
- if not re.match(r"\\[a-zA-Z]+", match):
- new_match = match.strip("$")
- content = content.replace(match, " " + new_match + " ")
-
- return content
-
-
-
-[docs]
-def to_katex(formula: str) -> str:
- """
- Convert LaTeX formula to KaTeX-compatible format.
-
- This function processes a LaTeX formula string and converts it to a format
- that is compatible with KaTeX rendering. It removes unsupported commands
- and structures, simplifies LaTeX environments, and optimizes the formula
- for web display.
-
- Args:
- formula: LaTeX formula string to convert
-
- Returns:
- KaTeX-compatible formula string
- """
- res = formula
- # remove mbox surrounding
- res = change_all(res, r"\mbox ", r" ", r"{", r"}", r"", r"")
- res = change_all(res, r"\mbox", r" ", r"{", r"}", r"", r"")
- # remove hbox surrounding
- res = re.sub(r"\\hbox to ?-? ?\d+\.\d+(pt)?\{", r"\\hbox{", res)
- res = change_all(res, r"\hbox", r" ", r"{", r"}", r"", r" ")
- # remove raise surrounding
- res = re.sub(r"\\raise ?-? ?\d+\.\d+(pt)?", r" ", res)
- # remove makebox
- res = re.sub(r"\\makebox ?\[\d+\.\d+(pt)?\]\{", r"\\makebox{", res)
- res = change_all(res, r"\makebox", r" ", r"{", r"}", r"", r" ")
- # remove vbox surrounding, scalebox surrounding
- res = re.sub(r"\\raisebox\{-? ?\d+\.\d+(pt)?\}\{", r"\\raisebox{", res)
- res = re.sub(r"\\scalebox\{-? ?\d+\.\d+(pt)?\}\{", r"\\scalebox{", res)
- res = change_all(res, r"\scalebox", r" ", r"{", r"}", r"", r" ")
- res = change_all(res, r"\raisebox", r" ", r"{", r"}", r"", r" ")
- res = change_all(res, r"\vbox", r" ", r"{", r"}", r"", r" ")
-
- origin_instructions = [
- r"\Huge",
- r"\huge",
- r"\LARGE",
- r"\Large",
- r"\large",
- r"\normalsize",
- r"\small",
- r"\footnotesize",
- r"\tiny",
- ]
- for old_ins, new_ins in zip(origin_instructions, origin_instructions):
- res = change_all(res, old_ins, new_ins, r"$", r"$", "{", "}")
- res = change_all(res, r"\mathbf", r"\bm", r"{", r"}", r"{", r"}")
- res = change_all(res, r"\boldmath ", r"\bm", r"{", r"}", r"{", r"}")
- res = change_all(res, r"\boldmath", r"\bm", r"{", r"}", r"{", r"}")
- res = change_all(res, r"\boldmath ", r"\bm", r"$", r"$", r"{", r"}")
- res = change_all(res, r"\boldmath", r"\bm", r"$", r"$", r"{", r"}")
- res = change_all(res, r"\scriptsize", r"\scriptsize", r"$", r"$", r"{", r"}")
- res = change_all(res, r"\emph", r"\textit", r"{", r"}", r"{", r"}")
- res = change_all(res, r"\emph ", r"\textit", r"{", r"}", r"{", r"}")
-
- # remove bold command
- res = change_all(res, r"\bm", r" ", r"{", r"}", r"", r"")
-
- origin_instructions = [
- r"\left",
- r"\middle",
- r"\right",
- r"\big",
- r"\Big",
- r"\bigg",
- r"\Bigg",
- r"\bigl",
- r"\Bigl",
- r"\biggl",
- r"\Biggl",
- r"\bigm",
- r"\Bigm",
- r"\biggm",
- r"\Biggm",
- r"\bigr",
- r"\Bigr",
- r"\biggr",
- r"\Biggr",
- ]
- for origin_ins in origin_instructions:
- res = change_all(res, origin_ins, origin_ins, r"{", r"}", r"", r"")
-
- res = re.sub(r"\\\[(.*?)\\\]", r"\1\\newline", res)
-
- if res.endswith(r"\newline"):
- res = res[:-8]
-
- # remove multiple spaces
- res = re.sub(r"(\\,){1,}", " ", res)
- res = re.sub(r"(\\!){1,}", " ", res)
- res = re.sub(r"(\\;){1,}", " ", res)
- res = re.sub(r"(\\:){1,}", " ", res)
- res = re.sub(r"\\vspace\{.*?}", "", res)
-
- # merge consecutive text
- def merge_texts(match):
- texts = match.group(0)
- merged_content = "".join(re.findall(r"\\text\{([^}]*)\}", texts))
- return f"\\text{{{merged_content}}}"
-
- res = re.sub(r"(\\text\{[^}]*\}\s*){2,}", merge_texts, res)
-
- res = res.replace(r"\bf ", "")
- res = _rm_dollar_surr(res)
-
- # remove extra spaces (keeping only one)
- res = re.sub(r" +", " ", res)
-
- # format latex
- res = res.strip()
- res = format_latex(res)
-
- return res
-
-
-from pathlib import Path
-
-import wget
-from onnxruntime import InferenceSession
-from transformers import RobertaTokenizerFast
-
-from texteller.constants import LATEX_DET_MODEL_URL, TEXT_DET_MODEL_URL, TEXT_REC_MODEL_URL
-from texteller.globals import Globals
-from texteller.logger import get_logger
-from texteller.models import TexTeller
-from texteller.paddleocr import predict_det, predict_rec
-from texteller.paddleocr.utility import parse_args
-from texteller.utils import cuda_available, mkdir, resolve_path
-from texteller.types import TexTellerModel
-
-_logger = get_logger(__name__)
-
-
-
-[docs]
-def load_model(model_dir: str | None = None, use_onnx: bool = False) -> TexTellerModel:
- """
- Load the TexTeller model for LaTeX recognition.
-
- This function loads the main TexTeller model, which is responsible for
- converting images to LaTeX. It can load either the standard PyTorch model
- or the optimized ONNX version.
-
- Args:
- model_dir: Directory containing the model files. If None, uses the default model.
- use_onnx: Whether to load the ONNX version of the model for faster inference.
- Requires the 'optimum' package and ONNX Runtime.
-
- Returns:
- Loaded TexTeller model instance
-
- Example:
- >>> from texteller import load_model
- >>>
- >>> model = load_model(use_onnx=True)
- """
- return TexTeller.from_pretrained(model_dir, use_onnx=use_onnx)
-
-
-
-
-[docs]
-def load_tokenizer(tokenizer_dir: str | None = None) -> RobertaTokenizerFast:
- """
- Load the tokenizer for the TexTeller model.
-
- This function loads the tokenizer used by the TexTeller model for
- encoding and decoding LaTeX sequences.
-
- Args:
- tokenizer_dir: Directory containing the tokenizer files. If None, uses the default tokenizer.
-
- Returns:
- RobertaTokenizerFast instance
-
- Example:
- >>> from texteller import load_tokenizer
- >>>
- >>> tokenizer = load_tokenizer()
- """
- return TexTeller.get_tokenizer(tokenizer_dir)
-
-
-
-
-[docs]
-def load_latexdet_model() -> InferenceSession:
- """
- Load the LaTeX detection model.
-
- This function loads the model responsible for detecting LaTeX formulas in images.
- The model is implemented as an ONNX InferenceSession for optimal performance.
-
- Returns:
- ONNX InferenceSession for LaTeX detection
-
- Example:
- >>> from texteller import load_latexdet_model
- >>>
- >>> detector = load_latexdet_model()
- """
- fpath = _maybe_download(LATEX_DET_MODEL_URL)
- return InferenceSession(
- resolve_path(fpath),
- providers=["CUDAExecutionProvider" if cuda_available() else "CPUExecutionProvider"],
- )
-
-
-
-
-[docs]
-def load_textrec_model() -> predict_rec.TextRecognizer:
- """
- Load the text recognition model.
-
- This function loads the model responsible for recognizing regular text in images.
- It's based on PaddleOCR's text recognition model.
-
- Returns:
- PaddleOCR TextRecognizer instance
-
- Example:
- >>> from texteller import load_textrec_model
- >>>
- >>> text_recognizer = load_textrec_model()
- """
- fpath = _maybe_download(TEXT_REC_MODEL_URL)
- paddleocr_args = parse_args()
- paddleocr_args.use_onnx = True
- paddleocr_args.rec_model_dir = resolve_path(fpath)
- paddleocr_args.use_gpu = cuda_available()
- predictor = predict_rec.TextRecognizer(paddleocr_args)
- return predictor
-
-
-
-
-[docs]
-def load_textdet_model() -> predict_det.TextDetector:
- """
- Load the text detection model.
-
- This function loads the model responsible for detecting text regions in images.
- It's based on PaddleOCR's text detection model.
-
- Returns:
- PaddleOCR TextDetector instance
-
- Example:
- >>> from texteller import load_textdet_model
- >>>
- >>> text_detector = load_textdet_model()
- """
- fpath = _maybe_download(TEXT_DET_MODEL_URL)
- paddleocr_args = parse_args()
- paddleocr_args.use_onnx = True
- paddleocr_args.det_model_dir = resolve_path(fpath)
- paddleocr_args.use_gpu = cuda_available()
- predictor = predict_det.TextDetector(paddleocr_args)
- return predictor
-
-
-
-def _maybe_download(url: str, dirpath: str | None = None, force: bool = False) -> Path:
- """
- Download a file if it doesn't already exist.
-
- Args:
- url: URL to download from
- dirpath: Directory to save the file in. If None, uses the default cache directory.
- force: Whether to force download even if the file already exists
-
- Returns:
- Path to the downloaded file
- """
- if dirpath is None:
- dirpath = Globals().cache_dir
- mkdir(dirpath)
-
- fname = Path(url).name
- fpath = Path(dirpath) / fname
- if not fpath.exists() or force:
- _logger.info(f"Downloading {fname} from {url} to {fpath}")
- wget.download(url, resolve_path(fpath))
-
- return fpath
-Convert images to LaTeX or KaTeX formatted strings.
-model – The TexTeller or ORTModelForVision2Seq model instance
tokenizer – The tokenizer for the model
images – List of image paths or numpy arrays (RGB format)
device – The torch device to use (defaults to available GPU or CPU)
out_format – Output format, either “latex” or “katex”
keep_style – Whether to keep the style of the LaTeX
max_tokens – Maximum number of tokens to generate
num_beams – Number of beams for beam search
no_repeat_ngram_size – Size of n-grams to prevent repetition
List of LaTeX or KaTeX strings corresponding to each input image
-Example
->>> import torch
->>> from texteller import load_model, load_tokenizer, img2latex
->>>
->>> model = load_model(model_path=None, use_onnx=False)
->>> tokenizer = load_tokenizer(tokenizer_path=None)
->>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
->>>
->>> res = img2latex(model, tokenizer, ["path/to/image.png"], device=device, out_format="katex")
-Convert an image containing both text and mathematical formulas to markdown format.
-This function processes a mixed-content image by: -1. Detecting mathematical formulas using a latex detection model -2. Masking detected formula areas and detecting text regions using OCR -3. Recognizing text in the detected regions -4. Converting formula regions to LaTeX using the latex recognition model -5. Combining all detected elements into a properly formatted markdown string
-img_path – Path to the input image containing text and formulas
latexdet_model – ONNX InferenceSession for LaTeX formula detection
textdet_model – OCR text detector model
textrec_model – OCR text recognition model
latexrec_model – TexTeller model for LaTeX formula recognition
tokenizer – Tokenizer for the LaTeX recognition model
device – The torch device to use (defaults to available GPU or CPU)
num_beams – Number of beams for beam search during LaTeX generation
Markdown formatted string containing the recognized text and formulas
-Example
->>> from texteller import load_latexdet_model, load_textdet_model, load_textrec_model, load_tokenizer, paragraph2md
->>>
->>> # Load all required models
->>> latexdet_model = load_latexdet_model()
->>> textdet_model = load_textdet_model()
->>> textrec_model = load_textrec_model()
->>> latexrec_model = load_model()
->>> tokenizer = load_tokenizer()
->>>
->>> # Convert image to markdown
->>> markdown_text = paragraph2md(
-... img_path="path/to/mixed_content_image.jpg",
-... latexdet_model=latexdet_model,
-... textdet_model=textdet_model,
-... textrec_model=textrec_model,
-... latexrec_model=latexrec_model,
-... tokenizer=tokenizer,
-... )
-Detect LaTeX formulas in an image and classify them as isolated or embedded.
-This function uses an ONNX model to detect LaTeX formulas in images. The model -identifies two types of LaTeX formulas: -- ‘isolated’: Standalone LaTeX formulas (typically displayed equations) -- ‘embedding’: Inline LaTeX formulas embedded within text
-img_path – Path to the input image file
predictor – ONNX InferenceSession model for LaTeX detection
List of Bbox objects representing the detected LaTeX formulas with their -positions, classifications, and confidence scores
-Example
->>> from texteller.api import load_latexdet_model, latex_detect
->>> model = load_latexdet_model()
->>> bboxes = latex_detect("path/to/image.png", model)
-Load the TexTeller model for LaTeX recognition.
-This function loads the main TexTeller model, which is responsible for -converting images to LaTeX. It can load either the standard PyTorch model -or the optimized ONNX version.
-model_dir – Directory containing the model files. If None, uses the default model.
use_onnx – Whether to load the ONNX version of the model for faster inference. -Requires the ‘optimum’ package and ONNX Runtime.
Loaded TexTeller model instance
-Example
->>> from texteller import load_model
->>>
->>> model = load_model(use_onnx=True)
-Load the tokenizer for the TexTeller model.
-This function loads the tokenizer used by the TexTeller model for -encoding and decoding LaTeX sequences.
-tokenizer_dir – Directory containing the tokenizer files. If None, uses the default tokenizer.
-RobertaTokenizerFast instance
-Example
->>> from texteller import load_tokenizer
->>>
->>> tokenizer = load_tokenizer()
-Load the LaTeX detection model.
-This function loads the model responsible for detecting LaTeX formulas in images. -The model is implemented as an ONNX InferenceSession for optimal performance.
-ONNX InferenceSession for LaTeX detection
-Example
->>> from texteller import load_latexdet_model
->>>
->>> detector = load_latexdet_model()
-Load the text detection model.
-This function loads the model responsible for detecting text regions in images. -It’s based on PaddleOCR’s text detection model.
-PaddleOCR TextDetector instance
-Example
->>> from texteller import load_textdet_model
->>>
->>> text_detector = load_textdet_model()
-Load the text recognition model.
-This function loads the model responsible for recognizing regular text in images. -It’s based on PaddleOCR’s text recognition model.
-PaddleOCR TextRecognizer instance
-Example
->>> from texteller import load_textrec_model
->>>
->>> text_recognizer = load_textrec_model()
-Convert LaTeX formula to KaTeX-compatible format.
-This function processes a LaTeX formula string and converts it to a format -that is compatible with KaTeX rendering. It removes unsupported commands -and structures, simplifies LaTeX environments, and optimizes the formula -for web display.
-formula – LaTeX formula string to convert
-KaTeX-compatible formula string
-| - |
| - | - |
| - |
| - |