[docs] Add comprehensive function documentation

2025-04-21 02:34:56 +00:00
parent 2c9ce6b6c1
commit 789006894c
4 changed files with 169 additions and 4 deletions
--- a/texteller/api/load.py
+++ b/texteller/api/load.py
@@ -17,14 +17,65 @@ _logger = get_logger(__name__)


 def load_model(model_dir: str | None = None, use_onnx: bool = False) -> TexTellerModel:
+    """
+    Load the TexTeller model for LaTeX recognition.
+
+    This function loads the main TexTeller model, which is responsible for
+    converting images to LaTeX. It can load either the standard PyTorch model
+    or the optimized ONNX version.
+
+    Args:
+        model_dir: Directory containing the model files. If None, uses the default model.
+        use_onnx: Whether to load the ONNX version of the model for faster inference.
+                  Requires the 'optimum' package and ONNX Runtime.
+
+    Returns:
+        Loaded TexTeller model instance
+
+    Example:
+        >>> from texteller import load_model
+        >>>
+        >>> model = load_model(use_onnx=True)
+    """
    return TexTeller.from_pretrained(model_dir, use_onnx=use_onnx)


 def load_tokenizer(tokenizer_dir: str | None = None) -> RobertaTokenizerFast:
+    """
+    Load the tokenizer for the TexTeller model.
+
+    This function loads the tokenizer used by the TexTeller model for
+    encoding and decoding LaTeX sequences.
+
+    Args:
+        tokenizer_dir: Directory containing the tokenizer files. If None, uses the default tokenizer.
+
+    Returns:
+        RobertaTokenizerFast instance
+
+    Example:
+        >>> from texteller import load_tokenizer
+        >>>
+        >>> tokenizer = load_tokenizer()
+    """
    return TexTeller.get_tokenizer(tokenizer_dir)


 def load_latexdet_model() -> InferenceSession:
+    """
+    Load the LaTeX detection model.
+
+    This function loads the model responsible for detecting LaTeX formulas in images.
+    The model is implemented as an ONNX InferenceSession for optimal performance.
+
+    Returns:
+        ONNX InferenceSession for LaTeX detection
+
+    Example:
+        >>> from texteller import load_latexdet_model
+        >>>
+        >>> detector = load_latexdet_model()
+    """
    fpath = _maybe_download(LATEX_DET_MODEL_URL)
    return InferenceSession(
        resolve_path(fpath),
@@ -33,6 +84,20 @@ def load_latexdet_model() -> InferenceSession:


 def load_textrec_model() -> predict_rec.TextRecognizer:
+    """
+    Load the text recognition model.
+
+    This function loads the model responsible for recognizing regular text in images.
+    It's based on PaddleOCR's text recognition model.
+
+    Returns:
+        PaddleOCR TextRecognizer instance
+
+    Example:
+        >>> from texteller import load_textrec_model
+        >>>
+        >>> text_recognizer = load_textrec_model()
+    """
    fpath = _maybe_download(TEXT_REC_MODEL_URL)
    paddleocr_args = parse_args()
    paddleocr_args.use_onnx = True
@@ -43,6 +108,20 @@ def load_textrec_model() -> predict_rec.TextRecognizer:


 def load_textdet_model() -> predict_det.TextDetector:
+    """
+    Load the text detection model.
+
+    This function loads the model responsible for detecting text regions in images.
+    It's based on PaddleOCR's text detection model.
+
+    Returns:
+        PaddleOCR TextDetector instance
+
+    Example:
+        >>> from texteller import load_textdet_model
+        >>>
+        >>> text_detector = load_textdet_model()
+    """
    fpath = _maybe_download(TEXT_DET_MODEL_URL)
    paddleocr_args = parse_args()
    paddleocr_args.use_onnx = True
@@ -53,6 +132,17 @@ def load_textdet_model() -> predict_det.TextDetector:


 def _maybe_download(url: str, dirpath: str | None = None, force: bool = False) -> Path:
+    """
+    Download a file if it doesn't already exist.
+
+    Args:
+        url: URL to download from
+        dirpath: Directory to save the file in. If None, uses the default cache directory.
+        force: Whether to force download even if the file already exists
+
+    Returns:
+        Path to the downloaded file
+    """
    if dirpath is None:
        dirpath = Globals().cache_dir
    mkdir(dirpath)