70 lines
2.2 KiB
Python
70 lines
2.2 KiB
Python
from typing import List
|
|
|
|
from onnxruntime import InferenceSession
|
|
|
|
from texteller.types import Bbox
|
|
|
|
from .preprocess import Compose
|
|
|
|
_config = {
|
|
"mode": "paddle",
|
|
"draw_threshold": 0.5,
|
|
"metric": "COCO",
|
|
"use_dynamic_shape": False,
|
|
"arch": "DETR",
|
|
"min_subgraph_size": 3,
|
|
"preprocess": [
|
|
{"interp": 2, "keep_ratio": False, "target_size": [1600, 1600], "type": "Resize"},
|
|
{
|
|
"mean": [0.0, 0.0, 0.0],
|
|
"norm_type": "none",
|
|
"std": [1.0, 1.0, 1.0],
|
|
"type": "NormalizeImage",
|
|
},
|
|
{"type": "Permute"},
|
|
],
|
|
"label_list": ["isolated", "embedding"],
|
|
}
|
|
|
|
|
|
def latex_detect(img_path: str, predictor: InferenceSession) -> List[Bbox]:
|
|
"""
|
|
Detect LaTeX formulas in an image and classify them as isolated or embedded.
|
|
|
|
This function uses an ONNX model to detect LaTeX formulas in images. The model
|
|
identifies two types of LaTeX formulas:
|
|
- 'isolated': Standalone LaTeX formulas (typically displayed equations)
|
|
- 'embedding': Inline LaTeX formulas embedded within text
|
|
|
|
Args:
|
|
img_path: Path to the input image file
|
|
predictor: ONNX InferenceSession model for LaTeX detection
|
|
|
|
Returns:
|
|
List of Bbox objects representing the detected LaTeX formulas with their
|
|
positions, classifications, and confidence scores
|
|
|
|
Example:
|
|
>>> from texteller.api import load_latexdet_model, latex_detect
|
|
>>> model = load_latexdet_model()
|
|
>>> bboxes = latex_detect("path/to/image.png", model)
|
|
"""
|
|
transforms = Compose(_config["preprocess"])
|
|
inputs = transforms(img_path)
|
|
inputs_name = [var.name for var in predictor.get_inputs()]
|
|
inputs = {k: inputs[k][None,] for k in inputs_name}
|
|
|
|
outputs = predictor.run(output_names=None, input_feed=inputs)[0]
|
|
res = []
|
|
for output in outputs:
|
|
cls_name = _config["label_list"][int(output[0])]
|
|
score = output[1]
|
|
xmin = int(max(output[2], 0))
|
|
ymin = int(max(output[3], 0))
|
|
xmax = int(output[4])
|
|
ymax = int(output[5])
|
|
if score > 0.5:
|
|
res.append(Bbox(xmin, ymin, ymax - ymin, xmax - xmin, cls_name, score))
|
|
|
|
return res
|