texteller/api/detection/detect.py

from typing import List

from onnxruntime import InferenceSession

from texteller.types import Bbox

from .preprocess import Compose

_config = {
    "mode": "paddle",
    "draw_threshold": 0.5,
    "metric": "COCO",
    "use_dynamic_shape": False,
    "arch": "DETR",
    "min_subgraph_size": 3,
    "preprocess": [
        {"interp": 2, "keep_ratio": False, "target_size": [1600, 1600], "type": "Resize"},
        {
            "mean": [0.0, 0.0, 0.0],
            "norm_type": "none",
            "std": [1.0, 1.0, 1.0],
            "type": "NormalizeImage",
        },
        {"type": "Permute"},
    ],
    "label_list": ["isolated", "embedding"],
}


def latex_detect(img_path: str, predictor: InferenceSession) -> List[Bbox]:
    transforms = Compose(_config["preprocess"])
    inputs = transforms(img_path)
    inputs_name = [var.name for var in predictor.get_inputs()]
    inputs = {k: inputs[k][None,] for k in inputs_name}

    outputs = predictor.run(output_names=None, input_feed=inputs)[0]
    res = []
    for output in outputs:
        cls_name = _config["label_list"][int(output[0])]
        score = output[1]
        xmin = int(max(output[2], 0))
        ymin = int(max(output[3], 0))
        xmax = int(output[4])
        ymax = int(output[5])
        if score > 0.5:
            res.append(Bbox(xmin, ymin, ymax - ymin, xmax - xmin, cls_name, score))

    return res
[refactor] Init 2025-04-16 14:23:02 +00:00			`from typing import List`

			`from onnxruntime import InferenceSession`

			`from texteller.types import Bbox`

			`from .preprocess import Compose`

			`_config = {`
			`"mode": "paddle",`
			`"draw_threshold": 0.5,`
			`"metric": "COCO",`
			`"use_dynamic_shape": False,`
			`"arch": "DETR",`
			`"min_subgraph_size": 3,`
			`"preprocess": [`
			`{"interp": 2, "keep_ratio": False, "target_size": [1600, 1600], "type": "Resize"},`
			`{`
			`"mean": [0.0, 0.0, 0.0],`
			`"norm_type": "none",`
			`"std": [1.0, 1.0, 1.0],`
			`"type": "NormalizeImage",`
			`},`
			`{"type": "Permute"},`
			`],`
			`"label_list": ["isolated", "embedding"],`
			`}`


			`def latex_detect(img_path: str, predictor: InferenceSession) -> List[Bbox]:`
			`transforms = Compose(_config["preprocess"])`
			`inputs = transforms(img_path)`
			`inputs_name = [var.name for var in predictor.get_inputs()]`
			`inputs = {k: inputs[k][None,] for k in inputs_name}`

			`outputs = predictor.run(output_names=None, input_feed=inputs)[0]`
			`res = []`
			`for output in outputs:`
			`cls_name = _config["label_list"][int(output[0])]`
			`score = output[1]`
			`xmin = int(max(output[2], 0))`
			`ymin = int(max(output[3], 0))`
			`xmax = int(output[4])`
			`ymax = int(output[5])`
			`if score > 0.5:`
			`res.append(Bbox(xmin, ymin, ymax - ymin, xmax - xmin, cls_name, score))`

			`return res`