Files
TexTeller/_modules/texteller/api/detection/detect.html

454 lines
21 KiB
HTML

<!DOCTYPE html>
<html lang="en" data-content_root="../../../../" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>texteller.api.detection.detect &#8212; TexTeller documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
<link href="../../../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
<link href="../../../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/sphinx-book-theme.css?v=eba8b062" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sphinx-design.min.css?v=95c83b7e" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
<script src="../../../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
<script src="../../../../_static/documentation_options.js?v=9eb32ce0"></script>
<script src="../../../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../../_static/clipboard.min.js?v=a7894cd8"></script>
<script src="../../../../_static/copybutton.js?v=f281be69"></script>
<script src="../../../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
<script src="../../../../_static/design-tabs.js?v=f930bc37"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = '_modules/texteller/api/detection/detect';</script>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
<input type="checkbox"
class="sidebar-toggle"
id="pst-primary-sidebar-checkbox"/>
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
<input type="checkbox"
class="sidebar-toggle"
id="pst-secondary-sidebar-checkbox"/>
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search..."
aria-label="Search..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<div class="pst-async-banner-revealer d-none">
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<a class="navbar-brand logo" href="../../../../index.html">
<img src="../../../../_static/logo.svg" class="logo__image only-light" alt="TexTeller documentation - Home"/>
<script>document.write(`<img src="../../../../_static/logo.svg" class="logo__image only-dark" alt="TexTeller documentation - Home"/>`);</script>
</a></div>
<div class="sidebar-primary-item">
<script>
document.write(`
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script></div>
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
<div class="bd-toc-item navbar-nav active">
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../../../../api.html">API Reference</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main" role="main">
<div class="sbt-scroll-pixel-helper"></div>
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="fa-solid fa-bars"></span>
</button></div>
</div>
<div class="header-article-items__end">
<div class="header-article-item">
<div class="article-header-buttons">
<div class="dropdown dropdown-source-buttons">
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
<i class="fab fa-github"></i>
</button>
<ul class="dropdown-menu">
<li><a href="https://github.com/OleehyO/TexTeller" target="_blank"
class="btn btn-sm btn-source-repository-button dropdown-item"
title="Source repository"
data-bs-placement="left" data-bs-toggle="tooltip"
>
<span class="btn__icon-container">
<i class="fab fa-github"></i>
</span>
<span class="btn__text-container">Repository</span>
</a>
</li>
<li><a href="https://github.com/OleehyO/TexTeller/issues/new?title=Issue%20on%20page%20%2F_modules/texteller/api/detection/detect.html&body=Your%20issue%20content%20here." target="_blank"
class="btn btn-sm btn-source-issues-button dropdown-item"
title="Open an issue"
data-bs-placement="left" data-bs-toggle="tooltip"
>
<span class="btn__icon-container">
<i class="fas fa-lightbulb"></i>
</span>
<span class="btn__text-container">Open issue</span>
</a>
</li>
</ul>
</div>
<button onclick="toggleFullScreen()"
class="btn btn-sm btn-fullscreen-button"
title="Fullscreen mode"
data-bs-placement="bottom" data-bs-toggle="tooltip"
>
<span class="btn__icon-container">
<i class="fas fa-expand"></i>
</span>
</button>
<script>
document.write(`
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
</button>
`);
</script>
<script>
document.write(`
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
</button>
`);
</script>
</div></div>
</div>
</div>
</div>
<div id="jb-print-docs-body" class="onlyprint">
<h1></h1>
<!-- Table of contents -->
<div id="print-main-content">
<div id="jb-print-toc">
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<h1>Source code for texteller.api.detection.detect</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">List</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">onnxruntime</span><span class="w"> </span><span class="kn">import</span> <span class="n">InferenceSession</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">texteller.types</span><span class="w"> </span><span class="kn">import</span> <span class="n">Bbox</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">.preprocess</span><span class="w"> </span><span class="kn">import</span> <span class="n">Compose</span>
<span class="n">_config</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;paddle&quot;</span><span class="p">,</span>
<span class="s2">&quot;draw_threshold&quot;</span><span class="p">:</span> <span class="mf">0.5</span><span class="p">,</span>
<span class="s2">&quot;metric&quot;</span><span class="p">:</span> <span class="s2">&quot;COCO&quot;</span><span class="p">,</span>
<span class="s2">&quot;use_dynamic_shape&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s2">&quot;arch&quot;</span><span class="p">:</span> <span class="s2">&quot;DETR&quot;</span><span class="p">,</span>
<span class="s2">&quot;min_subgraph_size&quot;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span>
<span class="s2">&quot;preprocess&quot;</span><span class="p">:</span> <span class="p">[</span>
<span class="p">{</span><span class="s2">&quot;interp&quot;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s2">&quot;keep_ratio&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="s2">&quot;target_size&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1600</span><span class="p">,</span> <span class="mi">1600</span><span class="p">],</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;Resize&quot;</span><span class="p">},</span>
<span class="p">{</span>
<span class="s2">&quot;mean&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">],</span>
<span class="s2">&quot;norm_type&quot;</span><span class="p">:</span> <span class="s2">&quot;none&quot;</span><span class="p">,</span>
<span class="s2">&quot;std&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">],</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;NormalizeImage&quot;</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">{</span><span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;Permute&quot;</span><span class="p">},</span>
<span class="p">],</span>
<span class="s2">&quot;label_list&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;isolated&quot;</span><span class="p">,</span> <span class="s2">&quot;embedding&quot;</span><span class="p">],</span>
<span class="p">}</span>
<div class="viewcode-block" id="latex_detect">
<a class="viewcode-back" href="../../../../api.html#texteller.api.detection.latex_detect">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">latex_detect</span><span class="p">(</span><span class="n">img_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">predictor</span><span class="p">:</span> <span class="n">InferenceSession</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">Bbox</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Detect LaTeX formulas in an image and classify them as isolated or embedded.</span>
<span class="sd"> This function uses an ONNX model to detect LaTeX formulas in images. The model</span>
<span class="sd"> identifies two types of LaTeX formulas:</span>
<span class="sd"> - &#39;isolated&#39;: Standalone LaTeX formulas (typically displayed equations)</span>
<span class="sd"> - &#39;embedding&#39;: Inline LaTeX formulas embedded within text</span>
<span class="sd"> Args:</span>
<span class="sd"> img_path: Path to the input image file</span>
<span class="sd"> predictor: ONNX InferenceSession model for LaTeX detection</span>
<span class="sd"> Returns:</span>
<span class="sd"> List of Bbox objects representing the detected LaTeX formulas with their</span>
<span class="sd"> positions, classifications, and confidence scores</span>
<span class="sd"> Example:</span>
<span class="sd"> &gt;&gt;&gt; from texteller.api import load_latexdet_model, latex_detect</span>
<span class="sd"> &gt;&gt;&gt; model = load_latexdet_model()</span>
<span class="sd"> &gt;&gt;&gt; bboxes = latex_detect(&quot;path/to/image.png&quot;, model)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">transforms</span> <span class="o">=</span> <span class="n">Compose</span><span class="p">(</span><span class="n">_config</span><span class="p">[</span><span class="s2">&quot;preprocess&quot;</span><span class="p">])</span>
<span class="n">inputs</span> <span class="o">=</span> <span class="n">transforms</span><span class="p">(</span><span class="n">img_path</span><span class="p">)</span>
<span class="n">inputs_name</span> <span class="o">=</span> <span class="p">[</span><span class="n">var</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">var</span> <span class="ow">in</span> <span class="n">predictor</span><span class="o">.</span><span class="n">get_inputs</span><span class="p">()]</span>
<span class="n">inputs</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">inputs</span><span class="p">[</span><span class="n">k</span><span class="p">][</span><span class="kc">None</span><span class="p">,]</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">inputs_name</span><span class="p">}</span>
<span class="n">outputs</span> <span class="o">=</span> <span class="n">predictor</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">output_names</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">input_feed</span><span class="o">=</span><span class="n">inputs</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">res</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">output</span> <span class="ow">in</span> <span class="n">outputs</span><span class="p">:</span>
<span class="n">cls_name</span> <span class="o">=</span> <span class="n">_config</span><span class="p">[</span><span class="s2">&quot;label_list&quot;</span><span class="p">][</span><span class="nb">int</span><span class="p">(</span><span class="n">output</span><span class="p">[</span><span class="mi">0</span><span class="p">])]</span>
<span class="n">score</span> <span class="o">=</span> <span class="n">output</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">xmin</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="nb">max</span><span class="p">(</span><span class="n">output</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="mi">0</span><span class="p">))</span>
<span class="n">ymin</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="nb">max</span><span class="p">(</span><span class="n">output</span><span class="p">[</span><span class="mi">3</span><span class="p">],</span> <span class="mi">0</span><span class="p">))</span>
<span class="n">xmax</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">output</span><span class="p">[</span><span class="mi">4</span><span class="p">])</span>
<span class="n">ymax</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">output</span><span class="p">[</span><span class="mi">5</span><span class="p">])</span>
<span class="k">if</span> <span class="n">score</span> <span class="o">&gt;</span> <span class="mf">0.5</span><span class="p">:</span>
<span class="n">res</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Bbox</span><span class="p">(</span><span class="n">xmin</span><span class="p">,</span> <span class="n">ymin</span><span class="p">,</span> <span class="n">ymax</span> <span class="o">-</span> <span class="n">ymin</span><span class="p">,</span> <span class="n">xmax</span> <span class="o">-</span> <span class="n">xmin</span><span class="p">,</span> <span class="n">cls_name</span><span class="p">,</span> <span class="n">score</span><span class="p">))</span>
<span class="k">return</span> <span class="n">res</span></div>
</pre></div>
</article>
<footer class="prev-next-footer d-print-none">
<div class="prev-next-area">
</div>
</footer>
</div>
</div>
<footer class="bd-footer-content">
<div class="bd-footer-content__inner container">
<div class="footer-item">
<p class="component-author">
By TexTeller Team
</p>
</div>
<div class="footer-item">
<p class="copyright">
© Copyright 2025, TexTeller Team.
<br/>
</p>
</div>
<div class="footer-item">
</div>
<div class="footer-item">
</div>
</div>
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
<script src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
<footer class="bd-footer">
</footer>
</body>
</html>