feat add glm-ocr core
This commit is contained in:
412
app/services/glm_postprocess.py
Normal file
412
app/services/glm_postprocess.py
Normal file
@@ -0,0 +1,412 @@
|
||||
"""GLM-OCR postprocessing logic adapted for this project.
|
||||
|
||||
Ported from glm-ocr/glmocr/postprocess/result_formatter.py and
|
||||
glm-ocr/glmocr/utils/result_postprocess_utils.py.
|
||||
|
||||
Covers:
|
||||
- Repeated-content / hallucination detection
|
||||
- Per-region content cleaning and formatting (titles, bullets, formulas)
|
||||
- formula_number merging (→ \\tag{})
|
||||
- Hyphenated text-block merging (via wordfreq)
|
||||
- Missing bullet-point detection
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import json
|
||||
from collections import Counter
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
try:
|
||||
from wordfreq import zipf_frequency
|
||||
|
||||
_WORDFREQ_AVAILABLE = True
|
||||
except ImportError:
|
||||
_WORDFREQ_AVAILABLE = False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# result_postprocess_utils (ported)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def find_consecutive_repeat(s: str, min_unit_len: int = 10, min_repeats: int = 10) -> Optional[str]:
|
||||
"""Detect and truncate a consecutively-repeated pattern.
|
||||
|
||||
Returns the string with the repeat removed, or None if not found.
|
||||
"""
|
||||
n = len(s)
|
||||
if n < min_unit_len * min_repeats:
|
||||
return None
|
||||
|
||||
max_unit_len = n // min_repeats
|
||||
if max_unit_len < min_unit_len:
|
||||
return None
|
||||
|
||||
pattern = re.compile(
|
||||
r"(.{" + str(min_unit_len) + "," + str(max_unit_len) + r"}?)\1{" + str(min_repeats - 1) + ",}",
|
||||
re.DOTALL,
|
||||
)
|
||||
match = pattern.search(s)
|
||||
if match:
|
||||
return s[: match.start()] + match.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def clean_repeated_content(
|
||||
content: str,
|
||||
min_len: int = 10,
|
||||
min_repeats: int = 10,
|
||||
line_threshold: int = 10,
|
||||
) -> str:
|
||||
"""Remove hallucination-style repeated content (consecutive or line-level)."""
|
||||
stripped = content.strip()
|
||||
if not stripped:
|
||||
return content
|
||||
|
||||
# 1. Consecutive repeat (multi-line aware)
|
||||
if len(stripped) > min_len * min_repeats:
|
||||
result = find_consecutive_repeat(stripped, min_unit_len=min_len, min_repeats=min_repeats)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# 2. Line-level repeat
|
||||
lines = [line.strip() for line in content.split("\n") if line.strip()]
|
||||
total_lines = len(lines)
|
||||
if total_lines >= line_threshold and lines:
|
||||
common, count = Counter(lines).most_common(1)[0]
|
||||
if count >= line_threshold and (count / total_lines) >= 0.8:
|
||||
for i, line in enumerate(lines):
|
||||
if line == common:
|
||||
consecutive = sum(1 for j in range(i, min(i + 3, len(lines))) if lines[j] == common)
|
||||
if consecutive >= 3:
|
||||
original_lines = content.split("\n")
|
||||
non_empty_count = 0
|
||||
for idx, orig_line in enumerate(original_lines):
|
||||
if orig_line.strip():
|
||||
non_empty_count += 1
|
||||
if non_empty_count == i + 1:
|
||||
return "\n".join(original_lines[: idx + 1])
|
||||
break
|
||||
return content
|
||||
|
||||
|
||||
def clean_formula_number(number_content: str) -> str:
|
||||
"""Strip parentheses from a formula number string, e.g. '(1)' → '1'."""
|
||||
s = number_content.strip()
|
||||
if s.startswith("(") and s.endswith(")"):
|
||||
return s[1:-1]
|
||||
if s.startswith("(") and s.endswith(")"):
|
||||
return s[1:-1]
|
||||
return s
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GLMResultFormatter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Label → canonical category mapping (mirrors GLM-OCR label_visualization_mapping)
|
||||
_LABEL_TO_CATEGORY: Dict[str, str] = {
|
||||
# text
|
||||
"abstract": "text",
|
||||
"algorithm": "text",
|
||||
"content": "text",
|
||||
"doc_title": "text",
|
||||
"figure_title": "text",
|
||||
"paragraph_title": "text",
|
||||
"reference_content": "text",
|
||||
"text": "text",
|
||||
"vertical_text": "text",
|
||||
"vision_footnote": "text",
|
||||
"seal": "text",
|
||||
"formula_number": "text",
|
||||
# table
|
||||
"table": "table",
|
||||
# formula
|
||||
"display_formula": "formula",
|
||||
"inline_formula": "formula",
|
||||
# image (skip OCR)
|
||||
"chart": "image",
|
||||
"image": "image",
|
||||
}
|
||||
|
||||
|
||||
class GLMResultFormatter:
|
||||
"""Port of GLM-OCR's ResultFormatter for use in our pipeline.
|
||||
|
||||
Accepts a list of region dicts (each with label, native_label, content,
|
||||
bbox_2d) and returns a final Markdown string.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Public entry-point
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def process(self, regions: List[Dict[str, Any]]) -> str:
|
||||
"""Run the full postprocessing pipeline and return Markdown.
|
||||
|
||||
Args:
|
||||
regions: List of dicts with keys:
|
||||
- index (int) reading order from layout detection
|
||||
- label (str) mapped category: text/formula/table/figure
|
||||
- native_label (str) raw PP-DocLayout label (e.g. doc_title)
|
||||
- content (str) raw OCR output from vLLM
|
||||
- bbox_2d (list) [x1, y1, x2, y2] in 0-1000 normalised coords
|
||||
|
||||
Returns:
|
||||
Markdown string.
|
||||
"""
|
||||
# Sort by reading order
|
||||
items = sorted(deepcopy(regions), key=lambda x: x.get("index", 0))
|
||||
|
||||
# Per-region cleaning + formatting
|
||||
processed: List[Dict] = []
|
||||
for item in items:
|
||||
item["native_label"] = item.get("native_label", item.get("label", "text"))
|
||||
item["label"] = self._map_label(item.get("label", "text"), item["native_label"])
|
||||
|
||||
item["content"] = self._format_content(
|
||||
item.get("content") or "",
|
||||
item["label"],
|
||||
item["native_label"],
|
||||
)
|
||||
if not (item.get("content") or "").strip():
|
||||
continue
|
||||
processed.append(item)
|
||||
|
||||
# Re-index
|
||||
for i, item in enumerate(processed):
|
||||
item["index"] = i
|
||||
|
||||
# Structural merges
|
||||
processed = self._merge_formula_numbers(processed)
|
||||
processed = self._merge_text_blocks(processed)
|
||||
processed = self._format_bullet_points(processed)
|
||||
|
||||
# Assemble Markdown
|
||||
parts: List[str] = []
|
||||
for item in processed:
|
||||
content = item.get("content") or ""
|
||||
if item["label"] == "image":
|
||||
parts.append(f"})")
|
||||
elif content.strip():
|
||||
parts.append(content)
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Label mapping
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _map_label(self, label: str, native_label: str) -> str:
|
||||
return _LABEL_TO_CATEGORY.get(native_label, _LABEL_TO_CATEGORY.get(label, "text"))
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Content cleaning
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _clean_content(self, content: str) -> str:
|
||||
"""Remove artefacts: leading/trailing \\t, repeated punctuation, long repeats."""
|
||||
if content is None:
|
||||
return ""
|
||||
|
||||
content = re.sub(r"^(\\t)+", "", content).lstrip()
|
||||
content = re.sub(r"(\\t)+$", "", content).rstrip()
|
||||
|
||||
content = re.sub(r"(\.)\1{2,}", r"\1\1\1", content)
|
||||
content = re.sub(r"(·)\1{2,}", r"\1\1\1", content)
|
||||
content = re.sub(r"(_)\1{2,}", r"\1\1\1", content)
|
||||
content = re.sub(r"(\\_)\1{2,}", r"\1\1\1", content)
|
||||
|
||||
if len(content) >= 2048:
|
||||
content = clean_repeated_content(content)
|
||||
|
||||
return content.strip()
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Per-region content formatting
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _format_content(self, content: Any, label: str, native_label: str) -> str:
|
||||
"""Clean and format a single region's content."""
|
||||
if content is None:
|
||||
return ""
|
||||
|
||||
content = self._clean_content(str(content))
|
||||
|
||||
# Heading formatting
|
||||
if native_label == "doc_title":
|
||||
content = re.sub(r"^#+\s*", "", content)
|
||||
content = "# " + content
|
||||
elif native_label == "paragraph_title":
|
||||
if content.startswith("- ") or content.startswith("* "):
|
||||
content = content[2:].lstrip()
|
||||
content = re.sub(r"^#+\s*", "", content)
|
||||
content = "## " + content.lstrip()
|
||||
|
||||
# Formula wrapping
|
||||
if label == "formula":
|
||||
content = content.strip()
|
||||
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)")]:
|
||||
if content.startswith(s) and content.endswith(e):
|
||||
content = content[len(s) : -len(e)].strip()
|
||||
break
|
||||
content = "$$\n" + content + "\n$$"
|
||||
|
||||
# Text formatting
|
||||
if label == "text":
|
||||
if content.startswith("·") or content.startswith("•") or content.startswith("* "):
|
||||
content = "- " + content[1:].lstrip()
|
||||
|
||||
match = re.match(r"^(\(|\()(\d+|[A-Za-z])(\)|\))(.*)$", content)
|
||||
if match:
|
||||
_, symbol, _, rest = match.groups()
|
||||
content = f"({symbol}) {rest.lstrip()}"
|
||||
|
||||
match = re.match(r"^(\d+|[A-Za-z])(\.|\)|\))(.*)$", content)
|
||||
if match:
|
||||
symbol, sep, rest = match.groups()
|
||||
sep = ")" if sep == ")" else sep
|
||||
content = f"{symbol}{sep} {rest.lstrip()}"
|
||||
|
||||
# Single newline → double newline
|
||||
content = re.sub(r"(?<!\n)\n(?!\n)", "\n\n", content)
|
||||
|
||||
return content
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Structural merges
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _merge_formula_numbers(self, items: List[Dict]) -> List[Dict]:
|
||||
"""Merge formula_number region into adjacent formula with \\tag{}."""
|
||||
if not items:
|
||||
return items
|
||||
|
||||
merged: List[Dict] = []
|
||||
skip: set = set()
|
||||
|
||||
for i, block in enumerate(items):
|
||||
if i in skip:
|
||||
continue
|
||||
|
||||
native = block.get("native_label", "")
|
||||
|
||||
# Case 1: formula_number then formula
|
||||
if native == "formula_number":
|
||||
if i + 1 < len(items) and items[i + 1].get("label") == "formula":
|
||||
num_clean = clean_formula_number(block.get("content", "").strip())
|
||||
formula_content = items[i + 1].get("content", "")
|
||||
merged_block = deepcopy(items[i + 1])
|
||||
if formula_content.endswith("\n$$"):
|
||||
merged_block["content"] = formula_content[:-3] + f" \\tag{{{num_clean}}}\n$$"
|
||||
merged.append(merged_block)
|
||||
skip.add(i + 1)
|
||||
continue # always skip the formula_number block itself
|
||||
|
||||
# Case 2: formula then formula_number
|
||||
if block.get("label") == "formula":
|
||||
if i + 1 < len(items) and items[i + 1].get("native_label") == "formula_number":
|
||||
num_clean = clean_formula_number(items[i + 1].get("content", "").strip())
|
||||
formula_content = block.get("content", "")
|
||||
merged_block = deepcopy(block)
|
||||
if formula_content.endswith("\n$$"):
|
||||
merged_block["content"] = formula_content[:-3] + f" \\tag{{{num_clean}}}\n$$"
|
||||
merged.append(merged_block)
|
||||
skip.add(i + 1)
|
||||
continue
|
||||
|
||||
merged.append(block)
|
||||
|
||||
for i, block in enumerate(merged):
|
||||
block["index"] = i
|
||||
return merged
|
||||
|
||||
def _merge_text_blocks(self, items: List[Dict]) -> List[Dict]:
|
||||
"""Merge hyphenated text blocks when the combined word is valid (wordfreq)."""
|
||||
if not items or not _WORDFREQ_AVAILABLE:
|
||||
return items
|
||||
|
||||
merged: List[Dict] = []
|
||||
skip: set = set()
|
||||
|
||||
for i, block in enumerate(items):
|
||||
if i in skip:
|
||||
continue
|
||||
if block.get("label") != "text":
|
||||
merged.append(block)
|
||||
continue
|
||||
|
||||
content = block.get("content", "")
|
||||
if not isinstance(content, str) or not content.rstrip().endswith("-"):
|
||||
merged.append(block)
|
||||
continue
|
||||
|
||||
content_stripped = content.rstrip()
|
||||
did_merge = False
|
||||
for j in range(i + 1, len(items)):
|
||||
if items[j].get("label") != "text":
|
||||
continue
|
||||
next_content = items[j].get("content", "")
|
||||
if not isinstance(next_content, str):
|
||||
continue
|
||||
next_stripped = next_content.lstrip()
|
||||
if next_stripped and next_stripped[0].islower():
|
||||
words_before = content_stripped[:-1].split()
|
||||
next_words = next_stripped.split()
|
||||
if words_before and next_words:
|
||||
merged_word = words_before[-1] + next_words[0]
|
||||
if zipf_frequency(merged_word.lower(), "en") >= 2.5:
|
||||
merged_block = deepcopy(block)
|
||||
merged_block["content"] = content_stripped[:-1] + next_content.lstrip()
|
||||
merged.append(merged_block)
|
||||
skip.add(j)
|
||||
did_merge = True
|
||||
break
|
||||
|
||||
if not did_merge:
|
||||
merged.append(block)
|
||||
|
||||
for i, block in enumerate(merged):
|
||||
block["index"] = i
|
||||
return merged
|
||||
|
||||
def _format_bullet_points(self, items: List[Dict], left_align_threshold: float = 10.0) -> List[Dict]:
|
||||
"""Add missing bullet prefix when a text block is sandwiched between two bullet items."""
|
||||
if len(items) < 3:
|
||||
return items
|
||||
|
||||
for i in range(1, len(items) - 1):
|
||||
cur = items[i]
|
||||
prev = items[i - 1]
|
||||
nxt = items[i + 1]
|
||||
|
||||
if cur.get("native_label") != "text":
|
||||
continue
|
||||
if prev.get("native_label") != "text" or nxt.get("native_label") != "text":
|
||||
continue
|
||||
|
||||
cur_content = cur.get("content", "")
|
||||
if cur_content.startswith("- "):
|
||||
continue
|
||||
|
||||
prev_content = prev.get("content", "")
|
||||
nxt_content = nxt.get("content", "")
|
||||
if not (prev_content.startswith("- ") and nxt_content.startswith("- ")):
|
||||
continue
|
||||
|
||||
cur_bbox = cur.get("bbox_2d", [])
|
||||
prev_bbox = prev.get("bbox_2d", [])
|
||||
nxt_bbox = nxt.get("bbox_2d", [])
|
||||
if not (cur_bbox and prev_bbox and nxt_bbox):
|
||||
continue
|
||||
|
||||
if (
|
||||
abs(cur_bbox[0] - prev_bbox[0]) <= left_align_threshold
|
||||
and abs(cur_bbox[0] - nxt_bbox[0]) <= left_align_threshold
|
||||
):
|
||||
cur["content"] = "- " + cur_content
|
||||
|
||||
return items
|
||||
@@ -1,9 +1,10 @@
|
||||
"""PP-DocLayoutV2 wrapper for document layout detection."""
|
||||
"""PP-DocLayoutV3 wrapper for document layout detection."""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from app.schemas.image import LayoutInfo, LayoutRegion
|
||||
from app.core.config import get_settings
|
||||
from app.services.layout_postprocess import apply_layout_postprocess
|
||||
from paddleocr import LayoutDetection
|
||||
from typing import Optional
|
||||
|
||||
@@ -116,6 +117,17 @@ class LayoutDetector:
|
||||
else:
|
||||
boxes = []
|
||||
|
||||
# Apply GLM-OCR layout post-processing (NMS, containment, unclip, clamp)
|
||||
if boxes:
|
||||
h, w = image.shape[:2]
|
||||
boxes = apply_layout_postprocess(
|
||||
boxes,
|
||||
img_size=(w, h),
|
||||
layout_nms=True,
|
||||
layout_unclip_ratio=None,
|
||||
layout_merge_bboxes_mode="large",
|
||||
)
|
||||
|
||||
for box in boxes:
|
||||
cls_id = box.get("cls_id")
|
||||
label = box.get("label") or self.CLS_ID_TO_LABEL.get(cls_id, "other")
|
||||
@@ -128,6 +140,7 @@ class LayoutDetector:
|
||||
regions.append(
|
||||
LayoutRegion(
|
||||
type=region_type,
|
||||
native_label=label,
|
||||
bbox=coordinate,
|
||||
confidence=score,
|
||||
score=score,
|
||||
|
||||
343
app/services/layout_postprocess.py
Normal file
343
app/services/layout_postprocess.py
Normal file
@@ -0,0 +1,343 @@
|
||||
"""Layout post-processing utilities ported from GLM-OCR.
|
||||
|
||||
Source: glm-ocr/glmocr/utils/layout_postprocess_utils.py
|
||||
|
||||
Algorithms applied after PaddleOCR LayoutDetection.predict():
|
||||
1. NMS with dual IoU thresholds (same-class vs cross-class)
|
||||
2. Large-image-region filtering (remove image boxes that fill most of the page)
|
||||
3. Containment analysis (merge_bboxes_mode: keep large parent, remove contained child)
|
||||
4. Unclip ratio (optional bbox expansion)
|
||||
5. Invalid bbox skipping
|
||||
|
||||
These steps run on top of PaddleOCR's built-in detection to replicate
|
||||
the quality of the GLM-OCR SDK's layout pipeline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Primitive geometry helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def iou(box1: List[float], box2: List[float]) -> float:
|
||||
"""Compute IoU of two bounding boxes [x1, y1, x2, y2]."""
|
||||
x1, y1, x2, y2 = box1
|
||||
x1_p, y1_p, x2_p, y2_p = box2
|
||||
|
||||
x1_i = max(x1, x1_p)
|
||||
y1_i = max(y1, y1_p)
|
||||
x2_i = min(x2, x2_p)
|
||||
y2_i = min(y2, y2_p)
|
||||
|
||||
inter_area = max(0, x2_i - x1_i + 1) * max(0, y2_i - y1_i + 1)
|
||||
box1_area = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
box2_area = (x2_p - x1_p + 1) * (y2_p - y1_p + 1)
|
||||
|
||||
return inter_area / float(box1_area + box2_area - inter_area)
|
||||
|
||||
|
||||
def is_contained(box1: List[float], box2: List[float], overlap_threshold: float = 0.8) -> bool:
|
||||
"""Return True if box1 is contained within box2 (overlap ratio >= threshold).
|
||||
|
||||
box format: [cls_id, score, x1, y1, x2, y2]
|
||||
"""
|
||||
_, _, x1, y1, x2, y2 = box1
|
||||
_, _, x1_p, y1_p, x2_p, y2_p = box2
|
||||
|
||||
box1_area = (x2 - x1) * (y2 - y1)
|
||||
if box1_area <= 0:
|
||||
return False
|
||||
|
||||
xi1 = max(x1, x1_p)
|
||||
yi1 = max(y1, y1_p)
|
||||
xi2 = min(x2, x2_p)
|
||||
yi2 = min(y2, y2_p)
|
||||
inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
|
||||
|
||||
return (inter_area / box1_area) >= overlap_threshold
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# NMS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def nms(
|
||||
boxes: np.ndarray,
|
||||
iou_same: float = 0.6,
|
||||
iou_diff: float = 0.98,
|
||||
) -> List[int]:
|
||||
"""NMS with separate IoU thresholds for same-class and cross-class overlaps.
|
||||
|
||||
Args:
|
||||
boxes: Array of shape (N, 6+) — [cls_id, score, x1, y1, x2, y2, ...].
|
||||
iou_same: Suppression threshold for boxes of the same class.
|
||||
iou_diff: Suppression threshold for boxes of different classes.
|
||||
|
||||
Returns:
|
||||
List of kept row indices.
|
||||
"""
|
||||
scores = boxes[:, 1]
|
||||
indices = np.argsort(scores)[::-1].tolist()
|
||||
selected: List[int] = []
|
||||
|
||||
while indices:
|
||||
current = indices[0]
|
||||
selected.append(current)
|
||||
current_class = int(boxes[current, 0])
|
||||
current_coords = boxes[current, 2:6].tolist()
|
||||
indices = indices[1:]
|
||||
|
||||
kept = []
|
||||
for i in indices:
|
||||
box_class = int(boxes[i, 0])
|
||||
box_coords = boxes[i, 2:6].tolist()
|
||||
threshold = iou_same if current_class == box_class else iou_diff
|
||||
if iou(current_coords, box_coords) < threshold:
|
||||
kept.append(i)
|
||||
indices = kept
|
||||
|
||||
return selected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Containment analysis
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Labels whose regions should never be removed even when contained in another box
|
||||
_PRESERVE_LABELS = {"image", "seal", "chart"}
|
||||
|
||||
|
||||
def check_containment(
|
||||
boxes: np.ndarray,
|
||||
preserve_cls_ids: Optional[set] = None,
|
||||
category_index: Optional[int] = None,
|
||||
mode: Optional[str] = None,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Compute containment flags for each box.
|
||||
|
||||
Args:
|
||||
boxes: Array of shape (N, 6+) — [cls_id, score, x1, y1, x2, y2, ...].
|
||||
preserve_cls_ids: Class IDs that must never be marked as contained.
|
||||
category_index: If set, apply mode only relative to this class.
|
||||
mode: 'large' or 'small' (only used with category_index).
|
||||
|
||||
Returns:
|
||||
(contains_other, contained_by_other): boolean arrays of length N.
|
||||
"""
|
||||
n = len(boxes)
|
||||
contains_other = np.zeros(n, dtype=int)
|
||||
contained_by_other = np.zeros(n, dtype=int)
|
||||
|
||||
for i in range(n):
|
||||
for j in range(n):
|
||||
if i == j:
|
||||
continue
|
||||
if preserve_cls_ids and int(boxes[i, 0]) in preserve_cls_ids:
|
||||
continue
|
||||
if category_index is not None and mode is not None:
|
||||
if mode == "large" and int(boxes[j, 0]) == category_index:
|
||||
if is_contained(boxes[i].tolist(), boxes[j].tolist()):
|
||||
contained_by_other[i] = 1
|
||||
contains_other[j] = 1
|
||||
elif mode == "small" and int(boxes[i, 0]) == category_index:
|
||||
if is_contained(boxes[i].tolist(), boxes[j].tolist()):
|
||||
contained_by_other[i] = 1
|
||||
contains_other[j] = 1
|
||||
else:
|
||||
if is_contained(boxes[i].tolist(), boxes[j].tolist()):
|
||||
contained_by_other[i] = 1
|
||||
contains_other[j] = 1
|
||||
|
||||
return contains_other, contained_by_other
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Box expansion (unclip)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def unclip_boxes(
|
||||
boxes: np.ndarray,
|
||||
unclip_ratio: Union[float, Tuple[float, float], Dict, List, None],
|
||||
) -> np.ndarray:
|
||||
"""Expand bounding boxes by the given ratio.
|
||||
|
||||
Args:
|
||||
boxes: Array of shape (N, 6+) — [cls_id, score, x1, y1, x2, y2, ...].
|
||||
unclip_ratio: Scalar, (w_ratio, h_ratio) tuple, or dict mapping cls_id to ratio.
|
||||
|
||||
Returns:
|
||||
Expanded boxes array.
|
||||
"""
|
||||
if unclip_ratio is None:
|
||||
return boxes
|
||||
|
||||
if isinstance(unclip_ratio, dict):
|
||||
expanded = []
|
||||
for box in boxes:
|
||||
cls_id = int(box[0])
|
||||
if cls_id in unclip_ratio:
|
||||
w_ratio, h_ratio = unclip_ratio[cls_id]
|
||||
x1, y1, x2, y2 = box[2], box[3], box[4], box[5]
|
||||
cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
|
||||
nw, nh = (x2 - x1) * w_ratio, (y2 - y1) * h_ratio
|
||||
new_box = list(box)
|
||||
new_box[2], new_box[3] = cx - nw / 2, cy - nh / 2
|
||||
new_box[4], new_box[5] = cx + nw / 2, cy + nh / 2
|
||||
expanded.append(new_box)
|
||||
else:
|
||||
expanded.append(list(box))
|
||||
return np.array(expanded)
|
||||
|
||||
# Scalar or tuple
|
||||
if isinstance(unclip_ratio, (int, float)):
|
||||
unclip_ratio = (float(unclip_ratio), float(unclip_ratio))
|
||||
|
||||
w_ratio, h_ratio = unclip_ratio[0], unclip_ratio[1]
|
||||
widths = boxes[:, 4] - boxes[:, 2]
|
||||
heights = boxes[:, 5] - boxes[:, 3]
|
||||
cx = boxes[:, 2] + widths / 2
|
||||
cy = boxes[:, 3] + heights / 2
|
||||
nw, nh = widths * w_ratio, heights * h_ratio
|
||||
expanded = boxes.copy().astype(float)
|
||||
expanded[:, 2] = cx - nw / 2
|
||||
expanded[:, 3] = cy - nh / 2
|
||||
expanded[:, 4] = cx + nw / 2
|
||||
expanded[:, 5] = cy + nh / 2
|
||||
return expanded
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry-point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def apply_layout_postprocess(
|
||||
boxes: List[Dict],
|
||||
img_size: Tuple[int, int],
|
||||
layout_nms: bool = True,
|
||||
layout_unclip_ratio: Union[float, Tuple, Dict, None] = None,
|
||||
layout_merge_bboxes_mode: Union[str, Dict, None] = "large",
|
||||
) -> List[Dict]:
|
||||
"""Apply GLM-OCR layout post-processing to PaddleOCR detection results.
|
||||
|
||||
Args:
|
||||
boxes: PaddleOCR output — list of dicts with keys:
|
||||
cls_id, label, score, coordinate ([x1, y1, x2, y2]).
|
||||
img_size: (width, height) of the image.
|
||||
layout_nms: Apply dual-threshold NMS.
|
||||
layout_unclip_ratio: Optional bbox expansion ratio.
|
||||
layout_merge_bboxes_mode: Containment mode — 'large' (default), 'small',
|
||||
'union', or per-class dict.
|
||||
|
||||
Returns:
|
||||
Filtered and ordered list of box dicts in the same PaddleOCR format.
|
||||
"""
|
||||
if not boxes:
|
||||
return boxes
|
||||
|
||||
img_width, img_height = img_size
|
||||
|
||||
# --- Build working array [cls_id, score, x1, y1, x2, y2] -------------- #
|
||||
arr_rows = []
|
||||
for b in boxes:
|
||||
cls_id = b.get("cls_id", 0)
|
||||
score = b.get("score", 0.0)
|
||||
x1, y1, x2, y2 = b.get("coordinate", [0, 0, 0, 0])
|
||||
arr_rows.append([cls_id, score, x1, y1, x2, y2])
|
||||
boxes_array = np.array(arr_rows, dtype=float)
|
||||
|
||||
all_labels: List[str] = [b.get("label", "") for b in boxes]
|
||||
|
||||
# 1. NMS ---------------------------------------------------------------- #
|
||||
if layout_nms and len(boxes_array) > 1:
|
||||
kept = nms(boxes_array, iou_same=0.6, iou_diff=0.98)
|
||||
boxes_array = boxes_array[kept]
|
||||
all_labels = [all_labels[k] for k in kept]
|
||||
|
||||
# 2. Filter large image regions ---------------------------------------- #
|
||||
if len(boxes_array) > 1:
|
||||
img_area = img_width * img_height
|
||||
area_thres = 0.82 if img_width > img_height else 0.93
|
||||
image_cls_ids = {
|
||||
int(boxes_array[i, 0])
|
||||
for i, lbl in enumerate(all_labels)
|
||||
if lbl == "image"
|
||||
}
|
||||
keep_mask = np.ones(len(boxes_array), dtype=bool)
|
||||
for i, lbl in enumerate(all_labels):
|
||||
if lbl == "image":
|
||||
x1, y1, x2, y2 = boxes_array[i, 2:6]
|
||||
x1 = max(0.0, x1); y1 = max(0.0, y1)
|
||||
x2 = min(float(img_width), x2); y2 = min(float(img_height), y2)
|
||||
if (x2 - x1) * (y2 - y1) > area_thres * img_area:
|
||||
keep_mask[i] = False
|
||||
boxes_array = boxes_array[keep_mask]
|
||||
all_labels = [lbl for lbl, k in zip(all_labels, keep_mask) if k]
|
||||
|
||||
# 3. Containment analysis (merge_bboxes_mode) -------------------------- #
|
||||
if layout_merge_bboxes_mode and len(boxes_array) > 1:
|
||||
preserve_cls_ids = {
|
||||
int(boxes_array[i, 0])
|
||||
for i, lbl in enumerate(all_labels)
|
||||
if lbl in _PRESERVE_LABELS
|
||||
}
|
||||
|
||||
if isinstance(layout_merge_bboxes_mode, str):
|
||||
mode = layout_merge_bboxes_mode
|
||||
if mode in ("large", "small"):
|
||||
contains_other, contained_by_other = check_containment(
|
||||
boxes_array, preserve_cls_ids
|
||||
)
|
||||
if mode == "large":
|
||||
keep_mask = contained_by_other == 0
|
||||
else:
|
||||
keep_mask = (contains_other == 0) | (contained_by_other == 1)
|
||||
boxes_array = boxes_array[keep_mask]
|
||||
all_labels = [lbl for lbl, k in zip(all_labels, keep_mask) if k]
|
||||
|
||||
elif isinstance(layout_merge_bboxes_mode, dict):
|
||||
keep_mask = np.ones(len(boxes_array), dtype=bool)
|
||||
for category_index, mode in layout_merge_bboxes_mode.items():
|
||||
if mode in ("large", "small"):
|
||||
contains_other, contained_by_other = check_containment(
|
||||
boxes_array, preserve_cls_ids, int(category_index), mode
|
||||
)
|
||||
if mode == "large":
|
||||
keep_mask &= contained_by_other == 0
|
||||
else:
|
||||
keep_mask &= (contains_other == 0) | (contained_by_other == 1)
|
||||
boxes_array = boxes_array[keep_mask]
|
||||
all_labels = [lbl for lbl, k in zip(all_labels, keep_mask) if k]
|
||||
|
||||
if len(boxes_array) == 0:
|
||||
return []
|
||||
|
||||
# 4. Unclip (bbox expansion) ------------------------------------------- #
|
||||
if layout_unclip_ratio is not None:
|
||||
boxes_array = unclip_boxes(boxes_array, layout_unclip_ratio)
|
||||
|
||||
# 5. Clamp to image boundaries + skip invalid -------------------------- #
|
||||
result: List[Dict] = []
|
||||
for i, row in enumerate(boxes_array):
|
||||
cls_id = int(row[0])
|
||||
score = float(row[1])
|
||||
x1 = max(0.0, min(float(row[2]), img_width))
|
||||
y1 = max(0.0, min(float(row[3]), img_height))
|
||||
x2 = max(0.0, min(float(row[4]), img_width))
|
||||
y2 = max(0.0, min(float(row[5]), img_height))
|
||||
|
||||
if x1 >= x2 or y1 >= y2:
|
||||
continue
|
||||
|
||||
result.append({
|
||||
"cls_id": cls_id,
|
||||
"label": all_labels[i],
|
||||
"score": score,
|
||||
"coordinate": [int(x1), int(y1), int(x2), int(y2)],
|
||||
})
|
||||
|
||||
return result
|
||||
@@ -1,19 +1,23 @@
|
||||
"""PaddleOCR-VL client service for text and formula recognition."""
|
||||
|
||||
import re
|
||||
import numpy as np
|
||||
import cv2
|
||||
import requests
|
||||
from io import BytesIO
|
||||
import base64
|
||||
from app.core.config import get_settings
|
||||
from paddleocr import PaddleOCRVL
|
||||
from typing import Optional
|
||||
from app.services.layout_detector import LayoutDetector
|
||||
from app.services.image_processor import ImageProcessor
|
||||
from app.services.converter import Converter
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from io import BytesIO
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import requests
|
||||
from openai import OpenAI
|
||||
from paddleocr import PaddleOCRVL
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.services.converter import Converter
|
||||
from app.services.glm_postprocess import GLMResultFormatter
|
||||
from app.services.image_processor import ImageProcessor
|
||||
from app.services.layout_detector import LayoutDetector
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
@@ -144,7 +148,9 @@ def _clean_latex_syntax_spaces(expr: str) -> str:
|
||||
# Strategy: remove spaces before \ and between non-command chars,
|
||||
# but preserve the space after \command when followed by a non-\ char
|
||||
cleaned = re.sub(r"\s+(?=\\)", "", content) # remove space before \cmd
|
||||
cleaned = re.sub(r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned) # remove space after non-letter non-\
|
||||
cleaned = re.sub(
|
||||
r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned
|
||||
) # remove space after non-letter non-\
|
||||
return f"{operator}{{{cleaned}}}"
|
||||
|
||||
# Match _{ ... } or ^{ ... }
|
||||
@@ -383,8 +389,8 @@ class OCRServiceBase(ABC):
|
||||
class OCRService(OCRServiceBase):
|
||||
"""Service for OCR using PaddleOCR-VL."""
|
||||
|
||||
_pipeline: Optional[PaddleOCRVL] = None
|
||||
_layout_detector: Optional[LayoutDetector] = None
|
||||
_pipeline: PaddleOCRVL | None = None
|
||||
_layout_detector: LayoutDetector | None = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -549,7 +555,15 @@ class GLMOCRService(OCRServiceBase):
|
||||
|
||||
# Call OpenAI-compatible API with formula recognition prompt
|
||||
prompt = "Formula Recognition:"
|
||||
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": image_url}},
|
||||
{"type": "text", "text": prompt},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
# Don't catch exceptions here - let them propagate for fallback handling
|
||||
response = self.openai_client.chat.completions.create(
|
||||
@@ -596,10 +610,10 @@ class MineruOCRService(OCRServiceBase):
|
||||
def __init__(
|
||||
self,
|
||||
api_url: str = "http://127.0.0.1:8000/file_parse",
|
||||
image_processor: Optional[ImageProcessor] = None,
|
||||
converter: Optional[Converter] = None,
|
||||
image_processor: ImageProcessor | None = None,
|
||||
converter: Converter | None = None,
|
||||
glm_ocr_url: str = "http://localhost:8002/v1",
|
||||
layout_detector: Optional[LayoutDetector] = None,
|
||||
layout_detector: LayoutDetector | None = None,
|
||||
):
|
||||
"""Initialize Local API service.
|
||||
|
||||
@@ -614,7 +628,9 @@ class MineruOCRService(OCRServiceBase):
|
||||
self.glm_ocr_url = glm_ocr_url
|
||||
self.openai_client = OpenAI(api_key="EMPTY", base_url=glm_ocr_url, timeout=3600)
|
||||
|
||||
def _recognize_formula_with_paddleocr_vl(self, image: np.ndarray, prompt: str = "Formula Recognition:") -> str:
|
||||
def _recognize_formula_with_paddleocr_vl(
|
||||
self, image: np.ndarray, prompt: str = "Formula Recognition:"
|
||||
) -> str:
|
||||
"""Recognize formula using PaddleOCR-VL API.
|
||||
|
||||
Args:
|
||||
@@ -634,7 +650,15 @@ class MineruOCRService(OCRServiceBase):
|
||||
image_url = f"data:image/png;base64,{image_base64}"
|
||||
|
||||
# Call OpenAI-compatible API
|
||||
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": image_url}},
|
||||
{"type": "text", "text": prompt},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model="glm-ocr",
|
||||
@@ -647,7 +671,9 @@ class MineruOCRService(OCRServiceBase):
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"PaddleOCR-VL formula recognition failed: {e}") from e
|
||||
|
||||
def _extract_and_recognize_formulas(self, markdown_content: str, original_image: np.ndarray) -> str:
|
||||
def _extract_and_recognize_formulas(
|
||||
self, markdown_content: str, original_image: np.ndarray
|
||||
) -> str:
|
||||
"""Extract image references from markdown and recognize formulas.
|
||||
|
||||
Args:
|
||||
@@ -712,7 +738,13 @@ class MineruOCRService(OCRServiceBase):
|
||||
}
|
||||
|
||||
# Make API request
|
||||
response = requests.post(self.api_url, files=files, data=data, headers={"accept": "application/json"}, timeout=30)
|
||||
response = requests.post(
|
||||
self.api_url,
|
||||
files=files,
|
||||
data=data,
|
||||
headers={"accept": "application/json"},
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
@@ -723,7 +755,9 @@ class MineruOCRService(OCRServiceBase):
|
||||
markdown_content = result["results"]["image"].get("md_content", "")
|
||||
|
||||
if "
|
||||
markdown_content = self._extract_and_recognize_formulas(
|
||||
markdown_content, original_image
|
||||
)
|
||||
|
||||
# Apply postprocessing to fix OCR errors
|
||||
markdown_content = _postprocess_markdown(markdown_content)
|
||||
@@ -751,6 +785,167 @@ class MineruOCRService(OCRServiceBase):
|
||||
raise RuntimeError(f"Recognition failed: {e}") from e
|
||||
|
||||
|
||||
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
||||
_TASK_PROMPTS: dict[str, str] = {
|
||||
"text": "Text Recognition:",
|
||||
"formula": "Formula Recognition:",
|
||||
"table": "Table Recognition:",
|
||||
}
|
||||
_DEFAULT_PROMPT = (
|
||||
"Recognize the text in the image and output in Markdown format. "
|
||||
"Preserve the original layout (headings/paragraphs/tables/formulas). "
|
||||
"Do not fabricate content that does not exist in the image."
|
||||
)
|
||||
|
||||
|
||||
class GLMOCREndToEndService(OCRServiceBase):
|
||||
"""End-to-end OCR using GLM-OCR pipeline: layout detection → per-region OCR.
|
||||
|
||||
Pipeline:
|
||||
1. Add padding (ImageProcessor)
|
||||
2. Detect layout regions (LayoutDetector → PP-DocLayoutV3)
|
||||
3. Crop each region and call vLLM with a task-specific prompt (parallel)
|
||||
4. GLMResultFormatter: clean, format titles/bullets/formulas, merge tags
|
||||
5. _postprocess_markdown: LaTeX math error correction
|
||||
6. Converter: markdown → latex/mathml/mml
|
||||
|
||||
This replaces both GLMOCRService (formula-only) and MineruOCRService (mixed).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vl_server_url: str,
|
||||
image_processor: ImageProcessor,
|
||||
converter: Converter,
|
||||
layout_detector: LayoutDetector,
|
||||
max_workers: int = 8,
|
||||
):
|
||||
self.vl_server_url = vl_server_url or settings.glm_ocr_url
|
||||
self.image_processor = image_processor
|
||||
self.converter = converter
|
||||
self.layout_detector = layout_detector
|
||||
self.max_workers = max_workers
|
||||
self.openai_client = OpenAI(api_key="EMPTY", base_url=self.vl_server_url, timeout=3600)
|
||||
self._formatter = GLMResultFormatter()
|
||||
|
||||
def _encode_region(self, image: np.ndarray) -> str:
|
||||
"""Convert BGR numpy array to base64 JPEG string."""
|
||||
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
pil_img = PILImage.fromarray(rgb)
|
||||
buf = BytesIO()
|
||||
pil_img.save(buf, format="JPEG")
|
||||
return base64.b64encode(buf.getvalue()).decode("utf-8")
|
||||
|
||||
def _call_vllm(self, image: np.ndarray, prompt: str) -> str:
|
||||
"""Send image + prompt to vLLM and return raw content string."""
|
||||
img_b64 = self._encode_region(image)
|
||||
data_url = f"data:image/jpeg;base64,{img_b64}"
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": data_url}},
|
||||
{"type": "text", "text": prompt},
|
||||
],
|
||||
}
|
||||
]
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model="glm-ocr",
|
||||
messages=messages,
|
||||
temperature=0.01,
|
||||
max_tokens=settings.max_tokens,
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
|
||||
def _normalize_bbox(self, bbox: list[float], img_w: int, img_h: int) -> list[int]:
|
||||
"""Convert pixel bbox [x1,y1,x2,y2] to 0-1000 normalised coords."""
|
||||
x1, y1, x2, y2 = bbox
|
||||
return [
|
||||
int(x1 / img_w * 1000),
|
||||
int(y1 / img_h * 1000),
|
||||
int(x2 / img_w * 1000),
|
||||
int(y2 / img_h * 1000),
|
||||
]
|
||||
|
||||
def recognize(self, image: np.ndarray) -> dict:
|
||||
"""Full pipeline: padding → layout → per-region OCR → postprocess → markdown.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array in BGR format.
|
||||
|
||||
Returns:
|
||||
Dict with 'markdown', 'latex', 'mathml', 'mml' keys.
|
||||
"""
|
||||
# 1. Padding
|
||||
padded = self.image_processor.add_padding(image)
|
||||
img_h, img_w = padded.shape[:2]
|
||||
|
||||
# 2. Layout detection
|
||||
layout_info = self.layout_detector.detect(padded)
|
||||
|
||||
# 3. OCR: per-region (parallel) or full-image fallback
|
||||
if not layout_info.regions:
|
||||
raw_content = self._call_vllm(padded, _DEFAULT_PROMPT)
|
||||
markdown_content = self._formatter._clean_content(raw_content)
|
||||
else:
|
||||
# Build task list for non-figure regions
|
||||
tasks = []
|
||||
for idx, region in enumerate(layout_info.regions):
|
||||
if region.type == "figure":
|
||||
continue
|
||||
x1, y1, x2, y2 = (int(c) for c in region.bbox)
|
||||
cropped = padded[y1:y2, x1:x2]
|
||||
if cropped.size == 0:
|
||||
continue
|
||||
prompt = _TASK_PROMPTS.get(region.type, _DEFAULT_PROMPT)
|
||||
tasks.append((idx, region, cropped, prompt))
|
||||
|
||||
if not tasks:
|
||||
raw_content = self._call_vllm(padded, _DEFAULT_PROMPT)
|
||||
markdown_content = self._formatter._clean_content(raw_content)
|
||||
else:
|
||||
# Parallel OCR calls
|
||||
raw_results: dict[int, str] = {}
|
||||
with ThreadPoolExecutor(max_workers=min(self.max_workers, len(tasks))) as ex:
|
||||
future_map = {
|
||||
ex.submit(self._call_vllm, cropped, prompt): idx
|
||||
for idx, region, cropped, prompt in tasks
|
||||
}
|
||||
for future in as_completed(future_map):
|
||||
idx = future_map[future]
|
||||
try:
|
||||
raw_results[idx] = future.result()
|
||||
except Exception:
|
||||
raw_results[idx] = ""
|
||||
|
||||
# Build structured region dicts for GLMResultFormatter
|
||||
region_dicts = []
|
||||
for idx, region, _cropped, _prompt in tasks:
|
||||
region_dicts.append(
|
||||
{
|
||||
"index": idx,
|
||||
"label": region.type,
|
||||
"native_label": region.native_label,
|
||||
"content": raw_results.get(idx, ""),
|
||||
"bbox_2d": self._normalize_bbox(region.bbox, img_w, img_h),
|
||||
}
|
||||
)
|
||||
|
||||
# 4. GLM-OCR postprocessing: clean, format, merge, bullets
|
||||
markdown_content = self._formatter.process(region_dicts)
|
||||
|
||||
# 5. LaTeX math error correction (our existing pipeline)
|
||||
markdown_content = _postprocess_markdown(markdown_content)
|
||||
|
||||
# 6. Format conversion
|
||||
latex, mathml, mml = "", "", ""
|
||||
if markdown_content and self.converter:
|
||||
fmt = self.converter.convert_to_formats(markdown_content)
|
||||
latex, mathml, mml = fmt.latex, fmt.mathml, fmt.mml
|
||||
|
||||
return {"markdown": markdown_content, "latex": latex, "mathml": mathml, "mml": mml}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mineru_service = MineruOCRService()
|
||||
image = cv2.imread("test/formula2.jpg")
|
||||
|
||||
Reference in New Issue
Block a user