fix: single dollar sysmpol
This commit is contained in:
@@ -50,9 +50,7 @@ class Settings(BaseSettings):
|
|||||||
max_tokens: int = 4096
|
max_tokens: int = 4096
|
||||||
|
|
||||||
# Model Paths
|
# Model Paths
|
||||||
pp_doclayout_model_dir: str | None = (
|
pp_doclayout_model_dir: str | None = "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
|
||||||
"/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Image Processing
|
# Image Processing
|
||||||
max_image_size_mb: int = 10
|
max_image_size_mb: int = 10
|
||||||
|
|||||||
@@ -265,7 +265,7 @@ class GLMResultFormatter:
|
|||||||
# Formula wrapping
|
# Formula wrapping
|
||||||
if label == "formula":
|
if label == "formula":
|
||||||
content = content.strip()
|
content = content.strip()
|
||||||
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)")]:
|
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)"), ("$", "$")]:
|
||||||
if content.startswith(s):
|
if content.startswith(s):
|
||||||
content = content[len(s) :].strip()
|
content = content[len(s) :].strip()
|
||||||
if content.endswith(e):
|
if content.endswith(e):
|
||||||
|
|||||||
@@ -781,11 +781,11 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
|
|
||||||
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
||||||
_TASK_PROMPTS: dict[str, str] = {
|
_TASK_PROMPTS: dict[str, str] = {
|
||||||
"text": "Text Recognition. If the content is a formula, please ouput display latex code, else output text",
|
"text": "Text Recognition. If the content is a formula, please output display latex code, else output text",
|
||||||
"formula": "Formula Recognition:",
|
"formula": "Formula Recognition:",
|
||||||
"table": "Table Recognition:",
|
"table": "Table Recognition:",
|
||||||
}
|
}
|
||||||
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput display latex code, else output text"
|
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please output display latex code, else output text"
|
||||||
|
|
||||||
|
|
||||||
class GLMOCREndToEndService(OCRServiceBase):
|
class GLMOCREndToEndService(OCRServiceBase):
|
||||||
@@ -868,7 +868,8 @@ class GLMOCREndToEndService(OCRServiceBase):
|
|||||||
"""
|
"""
|
||||||
# 1. Layout detection
|
# 1. Layout detection
|
||||||
img_h, img_w = image.shape[:2]
|
img_h, img_w = image.shape[:2]
|
||||||
layout_info = self.layout_detector.detect(image)
|
padded_image = self.image_processor.add_padding(image)
|
||||||
|
layout_info = self.layout_detector.detect(padded_image)
|
||||||
|
|
||||||
# Sort regions in reading order: top-to-bottom, left-to-right
|
# Sort regions in reading order: top-to-bottom, left-to-right
|
||||||
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
|
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
|
||||||
@@ -890,7 +891,7 @@ class GLMOCREndToEndService(OCRServiceBase):
|
|||||||
if region.type == "figure":
|
if region.type == "figure":
|
||||||
continue
|
continue
|
||||||
x1, y1, x2, y2 = (int(c) for c in region.bbox)
|
x1, y1, x2, y2 = (int(c) for c in region.bbox)
|
||||||
cropped = image[y1:y2, x1:x2]
|
cropped = padded_image[y1:y2, x1:x2]
|
||||||
if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10:
|
if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Skipping region idx=%d (label=%s): crop too small %s",
|
"Skipping region idx=%d (label=%s): crop too small %s",
|
||||||
|
|||||||
Reference in New Issue
Block a user