fix: single dollar sysmpol

2026-03-12 23:20:14 +08:00
parent ff82021467
commit aee1a1bf3b
3 changed files with 7 additions and 8 deletions
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -50,9 +50,7 @@ class Settings(BaseSettings):
    max_tokens: int = 4096

    # Model Paths
-    pp_doclayout_model_dir: str | None = (
-        "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
-    )
+    pp_doclayout_model_dir: str | None = "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"

    # Image Processing
    max_image_size_mb: int = 10
--- a/app/services/glm_postprocess.py
+++ b/app/services/glm_postprocess.py
@@ -265,7 +265,7 @@ class GLMResultFormatter:
        # Formula wrapping
        if label == "formula":
            content = content.strip()
-            for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)")]:
+            for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)"), ("$", "$")]:
                if content.startswith(s):
                    content = content[len(s) :].strip()
                    if content.endswith(e):
--- a/app/services/ocr_service.py
+++ b/app/services/ocr_service.py
@@ -781,11 +781,11 @@ class MineruOCRService(OCRServiceBase):

 # Task-specific prompts (from GLM-OCR SDK config.yaml)
 _TASK_PROMPTS: dict[str, str] = {
-    "text": "Text Recognition. If the content is a formula, please ouput display latex code, else output text",
+    "text": "Text Recognition. If the content is a formula, please output display latex code, else output text",
    "formula": "Formula Recognition:",
    "table": "Table Recognition:",
 }
-_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput display latex code, else output text"
+_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please output display latex code, else output text"


 class GLMOCREndToEndService(OCRServiceBase):
@@ -868,7 +868,8 @@ class GLMOCREndToEndService(OCRServiceBase):
        """
        # 1. Layout detection
        img_h, img_w = image.shape[:2]
-        layout_info = self.layout_detector.detect(image)
+        padded_image = self.image_processor.add_padding(image)
+        layout_info = self.layout_detector.detect(padded_image)

        # Sort regions in reading order: top-to-bottom, left-to-right
        layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
@@ -890,7 +891,7 @@ class GLMOCREndToEndService(OCRServiceBase):
                if region.type == "figure":
                    continue
                x1, y1, x2, y2 = (int(c) for c in region.bbox)
-                cropped = image[y1:y2, x1:x2]
+                cropped = padded_image[y1:y2, x1:x2]
                if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10:
                    logger.warning(
                        "Skipping region idx=%d (label=%s): crop too small %s",