optimize: formula is recognize text

2026-03-12 22:30:27 +08:00
parent 11e9ed780d
commit ff82021467
2 changed files with 4 additions and 4 deletions
--- a/app/services/layout_detector.py
+++ b/app/services/layout_detector.py
@@ -148,7 +148,7 @@ class LayoutDetector:
                )
            )

-        mixed_recognition = any(region.type == "text" and region.score > 0.3 for region in regions)
+        mixed_recognition = any(region.type == "text" and region.score > 0.85 for region in regions)

        return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)

--- a/app/services/ocr_service.py
+++ b/app/services/ocr_service.py
@@ -781,11 +781,11 @@ class MineruOCRService(OCRServiceBase):

 # Task-specific prompts (from GLM-OCR SDK config.yaml)
 _TASK_PROMPTS: dict[str, str] = {
-    "text": "Text Recognition. If the content is a formula, please ouput latex code, else output text",
+    "text": "Text Recognition. If the content is a formula, please ouput display latex code, else output text",
    "formula": "Formula Recognition:",
    "table": "Table Recognition:",
 }
-_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text"
+_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput display latex code, else output text"


 class GLMOCREndToEndService(OCRServiceBase):
@@ -874,7 +874,7 @@ class GLMOCREndToEndService(OCRServiceBase):
        layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))

        # 3. OCR: per-region (parallel) or full-image fallback
-        if not layout_info.regions:
+        if not layout_info.regions or (len(layout_info.regions) == 1 and not layout_info.MixedRecognition):
            # No layout detected → assume it's a formula, use formula recognition
            logger.info("No layout regions detected, treating image as formula")
            raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"])