fix: image as element

2026-02-09 22:18:30 +08:00
parent 5504bbbf1e
commit 7799e39298
5 changed files with 308 additions and 3 deletions
--- a/app/services/ocr_service.py
+++ b/app/services/ocr_service.py
@@ -143,7 +143,7 @@ def _clean_latex_syntax_spaces(expr: str) -> str:
            # Remove spaces everywhere else (e.g., x \in -> x\in is fine)
            # Strategy: remove spaces before \ and between non-command chars,
            # but preserve the space after \command when followed by a non-\ char
-            cleaned = re.sub(r"\s+(?=\\)", "", content)       # remove space before \cmd
+            cleaned = re.sub(r"\s+(?=\\)", "", content)  # remove space before \cmd
            cleaned = re.sub(r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned)  # remove space after non-letter non-\
        return f"{operator}{{{cleaned}}}"

@@ -532,7 +532,7 @@ class GLMOCRService(OCRServiceBase):

        Returns:
            Dict with 'latex', 'markdown', 'mathml', 'mml' keys.
-        
+
        Raises:
            RuntimeError: If recognition fails (preserves original exception for fallback handling).
        """
@@ -637,7 +637,7 @@ class MineruOCRService(OCRServiceBase):
            messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]

            response = self.openai_client.chat.completions.create(
-                model="PaddleOCR-VL-0.9B",
+                model="glm-ocr",
                messages=messages,
                temperature=0.0,
            )
@@ -714,6 +714,9 @@ class MineruOCRService(OCRServiceBase):
            if "results" in result and "image" in result["results"]:
                markdown_content = result["results"]["image"].get("md_content", "")

+            if "![](images/" in markdown_content:
+                markdown_content = self._extract_and_recognize_formulas(markdown_content, image)
+
            # Apply postprocessing to fix OCR errors
            markdown_content = _postprocess_markdown(markdown_content)