diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 7b928ea..47b65d9 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -527,7 +527,7 @@ class MineruOCRService(OCRServiceBase): messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}] response = self.openai_client.chat.completions.create( - model="PaddleOCR-VL-0.9B", # Use exact model name from vLLM server + model="PaddleOCR-VL-0.9B", messages=messages, temperature=0.0, ) @@ -555,9 +555,9 @@ class MineruOCRService(OCRServiceBase): formula_text = self._recognize_formula_with_paddleocr_vl(original_image) - if formula_text.startswith("\[") or formula_text.startswith("\("): - formula_text = formula_text.replace("\[", "$$").replace("\(", "$$") - formula_text = formula_text.replace("\]", "$$").replace("\)", "$$") + if formula_text.startswith(r"\[") or formula_text.startswith(r"\("): + formula_text = formula_text.replace(r"\[", "$$").replace(r"\(", "$$") + formula_text = formula_text.replace(r"\]", "$$").replace(r"\)", "$$") else: formula_text = f"$${formula_text}$$" @@ -614,15 +614,10 @@ class MineruOCRService(OCRServiceBase): if "results" in result and "image" in result["results"]: markdown_content = result["results"]["image"].get("md_content", "") - print(f"[DEBUG] Markdown content from Mineru: {markdown_content[:200]}...") - # Check if markdown contains formula image references if "![](images/" in markdown_content: - print(f"[DEBUG] Detected image reference, calling PaddleOCR-VL...") # Use PaddleOCR-VL to recognize the formula markdown_content = self._extract_and_recognize_formulas(markdown_content, image) - else: - print(f"[DEBUG] No image reference found in markdown") # Apply postprocessing to fix OCR errors markdown_content = _postprocess_markdown(markdown_content)