fix: rm space

This commit is contained in:
liuyuanchuang
2026-02-05 21:50:12 +08:00
parent eb68843e2c
commit a3ca04856f

View File

@@ -527,7 +527,7 @@ class MineruOCRService(OCRServiceBase):
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}] messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
response = self.openai_client.chat.completions.create( response = self.openai_client.chat.completions.create(
model="PaddleOCR-VL-0.9B", # Use exact model name from vLLM server model="PaddleOCR-VL-0.9B",
messages=messages, messages=messages,
temperature=0.0, temperature=0.0,
) )
@@ -555,9 +555,9 @@ class MineruOCRService(OCRServiceBase):
formula_text = self._recognize_formula_with_paddleocr_vl(original_image) formula_text = self._recognize_formula_with_paddleocr_vl(original_image)
if formula_text.startswith("\[") or formula_text.startswith("\("): if formula_text.startswith(r"\[") or formula_text.startswith(r"\("):
formula_text = formula_text.replace("\[", "$$").replace("\(", "$$") formula_text = formula_text.replace(r"\[", "$$").replace(r"\(", "$$")
formula_text = formula_text.replace("\]", "$$").replace("\)", "$$") formula_text = formula_text.replace(r"\]", "$$").replace(r"\)", "$$")
else: else:
formula_text = f"$${formula_text}$$" formula_text = f"$${formula_text}$$"
@@ -614,15 +614,10 @@ class MineruOCRService(OCRServiceBase):
if "results" in result and "image" in result["results"]: if "results" in result and "image" in result["results"]:
markdown_content = result["results"]["image"].get("md_content", "") markdown_content = result["results"]["image"].get("md_content", "")
print(f"[DEBUG] Markdown content from Mineru: {markdown_content[:200]}...")
# Check if markdown contains formula image references # Check if markdown contains formula image references
if "![](images/" in markdown_content: if "![](images/" in markdown_content:
print(f"[DEBUG] Detected image reference, calling PaddleOCR-VL...")
# Use PaddleOCR-VL to recognize the formula # Use PaddleOCR-VL to recognize the formula
markdown_content = self._extract_and_recognize_formulas(markdown_content, image) markdown_content = self._extract_and_recognize_formulas(markdown_content, image)
else:
print(f"[DEBUG] No image reference found in markdown")
# Apply postprocessing to fix OCR errors # Apply postprocessing to fix OCR errors
markdown_content = _postprocess_markdown(markdown_content) markdown_content = _postprocess_markdown(markdown_content)