fix: rm space
This commit is contained in:
@@ -527,7 +527,7 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
||||||
|
|
||||||
response = self.openai_client.chat.completions.create(
|
response = self.openai_client.chat.completions.create(
|
||||||
model="PaddleOCR-VL-0.9B", # Use exact model name from vLLM server
|
model="PaddleOCR-VL-0.9B",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@@ -555,9 +555,9 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
|
|
||||||
formula_text = self._recognize_formula_with_paddleocr_vl(original_image)
|
formula_text = self._recognize_formula_with_paddleocr_vl(original_image)
|
||||||
|
|
||||||
if formula_text.startswith("\[") or formula_text.startswith("\("):
|
if formula_text.startswith(r"\[") or formula_text.startswith(r"\("):
|
||||||
formula_text = formula_text.replace("\[", "$$").replace("\(", "$$")
|
formula_text = formula_text.replace(r"\[", "$$").replace(r"\(", "$$")
|
||||||
formula_text = formula_text.replace("\]", "$$").replace("\)", "$$")
|
formula_text = formula_text.replace(r"\]", "$$").replace(r"\)", "$$")
|
||||||
else:
|
else:
|
||||||
formula_text = f"$${formula_text}$$"
|
formula_text = f"$${formula_text}$$"
|
||||||
|
|
||||||
@@ -614,15 +614,10 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
if "results" in result and "image" in result["results"]:
|
if "results" in result and "image" in result["results"]:
|
||||||
markdown_content = result["results"]["image"].get("md_content", "")
|
markdown_content = result["results"]["image"].get("md_content", "")
|
||||||
|
|
||||||
print(f"[DEBUG] Markdown content from Mineru: {markdown_content[:200]}...")
|
|
||||||
|
|
||||||
# Check if markdown contains formula image references
|
# Check if markdown contains formula image references
|
||||||
if "
|
|
||||||
# Use PaddleOCR-VL to recognize the formula
|
# Use PaddleOCR-VL to recognize the formula
|
||||||
markdown_content = self._extract_and_recognize_formulas(markdown_content, image)
|
markdown_content = self._extract_and_recognize_formulas(markdown_content, image)
|
||||||
else:
|
|
||||||
print(f"[DEBUG] No image reference found in markdown")
|
|
||||||
|
|
||||||
# Apply postprocessing to fix OCR errors
|
# Apply postprocessing to fix OCR errors
|
||||||
markdown_content = _postprocess_markdown(markdown_content)
|
markdown_content = _postprocess_markdown(markdown_content)
|
||||||
|
|||||||
Reference in New Issue
Block a user