fix: image as element
This commit is contained in:
@@ -143,7 +143,7 @@ def _clean_latex_syntax_spaces(expr: str) -> str:
|
||||
# Remove spaces everywhere else (e.g., x \in -> x\in is fine)
|
||||
# Strategy: remove spaces before \ and between non-command chars,
|
||||
# but preserve the space after \command when followed by a non-\ char
|
||||
cleaned = re.sub(r"\s+(?=\\)", "", content) # remove space before \cmd
|
||||
cleaned = re.sub(r"\s+(?=\\)", "", content) # remove space before \cmd
|
||||
cleaned = re.sub(r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned) # remove space after non-letter non-\
|
||||
return f"{operator}{{{cleaned}}}"
|
||||
|
||||
@@ -532,7 +532,7 @@ class GLMOCRService(OCRServiceBase):
|
||||
|
||||
Returns:
|
||||
Dict with 'latex', 'markdown', 'mathml', 'mml' keys.
|
||||
|
||||
|
||||
Raises:
|
||||
RuntimeError: If recognition fails (preserves original exception for fallback handling).
|
||||
"""
|
||||
@@ -637,7 +637,7 @@ class MineruOCRService(OCRServiceBase):
|
||||
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
||||
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model="PaddleOCR-VL-0.9B",
|
||||
model="glm-ocr",
|
||||
messages=messages,
|
||||
temperature=0.0,
|
||||
)
|
||||
@@ -714,6 +714,9 @@ class MineruOCRService(OCRServiceBase):
|
||||
if "results" in result and "image" in result["results"]:
|
||||
markdown_content = result["results"]["image"].get("md_content", "")
|
||||
|
||||
if "
|
||||
|
||||
# Apply postprocessing to fix OCR errors
|
||||
markdown_content = _postprocess_markdown(markdown_content)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user