feat: optimize padding and formula fallback
This commit is contained in:
@@ -890,8 +890,14 @@ class GLMOCREndToEndService(OCRServiceBase):
|
||||
|
||||
# 3. OCR: per-region (parallel) or full-image fallback
|
||||
if not layout_info.regions:
|
||||
raw_content = self._call_vllm(padded, _DEFAULT_PROMPT)
|
||||
markdown_content = self._formatter._clean_content(raw_content)
|
||||
# No layout detected → assume it's a formula, use formula recognition
|
||||
logger.info("No layout regions detected, treating image as formula")
|
||||
raw_content = self._call_vllm(padded, _TASK_PROMPTS["formula"])
|
||||
# Format as display formula markdown
|
||||
formatted_content = raw_content.strip()
|
||||
if not (formatted_content.startswith("$$") and formatted_content.endswith("$$")):
|
||||
formatted_content = f"$$\n{formatted_content}\n$$"
|
||||
markdown_content = formatted_content
|
||||
else:
|
||||
# Build task list for non-figure regions
|
||||
tasks = []
|
||||
|
||||
Reference in New Issue
Block a user