feat: optimize padding and formula fallback
This commit is contained in:
@@ -104,7 +104,8 @@ class ImageProcessor:
|
|||||||
"""Add whitespace padding around the image.
|
"""Add whitespace padding around the image.
|
||||||
|
|
||||||
Adds padding equal to padding_ratio * max(height, width) on each side.
|
Adds padding equal to padding_ratio * max(height, width) on each side.
|
||||||
This expands the image by approximately 30% total (15% on each side).
|
For small images (height < 80 or width < 500), uses reduced padding_ratio 0.2.
|
||||||
|
This expands the image by approximately 30% total (15% on each side) for normal images.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
image: Input image as numpy array in BGR format.
|
image: Input image as numpy array in BGR format.
|
||||||
@@ -113,7 +114,9 @@ class ImageProcessor:
|
|||||||
Padded image as numpy array.
|
Padded image as numpy array.
|
||||||
"""
|
"""
|
||||||
height, width = image.shape[:2]
|
height, width = image.shape[:2]
|
||||||
padding = int(max(height, width) * self.padding_ratio)
|
# Use smaller padding ratio for small images to preserve detail
|
||||||
|
padding_ratio = 0.2 if height < 80 or width < 500 else self.padding_ratio
|
||||||
|
padding = int(max(height, width) * padding_ratio)
|
||||||
|
|
||||||
# Add white padding on all sides
|
# Add white padding on all sides
|
||||||
padded_image = cv2.copyMakeBorder(
|
padded_image = cv2.copyMakeBorder(
|
||||||
|
|||||||
@@ -890,8 +890,14 @@ class GLMOCREndToEndService(OCRServiceBase):
|
|||||||
|
|
||||||
# 3. OCR: per-region (parallel) or full-image fallback
|
# 3. OCR: per-region (parallel) or full-image fallback
|
||||||
if not layout_info.regions:
|
if not layout_info.regions:
|
||||||
raw_content = self._call_vllm(padded, _DEFAULT_PROMPT)
|
# No layout detected → assume it's a formula, use formula recognition
|
||||||
markdown_content = self._formatter._clean_content(raw_content)
|
logger.info("No layout regions detected, treating image as formula")
|
||||||
|
raw_content = self._call_vllm(padded, _TASK_PROMPTS["formula"])
|
||||||
|
# Format as display formula markdown
|
||||||
|
formatted_content = raw_content.strip()
|
||||||
|
if not (formatted_content.startswith("$$") and formatted_content.endswith("$$")):
|
||||||
|
formatted_content = f"$$\n{formatted_content}\n$$"
|
||||||
|
markdown_content = formatted_content
|
||||||
else:
|
else:
|
||||||
# Build task list for non-figure regions
|
# Build task list for non-figure regions
|
||||||
tasks = []
|
tasks = []
|
||||||
|
|||||||
Reference in New Issue
Block a user