feat: optimize padding and formula fallback

2026-03-10 09:54:54 +08:00
parent cff14904bf
commit f8173f7c0a
2 changed files with 13 additions and 4 deletions
--- a/app/services/image_processor.py
+++ b/app/services/image_processor.py
@@ -104,7 +104,8 @@ class ImageProcessor:
        """Add whitespace padding around the image.

        Adds padding equal to padding_ratio * max(height, width) on each side.
-        This expands the image by approximately 30% total (15% on each side).
+        For small images (height < 80 or width < 500), uses reduced padding_ratio 0.2.
+        This expands the image by approximately 30% total (15% on each side) for normal images.

        Args:
            image: Input image as numpy array in BGR format.
@@ -113,7 +114,9 @@ class ImageProcessor:
            Padded image as numpy array.
        """
        height, width = image.shape[:2]
-        padding = int(max(height, width) * self.padding_ratio)
+        # Use smaller padding ratio for small images to preserve detail
+        padding_ratio = 0.2 if height < 80 or width < 500 else self.padding_ratio
+        padding = int(max(height, width) * padding_ratio)

        # Add white padding on all sides
        padded_image = cv2.copyMakeBorder(
--- a/app/services/ocr_service.py
+++ b/app/services/ocr_service.py
@@ -890,8 +890,14 @@ class GLMOCREndToEndService(OCRServiceBase):

        # 3. OCR: per-region (parallel) or full-image fallback
        if not layout_info.regions:
-            raw_content = self._call_vllm(padded, _DEFAULT_PROMPT)
-            markdown_content = self._formatter._clean_content(raw_content)
+            # No layout detected → assume it's a formula, use formula recognition
+            logger.info("No layout regions detected, treating image as formula")
+            raw_content = self._call_vllm(padded, _TASK_PROMPTS["formula"])
+            # Format as display formula markdown
+            formatted_content = raw_content.strip()
+            if not (formatted_content.startswith("$$") and formatted_content.endswith("$$")):
+                formatted_content = f"$$\n{formatted_content}\n$$"
+            markdown_content = formatted_content
        else:
            # Build task list for non-figure regions
            tasks = []