From f8173f7c0a0d2701ac2fd1aaeae466bb05dfbafb Mon Sep 17 00:00:00 2001 From: liuyuanchuang Date: Tue, 10 Mar 2026 09:54:54 +0800 Subject: [PATCH] feat: optimize padding and formula fallback --- app/services/image_processor.py | 7 +++++-- app/services/ocr_service.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/app/services/image_processor.py b/app/services/image_processor.py index b57dff6..baf6f8a 100644 --- a/app/services/image_processor.py +++ b/app/services/image_processor.py @@ -104,7 +104,8 @@ class ImageProcessor: """Add whitespace padding around the image. Adds padding equal to padding_ratio * max(height, width) on each side. - This expands the image by approximately 30% total (15% on each side). + For small images (height < 80 or width < 500), uses reduced padding_ratio 0.2. + This expands the image by approximately 30% total (15% on each side) for normal images. Args: image: Input image as numpy array in BGR format. @@ -113,7 +114,9 @@ class ImageProcessor: Padded image as numpy array. """ height, width = image.shape[:2] - padding = int(max(height, width) * self.padding_ratio) + # Use smaller padding ratio for small images to preserve detail + padding_ratio = 0.2 if height < 80 or width < 500 else self.padding_ratio + padding = int(max(height, width) * padding_ratio) # Add white padding on all sides padded_image = cv2.copyMakeBorder( diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 321a483..3ccfb53 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -890,8 +890,14 @@ class GLMOCREndToEndService(OCRServiceBase): # 3. OCR: per-region (parallel) or full-image fallback if not layout_info.regions: - raw_content = self._call_vllm(padded, _DEFAULT_PROMPT) - markdown_content = self._formatter._clean_content(raw_content) + # No layout detected → assume it's a formula, use formula recognition + logger.info("No layout regions detected, treating image as formula") + raw_content = self._call_vllm(padded, _TASK_PROMPTS["formula"]) + # Format as display formula markdown + formatted_content = raw_content.strip() + if not (formatted_content.startswith("$$") and formatted_content.endswith("$$")): + formatted_content = f"$$\n{formatted_content}\n$$" + markdown_content = formatted_content else: # Build task list for non-figure regions tasks = []