diff --git a/app/services/image_processor.py b/app/services/image_processor.py index b57dff6..baf6f8a 100644 --- a/app/services/image_processor.py +++ b/app/services/image_processor.py @@ -104,7 +104,8 @@ class ImageProcessor: """Add whitespace padding around the image. Adds padding equal to padding_ratio * max(height, width) on each side. - This expands the image by approximately 30% total (15% on each side). + For small images (height < 80 or width < 500), uses reduced padding_ratio 0.2. + This expands the image by approximately 30% total (15% on each side) for normal images. Args: image: Input image as numpy array in BGR format. @@ -113,7 +114,9 @@ class ImageProcessor: Padded image as numpy array. """ height, width = image.shape[:2] - padding = int(max(height, width) * self.padding_ratio) + # Use smaller padding ratio for small images to preserve detail + padding_ratio = 0.2 if height < 80 or width < 500 else self.padding_ratio + padding = int(max(height, width) * padding_ratio) # Add white padding on all sides padded_image = cv2.copyMakeBorder( diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 321a483..3ccfb53 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -890,8 +890,14 @@ class GLMOCREndToEndService(OCRServiceBase): # 3. OCR: per-region (parallel) or full-image fallback if not layout_info.regions: - raw_content = self._call_vllm(padded, _DEFAULT_PROMPT) - markdown_content = self._formatter._clean_content(raw_content) + # No layout detected → assume it's a formula, use formula recognition + logger.info("No layout regions detected, treating image as formula") + raw_content = self._call_vllm(padded, _TASK_PROMPTS["formula"]) + # Format as display formula markdown + formatted_content = raw_content.strip() + if not (formatted_content.startswith("$$") and formatted_content.endswith("$$")): + formatted_content = f"$$\n{formatted_content}\n$$" + markdown_content = formatted_content else: # Build task list for non-figure regions tasks = []