feat: add padding

2026-02-07 16:53:09 +08:00
parent d86107976a
commit f514f98142
3 changed files with 50 additions and 21 deletions
--- a/app/services/ocr_service.py
+++ b/app/services/ocr_service.py
@@ -650,26 +650,16 @@ class MineruOCRService(OCRServiceBase):

        return formula_text

-    def recognize(self, image: np.ndarray) -> dict:
+    def recognize(self, image_bytes: BytesIO) -> dict:
        """Recognize content using local file_parse API.

        Args:
-            image: Input image as numpy array in BGR format.
+            image_bytes: Input image as BytesIO object (already encoded as PNG).

        Returns:
            Dict with 'markdown', 'latex', 'mathml' keys.
        """
        try:
-            if self.image_processor and settings.is_padding:
-                image = self.image_processor.add_padding(image)
-
-            # Convert numpy array to image bytes
-            success, encoded_image = cv2.imencode(".png", image)
-            if not success:
-                raise RuntimeError("Failed to encode image")
-
-            image_bytes = BytesIO(encoded_image.tobytes())
-
            # Prepare multipart form data
            files = {"files": ("image.png", image_bytes, "image/png")}

@@ -731,5 +721,11 @@ if __name__ == "__main__":
    mineru_service = MineruOCRService()
    image = cv2.imread("test/formula2.jpg")
    image_numpy = np.array(image)
-    ocr_result = mineru_service.recognize(image_numpy)
+    # Encode image to bytes (as done in API layer)
+    success, encoded_image = cv2.imencode(".png", image_numpy)
+    if not success:
+        raise RuntimeError("Failed to encode image")
+    image_bytes = BytesIO(encoded_image.tobytes())
+    image_bytes.seek(0)
+    ocr_result = mineru_service.recognize(image_bytes)
    print(ocr_result)