feat: add padding

This commit is contained in:
liuyuanchuang
2026-02-07 16:53:09 +08:00
parent d86107976a
commit f514f98142
3 changed files with 50 additions and 21 deletions

View File

@@ -650,26 +650,16 @@ class MineruOCRService(OCRServiceBase):
return formula_text
def recognize(self, image: np.ndarray) -> dict:
def recognize(self, image_bytes: BytesIO) -> dict:
"""Recognize content using local file_parse API.
Args:
image: Input image as numpy array in BGR format.
image_bytes: Input image as BytesIO object (already encoded as PNG).
Returns:
Dict with 'markdown', 'latex', 'mathml' keys.
"""
try:
if self.image_processor and settings.is_padding:
image = self.image_processor.add_padding(image)
# Convert numpy array to image bytes
success, encoded_image = cv2.imencode(".png", image)
if not success:
raise RuntimeError("Failed to encode image")
image_bytes = BytesIO(encoded_image.tobytes())
# Prepare multipart form data
files = {"files": ("image.png", image_bytes, "image/png")}
@@ -731,5 +721,11 @@ if __name__ == "__main__":
mineru_service = MineruOCRService()
image = cv2.imread("test/formula2.jpg")
image_numpy = np.array(image)
ocr_result = mineru_service.recognize(image_numpy)
# Encode image to bytes (as done in API layer)
success, encoded_image = cv2.imencode(".png", image_numpy)
if not success:
raise RuntimeError("Failed to encode image")
image_bytes = BytesIO(encoded_image.tobytes())
image_bytes.seek(0)
ocr_result = mineru_service.recognize(image_bytes)
print(ocr_result)