Files
doc_processer/app/services/image_processor.py

139 lines
4.1 KiB
Python
Raw Normal View History

2025-12-29 17:34:58 +08:00
"""Image preprocessing service using OpenCV."""
import base64
import io
from urllib.request import urlopen
import cv2
import numpy as np
from PIL import Image
from app.core.config import get_settings
settings = get_settings()
class ImageProcessor:
"""Service for image preprocessing operations."""
def __init__(self, padding_ratio: float | None = None):
"""Initialize with padding ratio.
Args:
padding_ratio: Ratio for padding on each side (default from settings).
0.15 means 15% padding on each side = 30% total expansion.
"""
self.padding_ratio = padding_ratio or settings.image_padding_ratio
def load_image_from_url(self, url: str) -> np.ndarray:
"""Load image from URL.
Args:
url: Image URL to fetch.
Returns:
Image as numpy array in BGR format.
Raises:
ValueError: If image cannot be loaded from URL.
"""
try:
with urlopen(url, timeout=30) as response:
image_data = response.read()
image = Image.open(io.BytesIO(image_data))
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
except Exception as e:
raise ValueError(f"Failed to load image from URL: {e}") from e
def load_image_from_base64(self, base64_str: str) -> np.ndarray:
"""Load image from base64 string.
Args:
base64_str: Base64-encoded image data.
Returns:
Image as numpy array in BGR format.
Raises:
ValueError: If image cannot be decoded.
"""
try:
# Handle data URL format
if "," in base64_str:
base64_str = base64_str.split(",", 1)[1]
image_data = base64.b64decode(base64_str)
image = Image.open(io.BytesIO(image_data))
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
except Exception as e:
raise ValueError(f"Failed to decode base64 image: {e}") from e
def add_padding(self, image: np.ndarray) -> np.ndarray:
"""Add whitespace padding around the image.
Adds padding equal to padding_ratio * max(height, width) on each side.
This expands the image by approximately 30% total (15% on each side).
Args:
image: Input image as numpy array in BGR format.
Returns:
Padded image as numpy array.
"""
height, width = image.shape[:2]
padding = int(max(height, width) * self.padding_ratio)
# Add white padding on all sides
padded_image = cv2.copyMakeBorder(
image,
top=padding,
bottom=padding,
left=padding,
right=padding,
borderType=cv2.BORDER_CONSTANT,
value=[255, 255, 255], # White
)
return padded_image
def preprocess(self, image_url: str | None, image_base64: str | None) -> np.ndarray:
"""Load and preprocess image with padding.
Args:
image_url: URL to fetch image from (optional).
image_base64: Base64-encoded image (optional).
Returns:
Preprocessed image with padding.
Raises:
ValueError: If neither input is provided or loading fails.
"""
if image_url:
image = self.load_image_from_url(image_url)
elif image_base64:
image = self.load_image_from_base64(image_base64)
else:
raise ValueError("Either image_url or image_base64 must be provided")
2025-12-31 17:38:32 +08:00
return image
2025-12-29 17:34:58 +08:00
def image_to_base64(self, image: np.ndarray, format: str = "PNG") -> str:
"""Convert numpy image to base64 string.
Args:
image: Image as numpy array in BGR format.
format: Output format (PNG, JPEG).
Returns:
Base64-encoded image string.
"""
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(image_rgb)
buffer = io.BytesIO()
pil_image.save(buffer, format=format)
buffer.seek(0)
return base64.b64encode(buffer.getvalue()).decode("utf-8")