Files
doc_processer/app/services/image_processor.py
liuyuanchuang 874fd383cc init repo
2025-12-29 17:34:58 +08:00

140 lines
4.2 KiB
Python

"""Image preprocessing service using OpenCV."""
import base64
import io
from urllib.request import urlopen
import cv2
import numpy as np
from PIL import Image
from app.core.config import get_settings
settings = get_settings()
class ImageProcessor:
"""Service for image preprocessing operations."""
def __init__(self, padding_ratio: float | None = None):
"""Initialize with padding ratio.
Args:
padding_ratio: Ratio for padding on each side (default from settings).
0.15 means 15% padding on each side = 30% total expansion.
"""
self.padding_ratio = padding_ratio or settings.image_padding_ratio
def load_image_from_url(self, url: str) -> np.ndarray:
"""Load image from URL.
Args:
url: Image URL to fetch.
Returns:
Image as numpy array in BGR format.
Raises:
ValueError: If image cannot be loaded from URL.
"""
try:
with urlopen(url, timeout=30) as response:
image_data = response.read()
image = Image.open(io.BytesIO(image_data))
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
except Exception as e:
raise ValueError(f"Failed to load image from URL: {e}") from e
def load_image_from_base64(self, base64_str: str) -> np.ndarray:
"""Load image from base64 string.
Args:
base64_str: Base64-encoded image data.
Returns:
Image as numpy array in BGR format.
Raises:
ValueError: If image cannot be decoded.
"""
try:
# Handle data URL format
if "," in base64_str:
base64_str = base64_str.split(",", 1)[1]
image_data = base64.b64decode(base64_str)
image = Image.open(io.BytesIO(image_data))
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
except Exception as e:
raise ValueError(f"Failed to decode base64 image: {e}") from e
def add_padding(self, image: np.ndarray) -> np.ndarray:
"""Add whitespace padding around the image.
Adds padding equal to padding_ratio * max(height, width) on each side.
This expands the image by approximately 30% total (15% on each side).
Args:
image: Input image as numpy array in BGR format.
Returns:
Padded image as numpy array.
"""
height, width = image.shape[:2]
padding = int(max(height, width) * self.padding_ratio)
# Add white padding on all sides
padded_image = cv2.copyMakeBorder(
image,
top=padding,
bottom=padding,
left=padding,
right=padding,
borderType=cv2.BORDER_CONSTANT,
value=[255, 255, 255], # White
)
return padded_image
def preprocess(self, image_url: str | None, image_base64: str | None) -> np.ndarray:
"""Load and preprocess image with padding.
Args:
image_url: URL to fetch image from (optional).
image_base64: Base64-encoded image (optional).
Returns:
Preprocessed image with padding.
Raises:
ValueError: If neither input is provided or loading fails.
"""
if image_url:
image = self.load_image_from_url(image_url)
elif image_base64:
image = self.load_image_from_base64(image_base64)
else:
raise ValueError("Either image_url or image_base64 must be provided")
return self.add_padding(image)
def image_to_base64(self, image: np.ndarray, format: str = "PNG") -> str:
"""Convert numpy image to base64 string.
Args:
image: Image as numpy array in BGR format.
format: Output format (PNG, JPEG).
Returns:
Base64-encoded image string.
"""
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(image_rgb)
buffer = io.BytesIO()
pil_image.save(buffer, format=format)
buffer.seek(0)
return base64.b64encode(buffer.getvalue()).decode("utf-8")