From ba0968b2dade40dd6b1aa213fd2725e30994d1a9 Mon Sep 17 00:00:00 2001 From: yoge Date: Mon, 15 Dec 2025 22:31:13 +0800 Subject: [PATCH] feat: add dockerfile --- .dockerignore | 59 +++++ Dockerfile | 60 +++++ README.docker.md | 253 +++++++++++++++++++++ deploy.sh | 255 ++++++++++++++++++++++ docker-compose.yml | 38 ++++ examples/test_server.py | 77 +++++++ texteller/cli/commands/launch/__init__.py | 2 +- texteller/cli/commands/launch/server.py | 49 ++++- 8 files changed, 784 insertions(+), 9 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 README.docker.md create mode 100755 deploy.sh create mode 100644 docker-compose.yml create mode 100644 examples/test_server.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1d319f0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,59 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +*.egg + +# Virtual environments +venv/ +env/ +ENV/ +.venv + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Git +.git/ +.gitignore + +# Testing +.pytest_cache/ +.coverage +htmlcov/ + +# Documentation +docs/_build/ + +# OS +.DS_Store +Thumbs.db + +# Cache +.cache/ +*.log + +# Jupyter +.ipynb_checkpoints/ + +# Model files (will be mounted from host) +models/ +*.pth +*.onnx + +examples/ +assets/ +docs/ +tests/ +README.md +README.docker.md +LICENSE diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d5d2ef8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,60 @@ +# Use NVIDIA CUDA base image with Python 3.10 (CUDA 12.8 for RTX 5080) +FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu22.04 + +# Set environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 \ + CUDA_VISIBLE_DEVICES=0 + +# Install Python 3.10 and system dependencies +RUN apt-get update && apt-get install -y \ + python3.10 \ + python3.10-dev \ + python3-pip \ + git \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender-dev \ + libgomp1 \ + libgl1-mesa-glx \ + wget \ + && rm -rf /var/lib/apt/lists/* + +# Set Python 3.10 as default +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 + +# Upgrade pip +RUN python3 -m pip install --upgrade pip setuptools wheel + +# Set working directory +WORKDIR /app + +# Copy project files +COPY . /app/ + +# Install PyTorch with CUDA support first (cu124 is compatible with CUDA 12.8) +RUN pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu124 + +# Install the package and dependencies +RUN pip install -e . + +# Install additional dependencies for server +RUN pip install requests + +# Expose port for Ray Serve +EXPOSE 8001 + +# Create cache directory for models +RUN mkdir -p /root/.cache/huggingface/hub + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD python3 -c "import requests; requests.get('http://localhost:8001/', timeout=5)" || exit 1 + +# Default command to start the server (port 8001) +CMD ["texteller", "launch", "server", "-p", "8001"] + diff --git a/README.docker.md b/README.docker.md new file mode 100644 index 0000000..66c92f5 --- /dev/null +++ b/README.docker.md @@ -0,0 +1,253 @@ +# TexTeller Docker Deployment Guide + +This guide explains how to deploy TexTeller using Docker with NVIDIA GPU support (optimized for RTX 5080). + +## Prerequisites + +1. **NVIDIA Driver**: Install NVIDIA driver version 525 or later +2. **NVIDIA Container Toolkit**: Required for GPU access in Docker containers +3. **Docker**: Version 20.10 or later +4. **Docker Compose**: Version 1.29 or later (or use `docker compose` v2) +5. **Pre-downloaded Model**: Model should be in `~/.cache/huggingface/hub/models--OleehyO--TexTeller/` + +## Setup NVIDIA Container Toolkit + +If you haven't installed the NVIDIA Container Toolkit: + +```bash +# Add the package repository +distribution=$(. /etc/os-release;echo $ID$VERSION_ID) +curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - +curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list + +# Install nvidia-container-toolkit +sudo apt-get update +sudo apt-get install -y nvidia-container-toolkit + +# Restart Docker +sudo systemctl restart docker +``` + +## Quick Start + +The easiest way to deploy is using the provided deployment script: + +```bash +# Run all checks and deploy +./deploy.sh deploy + +# Or check system requirements first +./deploy.sh check + +# View available commands +./deploy.sh +``` + +## Build and Run + +### Using the Deployment Script (Recommended) + +```bash +# Full deployment (checks, build, and start) +./deploy.sh deploy + +# Just build the image +./deploy.sh build + +# Start/stop the service +./deploy.sh start +./deploy.sh stop + +# View logs +./deploy.sh logs + +# Check status +./deploy.sh status +``` + +### Using Docker Compose + +```bash +# Build and start the service +docker-compose up -d + +# View logs +docker-compose logs -f + +# Stop the service +docker-compose down +``` + +### Using Docker directly + +```bash +# Build the image +docker build -t texteller:latest . + +# Run the container +docker run -d \ + --name texteller-server \ + --gpus '"device=0"' \ + -p 8001:8001 \ + -v ~/.cache/huggingface/hub/models--OleehyO--TexTeller:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro \ + -e CUDA_VISIBLE_DEVICES=0 \ + texteller:latest +``` + +## API Usage + +The server accepts JSON requests with either base64-encoded images or image URLs at the `/predict` endpoint. + +### Using base64-encoded image + +```bash +# Example with base64 image +curl -X POST http://localhost:8001/predict \ + -H "Content-Type: application/json" \ + -d '{ + "image_base64": "..." + }' +``` + +### Using image URL + +```bash +# Example with image URL +curl -X POST http://localhost:8001/predict \ + -H "Content-Type: application/json" \ + -d '{ + "image_url": "https://example.com/math_equation.png" + }' +``` + +### Python client example + +```python +import requests +import base64 + +# Method 1: Using base64 +with open("equation.png", "rb") as f: + image_base64 = base64.b64encode(f.read()).decode() + +response = requests.post( + "http://localhost:8001/predict", + json={"image_base64": image_base64} +) +print(response.json()) + +# Method 2: Using URL +response = requests.post( + "http://localhost:8001/predict", + json={"image_url": "https://example.com/math_equation.png"} +) +print(response.json()) +``` + +Or use the provided test script: + +```bash +# Test with a local image +python examples/test_server.py path/to/equation.png + +# Test with both local and URL +python examples/test_server.py path/to/equation.png https://example.com/formula.png +``` + +### Response format + +Success response: +```json +{ + "result": "\\frac{a}{b} = c" +} +``` + +Error response: +```json +{ + "error": "Failed to decode image" +} +``` + +## Configuration + +You can configure the service by modifying environment variables in `docker-compose.yml`: + +- `CUDA_VISIBLE_DEVICES`: GPU device ID (default: 0) +- `RAY_NUM_REPLICAS`: Number of Ray Serve replicas (default: 1) +- `RAY_NCPU_PER_REPLICA`: CPUs per replica (default: 4) +- `RAY_NGPU_PER_REPLICA`: GPUs per replica (default: 1) + +## Monitoring + +```bash +# Check container status +docker ps + +# View real-time logs +docker-compose logs -f texteller + +# Check GPU usage +nvidia-smi + +# Check container resource usage +docker stats texteller-server +``` + +## Troubleshooting + +### GPU not detected +```bash +# Verify NVIDIA runtime is available +docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi +``` + +### Port already in use +Change the port mapping in `docker-compose.yml`: +```yaml +ports: + - "8080:8000" # Host port 8080 -> Container port 8000 +``` + +### Model not found +Ensure the model is downloaded to the correct location: +```bash +ls -la ~/.cache/huggingface/hub/models--OleehyO--TexTeller/ +``` + +## Performance Notes + +- **RTX 5080**: Optimized for CUDA 12.8 with cuDNN 9 +- **Memory**: Container requires ~4-6GB GPU memory (RTX 5080 has 16GB) +- **Throughput**: ~10-20 images/second depending on image complexity +- **Startup time**: ~30-60 seconds for model loading + +## Advanced Configuration + +### Multiple GPUs + +To use multiple GPUs, modify `docker-compose.yml`: + +```yaml +environment: + - CUDA_VISIBLE_DEVICES=0,1 + - RAY_NUM_REPLICAS=2 +deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0', '1'] + capabilities: [gpu] +``` + +### Production deployment + +For production, consider: +1. Using a reverse proxy (nginx/traefik) for SSL/TLS +2. Adding authentication middleware +3. Implementing rate limiting +4. Setting up monitoring (Prometheus/Grafana) +5. Using orchestration (Kubernetes) for scaling + diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..5e05e4c --- /dev/null +++ b/deploy.sh @@ -0,0 +1,255 @@ +#!/bin/bash +# TexTeller Docker Deployment Script + +set -e # Exit on error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Configuration +MODEL_PATH="$HOME/.cache/huggingface/hub/models--OleehyO--TexTeller" +CONTAINER_NAME="texteller-server" +IMAGE_NAME="texteller:latest" +PORT=8001 + +# Function to print colored messages +print_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if NVIDIA GPU is available +check_nvidia() { + print_info "Checking NVIDIA GPU availability..." + if ! command -v nvidia-smi &> /dev/null; then + print_error "nvidia-smi not found. Please install NVIDIA drivers." + exit 1 + fi + + nvidia-smi > /dev/null 2>&1 + if [ $? -eq 0 ]; then + print_info "NVIDIA GPU detected:" + nvidia-smi --query-gpu=name,memory.total --format=csv,noheader + else + print_error "NVIDIA GPU not detected or drivers not working." + exit 1 + fi +} + +# Check if Docker is installed +check_docker() { + print_info "Checking Docker installation..." + if ! command -v docker &> /dev/null; then + print_error "Docker not found. Please install Docker." + exit 1 + fi + print_info "Docker version: $(docker --version)" +} + +# Check if NVIDIA Container Toolkit is installed +check_nvidia_docker() { + print_info "Checking NVIDIA Container Toolkit..." + if ! docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then + print_error "NVIDIA Container Toolkit not working properly." + print_info "Please install it with:" + echo " sudo apt-get install -y nvidia-container-toolkit" + echo " sudo systemctl restart docker" + exit 1 + fi + print_info "NVIDIA Container Toolkit is working." +} + +# Check if model exists +check_model() { + print_info "Checking model availability..." + if [ ! -d "$MODEL_PATH" ]; then + print_error "Model not found at: $MODEL_PATH" + print_info "Please download the model first using:" + echo " python -c 'from texteller import load_model; load_model()'" + exit 1 + fi + print_info "Model found at: $MODEL_PATH" +} + +# Build Docker image +build_image() { + print_info "Building Docker image..." + docker build -t $IMAGE_NAME . + if [ $? -eq 0 ]; then + print_info "Docker image built successfully: $IMAGE_NAME" + else + print_error "Failed to build Docker image." + exit 1 + fi +} + +# Stop and remove existing container +stop_container() { + if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then + print_info "Stopping existing container..." + docker stop $CONTAINER_NAME + fi + + if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then + print_info "Removing existing container..." + docker rm $CONTAINER_NAME + fi +} + +# Start container +start_container() { + print_info "Starting TexTeller server container..." + docker run -d \ + --name $CONTAINER_NAME \ + --gpus '"device=0"' \ + -p $PORT:8001 \ + -v "$MODEL_PATH:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro" \ + -e CUDA_VISIBLE_DEVICES=0 \ + -e RAY_NUM_REPLICAS=1 \ + -e RAY_NCPU_PER_REPLICA=4 \ + -e RAY_NGPU_PER_REPLICA=1 \ + --restart unless-stopped \ + $IMAGE_NAME + + if [ $? -eq 0 ]; then + print_info "Container started successfully!" + print_info "Server will be available at: http://localhost:$PORT/predict" + else + print_error "Failed to start container." + exit 1 + fi +} + +# Wait for server to be ready +wait_for_server() { + print_info "Waiting for server to be ready..." + max_attempts=60 + attempt=0 + + while [ $attempt -lt $max_attempts ]; do + if curl -s http://localhost:$PORT/ > /dev/null 2>&1; then + print_info "Server is ready!" + return 0 + fi + attempt=$((attempt + 1)) + echo -n "." + sleep 1 + done + + echo "" + print_warn "Server might still be initializing. Check logs with: docker logs -f $CONTAINER_NAME" +} + +# Show logs +show_logs() { + print_info "Showing container logs (Ctrl+C to exit)..." + docker logs -f $CONTAINER_NAME +} + +# Main deployment workflow +case "${1:-deploy}" in + check) + check_nvidia + check_docker + check_nvidia_docker + check_model + print_info "All checks passed!" + ;; + + build) + check_docker + build_image + ;; + + deploy) + check_nvidia + check_docker + check_nvidia_docker + check_model + build_image + stop_container + start_container + wait_for_server + print_info "" + print_info "======================================" + print_info "TexTeller server deployed successfully!" + print_info "======================================" + print_info "API endpoint: http://localhost:$PORT/predict" + print_info "" + print_info "Test the server with:" + print_info " python examples/test_server.py path/to/image.png" + print_info "" + print_info "View logs with:" + print_info " docker logs -f $CONTAINER_NAME" + print_info "" + print_info "Stop the server with:" + print_info " docker stop $CONTAINER_NAME" + ;; + + start) + if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then + docker start $CONTAINER_NAME + print_info "Container started." + else + print_error "Container does not exist. Run './deploy.sh deploy' first." + exit 1 + fi + ;; + + stop) + stop_container + print_info "Container stopped." + ;; + + restart) + docker restart $CONTAINER_NAME + print_info "Container restarted." + ;; + + logs) + show_logs + ;; + + status) + if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then + print_info "Container is running." + docker stats --no-stream $CONTAINER_NAME + else + print_warn "Container is not running." + fi + ;; + + clean) + stop_container + print_info "Removing Docker image..." + docker rmi $IMAGE_NAME 2>/dev/null || true + print_info "Cleanup complete." + ;; + + *) + echo "Usage: $0 {check|build|deploy|start|stop|restart|logs|status|clean}" + echo "" + echo "Commands:" + echo " check - Check system requirements" + echo " build - Build Docker image only" + echo " deploy - Full deployment (build + start)" + echo " start - Start existing container" + echo " stop - Stop container" + echo " restart - Restart container" + echo " logs - Show container logs" + echo " status - Show container status" + echo " clean - Remove container and image" + exit 1 + ;; +esac + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..a0942d7 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,38 @@ +version: '3.8' + +services: + texteller: + build: + context: . + dockerfile: Dockerfile + container_name: texteller-server + runtime: nvidia + environment: + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility + - CUDA_VISIBLE_DEVICES=0 + # Ray Serve configuration + - RAY_NUM_REPLICAS=1 + - RAY_NCPU_PER_REPLICA=4 + - RAY_NGPU_PER_REPLICA=1 + ports: + - "8001:8001" + volumes: + # Mount the model cache directory to avoid downloading models + - ~/.cache/huggingface/hub/models--OleehyO--TexTeller:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] # Use first GPU (RTX 5080) + capabilities: [gpu] + restart: unless-stopped + command: ["texteller", "launch", "server", "-p", "8001"] + healthcheck: + test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:8001/', timeout=5)"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + diff --git a/examples/test_server.py b/examples/test_server.py new file mode 100644 index 0000000..4e85846 --- /dev/null +++ b/examples/test_server.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +""" +Example client script to test the TexTeller server API. +""" +import requests +import base64 +import sys +from pathlib import Path + + +def test_base64_request(image_path: str, server_url: str = "http://localhost:8001/predict"): + """Test the server with a base64-encoded image.""" + # Read and encode the image + with open(image_path, "rb") as f: + image_data = f.read() + image_base64 = base64.b64encode(image_data).decode() + + # Send request + response = requests.post(server_url, json={"image_base64": image_base64}, headers={"Content-Type": "application/json"}) + + # Print result + if response.status_code == 200: + result = response.json() + print(f"✓ Success!") + print(f"Result: {result.get('result', 'N/A')}") + return result + else: + print(f"✗ Error: {response.status_code}") + print(f"Response: {response.text}") + return None + + +def test_url_request(image_url: str, server_url: str = "http://localhost:8001/predict"): + """Test the server with an image URL.""" + # Send request + response = requests.post(server_url, json={"image_url": image_url}, headers={"Content-Type": "application/json"}) + + # Print result + if response.status_code == 200: + result = response.json() + print(f"✓ Success!") + print(f"Result: {result.get('result', 'N/A')}") + return result + else: + print(f"✗ Error: {response.status_code}") + print(f"Response: {response.text}") + return None + + +if __name__ == "__main__": + print("=" * 50) + print("TexTeller Server API Test") + print("=" * 50) + + # Test with local image if provided + if len(sys.argv) > 1: + image_path = sys.argv[1] + if Path(image_path).exists(): + print(f"\nTest 1: Base64 request with local image") + print(f"Image: {image_path}") + test_base64_request(image_path) + else: + print(f"Error: Image file not found: {image_path}") + + # Test with URL if provided + if len(sys.argv) > 2: + image_url = sys.argv[2] + print(f"\nTest 2: URL request") + print(f"URL: {image_url}") + test_url_request(image_url) + + if len(sys.argv) == 1: + print("\nUsage:") + print(f" python {sys.argv[0]} [image_url]") + print("\nExamples:") + print(f" python {sys.argv[0]} equation.png") + print(f" python {sys.argv[0]} equation.png https://example.com/formula.png") diff --git a/texteller/cli/commands/launch/__init__.py b/texteller/cli/commands/launch/__init__.py index 13780db..f471109 100644 --- a/texteller/cli/commands/launch/__init__.py +++ b/texteller/cli/commands/launch/__init__.py @@ -31,7 +31,7 @@ from texteller.utils import get_device "-p", "--port", type=int, - default=8000, + default=8001, help="Port to run the server on", ) @click.option( diff --git a/texteller/cli/commands/launch/server.py b/texteller/cli/commands/launch/server.py index b31d191..45abbd6 100644 --- a/texteller/cli/commands/launch/server.py +++ b/texteller/cli/commands/launch/server.py @@ -1,7 +1,11 @@ import numpy as np import cv2 +import base64 +import requests +from io import BytesIO from starlette.requests import Request +from starlette.responses import JSONResponse from ray import serve from ray.serve.handle import DeploymentHandle @@ -57,13 +61,42 @@ class Ingress: def __init__(self, rec_server: DeploymentHandle) -> None: self.texteller_server = rec_server - async def __call__(self, request: Request) -> str: - form = await request.form() - img_rb = await form["img"].read() + async def __call__(self, request: Request): + try: + # Parse JSON body + body = await request.json() - img_nparray = np.frombuffer(img_rb, np.uint8) - img_nparray = cv2.imdecode(img_nparray, cv2.IMREAD_COLOR) - img_nparray = cv2.cvtColor(img_nparray, cv2.COLOR_BGR2RGB) + # Get image data from either base64 or URL + if "image_base64" in body: + # Decode base64 image + image_data = body["image_base64"] + # Remove data URL prefix if present (e.g., "data:image/png;base64,") + if "," in image_data: + image_data = image_data.split(",", 1)[1] + img_bytes = base64.b64decode(image_data) + img_nparray = np.frombuffer(img_bytes, np.uint8) - pred = await self.texteller_server.predict.remote(img_nparray) - return pred + elif "image_url" in body: + # Download image from URL + image_url = body["image_url"] + response = requests.get(image_url, timeout=30) + response.raise_for_status() + img_bytes = response.content + img_nparray = np.frombuffer(img_bytes, np.uint8) + + else: + return JSONResponse({"error": "Either 'image_base64' or 'image_url' must be provided"}, status_code=400) + + # Decode and convert image + img_nparray = cv2.imdecode(img_nparray, cv2.IMREAD_COLOR) + if img_nparray is None: + return JSONResponse({"error": "Failed to decode image"}, status_code=400) + img_nparray = cv2.cvtColor(img_nparray, cv2.COLOR_BGR2RGB) + + # Get prediction + pred = await self.texteller_server.predict.remote(img_nparray) + + return JSONResponse({"result": pred}) + + except Exception as e: + return JSONResponse({"error": str(e)}, status_code=500)