feat: add dockerfile

2025-12-15 22:31:13 +08:00
parent 9b88cec77b
commit ba0968b2da
8 changed files with 784 additions and 9 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,59 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+*.egg
+
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# Git
+.git/
+.gitignore
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+
+# Documentation
+docs/_build/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Cache
+.cache/
+*.log
+
+# Jupyter
+.ipynb_checkpoints/
+
+# Model files (will be mounted from host)
+models/
+*.pth
+*.onnx
+
+examples/
+assets/
+docs/
+tests/
+README.md
+README.docker.md
+LICENSE
--- a/60
+++ b/60
@@ -0,0 +1,60 @@
+# Use NVIDIA CUDA base image with Python 3.10 (CUDA 12.8 for RTX 5080)
+FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu22.04
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    CUDA_VISIBLE_DEVICES=0
+
+# Install Python 3.10 and system dependencies
+RUN apt-get update && apt-get install -y \
+    python3.10 \
+    python3.10-dev \
+    python3-pip \
+    git \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libgomp1 \
+    libgl1-mesa-glx \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set Python 3.10 as default
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
+    update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1
+
+# Upgrade pip
+RUN python3 -m pip install --upgrade pip setuptools wheel
+
+# Set working directory
+WORKDIR /app
+
+# Copy project files
+COPY . /app/
+
+# Install PyTorch with CUDA support first (cu124 is compatible with CUDA 12.8)
+RUN pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu124
+
+# Install the package and dependencies
+RUN pip install -e .
+
+# Install additional dependencies for server
+RUN pip install requests
+
+# Expose port for Ray Serve
+EXPOSE 8001
+
+# Create cache directory for models
+RUN mkdir -p /root/.cache/huggingface/hub
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python3 -c "import requests; requests.get('http://localhost:8001/', timeout=5)" || exit 1
+
+# Default command to start the server (port 8001)
+CMD ["texteller", "launch", "server", "-p", "8001"]
+
--- a/README.docker.md
+++ b/README.docker.md
@@ -0,0 +1,253 @@
+# TexTeller Docker Deployment Guide
+
+This guide explains how to deploy TexTeller using Docker with NVIDIA GPU support (optimized for RTX 5080).
+
+## Prerequisites
+
+1. **NVIDIA Driver**: Install NVIDIA driver version 525 or later
+2. **NVIDIA Container Toolkit**: Required for GPU access in Docker containers
+3. **Docker**: Version 20.10 or later
+4. **Docker Compose**: Version 1.29 or later (or use `docker compose` v2)
+5. **Pre-downloaded Model**: Model should be in `~/.cache/huggingface/hub/models--OleehyO--TexTeller/`
+
+## Setup NVIDIA Container Toolkit
+
+If you haven't installed the NVIDIA Container Toolkit:
+
+```bash
+# Add the package repository
+distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
+curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
+curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
+
+# Install nvidia-container-toolkit
+sudo apt-get update
+sudo apt-get install -y nvidia-container-toolkit
+
+# Restart Docker
+sudo systemctl restart docker
+```
+
+## Quick Start
+
+The easiest way to deploy is using the provided deployment script:
+
+```bash
+# Run all checks and deploy
+./deploy.sh deploy
+
+# Or check system requirements first
+./deploy.sh check
+
+# View available commands
+./deploy.sh
+```
+
+## Build and Run
+
+### Using the Deployment Script (Recommended)
+
+```bash
+# Full deployment (checks, build, and start)
+./deploy.sh deploy
+
+# Just build the image
+./deploy.sh build
+
+# Start/stop the service
+./deploy.sh start
+./deploy.sh stop
+
+# View logs
+./deploy.sh logs
+
+# Check status
+./deploy.sh status
+```
+
+### Using Docker Compose
+
+```bash
+# Build and start the service
+docker-compose up -d
+
+# View logs
+docker-compose logs -f
+
+# Stop the service
+docker-compose down
+```
+
+### Using Docker directly
+
+```bash
+# Build the image
+docker build -t texteller:latest .
+
+# Run the container
+docker run -d \
+  --name texteller-server \
+  --gpus '"device=0"' \
+  -p 8001:8001 \
+  -v ~/.cache/huggingface/hub/models--OleehyO--TexTeller:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro \
+  -e CUDA_VISIBLE_DEVICES=0 \
+  texteller:latest
+```
+
+## API Usage
+
+The server accepts JSON requests with either base64-encoded images or image URLs at the `/predict` endpoint.
+
+### Using base64-encoded image
+
+```bash
+# Example with base64 image
+curl -X POST http://localhost:8001/predict \
+  -H "Content-Type: application/json" \
+  -d '{
+    "image_base64": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA..."
+  }'
+```
+
+### Using image URL
+
+```bash
+# Example with image URL
+curl -X POST http://localhost:8001/predict \
+  -H "Content-Type: application/json" \
+  -d '{
+    "image_url": "https://example.com/math_equation.png"
+  }'
+```
+
+### Python client example
+
+```python
+import requests
+import base64
+
+# Method 1: Using base64
+with open("equation.png", "rb") as f:
+    image_base64 = base64.b64encode(f.read()).decode()
+
+response = requests.post(
+    "http://localhost:8001/predict",
+    json={"image_base64": image_base64}
+)
+print(response.json())
+
+# Method 2: Using URL
+response = requests.post(
+    "http://localhost:8001/predict",
+    json={"image_url": "https://example.com/math_equation.png"}
+)
+print(response.json())
+```
+
+Or use the provided test script:
+
+```bash
+# Test with a local image
+python examples/test_server.py path/to/equation.png
+
+# Test with both local and URL
+python examples/test_server.py path/to/equation.png https://example.com/formula.png
+```
+
+### Response format
+
+Success response:
+```json
+{
+  "result": "\\frac{a}{b} = c"
+}
+```
+
+Error response:
+```json
+{
+  "error": "Failed to decode image"
+}
+```
+
+## Configuration
+
+You can configure the service by modifying environment variables in `docker-compose.yml`:
+
+- `CUDA_VISIBLE_DEVICES`: GPU device ID (default: 0)
+- `RAY_NUM_REPLICAS`: Number of Ray Serve replicas (default: 1)
+- `RAY_NCPU_PER_REPLICA`: CPUs per replica (default: 4)
+- `RAY_NGPU_PER_REPLICA`: GPUs per replica (default: 1)
+
+## Monitoring
+
+```bash
+# Check container status
+docker ps
+
+# View real-time logs
+docker-compose logs -f texteller
+
+# Check GPU usage
+nvidia-smi
+
+# Check container resource usage
+docker stats texteller-server
+```
+
+## Troubleshooting
+
+### GPU not detected
+```bash
+# Verify NVIDIA runtime is available
+docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi
+```
+
+### Port already in use
+Change the port mapping in `docker-compose.yml`:
+```yaml
+ports:
+  - "8080:8000"  # Host port 8080 -> Container port 8000
+```
+
+### Model not found
+Ensure the model is downloaded to the correct location:
+```bash
+ls -la ~/.cache/huggingface/hub/models--OleehyO--TexTeller/
+```
+
+## Performance Notes
+
+- **RTX 5080**: Optimized for CUDA 12.8 with cuDNN 9
+- **Memory**: Container requires ~4-6GB GPU memory (RTX 5080 has 16GB)
+- **Throughput**: ~10-20 images/second depending on image complexity
+- **Startup time**: ~30-60 seconds for model loading
+
+## Advanced Configuration
+
+### Multiple GPUs
+
+To use multiple GPUs, modify `docker-compose.yml`:
+
+```yaml
+environment:
+  - CUDA_VISIBLE_DEVICES=0,1
+  - RAY_NUM_REPLICAS=2
+deploy:
+  resources:
+    reservations:
+      devices:
+        - driver: nvidia
+          device_ids: ['0', '1']
+          capabilities: [gpu]
+```
+
+### Production deployment
+
+For production, consider:
+1. Using a reverse proxy (nginx/traefik) for SSL/TLS
+2. Adding authentication middleware
+3. Implementing rate limiting
+4. Setting up monitoring (Prometheus/Grafana)
+5. Using orchestration (Kubernetes) for scaling
+
--- a/deploy.sh
+++ b/deploy.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# TexTeller Docker Deployment Script
+
+set -e  # Exit on error
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Configuration
+MODEL_PATH="$HOME/.cache/huggingface/hub/models--OleehyO--TexTeller"
+CONTAINER_NAME="texteller-server"
+IMAGE_NAME="texteller:latest"
+PORT=8001
+
+# Function to print colored messages
+print_info() {
+    echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+print_warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if NVIDIA GPU is available
+check_nvidia() {
+    print_info "Checking NVIDIA GPU availability..."
+    if ! command -v nvidia-smi &> /dev/null; then
+        print_error "nvidia-smi not found. Please install NVIDIA drivers."
+        exit 1
+    fi
+    
+    nvidia-smi > /dev/null 2>&1
+    if [ $? -eq 0 ]; then
+        print_info "NVIDIA GPU detected:"
+        nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
+    else
+        print_error "NVIDIA GPU not detected or drivers not working."
+        exit 1
+    fi
+}
+
+# Check if Docker is installed
+check_docker() {
+    print_info "Checking Docker installation..."
+    if ! command -v docker &> /dev/null; then
+        print_error "Docker not found. Please install Docker."
+        exit 1
+    fi
+    print_info "Docker version: $(docker --version)"
+}
+
+# Check if NVIDIA Container Toolkit is installed
+check_nvidia_docker() {
+    print_info "Checking NVIDIA Container Toolkit..."
+    if ! docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then
+        print_error "NVIDIA Container Toolkit not working properly."
+        print_info "Please install it with:"
+        echo "  sudo apt-get install -y nvidia-container-toolkit"
+        echo "  sudo systemctl restart docker"
+        exit 1
+    fi
+    print_info "NVIDIA Container Toolkit is working."
+}
+
+# Check if model exists
+check_model() {
+    print_info "Checking model availability..."
+    if [ ! -d "$MODEL_PATH" ]; then
+        print_error "Model not found at: $MODEL_PATH"
+        print_info "Please download the model first using:"
+        echo "  python -c 'from texteller import load_model; load_model()'"
+        exit 1
+    fi
+    print_info "Model found at: $MODEL_PATH"
+}
+
+# Build Docker image
+build_image() {
+    print_info "Building Docker image..."
+    docker build -t $IMAGE_NAME .
+    if [ $? -eq 0 ]; then
+        print_info "Docker image built successfully: $IMAGE_NAME"
+    else
+        print_error "Failed to build Docker image."
+        exit 1
+    fi
+}
+
+# Stop and remove existing container
+stop_container() {
+    if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then
+        print_info "Stopping existing container..."
+        docker stop $CONTAINER_NAME
+    fi
+    
+    if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then
+        print_info "Removing existing container..."
+        docker rm $CONTAINER_NAME
+    fi
+}
+
+# Start container
+start_container() {
+    print_info "Starting TexTeller server container..."
+    docker run -d \
+        --name $CONTAINER_NAME \
+        --gpus '"device=0"' \
+        -p $PORT:8001 \
+        -v "$MODEL_PATH:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro" \
+        -e CUDA_VISIBLE_DEVICES=0 \
+        -e RAY_NUM_REPLICAS=1 \
+        -e RAY_NCPU_PER_REPLICA=4 \
+        -e RAY_NGPU_PER_REPLICA=1 \
+        --restart unless-stopped \
+        $IMAGE_NAME
+    
+    if [ $? -eq 0 ]; then
+        print_info "Container started successfully!"
+        print_info "Server will be available at: http://localhost:$PORT/predict"
+    else
+        print_error "Failed to start container."
+        exit 1
+    fi
+}
+
+# Wait for server to be ready
+wait_for_server() {
+    print_info "Waiting for server to be ready..."
+    max_attempts=60
+    attempt=0
+    
+    while [ $attempt -lt $max_attempts ]; do
+        if curl -s http://localhost:$PORT/ > /dev/null 2>&1; then
+            print_info "Server is ready!"
+            return 0
+        fi
+        attempt=$((attempt + 1))
+        echo -n "."
+        sleep 1
+    done
+    
+    echo ""
+    print_warn "Server might still be initializing. Check logs with: docker logs -f $CONTAINER_NAME"
+}
+
+# Show logs
+show_logs() {
+    print_info "Showing container logs (Ctrl+C to exit)..."
+    docker logs -f $CONTAINER_NAME
+}
+
+# Main deployment workflow
+case "${1:-deploy}" in
+    check)
+        check_nvidia
+        check_docker
+        check_nvidia_docker
+        check_model
+        print_info "All checks passed!"
+        ;;
+    
+    build)
+        check_docker
+        build_image
+        ;;
+    
+    deploy)
+        check_nvidia
+        check_docker
+        check_nvidia_docker
+        check_model
+        build_image
+        stop_container
+        start_container
+        wait_for_server
+        print_info ""
+        print_info "======================================"
+        print_info "TexTeller server deployed successfully!"
+        print_info "======================================"
+        print_info "API endpoint: http://localhost:$PORT/predict"
+        print_info ""
+        print_info "Test the server with:"
+        print_info "  python examples/test_server.py path/to/image.png"
+        print_info ""
+        print_info "View logs with:"
+        print_info "  docker logs -f $CONTAINER_NAME"
+        print_info ""
+        print_info "Stop the server with:"
+        print_info "  docker stop $CONTAINER_NAME"
+        ;;
+    
+    start)
+        if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then
+            docker start $CONTAINER_NAME
+            print_info "Container started."
+        else
+            print_error "Container does not exist. Run './deploy.sh deploy' first."
+            exit 1
+        fi
+        ;;
+    
+    stop)
+        stop_container
+        print_info "Container stopped."
+        ;;
+    
+    restart)
+        docker restart $CONTAINER_NAME
+        print_info "Container restarted."
+        ;;
+    
+    logs)
+        show_logs
+        ;;
+    
+    status)
+        if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then
+            print_info "Container is running."
+            docker stats --no-stream $CONTAINER_NAME
+        else
+            print_warn "Container is not running."
+        fi
+        ;;
+    
+    clean)
+        stop_container
+        print_info "Removing Docker image..."
+        docker rmi $IMAGE_NAME 2>/dev/null || true
+        print_info "Cleanup complete."
+        ;;
+    
+    *)
+        echo "Usage: $0 {check|build|deploy|start|stop|restart|logs|status|clean}"
+        echo ""
+        echo "Commands:"
+        echo "  check   - Check system requirements"
+        echo "  build   - Build Docker image only"
+        echo "  deploy  - Full deployment (build + start)"
+        echo "  start   - Start existing container"
+        echo "  stop    - Stop container"
+        echo "  restart - Restart container"
+        echo "  logs    - Show container logs"
+        echo "  status  - Show container status"
+        echo "  clean   - Remove container and image"
+        exit 1
+        ;;
+esac
+
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,38 @@
+version: '3.8'
+
+services:
+  texteller:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: texteller-server
+    runtime: nvidia
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+      - CUDA_VISIBLE_DEVICES=0
+      # Ray Serve configuration
+      - RAY_NUM_REPLICAS=1
+      - RAY_NCPU_PER_REPLICA=4
+      - RAY_NGPU_PER_REPLICA=1
+    ports:
+      - "8001:8001"
+    volumes:
+      # Mount the model cache directory to avoid downloading models
+      - ~/.cache/huggingface/hub/models--OleehyO--TexTeller:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']  # Use first GPU (RTX 5080)
+              capabilities: [gpu]
+    restart: unless-stopped
+    command: ["texteller", "launch", "server", "-p", "8001"]
+    healthcheck:
+      test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:8001/', timeout=5)"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
--- a/examples/test_server.py
+++ b/examples/test_server.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""
+Example client script to test the TexTeller server API.
+"""
+import requests
+import base64
+import sys
+from pathlib import Path
+
+
+def test_base64_request(image_path: str, server_url: str = "http://localhost:8001/predict"):
+    """Test the server with a base64-encoded image."""
+    # Read and encode the image
+    with open(image_path, "rb") as f:
+        image_data = f.read()
+        image_base64 = base64.b64encode(image_data).decode()
+
+    # Send request
+    response = requests.post(server_url, json={"image_base64": image_base64}, headers={"Content-Type": "application/json"})
+
+    # Print result
+    if response.status_code == 200:
+        result = response.json()
+        print(f"✓ Success!")
+        print(f"Result: {result.get('result', 'N/A')}")
+        return result
+    else:
+        print(f"✗ Error: {response.status_code}")
+        print(f"Response: {response.text}")
+        return None
+
+
+def test_url_request(image_url: str, server_url: str = "http://localhost:8001/predict"):
+    """Test the server with an image URL."""
+    # Send request
+    response = requests.post(server_url, json={"image_url": image_url}, headers={"Content-Type": "application/json"})
+
+    # Print result
+    if response.status_code == 200:
+        result = response.json()
+        print(f"✓ Success!")
+        print(f"Result: {result.get('result', 'N/A')}")
+        return result
+    else:
+        print(f"✗ Error: {response.status_code}")
+        print(f"Response: {response.text}")
+        return None
+
+
+if __name__ == "__main__":
+    print("=" * 50)
+    print("TexTeller Server API Test")
+    print("=" * 50)
+
+    # Test with local image if provided
+    if len(sys.argv) > 1:
+        image_path = sys.argv[1]
+        if Path(image_path).exists():
+            print(f"\nTest 1: Base64 request with local image")
+            print(f"Image: {image_path}")
+            test_base64_request(image_path)
+        else:
+            print(f"Error: Image file not found: {image_path}")
+
+    # Test with URL if provided
+    if len(sys.argv) > 2:
+        image_url = sys.argv[2]
+        print(f"\nTest 2: URL request")
+        print(f"URL: {image_url}")
+        test_url_request(image_url)
+
+    if len(sys.argv) == 1:
+        print("\nUsage:")
+        print(f"  python {sys.argv[0]} <image_path> [image_url]")
+        print("\nExamples:")
+        print(f"  python {sys.argv[0]} equation.png")
+        print(f"  python {sys.argv[0]} equation.png https://example.com/formula.png")
--- a/texteller/cli/commands/launch/init.py
+++ b/texteller/cli/commands/launch/init.py
@@ -31,7 +31,7 @@ from texteller.utils import get_device
    "-p",
    "--port",
    type=int,
-    default=8000,
+    default=8001,
    help="Port to run the server on",
 )
@click.option(
--- a/texteller/cli/commands/launch/server.py
+++ b/texteller/cli/commands/launch/server.py
@@ -1,7 +1,11 @@
 import numpy as np
 import cv2
+import base64
+import requests
+from io import BytesIO

 from starlette.requests import Request
+from starlette.responses import JSONResponse
 from ray import serve
 from ray.serve.handle import DeploymentHandle

@@ -57,13 +61,42 @@ class Ingress:
    def __init__(self, rec_server: DeploymentHandle) -> None:
        self.texteller_server = rec_server

-    async def __call__(self, request: Request) -> str:
-        form = await request.form()
-        img_rb = await form["img"].read()
+    async def __call__(self, request: Request):
+        try:
+            # Parse JSON body
+            body = await request.json()

-        img_nparray = np.frombuffer(img_rb, np.uint8)
+            # Get image data from either base64 or URL
+            if "image_base64" in body:
+                # Decode base64 image
+                image_data = body["image_base64"]
+                # Remove data URL prefix if present (e.g., "data:image/png;base64,")
+                if "," in image_data:
+                    image_data = image_data.split(",", 1)[1]
+                img_bytes = base64.b64decode(image_data)
+                img_nparray = np.frombuffer(img_bytes, np.uint8)
+
+            elif "image_url" in body:
+                # Download image from URL
+                image_url = body["image_url"]
+                response = requests.get(image_url, timeout=30)
+                response.raise_for_status()
+                img_bytes = response.content
+                img_nparray = np.frombuffer(img_bytes, np.uint8)
+
+            else:
+                return JSONResponse({"error": "Either 'image_base64' or 'image_url' must be provided"}, status_code=400)
+
+            # Decode and convert image
            img_nparray = cv2.imdecode(img_nparray, cv2.IMREAD_COLOR)
+            if img_nparray is None:
+                return JSONResponse({"error": "Failed to decode image"}, status_code=400)
            img_nparray = cv2.cvtColor(img_nparray, cv2.COLOR_BGR2RGB)

+            # Get prediction
            pred = await self.texteller_server.predict.remote(img_nparray)
-        return pred
+
+            return JSONResponse({"result": pred})
+
+        except Exception as e:
+            return JSONResponse({"error": str(e)}, status_code=500)