# Use NVIDIA CUDA base image with Python 3.10 (CUDA 12.8 for RTX 5080) FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu22.04 # Set environment variables ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ CUDA_VISIBLE_DEVICES=0 # Install Python 3.10 and system dependencies RUN apt-get update && apt-get install -y \ python3.10 \ python3.10-dev \ python3-pip \ git \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender-dev \ libgomp1 \ libgl1-mesa-glx \ wget \ && rm -rf /var/lib/apt/lists/* # Set Python 3.10 as default RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \ update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 # Upgrade pip RUN python3 -m pip install --upgrade pip setuptools wheel # Set working directory WORKDIR /app # Copy project files COPY . /app/ # Install PyTorch with CUDA support first (cu124 is compatible with CUDA 12.8) RUN pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu124 # Install the package and dependencies RUN pip install -e . # Install additional dependencies for server RUN pip install requests # Expose port for Ray Serve EXPOSE 8001 # Create cache directory for models RUN mkdir -p /root/.cache/huggingface/hub # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD python3 -c "import requests; requests.get('http://localhost:8001/', timeout=5)" || exit 1 # Default command to start the server (port 8001) CMD ["texteller", "launch", "server", "-p", "8001"]