version: '3.8' services: texteller: build: context: . dockerfile: Dockerfile container_name: texteller-server runtime: nvidia environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility - CUDA_VISIBLE_DEVICES=0 # Ray Serve configuration - RAY_NUM_REPLICAS=1 - RAY_NCPU_PER_REPLICA=4 - RAY_NGPU_PER_REPLICA=1 ports: - "8001:8001" volumes: # Mount the model cache directory to avoid downloading models - ~/.cache/huggingface/hub/models--OleehyO--TexTeller:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro deploy: resources: reservations: devices: - driver: nvidia device_ids: ['0'] # Use first GPU (RTX 5080) capabilities: [gpu] restart: unless-stopped command: ["texteller", "launch", "server", "-p", "8001"] healthcheck: test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:8001/', timeout=5)"] interval: 30s timeout: 10s retries: 3 start_period: 60s