docker-compose.yml

version: '3.8'

services:
  texteller:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: texteller-server
    runtime: nvidia
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
      - CUDA_VISIBLE_DEVICES=0
      # Ray Serve configuration
      - RAY_NUM_REPLICAS=1
      - RAY_NCPU_PER_REPLICA=4
      - RAY_NGPU_PER_REPLICA=1
    ports:
      - "8001:8001"
    volumes:
      # Mount the model cache directory to avoid downloading models
      - ~/.cache/huggingface/hub/models--OleehyO--TexTeller:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ['0']  # Use first GPU (RTX 5080)
              capabilities: [gpu]
    restart: unless-stopped
    command: ["texteller", "launch", "server", "-p", "8001"]
    healthcheck:
      test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:8001/', timeout=5)"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
feat: add dockerfile 2025-12-15 22:31:13 +08:00			`version: '3.8'`

			`services:`
			`texteller:`
			`build:`
			`context: .`
			`dockerfile: Dockerfile`
			`container_name: texteller-server`
			`runtime: nvidia`
			`environment:`
			`- NVIDIA_VISIBLE_DEVICES=all`
			`- NVIDIA_DRIVER_CAPABILITIES=compute,utility`
			`- CUDA_VISIBLE_DEVICES=0`
			`# Ray Serve configuration`
			`- RAY_NUM_REPLICAS=1`
			`- RAY_NCPU_PER_REPLICA=4`
			`- RAY_NGPU_PER_REPLICA=1`
			`ports:`
			`- "8001:8001"`
			`volumes:`
			`# Mount the model cache directory to avoid downloading models`
			`- ~/.cache/huggingface/hub/models--OleehyO--TexTeller:/root/.cache/huggingface/hub/models--OleehyO--TexTeller:ro`
			`deploy:`
			`resources:`
			`reservations:`
			`devices:`
			`- driver: nvidia`
			`device_ids: ['0'] # Use first GPU (RTX 5080)`
			`capabilities: [gpu]`
			`restart: unless-stopped`
			`command: ["texteller", "launch", "server", "-p", "8001"]`
			`healthcheck:`
			`test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:8001/', timeout=5)"]`
			`interval: 30s`
			`timeout: 10s`
			`retries: 3`
			`start_period: 60s`