Files
TexTeller/deploy.sh

260 lines
7.0 KiB
Bash
Raw Permalink Normal View History

2025-12-15 22:31:13 +08:00
#!/bin/bash
# TexTeller Docker Deployment Script
set -e # Exit on error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Configuration
MODEL_PATH="$HOME/.cache/huggingface/hub/models--OleehyO--TexTeller"
CONTAINER_NAME="texteller-server"
IMAGE_NAME="texteller:latest"
PORT=8001
# Function to print colored messages
print_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
print_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check if NVIDIA GPU is available
check_nvidia() {
print_info "Checking NVIDIA GPU availability..."
if ! command -v nvidia-smi &> /dev/null; then
print_error "nvidia-smi not found. Please install NVIDIA drivers."
exit 1
fi
nvidia-smi > /dev/null 2>&1
if [ $? -eq 0 ]; then
print_info "NVIDIA GPU detected:"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
else
print_error "NVIDIA GPU not detected or drivers not working."
exit 1
fi
}
# Check if Docker is installed
check_docker() {
print_info "Checking Docker installation..."
if ! command -v docker &> /dev/null; then
print_error "Docker not found. Please install Docker."
exit 1
fi
print_info "Docker version: $(docker --version)"
}
# Check if NVIDIA Container Toolkit is installed
check_nvidia_docker() {
print_info "Checking NVIDIA Container Toolkit..."
2025-12-15 23:21:47 +08:00
if ! docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &> /dev/null; then
2025-12-15 22:31:13 +08:00
print_error "NVIDIA Container Toolkit not working properly."
print_info "Please install it with:"
echo " sudo apt-get install -y nvidia-container-toolkit"
echo " sudo systemctl restart docker"
exit 1
fi
print_info "NVIDIA Container Toolkit is working."
}
# Check if model exists
check_model() {
print_info "Checking model availability..."
if [ ! -d "$MODEL_PATH" ]; then
print_error "Model not found at: $MODEL_PATH"
print_info "Please download the model first using:"
echo " python -c 'from texteller import load_model; load_model()'"
exit 1
fi
print_info "Model found at: $MODEL_PATH"
}
# Build Docker image
build_image() {
print_info "Building Docker image..."
docker build -t $IMAGE_NAME .
if [ $? -eq 0 ]; then
print_info "Docker image built successfully: $IMAGE_NAME"
else
print_error "Failed to build Docker image."
exit 1
fi
}
# Stop and remove existing container
stop_container() {
if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then
print_info "Stopping existing container..."
docker stop $CONTAINER_NAME
fi
if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then
print_info "Removing existing container..."
docker rm $CONTAINER_NAME
fi
}
# Start container
start_container() {
print_info "Starting TexTeller server container..."
docker run -d \
--name $CONTAINER_NAME \
--gpus '"device=0"' \
-p $PORT:8001 \
2025-12-15 23:21:47 +08:00
--shm-size=2g \
-v "$HOME/.cache/huggingface:/root/.cache/huggingface:ro" \
2025-12-15 22:31:13 +08:00
-e CUDA_VISIBLE_DEVICES=0 \
2025-12-15 23:21:47 +08:00
-e HF_HOME=/root/.cache/huggingface \
-e HF_HUB_OFFLINE=1 \
-e TRANSFORMERS_OFFLINE=1 \
2025-12-15 22:31:13 +08:00
-e RAY_NUM_REPLICAS=1 \
-e RAY_NCPU_PER_REPLICA=4 \
-e RAY_NGPU_PER_REPLICA=1 \
--restart unless-stopped \
$IMAGE_NAME
if [ $? -eq 0 ]; then
print_info "Container started successfully!"
print_info "Server will be available at: http://localhost:$PORT/predict"
else
print_error "Failed to start container."
exit 1
fi
}
# Wait for server to be ready
wait_for_server() {
print_info "Waiting for server to be ready..."
max_attempts=60
attempt=0
while [ $attempt -lt $max_attempts ]; do
if curl -s http://localhost:$PORT/ > /dev/null 2>&1; then
print_info "Server is ready!"
return 0
fi
attempt=$((attempt + 1))
echo -n "."
sleep 1
done
echo ""
print_warn "Server might still be initializing. Check logs with: docker logs -f $CONTAINER_NAME"
}
# Show logs
show_logs() {
print_info "Showing container logs (Ctrl+C to exit)..."
docker logs -f $CONTAINER_NAME
}
# Main deployment workflow
case "${1:-deploy}" in
check)
check_nvidia
check_docker
check_nvidia_docker
check_model
print_info "All checks passed!"
;;
build)
check_docker
build_image
;;
deploy)
check_nvidia
check_docker
check_nvidia_docker
check_model
build_image
stop_container
start_container
wait_for_server
print_info ""
print_info "======================================"
print_info "TexTeller server deployed successfully!"
print_info "======================================"
print_info "API endpoint: http://localhost:$PORT/predict"
print_info ""
print_info "Test the server with:"
print_info " python examples/test_server.py path/to/image.png"
print_info ""
print_info "View logs with:"
print_info " docker logs -f $CONTAINER_NAME"
print_info ""
print_info "Stop the server with:"
print_info " docker stop $CONTAINER_NAME"
;;
start)
if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then
docker start $CONTAINER_NAME
print_info "Container started."
else
print_error "Container does not exist. Run './deploy.sh deploy' first."
exit 1
fi
;;
stop)
stop_container
print_info "Container stopped."
;;
restart)
docker restart $CONTAINER_NAME
print_info "Container restarted."
;;
logs)
show_logs
;;
status)
if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then
print_info "Container is running."
docker stats --no-stream $CONTAINER_NAME
else
print_warn "Container is not running."
fi
;;
clean)
stop_container
print_info "Removing Docker image..."
docker rmi $IMAGE_NAME 2>/dev/null || true
print_info "Cleanup complete."
;;
*)
echo "Usage: $0 {check|build|deploy|start|stop|restart|logs|status|clean}"
echo ""
echo "Commands:"
echo " check - Check system requirements"
echo " build - Build Docker image only"
echo " deploy - Full deployment (build + start)"
echo " start - Start existing container"
echo " stop - Stop container"
echo " restart - Restart container"
echo " logs - Show container logs"
echo " status - Show container status"
echo " clean - Remove container and image"
exit 1
;;
esac