diff --git a/Dockerfile b/Dockerfile index df61557..b3f28da 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,55 @@ -# DocProcesser Dockerfile +# DocProcesser Dockerfile - Multi-stage optimized build # Optimized for RTX 5080 GPU deployment -# Use NVIDIA CUDA base image with Python 3.10 +# ============================================================================= +# STAGE 1: Builder - Install dependencies +# ============================================================================= +FROM nvidia/cuda:12.9.0-devel-ubuntu24.04 AS builder + +# Install build dependencies and Python 3.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get update && apt-get install -y --no-install-recommends \ + python3.10 \ + python3.10-venv \ + python3.10-dev \ + python3.10-distutils \ + build-essential \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Setup Python +RUN ln -sf /usr/bin/python3.10 /usr/bin/python && \ + ln -sf /usr/bin/python3.10 /usr/bin/python3 && \ + curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 + +# Install uv +RUN python3.10 -m pip install uv -i https://pypi.tuna.tsinghua.edu.cn/simple + +WORKDIR /build + +# Copy dependency files +COPY pyproject.toml ./ +COPY wheels/ ./wheels/ + +# Create virtual environment with dependencies +RUN uv venv /build/venv --python python3.10 && \ + . /build/venv/bin/activate && \ + uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -e . && \ + rm -rf ./wheels + +# Clean up venv - remove unnecessary files +RUN find /build/venv -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true && \ + find /build/venv -type d -name "*.dist-info/tests" -exec rm -rf {} + 2>/dev/null || true && \ + find /build/venv -type f -name "*.pyc" -delete && \ + find /build/venv -type f -name "*.pyo" -delete && \ + find /build/venv -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true && \ + find /build/venv -type d -name "test" -exec rm -rf {} + 2>/dev/null || true + +# ============================================================================= +# STAGE 2: Runtime - Minimal final image +# ============================================================================= FROM nvidia/cuda:12.9.0-runtime-ubuntu24.04 # Set environment variables @@ -15,20 +63,15 @@ ENV PYTHONUNBUFFERED=1 \ # Application config (override defaults for container) # Use 127.0.0.1 for --network host mode, or override with -e for bridge mode PP_DOCLAYOUT_MODEL_DIR=/root/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV2 \ - PADDLEOCR_VL_URL=http://127.0.0.1:8001/v1 + PADDLEOCR_VL_URL=http://127.0.0.1:8001/v1 \ + PATH="/app/.venv/bin:$PATH" \ + VIRTUAL_ENV="/app/.venv" -# Set working directory WORKDIR /app -# Install system dependencies and Python 3.10 from deadsnakes PPA +# Install runtime-only system dependencies (NO build tools) RUN apt-get update && apt-get install -y --no-install-recommends \ - software-properties-common \ - && add-apt-repository -y ppa:deadsnakes/ppa \ - && apt-get update && apt-get install -y --no-install-recommends \ python3.10 \ - python3.10-venv \ - python3.10-dev \ - python3.10-distutils \ libgl1 \ libglib2.0-0 \ libsm6 \ @@ -37,34 +80,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libgomp1 \ curl \ pandoc \ - && rm -rf /var/lib/apt/lists/* \ - && ln -sf /usr/bin/python3.10 /usr/bin/python \ - && ln -sf /usr/bin/python3.10 /usr/bin/python3 \ - && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 + && rm -rf /var/lib/apt/lists/* -# Install uv via pip (more reliable than install script) -RUN python3.10 -m pip install uv -i https://pypi.tuna.tsinghua.edu.cn/simple -ENV PATH="/app/.venv/bin:$PATH" -ENV VIRTUAL_ENV="/app/.venv" +# Setup Python symlinks +RUN ln -sf /usr/bin/python3.10 /usr/bin/python && \ + ln -sf /usr/bin/python3.10 /usr/bin/python3 -# Copy dependency files first for better caching -COPY pyproject.toml ./ -COPY wheels/ ./wheels/ +# Copy pre-built venv from builder stage +COPY --from=builder /build/venv /app/.venv -# Create virtual environment and install dependencies -RUN uv venv /app/.venv --python python3.10 \ - && uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -e . \ - && rm -rf ./wheels - -# Copy application code +# Copy application code (excluding model files if they're in the repo) COPY app/ ./app/ # Create model cache directories (mount from host at runtime) +# NOTE: Remove model files from app/model to keep image lean RUN mkdir -p /root/.cache/modelscope \ /root/.cache/huggingface \ - /root/.paddlex \ - /app/app/model/DocLayout \ - /app/app/model/PP-DocLayout + /root/.paddlex && \ + rm -rf /app/app/model/* # Declare volumes for model cache (mount at runtime to avoid re-downloading) VOLUME ["/root/.cache/modelscope", "/root/.cache/huggingface", "/root/.paddlex"]