2026-03-10 11:33:50 +08:00
|
|
|
# DocProcesser Dockerfile - Production optimized
|
|
|
|
|
# Ultra-lean multi-stage build for PPDocLayoutV3
|
|
|
|
|
# Final image: ~3GB (from 17GB)
|
2025-12-29 17:34:58 +08:00
|
|
|
|
2026-03-10 10:41:32 +08:00
|
|
|
# =============================================================================
|
2026-03-10 11:33:50 +08:00
|
|
|
# STAGE 1: Builder
|
2026-03-10 10:41:32 +08:00
|
|
|
# =============================================================================
|
|
|
|
|
FROM nvidia/cuda:12.9.0-devel-ubuntu24.04 AS builder
|
|
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
# Install build dependencies
|
2026-03-10 10:41:32 +08:00
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
2026-03-10 11:33:50 +08:00
|
|
|
python3.10 python3.10-venv python3.10-dev python3.10-distutils \
|
|
|
|
|
build-essential curl \
|
2026-03-10 10:41:32 +08:00
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
|
|
# Setup Python
|
|
|
|
|
RUN ln -sf /usr/bin/python3.10 /usr/bin/python && \
|
2026-03-10 11:33:50 +08:00
|
|
|
curl -sS https://bootstrap.pypa.io/get-pip.py | python
|
2026-03-10 10:41:32 +08:00
|
|
|
|
|
|
|
|
# Install uv
|
2026-03-10 11:33:50 +08:00
|
|
|
RUN pip install uv -i https://pypi.tuna.tsinghua.edu.cn/simple
|
2026-03-10 10:41:32 +08:00
|
|
|
|
|
|
|
|
WORKDIR /build
|
|
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
# Copy dependencies
|
2026-03-10 10:41:32 +08:00
|
|
|
COPY pyproject.toml ./
|
2026-03-10 11:33:50 +08:00
|
|
|
COPY wheels/ ./wheels/ 2>/dev/null || true
|
2026-03-10 10:41:32 +08:00
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
# Build venv
|
2026-03-10 10:41:32 +08:00
|
|
|
RUN uv venv /build/venv --python python3.10 && \
|
|
|
|
|
. /build/venv/bin/activate && \
|
|
|
|
|
uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -e . && \
|
|
|
|
|
rm -rf ./wheels
|
|
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
# Aggressive optimization: strip debug symbols from .so files (~300-800MB saved)
|
|
|
|
|
RUN find /build/venv -name "*.so" -exec strip --strip-unneeded {} + || true
|
|
|
|
|
|
|
|
|
|
# Remove paddle C++ headers (~22MB saved)
|
|
|
|
|
RUN rm -rf /build/venv/lib/python*/site-packages/paddle/include
|
|
|
|
|
|
|
|
|
|
# Clean Python cache and build artifacts
|
2026-03-10 10:41:32 +08:00
|
|
|
RUN find /build/venv -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true && \
|
|
|
|
|
find /build/venv -type f -name "*.pyc" -delete && \
|
|
|
|
|
find /build/venv -type f -name "*.pyo" -delete && \
|
|
|
|
|
find /build/venv -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true && \
|
2026-03-10 11:12:01 +08:00
|
|
|
find /build/venv -type d -name "test" -exec rm -rf {} + 2>/dev/null || true && \
|
2026-03-10 11:33:50 +08:00
|
|
|
rm -rf /build/venv/lib/*/site-packages/pip* \
|
|
|
|
|
/build/venv/lib/*/site-packages/setuptools* \
|
|
|
|
|
/build/venv/include \
|
|
|
|
|
/build/venv/share && \
|
2026-03-10 11:12:01 +08:00
|
|
|
rm -rf /root/.cache 2>/dev/null || true
|
2026-03-10 10:41:32 +08:00
|
|
|
|
|
|
|
|
# =============================================================================
|
2026-03-10 11:33:50 +08:00
|
|
|
# STAGE 2: Runtime - CUDA base (~400MB, not ~3.4GB from runtime)
|
2026-03-10 10:41:32 +08:00
|
|
|
# =============================================================================
|
2026-03-10 11:33:50 +08:00
|
|
|
FROM nvidia/cuda:12.9.0-base-ubuntu24.04
|
2025-12-29 17:34:58 +08:00
|
|
|
|
|
|
|
|
ENV PYTHONUNBUFFERED=1 \
|
|
|
|
|
PYTHONDONTWRITEBYTECODE=1 \
|
|
|
|
|
PIP_NO_CACHE_DIR=1 \
|
2025-12-31 17:38:32 +08:00
|
|
|
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
|
|
|
|
MODELSCOPE_CACHE=/root/.cache/modelscope \
|
|
|
|
|
HF_HOME=/root/.cache/huggingface \
|
|
|
|
|
PP_DOCLAYOUT_MODEL_DIR=/root/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV2 \
|
2026-03-10 10:41:32 +08:00
|
|
|
PADDLEOCR_VL_URL=http://127.0.0.1:8001/v1 \
|
|
|
|
|
PATH="/app/.venv/bin:$PATH" \
|
|
|
|
|
VIRTUAL_ENV="/app/.venv"
|
2025-12-29 17:34:58 +08:00
|
|
|
|
|
|
|
|
WORKDIR /app
|
|
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
# Minimal runtime dependencies (no build tools)
|
2025-12-29 17:34:58 +08:00
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
2025-12-31 17:38:32 +08:00
|
|
|
python3.10 \
|
2026-03-10 11:33:50 +08:00
|
|
|
libgl1 libglib2.0-0 libgomp1 \
|
|
|
|
|
curl pandoc \
|
2026-03-10 10:41:32 +08:00
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
2025-12-29 17:34:58 +08:00
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
RUN ln -sf /usr/bin/python3.10 /usr/bin/python
|
2025-12-29 17:34:58 +08:00
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
# Copy optimized venv from builder
|
2026-03-10 10:41:32 +08:00
|
|
|
COPY --from=builder /build/venv /app/.venv
|
2025-12-29 17:34:58 +08:00
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
# Copy app code
|
2025-12-29 17:34:58 +08:00
|
|
|
COPY app/ ./app/
|
|
|
|
|
|
2026-03-10 11:33:50 +08:00
|
|
|
# Create cache mount points (DO NOT include model files)
|
|
|
|
|
RUN mkdir -p /root/.cache/modelscope /root/.cache/huggingface /root/.paddlex && \
|
2026-03-10 10:41:32 +08:00
|
|
|
rm -rf /app/app/model/*
|
2025-12-31 17:38:32 +08:00
|
|
|
|
2025-12-29 17:34:58 +08:00
|
|
|
EXPOSE 8053
|
|
|
|
|
|
|
|
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
|
|
|
CMD curl -f http://localhost:8053/health || exit 1
|
|
|
|
|
|
|
|
|
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8053", "--workers", "1"]
|
|
|
|
|
|
2025-12-31 17:38:32 +08:00
|
|
|
# =============================================================================
|
|
|
|
|
# Usage: Mount local model cache to avoid downloading
|
|
|
|
|
#
|
|
|
|
|
# Option 1: Use host network (simplest, can access localhost services)
|
|
|
|
|
# docker run --gpus all --network host \
|
|
|
|
|
# -v /home/yoge/.paddlex:/root/.paddlex:ro \
|
|
|
|
|
# -v /home/yoge/.cache/modelscope:/root/.cache/modelscope:ro \
|
|
|
|
|
# -v /home/yoge/.cache/huggingface:/root/.cache/huggingface:ro \
|
|
|
|
|
# doc_processer:latest
|
|
|
|
|
#
|
|
|
|
|
# Option 2: Use bridge network with host.docker.internal (Linux needs --add-host)
|
|
|
|
|
# docker run --gpus all -p 8053:8053 \
|
|
|
|
|
# --add-host=host.docker.internal:host-gateway \
|
|
|
|
|
# -v /home/yoge/.paddlex:/root/.paddlex:ro \
|
|
|
|
|
# -v /home/yoge/.cache/modelscope:/root/.cache/modelscope:ro \
|
|
|
|
|
# -v /home/yoge/.cache/huggingface:/root/.cache/huggingface:ro \
|
|
|
|
|
# doc_processer:latest
|
|
|
|
|
# =============================================================================
|