2025-12-29 17:34:58 +08:00
|
|
|
# DocProcesser Dockerfile
|
|
|
|
|
# Optimized for RTX 5080 GPU deployment
|
|
|
|
|
|
2025-12-31 17:38:32 +08:00
|
|
|
# Use NVIDIA CUDA base image with Python 3.10
|
2025-12-29 17:34:58 +08:00
|
|
|
FROM nvidia/cuda:12.8.0-runtime-ubuntu24.04
|
|
|
|
|
|
|
|
|
|
# Set environment variables
|
|
|
|
|
ENV PYTHONUNBUFFERED=1 \
|
|
|
|
|
PYTHONDONTWRITEBYTECODE=1 \
|
|
|
|
|
PIP_NO_CACHE_DIR=1 \
|
2025-12-31 17:38:32 +08:00
|
|
|
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
|
|
|
|
# Model cache directories - mount these at runtime
|
|
|
|
|
MODELSCOPE_CACHE=/root/.cache/modelscope \
|
|
|
|
|
HF_HOME=/root/.cache/huggingface \
|
|
|
|
|
# Application config (override defaults for container)
|
|
|
|
|
# Use 127.0.0.1 for --network host mode, or override with -e for bridge mode
|
|
|
|
|
PP_DOCLAYOUT_MODEL_DIR=/root/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV2 \
|
|
|
|
|
PADDLEOCR_VL_URL=http://127.0.0.1:8000/v1
|
2025-12-29 17:34:58 +08:00
|
|
|
|
|
|
|
|
# Set working directory
|
|
|
|
|
WORKDIR /app
|
|
|
|
|
|
2025-12-31 17:38:32 +08:00
|
|
|
# Install system dependencies and Python 3.10 from deadsnakes PPA
|
2025-12-29 17:34:58 +08:00
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
2025-12-31 17:38:32 +08:00
|
|
|
software-properties-common \
|
|
|
|
|
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
|
|
|
|
&& apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
|
python3.10 \
|
|
|
|
|
python3.10-venv \
|
|
|
|
|
python3.10-dev \
|
|
|
|
|
python3.10-distutils \
|
|
|
|
|
libgl1 \
|
2025-12-29 17:34:58 +08:00
|
|
|
libglib2.0-0 \
|
|
|
|
|
libsm6 \
|
|
|
|
|
libxext6 \
|
|
|
|
|
libxrender-dev \
|
|
|
|
|
libgomp1 \
|
|
|
|
|
curl \
|
2025-12-31 17:38:32 +08:00
|
|
|
pandoc \
|
2025-12-29 17:34:58 +08:00
|
|
|
&& rm -rf /var/lib/apt/lists/* \
|
2025-12-31 17:38:32 +08:00
|
|
|
&& ln -sf /usr/bin/python3.10 /usr/bin/python \
|
|
|
|
|
&& ln -sf /usr/bin/python3.10 /usr/bin/python3 \
|
|
|
|
|
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
|
2025-12-29 17:34:58 +08:00
|
|
|
|
2025-12-31 17:38:32 +08:00
|
|
|
# Install uv via pip (more reliable than install script)
|
|
|
|
|
RUN python3.10 -m pip install uv -i https://pypi.tuna.tsinghua.edu.cn/simple
|
|
|
|
|
ENV PATH="/app/.venv/bin:$PATH"
|
|
|
|
|
ENV VIRTUAL_ENV="/app/.venv"
|
2025-12-29 17:34:58 +08:00
|
|
|
|
|
|
|
|
# Copy dependency files first for better caching
|
|
|
|
|
COPY pyproject.toml ./
|
2025-12-31 17:38:32 +08:00
|
|
|
COPY wheels/ ./wheels/
|
2025-12-29 17:34:58 +08:00
|
|
|
|
|
|
|
|
# Create virtual environment and install dependencies
|
2025-12-31 17:38:32 +08:00
|
|
|
RUN uv venv /app/.venv --python python3.10 \
|
|
|
|
|
&& uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -e . \
|
|
|
|
|
&& rm -rf ./wheels
|
2025-12-29 17:34:58 +08:00
|
|
|
|
|
|
|
|
# Copy application code
|
|
|
|
|
COPY app/ ./app/
|
|
|
|
|
|
2025-12-31 17:38:32 +08:00
|
|
|
# Create model cache directories (mount from host at runtime)
|
|
|
|
|
RUN mkdir -p /root/.cache/modelscope \
|
|
|
|
|
/root/.cache/huggingface \
|
|
|
|
|
/root/.paddlex \
|
|
|
|
|
/app/app/model/DocLayout \
|
|
|
|
|
/app/app/model/PP-DocLayout
|
|
|
|
|
|
|
|
|
|
# Declare volumes for model cache (mount at runtime to avoid re-downloading)
|
|
|
|
|
VOLUME ["/root/.cache/modelscope", "/root/.cache/huggingface", "/root/.paddlex"]
|
2025-12-29 17:34:58 +08:00
|
|
|
|
|
|
|
|
# Expose port
|
|
|
|
|
EXPOSE 8053
|
|
|
|
|
|
|
|
|
|
# Health check
|
|
|
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
|
|
|
CMD curl -f http://localhost:8053/health || exit 1
|
|
|
|
|
|
|
|
|
|
# Run the application
|
|
|
|
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8053", "--workers", "1"]
|
|
|
|
|
|
2025-12-31 17:38:32 +08:00
|
|
|
# =============================================================================
|
|
|
|
|
# Usage: Mount local model cache to avoid downloading
|
|
|
|
|
#
|
|
|
|
|
# Option 1: Use host network (simplest, can access localhost services)
|
|
|
|
|
# docker run --gpus all --network host \
|
|
|
|
|
# -v /home/yoge/.paddlex:/root/.paddlex:ro \
|
|
|
|
|
# -v /home/yoge/.cache/modelscope:/root/.cache/modelscope:ro \
|
|
|
|
|
# -v /home/yoge/.cache/huggingface:/root/.cache/huggingface:ro \
|
|
|
|
|
# doc_processer:latest
|
|
|
|
|
#
|
|
|
|
|
# Option 2: Use bridge network with host.docker.internal (Linux needs --add-host)
|
|
|
|
|
# docker run --gpus all -p 8053:8053 \
|
|
|
|
|
# --add-host=host.docker.internal:host-gateway \
|
|
|
|
|
# -v /home/yoge/.paddlex:/root/.paddlex:ro \
|
|
|
|
|
# -v /home/yoge/.cache/modelscope:/root/.cache/modelscope:ro \
|
|
|
|
|
# -v /home/yoge/.cache/huggingface:/root/.cache/huggingface:ro \
|
|
|
|
|
# doc_processer:latest
|
|
|
|
|
# =============================================================================
|