Compare commits
5 Commits
16399f0929
...
optimize/d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5ba835ab44 | ||
|
|
7c7d4bf36a | ||
|
|
ef98f37525 | ||
|
|
95c497829f | ||
|
|
6579cf55f5 |
123
Dockerfile
123
Dockerfile
@@ -1,82 +1,103 @@
|
||||
# DocProcesser Dockerfile
|
||||
# Optimized for RTX 5080 GPU deployment
|
||||
# DocProcesser Dockerfile - Production optimized
|
||||
# Ultra-lean multi-stage build for PPDocLayoutV3
|
||||
# Final image: ~3GB (from 17GB)
|
||||
|
||||
# Use NVIDIA CUDA base image with Python 3.10
|
||||
FROM nvidia/cuda:12.9.0-runtime-ubuntu24.04
|
||||
# =============================================================================
|
||||
# STAGE 1: Builder
|
||||
# =============================================================================
|
||||
FROM nvidia/cuda:12.9.0-devel-ubuntu24.04 AS builder
|
||||
|
||||
# Install build dependencies (deadsnakes PPA required for python3.10 on Ubuntu 24.04)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
software-properties-common \
|
||||
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
||||
&& apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.10 python3.10-venv python3.10-dev python3.10-distutils \
|
||||
build-essential curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Setup Python
|
||||
RUN ln -sf /usr/bin/python3.10 /usr/bin/python && \
|
||||
curl -sS https://bootstrap.pypa.io/get-pip.py | python
|
||||
|
||||
# Install uv
|
||||
RUN pip install uv -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Copy dependencies
|
||||
COPY pyproject.toml ./
|
||||
COPY wheels/ ./wheels/
|
||||
|
||||
# Build venv
|
||||
RUN uv venv /build/venv --python python3.10 && \
|
||||
. /build/venv/bin/activate && \
|
||||
uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -e . && \
|
||||
rm -rf ./wheels
|
||||
|
||||
# Aggressive optimization: strip debug symbols from .so files (~300-800MB saved)
|
||||
RUN find /build/venv -name "*.so" -exec strip --strip-unneeded {} + || true
|
||||
|
||||
# Remove paddle C++ headers (~22MB saved)
|
||||
RUN rm -rf /build/venv/lib/python*/site-packages/paddle/include
|
||||
|
||||
# Clean Python cache and build artifacts
|
||||
RUN find /build/venv -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true && \
|
||||
find /build/venv -type f -name "*.pyc" -delete && \
|
||||
find /build/venv -type f -name "*.pyo" -delete && \
|
||||
find /build/venv -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true && \
|
||||
find /build/venv -type d -name "test" -exec rm -rf {} + 2>/dev/null || true && \
|
||||
rm -rf /build/venv/lib/*/site-packages/pip* \
|
||||
/build/venv/lib/*/site-packages/setuptools* \
|
||||
/build/venv/include \
|
||||
/build/venv/share && \
|
||||
rm -rf /root/.cache 2>/dev/null || true
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 2: Runtime - CUDA base (~400MB, not ~3.4GB from runtime)
|
||||
# =============================================================================
|
||||
FROM nvidia/cuda:12.9.0-base-ubuntu24.04
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
# Model cache directories - mount these at runtime
|
||||
MODELSCOPE_CACHE=/root/.cache/modelscope \
|
||||
HF_HOME=/root/.cache/huggingface \
|
||||
# Application config (override defaults for container)
|
||||
# Use 127.0.0.1 for --network host mode, or override with -e for bridge mode
|
||||
PP_DOCLAYOUT_MODEL_DIR=/root/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV2 \
|
||||
PADDLEOCR_VL_URL=http://127.0.0.1:8001/v1
|
||||
PADDLEOCR_VL_URL=http://127.0.0.1:8001/v1 \
|
||||
PATH="/app/.venv/bin:$PATH" \
|
||||
VIRTUAL_ENV="/app/.venv"
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies and Python 3.10 from deadsnakes PPA
|
||||
# Minimal runtime dependencies (deadsnakes PPA required for python3.10 on Ubuntu 24.04)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
software-properties-common \
|
||||
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
||||
&& apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.10 \
|
||||
python3.10-venv \
|
||||
python3.10-dev \
|
||||
python3.10-distutils \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender-dev \
|
||||
libgomp1 \
|
||||
curl \
|
||||
pandoc \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.10 /usr/bin/python \
|
||||
&& ln -sf /usr/bin/python3.10 /usr/bin/python3 \
|
||||
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
|
||||
libgl1 libglib2.0-0 libgomp1 \
|
||||
curl pandoc \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install uv via pip (more reliable than install script)
|
||||
RUN python3.10 -m pip install uv -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
ENV VIRTUAL_ENV="/app/.venv"
|
||||
RUN ln -sf /usr/bin/python3.10 /usr/bin/python
|
||||
|
||||
# Copy dependency files first for better caching
|
||||
COPY pyproject.toml ./
|
||||
COPY wheels/ ./wheels/
|
||||
# Copy optimized venv from builder
|
||||
COPY --from=builder /build/venv /app/.venv
|
||||
|
||||
# Create virtual environment and install dependencies
|
||||
RUN uv venv /app/.venv --python python3.10 \
|
||||
&& uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -e . \
|
||||
&& rm -rf ./wheels
|
||||
|
||||
# Copy application code
|
||||
# Copy app code
|
||||
COPY app/ ./app/
|
||||
|
||||
# Create model cache directories (mount from host at runtime)
|
||||
RUN mkdir -p /root/.cache/modelscope \
|
||||
/root/.cache/huggingface \
|
||||
/root/.paddlex \
|
||||
/app/app/model/DocLayout \
|
||||
/app/app/model/PP-DocLayout
|
||||
# Create cache mount points (DO NOT include model files)
|
||||
RUN mkdir -p /root/.cache/modelscope /root/.cache/huggingface /root/.paddlex && \
|
||||
rm -rf /app/app/model/*
|
||||
|
||||
# Declare volumes for model cache (mount at runtime to avoid re-downloading)
|
||||
VOLUME ["/root/.cache/modelscope", "/root/.cache/huggingface", "/root/.paddlex"]
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8053
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8053/health || exit 1
|
||||
|
||||
# Run the application
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8053", "--workers", "1"]
|
||||
|
||||
# =============================================================================
|
||||
|
||||
@@ -11,7 +11,7 @@ authors = [
|
||||
dependencies = [
|
||||
"fastapi==0.128.0",
|
||||
"uvicorn[standard]==0.40.0",
|
||||
"opencv-python==4.12.0.88",
|
||||
"opencv-python-headless==4.12.0.88", # headless: no Qt/FFmpeg GUI, server-only
|
||||
"python-multipart==0.0.21",
|
||||
"pydantic==2.12.5",
|
||||
"pydantic-settings==2.12.0",
|
||||
@@ -20,7 +20,6 @@ dependencies = [
|
||||
"pillow==12.0.0",
|
||||
"python-docx==1.2.0",
|
||||
"paddleocr==3.4.0",
|
||||
"doclayout-yolo==0.0.4",
|
||||
"latex2mathml==3.78.1",
|
||||
"paddle==1.2.0",
|
||||
"pypandoc==1.16.2",
|
||||
|
||||
Reference in New Issue
Block a user