Files
doc_processer/pyproject.toml
liuyuanchuang ef98f37525 feat: aggressive image optimization for PPDocLayoutV3 only
- Remove doclayout-yolo (~4.8GB, torch/torchvision/triton)
- Replace opencv-python with opencv-python-headless (~200MB)
- Strip debug symbols from .so files (~300-800MB)
- Remove paddle C++ headers (~22MB)
- Use cuda:base instead of runtime (~3GB savings)
- Simplify dependencies: remove doc-parser extras
- Clean venv aggressively: no pip, setuptools, include/, share/

Expected size reduction:
  Before: 17GB
  After:  ~3GB (82% reduction)

Breakdown:
  - CUDA base: 0.4GB
  - Paddle: 0.7GB
  - PaddleOCR: 0.8GB
  - OpenCV-headless: 0.2GB
  - Other deps: 0.6GB
  Total: ~2.7-3GB

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-03-10 11:33:50 +08:00

63 lines
1.3 KiB
TOML

[project]
name = "doc-processer"
version = "0.1.0"
description = "Document processing API - Image to LaTeX/Markdown/MathML and Markdown to DOCX"
requires-python = ">=3.10"
license = { text = "MIT" }
authors = [
{ name = "YogeLiu" }
]
dependencies = [
"fastapi==0.128.0",
"uvicorn[standard]==0.40.0",
"opencv-python-headless==4.12.0.88", # headless: no Qt/FFmpeg GUI, server-only
"python-multipart==0.0.21",
"pydantic==2.12.5",
"pydantic-settings==2.12.0",
"httpx==0.28.1",
"numpy==2.2.6",
"pillow==12.0.0",
"python-docx==1.2.0",
"paddleocr==3.4.0",
"latex2mathml==3.78.1",
"paddle==1.2.0",
"pypandoc==1.16.2",
"paddlepaddle",
"paddleocr[doc-parser]",
"safetensors",
"lxml>=5.0.0",
"openai",
"wordfreq",
]
# [tool.uv.sources]
# paddlepaddle = { path = "wheels/paddlepaddle-3.4.0.dev20251224-cp310-cp310-linux_x86_64.whl" }
[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.24.0",
"ruff>=0.8.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["app"]
[tool.ruff]
target-version = "py311"
line-length = 100
[tool.ruff.lint]
select = ["E", "F", "I", "UP"]
ignore = ["E501"]
[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]