init repo

This commit is contained in:
liuyuanchuang
2025-12-29 17:34:58 +08:00
commit 874fd383cc
36 changed files with 2641 additions and 0 deletions

0
app/core/__init__.py Normal file
View File

52
app/core/config.py Normal file
View File

@@ -0,0 +1,52 @@
"""Application configuration using Pydantic Settings."""
from functools import lru_cache
from pathlib import Path
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
)
# API Settings
api_prefix: str = "/doc_process/v1"
debug: bool = False
# PaddleOCR-VL Settings
paddleocr_vl_url: str = "http://localhost:8080/v1"
# Model Paths
doclayout_model_path: str = "app/model/DocLayout"
pp_doclayout_model_dir: str = "app/model/PP-DocLayout"
# Image Processing
max_image_size_mb: int = 10
image_padding_ratio: float = 0.15 # 15% on each side = 30% total expansion
# Server Settings
host: str = "0.0.0.0"
port: int = 8053
@property
def doclayout_model_file(self) -> Path:
"""Get the DocLayout model file path."""
return Path(self.doclayout_model_path)
@property
def pp_doclayout_dir(self) -> Path:
"""Get the PP-DocLayout model directory path."""
return Path(self.pp_doclayout_model_dir)
@lru_cache
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()

42
app/core/dependencies.py Normal file
View File

@@ -0,0 +1,42 @@
"""Application dependencies."""
from app.services.image_processor import ImageProcessor
from app.services.layout_detector import LayoutDetector
from app.services.ocr_service import OCRService
from app.services.docx_converter import DocxConverter
# Global instances (initialized on startup)
_layout_detector: LayoutDetector | None = None
def init_layout_detector(model_path: str) -> None:
"""Initialize the global layout detector.
Called during application startup.
"""
global _layout_detector
_layout_detector = LayoutDetector(model_path=model_path)
_layout_detector.load_model()
def get_layout_detector() -> LayoutDetector:
"""Get the global layout detector instance."""
if _layout_detector is None:
raise RuntimeError("Layout detector not initialized. Call init_layout_detector() first.")
return _layout_detector
def get_image_processor() -> ImageProcessor:
"""Get an image processor instance."""
return ImageProcessor()
def get_ocr_service() -> OCRService:
"""Get an OCR service instance."""
return OCRService()
def get_docx_converter() -> DocxConverter:
"""Get a DOCX converter instance."""
return DocxConverter()