Files
doc_processer/app/core/dependencies.py

59 lines
1.7 KiB
Python
Raw Permalink Normal View History

2025-12-29 17:34:58 +08:00
"""Application dependencies."""
from app.services.image_processor import ImageProcessor
from app.services.layout_detector import LayoutDetector
2026-01-05 17:30:54 +08:00
from app.services.ocr_service import OCRService, MineruOCRService
2025-12-31 17:38:32 +08:00
from app.services.converter import Converter
from app.core.config import get_settings
2025-12-29 17:34:58 +08:00
# Global instances (initialized on startup)
_layout_detector: LayoutDetector | None = None
2025-12-31 17:38:32 +08:00
def init_layout_detector() -> None:
2025-12-29 17:34:58 +08:00
"""Initialize the global layout detector.
Called during application startup.
"""
global _layout_detector
2025-12-31 17:38:32 +08:00
_layout_detector = LayoutDetector()
2025-12-29 17:34:58 +08:00
def get_layout_detector() -> LayoutDetector:
"""Get the global layout detector instance."""
if _layout_detector is None:
raise RuntimeError("Layout detector not initialized. Call init_layout_detector() first.")
return _layout_detector
def get_image_processor() -> ImageProcessor:
"""Get an image processor instance."""
return ImageProcessor()
def get_ocr_service() -> OCRService:
"""Get an OCR service instance."""
2025-12-31 17:38:32 +08:00
return OCRService(
vl_server_url=get_settings().paddleocr_vl_url,
layout_detector=get_layout_detector(),
image_processor=get_image_processor(),
converter=get_converter(),
)
2025-12-29 17:34:58 +08:00
2025-12-31 17:38:32 +08:00
def get_converter() -> Converter:
2025-12-29 17:34:58 +08:00
"""Get a DOCX converter instance."""
2025-12-31 17:38:32 +08:00
return Converter()
2025-12-29 17:34:58 +08:00
2026-01-05 17:30:54 +08:00
def get_mineru_ocr_service() -> MineruOCRService:
"""Get a MinerOCR service instance."""
settings = get_settings()
api_url = getattr(settings, 'miner_ocr_api_url', 'http://127.0.0.1:8000/file_parse')
return MineruOCRService(
api_url=api_url,
converter=get_converter(),
2026-01-05 21:37:51 +08:00
image_processor=get_image_processor(),
2026-01-05 17:30:54 +08:00
)