Files
doc_processer/app/core/config.py

81 lines
2.2 KiB
Python
Raw Normal View History

2025-12-29 17:34:58 +08:00
"""Application configuration using Pydantic Settings."""
from functools import lru_cache
from pathlib import Path
2025-12-29 20:02:07 +08:00
import torch
2026-03-09 16:51:06 +08:00
from pydantic_settings import BaseSettings, SettingsConfigDict
2025-12-29 17:34:58 +08:00
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
)
# API Settings
api_prefix: str = "/doc_process/v1"
debug: bool = False
2026-02-07 16:53:09 +08:00
# Base Host Settings (can be overridden via .env file)
# Default: 127.0.0.1 (production)
# Dev: Set BASE_HOST=100.115.184.74 in .env file
base_host: str = "127.0.0.1"
2025-12-29 17:34:58 +08:00
# PaddleOCR-VL Settings
2026-02-07 16:53:09 +08:00
@property
def paddleocr_vl_url(self) -> str:
"""Get PaddleOCR-VL URL based on base_host."""
return f"http://{self.base_host}:8001/v1"
2026-02-04 12:00:06 +08:00
2026-01-05 17:30:54 +08:00
# MinerOCR Settings
2026-02-07 16:53:09 +08:00
@property
def miner_ocr_api_url(self) -> str:
"""Get MinerOCR API URL based on base_host."""
return f"http://{self.base_host}:8000/file_parse"
2025-12-29 17:34:58 +08:00
2026-02-06 15:06:50 +08:00
# GLM OCR Settings
2026-02-07 16:53:09 +08:00
@property
def glm_ocr_url(self) -> str:
"""Get GLM OCR URL based on base_host."""
return f"http://{self.base_host}:8002/v1"
2026-02-06 15:06:50 +08:00
2026-02-07 09:11:43 +08:00
# padding ratio
2026-02-26 17:01:23 +08:00
is_padding: bool = True
2026-02-25 09:52:45 +08:00
padding_ratio: float = 0.1
2026-02-07 09:11:43 +08:00
2026-03-09 16:51:06 +08:00
max_tokens: int = 4096
2025-12-29 17:34:58 +08:00
# Model Paths
2026-03-09 16:51:06 +08:00
pp_doclayout_model_dir: str | None = (
"/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
)
2025-12-29 17:34:58 +08:00
# Image Processing
max_image_size_mb: int = 10
2026-02-25 09:52:45 +08:00
image_padding_ratio: float = 0.1 # 10% on each side = 20% total expansion
2025-12-29 17:34:58 +08:00
2026-03-09 16:51:06 +08:00
device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
2025-12-29 20:02:07 +08:00
2025-12-29 17:34:58 +08:00
# Server Settings
host: str = "0.0.0.0"
port: int = 8053
2026-02-07 09:26:45 +08:00
# Logging Settings
2026-03-09 16:51:06 +08:00
log_dir: str | None = None # Defaults to /app/logs in container or ./logs locally
2026-02-07 09:26:45 +08:00
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
2025-12-29 17:34:58 +08:00
@property
def pp_doclayout_dir(self) -> Path:
"""Get the PP-DocLayout model directory path."""
return Path(self.pp_doclayout_model_dir)
@lru_cache
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()