app/core/config.py

"""Application configuration using Pydantic Settings."""

from functools import lru_cache
from pathlib import Path

import torch
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    """Application settings loaded from environment variables."""

    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
    )

    # API Settings
    api_prefix: str = "/doc_process/v1"
    debug: bool = False

    # Base Host Settings (can be overridden via .env file)
    # Default: 127.0.0.1 (production)
    # Dev: Set BASE_HOST=100.115.184.74 in .env file
    base_host: str = "127.0.0.1"

    # PaddleOCR-VL Settings
    @property
    def paddleocr_vl_url(self) -> str:
        """Get PaddleOCR-VL URL based on base_host."""
        return f"http://{self.base_host}:8001/v1"

    # MinerOCR Settings
    @property
    def miner_ocr_api_url(self) -> str:
        """Get MinerOCR API URL based on base_host."""
        return f"http://{self.base_host}:8000/file_parse"

    # GLM OCR Settings
    @property
    def glm_ocr_url(self) -> str:
        """Get GLM OCR URL based on base_host."""
        return f"http://{self.base_host}:8002/v1"

    # padding ratio
    is_padding: bool = True
    padding_ratio: float = 0.1

    max_tokens: int = 4096

    # Model Paths
    pp_doclayout_model_dir: str | None = (
        "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
    )

    # Image Processing
    max_image_size_mb: int = 10
    image_padding_ratio: float = 0.1  # 10% on each side = 20% total expansion

    device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Server Settings
    host: str = "0.0.0.0"
    port: int = 8053

    # Logging Settings
    log_dir: str | None = None  # Defaults to /app/logs in container or ./logs locally
    log_level: str = "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL

    @property
    def pp_doclayout_dir(self) -> Path:
        """Get the PP-DocLayout model directory path."""
        return Path(self.pp_doclayout_model_dir)


@lru_cache
def get_settings() -> Settings:
    """Get cached settings instance."""
    return Settings()
init repo 2025-12-29 17:34:58 +08:00			`"""Application configuration using Pydantic Settings."""`

			`from functools import lru_cache`
			`from pathlib import Path`

fix: add package 2025-12-29 20:02:07 +08:00			`import torch`
feat add glm-ocr core 2026-03-09 16:51:06 +08:00			`from pydantic_settings import BaseSettings, SettingsConfigDict`
init repo 2025-12-29 17:34:58 +08:00

			`class Settings(BaseSettings):`
			`"""Application settings loaded from environment variables."""`

			`model_config = SettingsConfigDict(`
			`env_file=".env",`
			`env_file_encoding="utf-8",`
			`case_sensitive=False,`
			`)`

			`# API Settings`
			`api_prefix: str = "/doc_process/v1"`
			`debug: bool = False`

feat: add padding 2026-02-07 16:53:09 +08:00			`# Base Host Settings (can be overridden via .env file)`
			`# Default: 127.0.0.1 (production)`
			`# Dev: Set BASE_HOST=100.115.184.74 in .env file`
			`base_host: str = "127.0.0.1"`

init repo 2025-12-29 17:34:58 +08:00			`# PaddleOCR-VL Settings`
feat: add padding 2026-02-07 16:53:09 +08:00			`@property`
			`def paddleocr_vl_url(self) -> str:`
			`"""Get PaddleOCR-VL URL based on base_host."""`
			`return f"http://{self.base_host}:8001/v1"`
feat: optimize the format convert 2026-02-04 12:00:06 +08:00
feat: add mineru model 2026-01-05 17:30:54 +08:00			`# MinerOCR Settings`
feat: add padding 2026-02-07 16:53:09 +08:00			`@property`
			`def miner_ocr_api_url(self) -> str:`
			`"""Get MinerOCR API URL based on base_host."""`
			`return f"http://{self.base_host}:8000/file_parse"`
init repo 2025-12-29 17:34:58 +08:00
feat: add glm ocr 2026-02-06 15:06:50 +08:00			`# GLM OCR Settings`
feat: add padding 2026-02-07 16:53:09 +08:00			`@property`
			`def glm_ocr_url(self) -> str:`
			`"""Get GLM OCR URL based on base_host."""`
			`return f"http://{self.base_host}:8002/v1"`
feat: add glm ocr 2026-02-06 15:06:50 +08:00
fix: get setting param 2026-02-07 09:11:43 +08:00			`# padding ratio`
feat: use padding mode 2026-02-26 17:01:23 +08:00			`is_padding: bool = True`
feat: no padding image 2026-02-25 09:52:45 +08:00			`padding_ratio: float = 0.1`
fix: get setting param 2026-02-07 09:11:43 +08:00
feat add glm-ocr core 2026-03-09 16:51:06 +08:00			`max_tokens: int = 4096`

init repo 2025-12-29 17:34:58 +08:00			`# Model Paths`
feat add glm-ocr core 2026-03-09 16:51:06 +08:00			`pp_doclayout_model_dir: str \| None = (`
			`"/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"`
			`)`
init repo 2025-12-29 17:34:58 +08:00
			`# Image Processing`
			`max_image_size_mb: int = 10`
feat: no padding image 2026-02-25 09:52:45 +08:00			`image_padding_ratio: float = 0.1 # 10% on each side = 20% total expansion`
init repo 2025-12-29 17:34:58 +08:00
feat add glm-ocr core 2026-03-09 16:51:06 +08:00			`device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")`
fix: add package 2025-12-29 20:02:07 +08:00
init repo 2025-12-29 17:34:58 +08:00			`# Server Settings`
			`host: str = "0.0.0.0"`
			`port: int = 8053`

feat: add log 2026-02-07 09:26:45 +08:00			`# Logging Settings`
feat add glm-ocr core 2026-03-09 16:51:06 +08:00			`log_dir: str \| None = None # Defaults to /app/logs in container or ./logs locally`
feat: add log 2026-02-07 09:26:45 +08:00			`log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL`

init repo 2025-12-29 17:34:58 +08:00			`@property`
			`def pp_doclayout_dir(self) -> Path:`
			`"""Get the PP-DocLayout model directory path."""`
			`return Path(self.pp_doclayout_model_dir)`


			`@lru_cache`
			`def get_settings() -> Settings:`
			`"""Get cached settings instance."""`
			`return Settings()`