"""Logging configuration with rotation by day and size.""" import logging import logging.handlers from contextvars import ContextVar from pathlib import Path from typing import Any from app.core.config import get_settings # Context variable to hold the current request_id across async boundaries request_id_ctx: ContextVar[str] = ContextVar("request_id", default="-") class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler): """File handler that rotates by both time (daily) and size (100MB).""" def __init__( self, filename: str, when: str = "midnight", interval: int = 1, backupCount: int = 30, maxBytes: int = 100 * 1024 * 1024, # 100MB encoding: str | None = None, delay: bool = False, utc: bool = False, atTime: Any | None = None, ): """Initialize handler with both time and size rotation. Args: filename: Log file path when: When to rotate (e.g., 'midnight', 'H', 'M') interval: Rotation interval backupCount: Number of backup files to keep maxBytes: Maximum file size before rotation (in bytes) encoding: File encoding delay: Delay file opening until first emit utc: Use UTC time atTime: Time to rotate (for 'midnight' rotation) """ super().__init__( filename=filename, when=when, interval=interval, backupCount=backupCount, encoding=encoding, delay=delay, utc=utc, atTime=atTime, ) self.maxBytes = maxBytes def shouldRollover(self, record): """Check if rollover should occur based on time or size.""" # Check time-based rotation first if super().shouldRollover(record): return True # Check size-based rotation if self.stream is None: self.stream = self._open() if self.maxBytes > 0: msg = f"{self.format(record)}\n" self.stream.seek(0, 2) # Seek to end if self.stream.tell() + len(msg) >= self.maxBytes: return True return False def setup_logging(log_dir: str | None = None) -> logging.Logger: """Setup application logging with rotation by day and size. Args: log_dir: Directory for log files. Defaults to /app/logs in container or ./logs locally. Returns: Configured logger instance. """ settings = get_settings() # Determine log directory if log_dir is None: log_dir = Path("/app/logs") if Path("/app/logs").exists() else Path("./logs") else: log_dir = Path(log_dir) # Create log directory if it doesn't exist log_dir.mkdir(parents=True, exist_ok=True) # Create logger logger = logging.getLogger("doc_processer") logger.setLevel(logging.DEBUG if settings.debug else logging.INFO) # Remove existing handlers to avoid duplicates logger.handlers.clear() # Create custom formatter that automatically injects request_id from context class RequestIDFormatter(logging.Formatter): """Formatter that injects request_id from ContextVar into log records.""" def format(self, record): if not hasattr(record, "request_id"): record.request_id = request_id_ctx.get() return super().format(record) formatter = RequestIDFormatter( fmt="%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) # File handler with rotation by day and size # Rotates daily at midnight OR when file exceeds 100MB, keeps 30 days log_file = log_dir / "doc_processer.log" file_handler = TimedRotatingAndSizeFileHandler( filename=str(log_file), when="midnight", interval=1, backupCount=30, maxBytes=100 * 1024 * 1024, # 100MB encoding="utf-8", ) file_handler.setLevel(logging.DEBUG if settings.debug else logging.INFO) file_handler.setFormatter(formatter) # Console handler console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) console_handler.setFormatter(formatter) # Add handlers logger.addHandler(file_handler) logger.addHandler(console_handler) return logger # Global logger instance _logger: logging.Logger | None = None def get_logger() -> logging.Logger: """Get the global logger instance.""" global _logger if _logger is None: _logger = setup_logging() return _logger class RequestIDAdapter(logging.LoggerAdapter): """Logger adapter that adds request_id to log records.""" def process(self, msg, kwargs): """Add request_id to extra if not present.""" if "extra" not in kwargs: kwargs["extra"] = {} if "request_id" not in kwargs["extra"]: kwargs["extra"]["request_id"] = getattr(self, "request_id", "unknown") return msg, kwargs