2026-02-07 09:26:45 +08:00
|
|
|
"""Logging configuration with rotation by day and size."""
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
import logging.handlers
|
|
|
|
|
from pathlib import Path
|
2026-03-10 19:52:22 +08:00
|
|
|
from typing import Any
|
2026-02-07 09:26:45 +08:00
|
|
|
|
|
|
|
|
from app.core.config import get_settings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler):
|
|
|
|
|
"""File handler that rotates by both time (daily) and size (100MB)."""
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
filename: str,
|
|
|
|
|
when: str = "midnight",
|
|
|
|
|
interval: int = 1,
|
|
|
|
|
backupCount: int = 30,
|
|
|
|
|
maxBytes: int = 100 * 1024 * 1024, # 100MB
|
2026-03-10 19:52:22 +08:00
|
|
|
encoding: str | None = None,
|
2026-02-07 09:26:45 +08:00
|
|
|
delay: bool = False,
|
|
|
|
|
utc: bool = False,
|
2026-03-10 19:52:22 +08:00
|
|
|
atTime: Any | None = None,
|
2026-02-07 09:26:45 +08:00
|
|
|
):
|
|
|
|
|
"""Initialize handler with both time and size rotation.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
filename: Log file path
|
|
|
|
|
when: When to rotate (e.g., 'midnight', 'H', 'M')
|
|
|
|
|
interval: Rotation interval
|
|
|
|
|
backupCount: Number of backup files to keep
|
|
|
|
|
maxBytes: Maximum file size before rotation (in bytes)
|
|
|
|
|
encoding: File encoding
|
|
|
|
|
delay: Delay file opening until first emit
|
|
|
|
|
utc: Use UTC time
|
|
|
|
|
atTime: Time to rotate (for 'midnight' rotation)
|
|
|
|
|
"""
|
|
|
|
|
super().__init__(
|
|
|
|
|
filename=filename,
|
|
|
|
|
when=when,
|
|
|
|
|
interval=interval,
|
|
|
|
|
backupCount=backupCount,
|
|
|
|
|
encoding=encoding,
|
|
|
|
|
delay=delay,
|
|
|
|
|
utc=utc,
|
|
|
|
|
atTime=atTime,
|
|
|
|
|
)
|
|
|
|
|
self.maxBytes = maxBytes
|
|
|
|
|
|
|
|
|
|
def shouldRollover(self, record):
|
|
|
|
|
"""Check if rollover should occur based on time or size."""
|
|
|
|
|
# Check time-based rotation first
|
|
|
|
|
if super().shouldRollover(record):
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
# Check size-based rotation
|
|
|
|
|
if self.stream is None:
|
|
|
|
|
self.stream = self._open()
|
|
|
|
|
if self.maxBytes > 0:
|
2026-03-10 19:52:22 +08:00
|
|
|
msg = f"{self.format(record)}\n"
|
2026-02-07 09:26:45 +08:00
|
|
|
self.stream.seek(0, 2) # Seek to end
|
|
|
|
|
if self.stream.tell() + len(msg) >= self.maxBytes:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2026-03-10 19:52:22 +08:00
|
|
|
def setup_logging(log_dir: str | None = None) -> logging.Logger:
|
2026-02-07 09:26:45 +08:00
|
|
|
"""Setup application logging with rotation by day and size.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
log_dir: Directory for log files. Defaults to /app/logs in container or ./logs locally.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Configured logger instance.
|
|
|
|
|
"""
|
|
|
|
|
settings = get_settings()
|
|
|
|
|
|
|
|
|
|
# Determine log directory
|
|
|
|
|
if log_dir is None:
|
|
|
|
|
log_dir = Path("/app/logs") if Path("/app/logs").exists() else Path("./logs")
|
|
|
|
|
else:
|
|
|
|
|
log_dir = Path(log_dir)
|
|
|
|
|
|
|
|
|
|
# Create log directory if it doesn't exist
|
|
|
|
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# Create logger
|
|
|
|
|
logger = logging.getLogger("doc_processer")
|
|
|
|
|
logger.setLevel(logging.DEBUG if settings.debug else logging.INFO)
|
|
|
|
|
|
|
|
|
|
# Remove existing handlers to avoid duplicates
|
|
|
|
|
logger.handlers.clear()
|
|
|
|
|
|
|
|
|
|
# Create custom formatter that handles missing request_id
|
|
|
|
|
class RequestIDFormatter(logging.Formatter):
|
|
|
|
|
"""Formatter that handles request_id in log records."""
|
|
|
|
|
|
|
|
|
|
def format(self, record):
|
|
|
|
|
# Add request_id if not present
|
|
|
|
|
if not hasattr(record, "request_id"):
|
|
|
|
|
record.request_id = getattr(record, "request_id", "unknown")
|
|
|
|
|
return super().format(record)
|
|
|
|
|
|
|
|
|
|
formatter = RequestIDFormatter(
|
|
|
|
|
fmt="%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s",
|
|
|
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# File handler with rotation by day and size
|
|
|
|
|
# Rotates daily at midnight OR when file exceeds 100MB, keeps 30 days
|
|
|
|
|
log_file = log_dir / "doc_processer.log"
|
|
|
|
|
file_handler = TimedRotatingAndSizeFileHandler(
|
|
|
|
|
filename=str(log_file),
|
|
|
|
|
when="midnight",
|
|
|
|
|
interval=1,
|
|
|
|
|
backupCount=30,
|
|
|
|
|
maxBytes=100 * 1024 * 1024, # 100MB
|
|
|
|
|
encoding="utf-8",
|
|
|
|
|
)
|
|
|
|
|
file_handler.setLevel(logging.DEBUG if settings.debug else logging.INFO)
|
|
|
|
|
file_handler.setFormatter(formatter)
|
|
|
|
|
|
|
|
|
|
# Console handler
|
|
|
|
|
console_handler = logging.StreamHandler()
|
|
|
|
|
console_handler.setLevel(logging.INFO)
|
|
|
|
|
console_handler.setFormatter(formatter)
|
|
|
|
|
|
|
|
|
|
# Add handlers
|
|
|
|
|
logger.addHandler(file_handler)
|
|
|
|
|
logger.addHandler(console_handler)
|
|
|
|
|
|
|
|
|
|
return logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Global logger instance
|
2026-03-10 19:52:22 +08:00
|
|
|
_logger: logging.Logger | None = None
|
2026-02-07 09:26:45 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_logger() -> logging.Logger:
|
|
|
|
|
"""Get the global logger instance."""
|
|
|
|
|
global _logger
|
|
|
|
|
if _logger is None:
|
|
|
|
|
_logger = setup_logging()
|
|
|
|
|
return _logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RequestIDAdapter(logging.LoggerAdapter):
|
|
|
|
|
"""Logger adapter that adds request_id to log records."""
|
|
|
|
|
|
|
|
|
|
def process(self, msg, kwargs):
|
|
|
|
|
"""Add request_id to extra if not present."""
|
|
|
|
|
if "extra" not in kwargs:
|
|
|
|
|
kwargs["extra"] = {}
|
|
|
|
|
if "request_id" not in kwargs["extra"]:
|
|
|
|
|
kwargs["extra"]["request_id"] = getattr(self, "request_id", "unknown")
|
|
|
|
|
return msg, kwargs
|