Files
doc_processer/app/core/logging_config.py
liuyuanchuang 30d2c2f45b fix: remove padding from GLMOCREndToEndService and clean up ruff violations
- Drop image padding in GLMOCREndToEndService.recognize(); use raw image directly
- Fix F821 undefined `padded` references replaced with `image`
- Fix F601 duplicate dict key "≠" in converter
- Fix F841 unused `image_cls_ids` variable in layout_postprocess
- Fix E702 semicolon-separated statements in layout_postprocess
- Fix UP031 percent-format replaced with f-string in logging_config
- Auto-fix 44 additional ruff violations (import order, UP035/UP045/UP006, F401, F541)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-10 19:52:22 +08:00

158 lines
4.8 KiB
Python

"""Logging configuration with rotation by day and size."""
import logging
import logging.handlers
from pathlib import Path
from typing import Any
from app.core.config import get_settings
class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler):
"""File handler that rotates by both time (daily) and size (100MB)."""
def __init__(
self,
filename: str,
when: str = "midnight",
interval: int = 1,
backupCount: int = 30,
maxBytes: int = 100 * 1024 * 1024, # 100MB
encoding: str | None = None,
delay: bool = False,
utc: bool = False,
atTime: Any | None = None,
):
"""Initialize handler with both time and size rotation.
Args:
filename: Log file path
when: When to rotate (e.g., 'midnight', 'H', 'M')
interval: Rotation interval
backupCount: Number of backup files to keep
maxBytes: Maximum file size before rotation (in bytes)
encoding: File encoding
delay: Delay file opening until first emit
utc: Use UTC time
atTime: Time to rotate (for 'midnight' rotation)
"""
super().__init__(
filename=filename,
when=when,
interval=interval,
backupCount=backupCount,
encoding=encoding,
delay=delay,
utc=utc,
atTime=atTime,
)
self.maxBytes = maxBytes
def shouldRollover(self, record):
"""Check if rollover should occur based on time or size."""
# Check time-based rotation first
if super().shouldRollover(record):
return True
# Check size-based rotation
if self.stream is None:
self.stream = self._open()
if self.maxBytes > 0:
msg = f"{self.format(record)}\n"
self.stream.seek(0, 2) # Seek to end
if self.stream.tell() + len(msg) >= self.maxBytes:
return True
return False
def setup_logging(log_dir: str | None = None) -> logging.Logger:
"""Setup application logging with rotation by day and size.
Args:
log_dir: Directory for log files. Defaults to /app/logs in container or ./logs locally.
Returns:
Configured logger instance.
"""
settings = get_settings()
# Determine log directory
if log_dir is None:
log_dir = Path("/app/logs") if Path("/app/logs").exists() else Path("./logs")
else:
log_dir = Path(log_dir)
# Create log directory if it doesn't exist
log_dir.mkdir(parents=True, exist_ok=True)
# Create logger
logger = logging.getLogger("doc_processer")
logger.setLevel(logging.DEBUG if settings.debug else logging.INFO)
# Remove existing handlers to avoid duplicates
logger.handlers.clear()
# Create custom formatter that handles missing request_id
class RequestIDFormatter(logging.Formatter):
"""Formatter that handles request_id in log records."""
def format(self, record):
# Add request_id if not present
if not hasattr(record, "request_id"):
record.request_id = getattr(record, "request_id", "unknown")
return super().format(record)
formatter = RequestIDFormatter(
fmt="%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# File handler with rotation by day and size
# Rotates daily at midnight OR when file exceeds 100MB, keeps 30 days
log_file = log_dir / "doc_processer.log"
file_handler = TimedRotatingAndSizeFileHandler(
filename=str(log_file),
when="midnight",
interval=1,
backupCount=30,
maxBytes=100 * 1024 * 1024, # 100MB
encoding="utf-8",
)
file_handler.setLevel(logging.DEBUG if settings.debug else logging.INFO)
file_handler.setFormatter(formatter)
# Console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)
# Add handlers
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
# Global logger instance
_logger: logging.Logger | None = None
def get_logger() -> logging.Logger:
"""Get the global logger instance."""
global _logger
if _logger is None:
_logger = setup_logging()
return _logger
class RequestIDAdapter(logging.LoggerAdapter):
"""Logger adapter that adds request_id to log records."""
def process(self, msg, kwargs):
"""Add request_id to extra if not present."""
if "extra" not in kwargs:
kwargs["extra"] = {}
if "request_id" not in kwargs["extra"]:
kwargs["extra"]["request_id"] = getattr(self, "request_id", "unknown")
return msg, kwargs