feat: add log

This commit is contained in:
liuyuanchuang
2026-02-07 09:26:45 +08:00
parent 23e2160668
commit aec030b071
5 changed files with 218 additions and 2 deletions

View File

@@ -1,19 +1,32 @@
"""Image OCR endpoint."""
from fastapi import APIRouter, Depends, HTTPException
import time
import uuid
from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service, get_mineru_ocr_service, get_glmocr_service
from fastapi import APIRouter, Depends, HTTPException, Request, Response
from app.core.dependencies import (
get_image_processor,
get_layout_detector,
get_ocr_service,
get_mineru_ocr_service,
get_glmocr_service,
)
from app.core.logging_config import get_logger, RequestIDAdapter
from app.schemas.image import ImageOCRRequest, ImageOCRResponse
from app.services.image_processor import ImageProcessor
from app.services.layout_detector import LayoutDetector
from app.services.ocr_service import OCRService, MineruOCRService, GLMOCRService
router = APIRouter()
logger = get_logger()
@router.post("/ocr", response_model=ImageOCRResponse)
async def process_image_ocr(
request: ImageOCRRequest,
http_request: Request,
response: Response,
image_processor: ImageProcessor = Depends(get_image_processor),
layout_detector: LayoutDetector = Depends(get_layout_detector),
mineru_service: MineruOCRService = Depends(get_mineru_ocr_service),
@@ -33,19 +46,53 @@ async def process_image_ocr(
Note: OMML conversion is not included due to performance overhead.
Use the /convert/latex-to-omml endpoint to convert LaTeX to OMML separately.
"""
# Get or generate request ID
request_id = http_request.headers.get("x-request-id", str(uuid.uuid4()))
response.headers["x-request-id"] = request_id
# Create logger adapter with request_id
log = RequestIDAdapter(logger, {"request_id": request_id})
log.request_id = request_id
try:
log.info("Starting image OCR processing")
# Preprocess image
preprocess_start = time.time()
image = image_processor.preprocess(
image_url=request.image_url,
image_base64=request.image_base64,
)
preprocess_time = time.time() - preprocess_start
log.debug(f"Image preprocessing completed in {preprocess_time:.3f}s")
# Layout detection
layout_start = time.time()
layout_info = layout_detector.detect(image)
layout_time = time.time() - layout_start
log.info(f"Layout detection completed in {layout_time:.3f}s")
# OCR recognition
ocr_start = time.time()
if layout_info.MixedRecognition:
recognition_method = "MixedRecognition (MinerU)"
log.info(f"Using {recognition_method}")
ocr_result = mineru_service.recognize(image)
else:
recognition_method = "FormulaOnly (GLMOCR)"
log.info(f"Using {recognition_method}")
ocr_result = glmocr_service.recognize(image)
ocr_time = time.time() - ocr_start
total_time = time.time() - preprocess_start
log.info(f"OCR processing completed - Method: {recognition_method}, " f"Layout time: {layout_time:.3f}s, OCR time: {ocr_time:.3f}s, " f"Total time: {total_time:.3f}s")
except RuntimeError as e:
log.error(f"OCR processing failed: {str(e)}", exc_info=True)
raise HTTPException(status_code=503, detail=str(e))
except Exception as e:
log.error(f"Unexpected error during OCR processing: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
return ImageOCRResponse(
latex=ocr_result.get("latex", ""),

View File

@@ -47,6 +47,10 @@ class Settings(BaseSettings):
host: str = "0.0.0.0"
port: int = 8053
# Logging Settings
log_dir: Optional[str] = None # Defaults to /app/logs in container or ./logs locally
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
@property
def pp_doclayout_dir(self) -> Path:
"""Get the PP-DocLayout model directory path."""

157
app/core/logging_config.py Normal file
View File

@@ -0,0 +1,157 @@
"""Logging configuration with rotation by day and size."""
import logging
import logging.handlers
from pathlib import Path
from typing import Optional
from app.core.config import get_settings
class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler):
"""File handler that rotates by both time (daily) and size (100MB)."""
def __init__(
self,
filename: str,
when: str = "midnight",
interval: int = 1,
backupCount: int = 30,
maxBytes: int = 100 * 1024 * 1024, # 100MB
encoding: Optional[str] = None,
delay: bool = False,
utc: bool = False,
atTime: Optional[logging.handlers.BaseRotatingHandler.atTime] = None,
):
"""Initialize handler with both time and size rotation.
Args:
filename: Log file path
when: When to rotate (e.g., 'midnight', 'H', 'M')
interval: Rotation interval
backupCount: Number of backup files to keep
maxBytes: Maximum file size before rotation (in bytes)
encoding: File encoding
delay: Delay file opening until first emit
utc: Use UTC time
atTime: Time to rotate (for 'midnight' rotation)
"""
super().__init__(
filename=filename,
when=when,
interval=interval,
backupCount=backupCount,
encoding=encoding,
delay=delay,
utc=utc,
atTime=atTime,
)
self.maxBytes = maxBytes
def shouldRollover(self, record):
"""Check if rollover should occur based on time or size."""
# Check time-based rotation first
if super().shouldRollover(record):
return True
# Check size-based rotation
if self.stream is None:
self.stream = self._open()
if self.maxBytes > 0:
msg = "%s\n" % self.format(record)
self.stream.seek(0, 2) # Seek to end
if self.stream.tell() + len(msg) >= self.maxBytes:
return True
return False
def setup_logging(log_dir: Optional[str] = None) -> logging.Logger:
"""Setup application logging with rotation by day and size.
Args:
log_dir: Directory for log files. Defaults to /app/logs in container or ./logs locally.
Returns:
Configured logger instance.
"""
settings = get_settings()
# Determine log directory
if log_dir is None:
log_dir = Path("/app/logs") if Path("/app/logs").exists() else Path("./logs")
else:
log_dir = Path(log_dir)
# Create log directory if it doesn't exist
log_dir.mkdir(parents=True, exist_ok=True)
# Create logger
logger = logging.getLogger("doc_processer")
logger.setLevel(logging.DEBUG if settings.debug else logging.INFO)
# Remove existing handlers to avoid duplicates
logger.handlers.clear()
# Create custom formatter that handles missing request_id
class RequestIDFormatter(logging.Formatter):
"""Formatter that handles request_id in log records."""
def format(self, record):
# Add request_id if not present
if not hasattr(record, "request_id"):
record.request_id = getattr(record, "request_id", "unknown")
return super().format(record)
formatter = RequestIDFormatter(
fmt="%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# File handler with rotation by day and size
# Rotates daily at midnight OR when file exceeds 100MB, keeps 30 days
log_file = log_dir / "doc_processer.log"
file_handler = TimedRotatingAndSizeFileHandler(
filename=str(log_file),
when="midnight",
interval=1,
backupCount=30,
maxBytes=100 * 1024 * 1024, # 100MB
encoding="utf-8",
)
file_handler.setLevel(logging.DEBUG if settings.debug else logging.INFO)
file_handler.setFormatter(formatter)
# Console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)
# Add handlers
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
# Global logger instance
_logger: Optional[logging.Logger] = None
def get_logger() -> logging.Logger:
"""Get the global logger instance."""
global _logger
if _logger is None:
_logger = setup_logging()
return _logger
class RequestIDAdapter(logging.LoggerAdapter):
"""Logger adapter that adds request_id to log records."""
def process(self, msg, kwargs):
"""Add request_id to extra if not present."""
if "extra" not in kwargs:
kwargs["extra"] = {}
if "request_id" not in kwargs["extra"]:
kwargs["extra"]["request_id"] = getattr(self, "request_id", "unknown")
return msg, kwargs

View File

@@ -7,9 +7,13 @@ from fastapi import FastAPI
from app.api.v1.router import api_router
from app.core.config import get_settings
from app.core.dependencies import init_layout_detector
from app.core.logging_config import setup_logging
settings = get_settings()
# Initialize logging
setup_logging()
@asynccontextmanager
async def lifespan(app: FastAPI):

View File

@@ -17,6 +17,8 @@ services:
# Mount pre-downloaded models (adjust paths as needed)
- ./models/DocLayout:/app/models/DocLayout:ro
- ./models/PP-DocLayout:/app/models/PP-DocLayout:ro
# Mount logs directory to persist logs across container restarts
- ./logs:/app/logs
deploy:
resources:
reservations:
@@ -47,6 +49,8 @@ services:
volumes:
- ./models/DocLayout:/app/models/DocLayout:ro
- ./models/PP-DocLayout:/app/models/PP-DocLayout:ro
# Mount logs directory to persist logs across container restarts
- ./logs:/app/logs
profiles:
- cpu
restart: unless-stopped