feat: add log
This commit is contained in:
@@ -1,19 +1,32 @@
|
||||
"""Image OCR endpoint."""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service, get_mineru_ocr_service, get_glmocr_service
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response
|
||||
|
||||
from app.core.dependencies import (
|
||||
get_image_processor,
|
||||
get_layout_detector,
|
||||
get_ocr_service,
|
||||
get_mineru_ocr_service,
|
||||
get_glmocr_service,
|
||||
)
|
||||
from app.core.logging_config import get_logger, RequestIDAdapter
|
||||
from app.schemas.image import ImageOCRRequest, ImageOCRResponse
|
||||
from app.services.image_processor import ImageProcessor
|
||||
from app.services.layout_detector import LayoutDetector
|
||||
from app.services.ocr_service import OCRService, MineruOCRService, GLMOCRService
|
||||
|
||||
router = APIRouter()
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
@router.post("/ocr", response_model=ImageOCRResponse)
|
||||
async def process_image_ocr(
|
||||
request: ImageOCRRequest,
|
||||
http_request: Request,
|
||||
response: Response,
|
||||
image_processor: ImageProcessor = Depends(get_image_processor),
|
||||
layout_detector: LayoutDetector = Depends(get_layout_detector),
|
||||
mineru_service: MineruOCRService = Depends(get_mineru_ocr_service),
|
||||
@@ -33,19 +46,53 @@ async def process_image_ocr(
|
||||
Note: OMML conversion is not included due to performance overhead.
|
||||
Use the /convert/latex-to-omml endpoint to convert LaTeX to OMML separately.
|
||||
"""
|
||||
# Get or generate request ID
|
||||
request_id = http_request.headers.get("x-request-id", str(uuid.uuid4()))
|
||||
response.headers["x-request-id"] = request_id
|
||||
|
||||
# Create logger adapter with request_id
|
||||
log = RequestIDAdapter(logger, {"request_id": request_id})
|
||||
log.request_id = request_id
|
||||
|
||||
try:
|
||||
log.info("Starting image OCR processing")
|
||||
|
||||
# Preprocess image
|
||||
preprocess_start = time.time()
|
||||
image = image_processor.preprocess(
|
||||
image_url=request.image_url,
|
||||
image_base64=request.image_base64,
|
||||
)
|
||||
preprocess_time = time.time() - preprocess_start
|
||||
log.debug(f"Image preprocessing completed in {preprocess_time:.3f}s")
|
||||
|
||||
# Layout detection
|
||||
layout_start = time.time()
|
||||
layout_info = layout_detector.detect(image)
|
||||
layout_time = time.time() - layout_start
|
||||
log.info(f"Layout detection completed in {layout_time:.3f}s")
|
||||
|
||||
# OCR recognition
|
||||
ocr_start = time.time()
|
||||
if layout_info.MixedRecognition:
|
||||
recognition_method = "MixedRecognition (MinerU)"
|
||||
log.info(f"Using {recognition_method}")
|
||||
ocr_result = mineru_service.recognize(image)
|
||||
else:
|
||||
recognition_method = "FormulaOnly (GLMOCR)"
|
||||
log.info(f"Using {recognition_method}")
|
||||
ocr_result = glmocr_service.recognize(image)
|
||||
ocr_time = time.time() - ocr_start
|
||||
|
||||
total_time = time.time() - preprocess_start
|
||||
log.info(f"OCR processing completed - Method: {recognition_method}, " f"Layout time: {layout_time:.3f}s, OCR time: {ocr_time:.3f}s, " f"Total time: {total_time:.3f}s")
|
||||
|
||||
except RuntimeError as e:
|
||||
log.error(f"OCR processing failed: {str(e)}", exc_info=True)
|
||||
raise HTTPException(status_code=503, detail=str(e))
|
||||
except Exception as e:
|
||||
log.error(f"Unexpected error during OCR processing: {str(e)}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
return ImageOCRResponse(
|
||||
latex=ocr_result.get("latex", ""),
|
||||
|
||||
@@ -47,6 +47,10 @@ class Settings(BaseSettings):
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8053
|
||||
|
||||
# Logging Settings
|
||||
log_dir: Optional[str] = None # Defaults to /app/logs in container or ./logs locally
|
||||
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||
|
||||
@property
|
||||
def pp_doclayout_dir(self) -> Path:
|
||||
"""Get the PP-DocLayout model directory path."""
|
||||
|
||||
157
app/core/logging_config.py
Normal file
157
app/core/logging_config.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""Logging configuration with rotation by day and size."""
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from app.core.config import get_settings
|
||||
|
||||
|
||||
class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler):
|
||||
"""File handler that rotates by both time (daily) and size (100MB)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
filename: str,
|
||||
when: str = "midnight",
|
||||
interval: int = 1,
|
||||
backupCount: int = 30,
|
||||
maxBytes: int = 100 * 1024 * 1024, # 100MB
|
||||
encoding: Optional[str] = None,
|
||||
delay: bool = False,
|
||||
utc: bool = False,
|
||||
atTime: Optional[logging.handlers.BaseRotatingHandler.atTime] = None,
|
||||
):
|
||||
"""Initialize handler with both time and size rotation.
|
||||
|
||||
Args:
|
||||
filename: Log file path
|
||||
when: When to rotate (e.g., 'midnight', 'H', 'M')
|
||||
interval: Rotation interval
|
||||
backupCount: Number of backup files to keep
|
||||
maxBytes: Maximum file size before rotation (in bytes)
|
||||
encoding: File encoding
|
||||
delay: Delay file opening until first emit
|
||||
utc: Use UTC time
|
||||
atTime: Time to rotate (for 'midnight' rotation)
|
||||
"""
|
||||
super().__init__(
|
||||
filename=filename,
|
||||
when=when,
|
||||
interval=interval,
|
||||
backupCount=backupCount,
|
||||
encoding=encoding,
|
||||
delay=delay,
|
||||
utc=utc,
|
||||
atTime=atTime,
|
||||
)
|
||||
self.maxBytes = maxBytes
|
||||
|
||||
def shouldRollover(self, record):
|
||||
"""Check if rollover should occur based on time or size."""
|
||||
# Check time-based rotation first
|
||||
if super().shouldRollover(record):
|
||||
return True
|
||||
|
||||
# Check size-based rotation
|
||||
if self.stream is None:
|
||||
self.stream = self._open()
|
||||
if self.maxBytes > 0:
|
||||
msg = "%s\n" % self.format(record)
|
||||
self.stream.seek(0, 2) # Seek to end
|
||||
if self.stream.tell() + len(msg) >= self.maxBytes:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def setup_logging(log_dir: Optional[str] = None) -> logging.Logger:
|
||||
"""Setup application logging with rotation by day and size.
|
||||
|
||||
Args:
|
||||
log_dir: Directory for log files. Defaults to /app/logs in container or ./logs locally.
|
||||
|
||||
Returns:
|
||||
Configured logger instance.
|
||||
"""
|
||||
settings = get_settings()
|
||||
|
||||
# Determine log directory
|
||||
if log_dir is None:
|
||||
log_dir = Path("/app/logs") if Path("/app/logs").exists() else Path("./logs")
|
||||
else:
|
||||
log_dir = Path(log_dir)
|
||||
|
||||
# Create log directory if it doesn't exist
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create logger
|
||||
logger = logging.getLogger("doc_processer")
|
||||
logger.setLevel(logging.DEBUG if settings.debug else logging.INFO)
|
||||
|
||||
# Remove existing handlers to avoid duplicates
|
||||
logger.handlers.clear()
|
||||
|
||||
# Create custom formatter that handles missing request_id
|
||||
class RequestIDFormatter(logging.Formatter):
|
||||
"""Formatter that handles request_id in log records."""
|
||||
|
||||
def format(self, record):
|
||||
# Add request_id if not present
|
||||
if not hasattr(record, "request_id"):
|
||||
record.request_id = getattr(record, "request_id", "unknown")
|
||||
return super().format(record)
|
||||
|
||||
formatter = RequestIDFormatter(
|
||||
fmt="%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
|
||||
# File handler with rotation by day and size
|
||||
# Rotates daily at midnight OR when file exceeds 100MB, keeps 30 days
|
||||
log_file = log_dir / "doc_processer.log"
|
||||
file_handler = TimedRotatingAndSizeFileHandler(
|
||||
filename=str(log_file),
|
||||
when="midnight",
|
||||
interval=1,
|
||||
backupCount=30,
|
||||
maxBytes=100 * 1024 * 1024, # 100MB
|
||||
encoding="utf-8",
|
||||
)
|
||||
file_handler.setLevel(logging.DEBUG if settings.debug else logging.INFO)
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
# Console handler
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.INFO)
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
# Add handlers
|
||||
logger.addHandler(file_handler)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
# Global logger instance
|
||||
_logger: Optional[logging.Logger] = None
|
||||
|
||||
|
||||
def get_logger() -> logging.Logger:
|
||||
"""Get the global logger instance."""
|
||||
global _logger
|
||||
if _logger is None:
|
||||
_logger = setup_logging()
|
||||
return _logger
|
||||
|
||||
|
||||
class RequestIDAdapter(logging.LoggerAdapter):
|
||||
"""Logger adapter that adds request_id to log records."""
|
||||
|
||||
def process(self, msg, kwargs):
|
||||
"""Add request_id to extra if not present."""
|
||||
if "extra" not in kwargs:
|
||||
kwargs["extra"] = {}
|
||||
if "request_id" not in kwargs["extra"]:
|
||||
kwargs["extra"]["request_id"] = getattr(self, "request_id", "unknown")
|
||||
return msg, kwargs
|
||||
@@ -7,9 +7,13 @@ from fastapi import FastAPI
|
||||
from app.api.v1.router import api_router
|
||||
from app.core.config import get_settings
|
||||
from app.core.dependencies import init_layout_detector
|
||||
from app.core.logging_config import setup_logging
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
# Initialize logging
|
||||
setup_logging()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
|
||||
@@ -17,6 +17,8 @@ services:
|
||||
# Mount pre-downloaded models (adjust paths as needed)
|
||||
- ./models/DocLayout:/app/models/DocLayout:ro
|
||||
- ./models/PP-DocLayout:/app/models/PP-DocLayout:ro
|
||||
# Mount logs directory to persist logs across container restarts
|
||||
- ./logs:/app/logs
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -47,6 +49,8 @@ services:
|
||||
volumes:
|
||||
- ./models/DocLayout:/app/models/DocLayout:ro
|
||||
- ./models/PP-DocLayout:/app/models/PP-DocLayout:ro
|
||||
# Mount logs directory to persist logs across container restarts
|
||||
- ./logs:/app/logs
|
||||
profiles:
|
||||
- cpu
|
||||
restart: unless-stopped
|
||||
|
||||
Reference in New Issue
Block a user