feat: add log
This commit is contained in:
@@ -1,19 +1,32 @@
|
|||||||
"""Image OCR endpoint."""
|
"""Image OCR endpoint."""
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service, get_mineru_ocr_service, get_glmocr_service
|
from fastapi import APIRouter, Depends, HTTPException, Request, Response
|
||||||
|
|
||||||
|
from app.core.dependencies import (
|
||||||
|
get_image_processor,
|
||||||
|
get_layout_detector,
|
||||||
|
get_ocr_service,
|
||||||
|
get_mineru_ocr_service,
|
||||||
|
get_glmocr_service,
|
||||||
|
)
|
||||||
|
from app.core.logging_config import get_logger, RequestIDAdapter
|
||||||
from app.schemas.image import ImageOCRRequest, ImageOCRResponse
|
from app.schemas.image import ImageOCRRequest, ImageOCRResponse
|
||||||
from app.services.image_processor import ImageProcessor
|
from app.services.image_processor import ImageProcessor
|
||||||
from app.services.layout_detector import LayoutDetector
|
from app.services.layout_detector import LayoutDetector
|
||||||
from app.services.ocr_service import OCRService, MineruOCRService, GLMOCRService
|
from app.services.ocr_service import OCRService, MineruOCRService, GLMOCRService
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
@router.post("/ocr", response_model=ImageOCRResponse)
|
@router.post("/ocr", response_model=ImageOCRResponse)
|
||||||
async def process_image_ocr(
|
async def process_image_ocr(
|
||||||
request: ImageOCRRequest,
|
request: ImageOCRRequest,
|
||||||
|
http_request: Request,
|
||||||
|
response: Response,
|
||||||
image_processor: ImageProcessor = Depends(get_image_processor),
|
image_processor: ImageProcessor = Depends(get_image_processor),
|
||||||
layout_detector: LayoutDetector = Depends(get_layout_detector),
|
layout_detector: LayoutDetector = Depends(get_layout_detector),
|
||||||
mineru_service: MineruOCRService = Depends(get_mineru_ocr_service),
|
mineru_service: MineruOCRService = Depends(get_mineru_ocr_service),
|
||||||
@@ -33,19 +46,53 @@ async def process_image_ocr(
|
|||||||
Note: OMML conversion is not included due to performance overhead.
|
Note: OMML conversion is not included due to performance overhead.
|
||||||
Use the /convert/latex-to-omml endpoint to convert LaTeX to OMML separately.
|
Use the /convert/latex-to-omml endpoint to convert LaTeX to OMML separately.
|
||||||
"""
|
"""
|
||||||
|
# Get or generate request ID
|
||||||
|
request_id = http_request.headers.get("x-request-id", str(uuid.uuid4()))
|
||||||
|
response.headers["x-request-id"] = request_id
|
||||||
|
|
||||||
|
# Create logger adapter with request_id
|
||||||
|
log = RequestIDAdapter(logger, {"request_id": request_id})
|
||||||
|
log.request_id = request_id
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
log.info("Starting image OCR processing")
|
||||||
|
|
||||||
|
# Preprocess image
|
||||||
|
preprocess_start = time.time()
|
||||||
image = image_processor.preprocess(
|
image = image_processor.preprocess(
|
||||||
image_url=request.image_url,
|
image_url=request.image_url,
|
||||||
image_base64=request.image_base64,
|
image_base64=request.image_base64,
|
||||||
)
|
)
|
||||||
|
preprocess_time = time.time() - preprocess_start
|
||||||
|
log.debug(f"Image preprocessing completed in {preprocess_time:.3f}s")
|
||||||
|
|
||||||
|
# Layout detection
|
||||||
|
layout_start = time.time()
|
||||||
layout_info = layout_detector.detect(image)
|
layout_info = layout_detector.detect(image)
|
||||||
|
layout_time = time.time() - layout_start
|
||||||
|
log.info(f"Layout detection completed in {layout_time:.3f}s")
|
||||||
|
|
||||||
|
# OCR recognition
|
||||||
|
ocr_start = time.time()
|
||||||
if layout_info.MixedRecognition:
|
if layout_info.MixedRecognition:
|
||||||
|
recognition_method = "MixedRecognition (MinerU)"
|
||||||
|
log.info(f"Using {recognition_method}")
|
||||||
ocr_result = mineru_service.recognize(image)
|
ocr_result = mineru_service.recognize(image)
|
||||||
else:
|
else:
|
||||||
|
recognition_method = "FormulaOnly (GLMOCR)"
|
||||||
|
log.info(f"Using {recognition_method}")
|
||||||
ocr_result = glmocr_service.recognize(image)
|
ocr_result = glmocr_service.recognize(image)
|
||||||
|
ocr_time = time.time() - ocr_start
|
||||||
|
|
||||||
|
total_time = time.time() - preprocess_start
|
||||||
|
log.info(f"OCR processing completed - Method: {recognition_method}, " f"Layout time: {layout_time:.3f}s, OCR time: {ocr_time:.3f}s, " f"Total time: {total_time:.3f}s")
|
||||||
|
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
|
log.error(f"OCR processing failed: {str(e)}", exc_info=True)
|
||||||
raise HTTPException(status_code=503, detail=str(e))
|
raise HTTPException(status_code=503, detail=str(e))
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"Unexpected error during OCR processing: {str(e)}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
return ImageOCRResponse(
|
return ImageOCRResponse(
|
||||||
latex=ocr_result.get("latex", ""),
|
latex=ocr_result.get("latex", ""),
|
||||||
|
|||||||
@@ -47,6 +47,10 @@ class Settings(BaseSettings):
|
|||||||
host: str = "0.0.0.0"
|
host: str = "0.0.0.0"
|
||||||
port: int = 8053
|
port: int = 8053
|
||||||
|
|
||||||
|
# Logging Settings
|
||||||
|
log_dir: Optional[str] = None # Defaults to /app/logs in container or ./logs locally
|
||||||
|
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pp_doclayout_dir(self) -> Path:
|
def pp_doclayout_dir(self) -> Path:
|
||||||
"""Get the PP-DocLayout model directory path."""
|
"""Get the PP-DocLayout model directory path."""
|
||||||
|
|||||||
157
app/core/logging_config.py
Normal file
157
app/core/logging_config.py
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
"""Logging configuration with rotation by day and size."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import logging.handlers
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from app.core.config import get_settings
|
||||||
|
|
||||||
|
|
||||||
|
class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler):
|
||||||
|
"""File handler that rotates by both time (daily) and size (100MB)."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
filename: str,
|
||||||
|
when: str = "midnight",
|
||||||
|
interval: int = 1,
|
||||||
|
backupCount: int = 30,
|
||||||
|
maxBytes: int = 100 * 1024 * 1024, # 100MB
|
||||||
|
encoding: Optional[str] = None,
|
||||||
|
delay: bool = False,
|
||||||
|
utc: bool = False,
|
||||||
|
atTime: Optional[logging.handlers.BaseRotatingHandler.atTime] = None,
|
||||||
|
):
|
||||||
|
"""Initialize handler with both time and size rotation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: Log file path
|
||||||
|
when: When to rotate (e.g., 'midnight', 'H', 'M')
|
||||||
|
interval: Rotation interval
|
||||||
|
backupCount: Number of backup files to keep
|
||||||
|
maxBytes: Maximum file size before rotation (in bytes)
|
||||||
|
encoding: File encoding
|
||||||
|
delay: Delay file opening until first emit
|
||||||
|
utc: Use UTC time
|
||||||
|
atTime: Time to rotate (for 'midnight' rotation)
|
||||||
|
"""
|
||||||
|
super().__init__(
|
||||||
|
filename=filename,
|
||||||
|
when=when,
|
||||||
|
interval=interval,
|
||||||
|
backupCount=backupCount,
|
||||||
|
encoding=encoding,
|
||||||
|
delay=delay,
|
||||||
|
utc=utc,
|
||||||
|
atTime=atTime,
|
||||||
|
)
|
||||||
|
self.maxBytes = maxBytes
|
||||||
|
|
||||||
|
def shouldRollover(self, record):
|
||||||
|
"""Check if rollover should occur based on time or size."""
|
||||||
|
# Check time-based rotation first
|
||||||
|
if super().shouldRollover(record):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check size-based rotation
|
||||||
|
if self.stream is None:
|
||||||
|
self.stream = self._open()
|
||||||
|
if self.maxBytes > 0:
|
||||||
|
msg = "%s\n" % self.format(record)
|
||||||
|
self.stream.seek(0, 2) # Seek to end
|
||||||
|
if self.stream.tell() + len(msg) >= self.maxBytes:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logging(log_dir: Optional[str] = None) -> logging.Logger:
|
||||||
|
"""Setup application logging with rotation by day and size.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
log_dir: Directory for log files. Defaults to /app/logs in container or ./logs locally.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured logger instance.
|
||||||
|
"""
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Determine log directory
|
||||||
|
if log_dir is None:
|
||||||
|
log_dir = Path("/app/logs") if Path("/app/logs").exists() else Path("./logs")
|
||||||
|
else:
|
||||||
|
log_dir = Path(log_dir)
|
||||||
|
|
||||||
|
# Create log directory if it doesn't exist
|
||||||
|
log_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Create logger
|
||||||
|
logger = logging.getLogger("doc_processer")
|
||||||
|
logger.setLevel(logging.DEBUG if settings.debug else logging.INFO)
|
||||||
|
|
||||||
|
# Remove existing handlers to avoid duplicates
|
||||||
|
logger.handlers.clear()
|
||||||
|
|
||||||
|
# Create custom formatter that handles missing request_id
|
||||||
|
class RequestIDFormatter(logging.Formatter):
|
||||||
|
"""Formatter that handles request_id in log records."""
|
||||||
|
|
||||||
|
def format(self, record):
|
||||||
|
# Add request_id if not present
|
||||||
|
if not hasattr(record, "request_id"):
|
||||||
|
record.request_id = getattr(record, "request_id", "unknown")
|
||||||
|
return super().format(record)
|
||||||
|
|
||||||
|
formatter = RequestIDFormatter(
|
||||||
|
fmt="%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
# File handler with rotation by day and size
|
||||||
|
# Rotates daily at midnight OR when file exceeds 100MB, keeps 30 days
|
||||||
|
log_file = log_dir / "doc_processer.log"
|
||||||
|
file_handler = TimedRotatingAndSizeFileHandler(
|
||||||
|
filename=str(log_file),
|
||||||
|
when="midnight",
|
||||||
|
interval=1,
|
||||||
|
backupCount=30,
|
||||||
|
maxBytes=100 * 1024 * 1024, # 100MB
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
file_handler.setLevel(logging.DEBUG if settings.debug else logging.INFO)
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
# Console handler
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setLevel(logging.INFO)
|
||||||
|
console_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
# Add handlers
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
logger.addHandler(console_handler)
|
||||||
|
|
||||||
|
return logger
|
||||||
|
|
||||||
|
|
||||||
|
# Global logger instance
|
||||||
|
_logger: Optional[logging.Logger] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_logger() -> logging.Logger:
|
||||||
|
"""Get the global logger instance."""
|
||||||
|
global _logger
|
||||||
|
if _logger is None:
|
||||||
|
_logger = setup_logging()
|
||||||
|
return _logger
|
||||||
|
|
||||||
|
|
||||||
|
class RequestIDAdapter(logging.LoggerAdapter):
|
||||||
|
"""Logger adapter that adds request_id to log records."""
|
||||||
|
|
||||||
|
def process(self, msg, kwargs):
|
||||||
|
"""Add request_id to extra if not present."""
|
||||||
|
if "extra" not in kwargs:
|
||||||
|
kwargs["extra"] = {}
|
||||||
|
if "request_id" not in kwargs["extra"]:
|
||||||
|
kwargs["extra"]["request_id"] = getattr(self, "request_id", "unknown")
|
||||||
|
return msg, kwargs
|
||||||
@@ -7,9 +7,13 @@ from fastapi import FastAPI
|
|||||||
from app.api.v1.router import api_router
|
from app.api.v1.router import api_router
|
||||||
from app.core.config import get_settings
|
from app.core.config import get_settings
|
||||||
from app.core.dependencies import init_layout_detector
|
from app.core.dependencies import init_layout_detector
|
||||||
|
from app.core.logging_config import setup_logging
|
||||||
|
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Initialize logging
|
||||||
|
setup_logging()
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ services:
|
|||||||
# Mount pre-downloaded models (adjust paths as needed)
|
# Mount pre-downloaded models (adjust paths as needed)
|
||||||
- ./models/DocLayout:/app/models/DocLayout:ro
|
- ./models/DocLayout:/app/models/DocLayout:ro
|
||||||
- ./models/PP-DocLayout:/app/models/PP-DocLayout:ro
|
- ./models/PP-DocLayout:/app/models/PP-DocLayout:ro
|
||||||
|
# Mount logs directory to persist logs across container restarts
|
||||||
|
- ./logs:/app/logs
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
reservations:
|
reservations:
|
||||||
@@ -47,6 +49,8 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./models/DocLayout:/app/models/DocLayout:ro
|
- ./models/DocLayout:/app/models/DocLayout:ro
|
||||||
- ./models/PP-DocLayout:/app/models/PP-DocLayout:ro
|
- ./models/PP-DocLayout:/app/models/PP-DocLayout:ro
|
||||||
|
# Mount logs directory to persist logs across container restarts
|
||||||
|
- ./logs:/app/logs
|
||||||
profiles:
|
profiles:
|
||||||
- cpu
|
- cpu
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|||||||
Reference in New Issue
Block a user