feat: add log for export api

2026-03-12 11:40:19 +08:00
parent bb1cf66137
commit 92b56d61d8
7 changed files with 101 additions and 5 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -8,7 +8,8 @@
      "WebFetch(domain:raw.githubusercontent.com)",
      "Bash(python -c \"\nfrom app.services.glm_postprocess import GLMResultFormatter, clean_repeated_content, clean_formula_number\nf = GLMResultFormatter\\(\\)\nprint\\('GLMResultFormatter OK'\\)\nprint\\('clean_formula_number:', clean_formula_number\\('\\(2.1\\)'\\)\\)\nregions = [\n    {'index': 0, 'label': 'text', 'native_label': 'doc_title', 'content': 'Introduction', 'bbox_2d': [10,10,990,50]},\n    {'index': 1, 'label': 'formula', 'native_label': 'display_formula', 'content': r'\\\\frac{a}{b}', 'bbox_2d': [10,60,990,200]},\n    {'index': 2, 'label': 'text', 'native_label': 'formula_number', 'content': '\\(1\\)', 'bbox_2d': [900,60,990,200]},\n]\nmd = f.process\\(regions\\)\nprint\\('process output:'\\)\nprint\\(md\\)\n\" 2>&1 | grep -v \"^$\")",
      "Bash(python3 -c \"\nfrom app.services.glm_postprocess import GLMResultFormatter, clean_repeated_content, clean_formula_number\nf = GLMResultFormatter\\(\\)\nprint\\('GLMResultFormatter OK'\\)\nprint\\('clean_formula_number:', clean_formula_number\\('\\(2.1\\)'\\)\\)\nregions = [\n    {'index': 0, 'label': 'text', 'native_label': 'doc_title', 'content': 'Introduction', 'bbox_2d': [10,10,990,50]},\n    {'index': 1, 'label': 'formula', 'native_label': 'display_formula', 'content': r'\\\\frac{a}{b}', 'bbox_2d': [10,60,990,200]},\n    {'index': 2, 'label': 'text', 'native_label': 'formula_number', 'content': '\\(1\\)', 'bbox_2d': [900,60,990,200]},\n]\nmd = f.process\\(regions\\)\nprint\\('process output:'\\)\nprint\\(repr\\(md\\)\\)\n\" 2>&1)",
-      "Bash(ls .venv 2>/dev/null || ls venv 2>/dev/null || echo \"no venv found\" && find . -name \"activate\" -path \"*/bin/activate\" 2>/dev/null | head -3)"
+      "Bash(ls .venv 2>/dev/null || ls venv 2>/dev/null || echo \"no venv found\" && find . -name \"activate\" -path \"*/bin/activate\" 2>/dev/null | head -3)",
      "Bash(ruff check:*)"
    ]
  }
 }
--- a/app/api/v1/endpoints/convert.py
+++ b/app/api/v1/endpoints/convert.py
@@ -1,5 +1,7 @@
 """Format conversion endpoints."""
 import logging
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import Response
@@ -7,6 +9,8 @@ from app.core.dependencies import get_converter
 from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest
 from app.services.converter import Converter
 logger = logging.getLogger(__name__)
 router = APIRouter()
@@ -19,14 +23,25 @@ async def convert_markdown_to_docx(
    Returns the generated DOCX file as a binary response.
    """
    logger.info(
        "Converting markdown to DOCX, filename=%s, content_length=%d",
        request.filename,
        len(request.markdown),
    )
    try:
        docx_bytes = converter.export_to_file(request.markdown, export_type="docx")
        logger.info(
            "DOCX conversion successful, filename=%s, size=%d bytes",
            request.filename,
            len(docx_bytes),
        )
        return Response(
            content=docx_bytes,
            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
            headers={"Content-Disposition": f'attachment; filename="{request.filename}.docx"'},
        )
    except Exception as e:
        logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)
        raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
@@ -55,12 +70,17 @@ async def convert_latex_to_omml(
        ```
    """
    if not request.latex or not request.latex.strip():
        logger.warning("LaTeX to OMML request received with empty formula")
        raise HTTPException(status_code=400, detail="LaTeX formula cannot be empty")
    logger.info("Converting LaTeX to OMML, latex=%r", request.latex)
    try:
        omml = converter.convert_to_omml(request.latex)
        logger.info("LaTeX to OMML conversion successful")
        return LatexToOmmlResponse(omml=omml)
    except ValueError as e:
        logger.warning("LaTeX to OMML conversion invalid input: %s", e)
        raise HTTPException(status_code=400, detail=str(e))
    except RuntimeError as e:
        logger.error("LaTeX to OMML conversion runtime error: %s", e)
        raise HTTPException(status_code=503, detail=str(e))
--- a/app/core/logging_config.py
+++ b/app/core/logging_config.py
@@ -2,11 +2,15 @@
 import logging
 import logging.handlers
 from contextvars import ContextVar
 from pathlib import Path
 from typing import Any
 from app.core.config import get_settings
 # Context variable to hold the current request_id across async boundaries
 request_id_ctx: ContextVar[str] = ContextVar("request_id", default="-")
 class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler):
    """File handler that rotates by both time (daily) and size (100MB)."""
@@ -92,14 +96,13 @@ def setup_logging(log_dir: str | None = None) -> logging.Logger:
    # Remove existing handlers to avoid duplicates
    logger.handlers.clear()
-    # Create custom formatter that handles missing request_id
+    # Create custom formatter that automatically injects request_id from context
    class RequestIDFormatter(logging.Formatter):
-        """Formatter that handles request_id in log records."""
+        """Formatter that injects request_id from ContextVar into log records."""
        def format(self, record):
            # Add request_id if not present
            if not hasattr(record, "request_id"):
-                record.request_id = getattr(record, "request_id", "unknown")
+                record.request_id = request_id_ctx.get()
            return super().format(record)
    formatter = RequestIDFormatter(
--- a/app/main.py
+++ b/app/main.py
@@ -8,6 +8,7 @@ from app.api.v1.router import api_router
 from app.core.config import get_settings
 from app.core.dependencies import init_layout_detector
 from app.core.logging_config import setup_logging
 from app.middleware.request_id import RequestIDMiddleware
 settings = get_settings()
@@ -33,6 +34,8 @@ app = FastAPI(
    lifespan=lifespan,
 )
 app.add_middleware(RequestIDMiddleware)
 # Include API router
 app.include_router(api_router, prefix=settings.api_prefix)
--- a/app/middleware/init.py
+++ b/app/middleware/init.py
--- a/app/middleware/request_id.py
+++ b/app/middleware/request_id.py
@@ -0,0 +1,34 @@
 """Middleware to propagate or generate request_id for every request."""
 import uuid
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.requests import Request
 from starlette.responses import Response
 from app.core.logging_config import request_id_ctx
 REQUEST_ID_HEADER = "X-Request-ID"
 class RequestIDMiddleware(BaseHTTPMiddleware):
    """Extract X-Request-ID from incoming request headers or generate one.
    The request_id is stored in a ContextVar so that all log records emitted
    during the request are automatically annotated with it, without needing to
    pass it explicitly through every call.
    The same request_id is also echoed back in the response header so that
    callers can correlate logs.
    """
    async def dispatch(self, request: Request, call_next) -> Response:
        request_id = request.headers.get(REQUEST_ID_HEADER) or str(uuid.uuid4())
        token = request_id_ctx.set(request_id)
        try:
            response = await call_next(request)
        finally:
            request_id_ctx.reset(token)
        response.headers[REQUEST_ID_HEADER] = request_id
        return response
--- a/tests/tools/layout.py
+++ b/tests/tools/layout.py
@@ -0,0 +1,35 @@
 import cv2
 from app.core.config import get_settings
 from app.services.layout_detector import LayoutDetector
 settings = get_settings()
 def debug_layout_detector():
    layout_detector = LayoutDetector()
    image = cv2.imread("test/image2.png")
    print(f"Image shape: {image.shape}")
    # padded_image = ImageProcessor(padding_ratio=0.15).add_padding(image)
    layout_info = layout_detector.detect(image)
    # draw the layout info and label
    for region in layout_info.regions:
        x1, y1, x2, y2 = region.bbox
        cv2.putText(
            image,
            region.native_label,
            (int(x1), int(y1)),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 0, 255),
            2,
        )
        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
    cv2.imwrite("test/layout_debug.png", image)
 if __name__ == "__main__":
    debug_layout_detector()