feat: add log for export api

This commit is contained in:
liuyuanchuang
2026-03-12 11:40:19 +08:00
parent bb1cf66137
commit 92b56d61d8
7 changed files with 101 additions and 5 deletions

View File

@@ -8,7 +8,8 @@
"WebFetch(domain:raw.githubusercontent.com)", "WebFetch(domain:raw.githubusercontent.com)",
"Bash(python -c \"\nfrom app.services.glm_postprocess import GLMResultFormatter, clean_repeated_content, clean_formula_number\nf = GLMResultFormatter\\(\\)\nprint\\('GLMResultFormatter OK'\\)\nprint\\('clean_formula_number:', clean_formula_number\\('\\(2.1\\)'\\)\\)\nregions = [\n {'index': 0, 'label': 'text', 'native_label': 'doc_title', 'content': 'Introduction', 'bbox_2d': [10,10,990,50]},\n {'index': 1, 'label': 'formula', 'native_label': 'display_formula', 'content': r'\\\\frac{a}{b}', 'bbox_2d': [10,60,990,200]},\n {'index': 2, 'label': 'text', 'native_label': 'formula_number', 'content': '\\(1\\)', 'bbox_2d': [900,60,990,200]},\n]\nmd = f.process\\(regions\\)\nprint\\('process output:'\\)\nprint\\(md\\)\n\" 2>&1 | grep -v \"^$\")", "Bash(python -c \"\nfrom app.services.glm_postprocess import GLMResultFormatter, clean_repeated_content, clean_formula_number\nf = GLMResultFormatter\\(\\)\nprint\\('GLMResultFormatter OK'\\)\nprint\\('clean_formula_number:', clean_formula_number\\('\\(2.1\\)'\\)\\)\nregions = [\n {'index': 0, 'label': 'text', 'native_label': 'doc_title', 'content': 'Introduction', 'bbox_2d': [10,10,990,50]},\n {'index': 1, 'label': 'formula', 'native_label': 'display_formula', 'content': r'\\\\frac{a}{b}', 'bbox_2d': [10,60,990,200]},\n {'index': 2, 'label': 'text', 'native_label': 'formula_number', 'content': '\\(1\\)', 'bbox_2d': [900,60,990,200]},\n]\nmd = f.process\\(regions\\)\nprint\\('process output:'\\)\nprint\\(md\\)\n\" 2>&1 | grep -v \"^$\")",
"Bash(python3 -c \"\nfrom app.services.glm_postprocess import GLMResultFormatter, clean_repeated_content, clean_formula_number\nf = GLMResultFormatter\\(\\)\nprint\\('GLMResultFormatter OK'\\)\nprint\\('clean_formula_number:', clean_formula_number\\('\\(2.1\\)'\\)\\)\nregions = [\n {'index': 0, 'label': 'text', 'native_label': 'doc_title', 'content': 'Introduction', 'bbox_2d': [10,10,990,50]},\n {'index': 1, 'label': 'formula', 'native_label': 'display_formula', 'content': r'\\\\frac{a}{b}', 'bbox_2d': [10,60,990,200]},\n {'index': 2, 'label': 'text', 'native_label': 'formula_number', 'content': '\\(1\\)', 'bbox_2d': [900,60,990,200]},\n]\nmd = f.process\\(regions\\)\nprint\\('process output:'\\)\nprint\\(repr\\(md\\)\\)\n\" 2>&1)", "Bash(python3 -c \"\nfrom app.services.glm_postprocess import GLMResultFormatter, clean_repeated_content, clean_formula_number\nf = GLMResultFormatter\\(\\)\nprint\\('GLMResultFormatter OK'\\)\nprint\\('clean_formula_number:', clean_formula_number\\('\\(2.1\\)'\\)\\)\nregions = [\n {'index': 0, 'label': 'text', 'native_label': 'doc_title', 'content': 'Introduction', 'bbox_2d': [10,10,990,50]},\n {'index': 1, 'label': 'formula', 'native_label': 'display_formula', 'content': r'\\\\frac{a}{b}', 'bbox_2d': [10,60,990,200]},\n {'index': 2, 'label': 'text', 'native_label': 'formula_number', 'content': '\\(1\\)', 'bbox_2d': [900,60,990,200]},\n]\nmd = f.process\\(regions\\)\nprint\\('process output:'\\)\nprint\\(repr\\(md\\)\\)\n\" 2>&1)",
"Bash(ls .venv 2>/dev/null || ls venv 2>/dev/null || echo \"no venv found\" && find . -name \"activate\" -path \"*/bin/activate\" 2>/dev/null | head -3)" "Bash(ls .venv 2>/dev/null || ls venv 2>/dev/null || echo \"no venv found\" && find . -name \"activate\" -path \"*/bin/activate\" 2>/dev/null | head -3)",
"Bash(ruff check:*)"
] ]
} }
} }

View File

@@ -1,5 +1,7 @@
"""Format conversion endpoints.""" """Format conversion endpoints."""
import logging
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import Response from fastapi.responses import Response
@@ -7,6 +9,8 @@ from app.core.dependencies import get_converter
from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest
from app.services.converter import Converter from app.services.converter import Converter
logger = logging.getLogger(__name__)
router = APIRouter() router = APIRouter()
@@ -19,14 +23,25 @@ async def convert_markdown_to_docx(
Returns the generated DOCX file as a binary response. Returns the generated DOCX file as a binary response.
""" """
logger.info(
"Converting markdown to DOCX, filename=%s, content_length=%d",
request.filename,
len(request.markdown),
)
try: try:
docx_bytes = converter.export_to_file(request.markdown, export_type="docx") docx_bytes = converter.export_to_file(request.markdown, export_type="docx")
logger.info(
"DOCX conversion successful, filename=%s, size=%d bytes",
request.filename,
len(docx_bytes),
)
return Response( return Response(
content=docx_bytes, content=docx_bytes,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={"Content-Disposition": f'attachment; filename="{request.filename}.docx"'}, headers={"Content-Disposition": f'attachment; filename="{request.filename}.docx"'},
) )
except Exception as e: except Exception as e:
logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)
raise HTTPException(status_code=500, detail=f"Conversion failed: {e}") raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
@@ -55,12 +70,17 @@ async def convert_latex_to_omml(
``` ```
""" """
if not request.latex or not request.latex.strip(): if not request.latex or not request.latex.strip():
logger.warning("LaTeX to OMML request received with empty formula")
raise HTTPException(status_code=400, detail="LaTeX formula cannot be empty") raise HTTPException(status_code=400, detail="LaTeX formula cannot be empty")
logger.info("Converting LaTeX to OMML, latex=%r", request.latex)
try: try:
omml = converter.convert_to_omml(request.latex) omml = converter.convert_to_omml(request.latex)
logger.info("LaTeX to OMML conversion successful")
return LatexToOmmlResponse(omml=omml) return LatexToOmmlResponse(omml=omml)
except ValueError as e: except ValueError as e:
logger.warning("LaTeX to OMML conversion invalid input: %s", e)
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
except RuntimeError as e: except RuntimeError as e:
logger.error("LaTeX to OMML conversion runtime error: %s", e)
raise HTTPException(status_code=503, detail=str(e)) raise HTTPException(status_code=503, detail=str(e))

View File

@@ -2,11 +2,15 @@
import logging import logging
import logging.handlers import logging.handlers
from contextvars import ContextVar
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from app.core.config import get_settings from app.core.config import get_settings
# Context variable to hold the current request_id across async boundaries
request_id_ctx: ContextVar[str] = ContextVar("request_id", default="-")
class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler): class TimedRotatingAndSizeFileHandler(logging.handlers.TimedRotatingFileHandler):
"""File handler that rotates by both time (daily) and size (100MB).""" """File handler that rotates by both time (daily) and size (100MB)."""
@@ -92,14 +96,13 @@ def setup_logging(log_dir: str | None = None) -> logging.Logger:
# Remove existing handlers to avoid duplicates # Remove existing handlers to avoid duplicates
logger.handlers.clear() logger.handlers.clear()
# Create custom formatter that handles missing request_id # Create custom formatter that automatically injects request_id from context
class RequestIDFormatter(logging.Formatter): class RequestIDFormatter(logging.Formatter):
"""Formatter that handles request_id in log records.""" """Formatter that injects request_id from ContextVar into log records."""
def format(self, record): def format(self, record):
# Add request_id if not present
if not hasattr(record, "request_id"): if not hasattr(record, "request_id"):
record.request_id = getattr(record, "request_id", "unknown") record.request_id = request_id_ctx.get()
return super().format(record) return super().format(record)
formatter = RequestIDFormatter( formatter = RequestIDFormatter(

View File

@@ -8,6 +8,7 @@ from app.api.v1.router import api_router
from app.core.config import get_settings from app.core.config import get_settings
from app.core.dependencies import init_layout_detector from app.core.dependencies import init_layout_detector
from app.core.logging_config import setup_logging from app.core.logging_config import setup_logging
from app.middleware.request_id import RequestIDMiddleware
settings = get_settings() settings = get_settings()
@@ -33,6 +34,8 @@ app = FastAPI(
lifespan=lifespan, lifespan=lifespan,
) )
app.add_middleware(RequestIDMiddleware)
# Include API router # Include API router
app.include_router(api_router, prefix=settings.api_prefix) app.include_router(api_router, prefix=settings.api_prefix)

View File

View File

@@ -0,0 +1,34 @@
"""Middleware to propagate or generate request_id for every request."""
import uuid
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response
from app.core.logging_config import request_id_ctx
REQUEST_ID_HEADER = "X-Request-ID"
class RequestIDMiddleware(BaseHTTPMiddleware):
"""Extract X-Request-ID from incoming request headers or generate one.
The request_id is stored in a ContextVar so that all log records emitted
during the request are automatically annotated with it, without needing to
pass it explicitly through every call.
The same request_id is also echoed back in the response header so that
callers can correlate logs.
"""
async def dispatch(self, request: Request, call_next) -> Response:
request_id = request.headers.get(REQUEST_ID_HEADER) or str(uuid.uuid4())
token = request_id_ctx.set(request_id)
try:
response = await call_next(request)
finally:
request_id_ctx.reset(token)
response.headers[REQUEST_ID_HEADER] = request_id
return response

35
tests/tools/layout.py Normal file
View File

@@ -0,0 +1,35 @@
import cv2
from app.core.config import get_settings
from app.services.layout_detector import LayoutDetector
settings = get_settings()
def debug_layout_detector():
layout_detector = LayoutDetector()
image = cv2.imread("test/image2.png")
print(f"Image shape: {image.shape}")
# padded_image = ImageProcessor(padding_ratio=0.15).add_padding(image)
layout_info = layout_detector.detect(image)
# draw the layout info and label
for region in layout_info.regions:
x1, y1, x2, y2 = region.bbox
cv2.putText(
image,
region.native_label,
(int(x1), int(y1)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
2,
)
cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
cv2.imwrite("test/layout_debug.png", image)
if __name__ == "__main__":
debug_layout_detector()