Files
doc_processer/app/api/v1/endpoints/convert.py

89 lines
3.2 KiB
Python
Raw Normal View History

2026-02-04 12:35:14 +08:00
"""Format conversion endpoints."""
2025-12-29 17:34:58 +08:00
from urllib.parse import quote
2025-12-29 17:34:58 +08:00
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import Response
2025-12-31 17:38:32 +08:00
from app.core.dependencies import get_converter
2026-03-12 12:41:26 +08:00
from app.core.logging_config import get_logger
from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest
2025-12-31 17:38:32 +08:00
from app.services.converter import Converter
2025-12-29 17:34:58 +08:00
2026-03-12 12:41:26 +08:00
logger = get_logger()
2026-03-12 11:40:19 +08:00
2025-12-29 17:34:58 +08:00
router = APIRouter()
2025-12-31 17:38:32 +08:00
@router.post("/file")
2025-12-29 17:34:58 +08:00
async def convert_markdown_to_docx(
request: MarkdownToDocxRequest,
2025-12-31 17:38:32 +08:00
converter: Converter = Depends(get_converter),
2025-12-29 17:34:58 +08:00
) -> Response:
"""Convert markdown content to DOCX file.
2025-12-31 17:38:32 +08:00
Returns the generated DOCX file as a binary response.
2025-12-29 17:34:58 +08:00
"""
2026-03-12 11:40:19 +08:00
logger.info(
"Converting markdown to DOCX, filename=%s, content_length=%d",
request.filename,
len(request.markdown),
)
2025-12-29 17:34:58 +08:00
try:
2025-12-31 17:38:32 +08:00
docx_bytes = converter.export_to_file(request.markdown, export_type="docx")
2026-03-12 11:40:19 +08:00
logger.info(
"DOCX conversion successful, filename=%s, size=%d bytes",
request.filename,
len(docx_bytes),
)
encoded_name = quote(f"{request.filename}.docx")
2025-12-31 17:38:32 +08:00
return Response(
content=docx_bytes,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_name}"},
2025-12-31 17:38:32 +08:00
)
2025-12-29 17:34:58 +08:00
except Exception as e:
2026-03-12 11:40:19 +08:00
logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)
2025-12-29 17:34:58 +08:00
raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
2026-02-04 12:35:14 +08:00
@router.post("/latex-to-omml", response_model=LatexToOmmlResponse)
async def convert_latex_to_omml(
request: LatexToOmmlRequest,
converter: Converter = Depends(get_converter),
) -> LatexToOmmlResponse:
"""Convert LaTeX formula to OMML (Office Math Markup Language).
OMML is the math format used by Microsoft Word and other Office applications.
This endpoint is separate from the main OCR endpoint due to the performance
overhead of OMML conversion (requires creating a temporary DOCX file).
Args:
request: Contains the LaTeX formula to convert (without $ or $$ delimiters).
Returns:
OMML representation of the formula.
Example:
```bash
curl -X POST "http://localhost:8000/api/v1/convert/latex-to-omml" \\
-H "Content-Type: application/json" \\
-d '{"latex": "\\\\frac{a}{b} + \\\\sqrt{c}"}'
```
"""
if not request.latex or not request.latex.strip():
2026-03-12 11:40:19 +08:00
logger.warning("LaTeX to OMML request received with empty formula")
2026-02-04 12:35:14 +08:00
raise HTTPException(status_code=400, detail="LaTeX formula cannot be empty")
2026-03-12 11:40:19 +08:00
logger.info("Converting LaTeX to OMML, latex=%r", request.latex)
2026-02-04 12:35:14 +08:00
try:
omml = converter.convert_to_omml(request.latex)
2026-03-12 11:40:19 +08:00
logger.info("LaTeX to OMML conversion successful")
2026-02-04 12:35:14 +08:00
return LatexToOmmlResponse(omml=omml)
except ValueError as e:
2026-03-12 11:40:19 +08:00
logger.warning("LaTeX to OMML conversion invalid input: %s", e)
2026-02-04 12:35:14 +08:00
raise HTTPException(status_code=400, detail=str(e))
except RuntimeError as e:
2026-03-12 11:40:19 +08:00
logger.error("LaTeX to OMML conversion runtime error: %s", e)
2026-02-04 12:35:14 +08:00
raise HTTPException(status_code=503, detail=str(e))