Use RFC 5987 filename*=UTF-8'' percent-encoding to support Chinese and other Unicode characters in DOCX download filenames. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
89 lines
3.2 KiB
Python
89 lines
3.2 KiB
Python
"""Format conversion endpoints."""
|
|
|
|
from urllib.parse import quote
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from fastapi.responses import Response
|
|
|
|
from app.core.dependencies import get_converter
|
|
from app.core.logging_config import get_logger
|
|
from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest
|
|
from app.services.converter import Converter
|
|
|
|
logger = get_logger()
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.post("/file")
|
|
async def convert_markdown_to_docx(
|
|
request: MarkdownToDocxRequest,
|
|
converter: Converter = Depends(get_converter),
|
|
) -> Response:
|
|
"""Convert markdown content to DOCX file.
|
|
|
|
Returns the generated DOCX file as a binary response.
|
|
"""
|
|
logger.info(
|
|
"Converting markdown to DOCX, filename=%s, content_length=%d",
|
|
request.filename,
|
|
len(request.markdown),
|
|
)
|
|
try:
|
|
docx_bytes = converter.export_to_file(request.markdown, export_type="docx")
|
|
logger.info(
|
|
"DOCX conversion successful, filename=%s, size=%d bytes",
|
|
request.filename,
|
|
len(docx_bytes),
|
|
)
|
|
encoded_name = quote(f"{request.filename}.docx")
|
|
return Response(
|
|
content=docx_bytes,
|
|
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_name}"},
|
|
)
|
|
except Exception as e:
|
|
logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)
|
|
raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
|
|
|
|
|
|
@router.post("/latex-to-omml", response_model=LatexToOmmlResponse)
|
|
async def convert_latex_to_omml(
|
|
request: LatexToOmmlRequest,
|
|
converter: Converter = Depends(get_converter),
|
|
) -> LatexToOmmlResponse:
|
|
"""Convert LaTeX formula to OMML (Office Math Markup Language).
|
|
|
|
OMML is the math format used by Microsoft Word and other Office applications.
|
|
This endpoint is separate from the main OCR endpoint due to the performance
|
|
overhead of OMML conversion (requires creating a temporary DOCX file).
|
|
|
|
Args:
|
|
request: Contains the LaTeX formula to convert (without $ or $$ delimiters).
|
|
|
|
Returns:
|
|
OMML representation of the formula.
|
|
|
|
Example:
|
|
```bash
|
|
curl -X POST "http://localhost:8000/api/v1/convert/latex-to-omml" \\
|
|
-H "Content-Type: application/json" \\
|
|
-d '{"latex": "\\\\frac{a}{b} + \\\\sqrt{c}"}'
|
|
```
|
|
"""
|
|
if not request.latex or not request.latex.strip():
|
|
logger.warning("LaTeX to OMML request received with empty formula")
|
|
raise HTTPException(status_code=400, detail="LaTeX formula cannot be empty")
|
|
|
|
logger.info("Converting LaTeX to OMML, latex=%r", request.latex)
|
|
try:
|
|
omml = converter.convert_to_omml(request.latex)
|
|
logger.info("LaTeX to OMML conversion successful")
|
|
return LatexToOmmlResponse(omml=omml)
|
|
except ValueError as e:
|
|
logger.warning("LaTeX to OMML conversion invalid input: %s", e)
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except RuntimeError as e:
|
|
logger.error("LaTeX to OMML conversion runtime error: %s", e)
|
|
raise HTTPException(status_code=503, detail=str(e))
|