feat: add omml api

This commit is contained in:
liuyuanchuang
2026-02-04 12:35:14 +08:00
parent 27f25d9f4d
commit 69f9a70ae5
5 changed files with 174 additions and 47 deletions

View File

@@ -2,12 +2,11 @@
from fastapi import APIRouter, Depends, HTTPException
from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service, get_mineru_ocr_service, get_converter
from app.schemas.image import ImageOCRRequest, ImageOCRResponse, LatexToOmmlRequest, LatexToOmmlResponse
from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service, get_mineru_ocr_service
from app.schemas.image import ImageOCRRequest, ImageOCRResponse
from app.services.image_processor import ImageProcessor
from app.services.layout_detector import LayoutDetector
from app.services.ocr_service import OCRService, MineruOCRService
from app.services.converter import Converter
router = APIRouter()
@@ -31,7 +30,7 @@ async def process_image_ocr(
4. Convert output to LaTeX, Markdown, and MathML formats
Note: OMML conversion is not included due to performance overhead.
Use the /latex-to-omml endpoint to convert LaTeX to OMML separately.
Use the /convert/latex-to-omml endpoint to convert LaTeX to OMML separately.
"""
image = image_processor.preprocess(
@@ -55,32 +54,3 @@ async def process_image_ocr(
mathml=ocr_result.get("mathml", ""),
mml=ocr_result.get("mml", ""),
)
@router.post("/latex-to-omml", response_model=LatexToOmmlResponse)
async def convert_latex_to_omml(
request: LatexToOmmlRequest,
converter: Converter = Depends(get_converter),
) -> LatexToOmmlResponse:
"""Convert LaTeX formula to OMML (Office Math Markup Language).
OMML is the math format used by Microsoft Word and other Office applications.
This endpoint is separate from the main OCR endpoint due to the performance
overhead of OMML conversion (requires creating a temporary DOCX file).
Args:
request: Contains the LaTeX formula to convert (without $ or $$ delimiters).
Returns:
OMML representation of the formula.
"""
if not request.latex or not request.latex.strip():
raise HTTPException(status_code=400, detail="LaTeX formula cannot be empty")
try:
omml = converter.convert_to_omml(request.latex)
return LatexToOmmlResponse(omml=omml)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except RuntimeError as e:
raise HTTPException(status_code=503, detail=str(e))