feat: add omml api
This commit is contained in:
@@ -1,10 +1,10 @@
|
||||
"""Markdown to DOCX conversion endpoint."""
|
||||
"""Format conversion endpoints."""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import Response
|
||||
|
||||
from app.core.dependencies import get_converter
|
||||
from app.schemas.convert import MarkdownToDocxRequest
|
||||
from app.schemas.convert import MarkdownToDocxRequest, LatexToOmmlRequest, LatexToOmmlResponse
|
||||
from app.services.converter import Converter
|
||||
|
||||
router = APIRouter()
|
||||
@@ -28,3 +28,39 @@ async def convert_markdown_to_docx(
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
|
||||
|
||||
|
||||
@router.post("/latex-to-omml", response_model=LatexToOmmlResponse)
|
||||
async def convert_latex_to_omml(
|
||||
request: LatexToOmmlRequest,
|
||||
converter: Converter = Depends(get_converter),
|
||||
) -> LatexToOmmlResponse:
|
||||
"""Convert LaTeX formula to OMML (Office Math Markup Language).
|
||||
|
||||
OMML is the math format used by Microsoft Word and other Office applications.
|
||||
This endpoint is separate from the main OCR endpoint due to the performance
|
||||
overhead of OMML conversion (requires creating a temporary DOCX file).
|
||||
|
||||
Args:
|
||||
request: Contains the LaTeX formula to convert (without $ or $$ delimiters).
|
||||
|
||||
Returns:
|
||||
OMML representation of the formula.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
curl -X POST "http://localhost:8000/api/v1/convert/latex-to-omml" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"latex": "\\\\frac{a}{b} + \\\\sqrt{c}"}'
|
||||
```
|
||||
"""
|
||||
if not request.latex or not request.latex.strip():
|
||||
raise HTTPException(status_code=400, detail="LaTeX formula cannot be empty")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(request.latex)
|
||||
return LatexToOmmlResponse(omml=omml)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(status_code=503, detail=str(e))
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service, get_mineru_ocr_service, get_converter
|
||||
from app.schemas.image import ImageOCRRequest, ImageOCRResponse, LatexToOmmlRequest, LatexToOmmlResponse
|
||||
from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service, get_mineru_ocr_service
|
||||
from app.schemas.image import ImageOCRRequest, ImageOCRResponse
|
||||
from app.services.image_processor import ImageProcessor
|
||||
from app.services.layout_detector import LayoutDetector
|
||||
from app.services.ocr_service import OCRService, MineruOCRService
|
||||
from app.services.converter import Converter
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -31,7 +30,7 @@ async def process_image_ocr(
|
||||
4. Convert output to LaTeX, Markdown, and MathML formats
|
||||
|
||||
Note: OMML conversion is not included due to performance overhead.
|
||||
Use the /latex-to-omml endpoint to convert LaTeX to OMML separately.
|
||||
Use the /convert/latex-to-omml endpoint to convert LaTeX to OMML separately.
|
||||
"""
|
||||
|
||||
image = image_processor.preprocess(
|
||||
@@ -55,32 +54,3 @@ async def process_image_ocr(
|
||||
mathml=ocr_result.get("mathml", ""),
|
||||
mml=ocr_result.get("mml", ""),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/latex-to-omml", response_model=LatexToOmmlResponse)
|
||||
async def convert_latex_to_omml(
|
||||
request: LatexToOmmlRequest,
|
||||
converter: Converter = Depends(get_converter),
|
||||
) -> LatexToOmmlResponse:
|
||||
"""Convert LaTeX formula to OMML (Office Math Markup Language).
|
||||
|
||||
OMML is the math format used by Microsoft Word and other Office applications.
|
||||
This endpoint is separate from the main OCR endpoint due to the performance
|
||||
overhead of OMML conversion (requires creating a temporary DOCX file).
|
||||
|
||||
Args:
|
||||
request: Contains the LaTeX formula to convert (without $ or $$ delimiters).
|
||||
|
||||
Returns:
|
||||
OMML representation of the formula.
|
||||
"""
|
||||
if not request.latex or not request.latex.strip():
|
||||
raise HTTPException(status_code=400, detail="LaTeX formula cannot be empty")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(request.latex)
|
||||
return LatexToOmmlResponse(omml=omml)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(status_code=503, detail=str(e))
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Request and response schemas for markdown to DOCX conversion endpoint."""
|
||||
"""Request and response schemas for format conversion endpoints."""
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
@@ -17,3 +17,23 @@ class MarkdownToDocxRequest(BaseModel):
|
||||
raise ValueError("Markdown content cannot be empty")
|
||||
return v
|
||||
|
||||
|
||||
class LatexToOmmlRequest(BaseModel):
|
||||
"""Request body for LaTeX to OMML conversion endpoint."""
|
||||
|
||||
latex: str = Field(..., description="Pure LaTeX formula (without $ or $$ delimiters)")
|
||||
|
||||
@field_validator("latex")
|
||||
@classmethod
|
||||
def validate_latex_not_empty(cls, v: str) -> str:
|
||||
"""Validate that LaTeX formula is not empty."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError("LaTeX formula cannot be empty")
|
||||
return v
|
||||
|
||||
|
||||
class LatexToOmmlResponse(BaseModel):
|
||||
"""Response body for LaTeX to OMML conversion endpoint."""
|
||||
|
||||
omml: str = Field("", description="OMML (Office Math Markup Language) representation")
|
||||
|
||||
|
||||
@@ -47,14 +47,3 @@ class ImageOCRResponse(BaseModel):
|
||||
layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
|
||||
recognition_mode: str = Field("", description="Recognition mode used: mixed_recognition or formula_recognition")
|
||||
|
||||
|
||||
class LatexToOmmlRequest(BaseModel):
|
||||
"""Request body for LaTeX to OMML conversion endpoint."""
|
||||
|
||||
latex: str = Field(..., description="Pure LaTeX formula (without $ or $$ delimiters)")
|
||||
|
||||
|
||||
class LatexToOmmlResponse(BaseModel):
|
||||
"""Response body for LaTeX to OMML conversion endpoint."""
|
||||
|
||||
omml: str = Field("", description="OMML (Office Math Markup Language) representation")
|
||||
|
||||
112
test_omml_api.py
Normal file
112
test_omml_api.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Test script for OMML conversion API endpoint."""
|
||||
|
||||
import requests
|
||||
import json
|
||||
|
||||
|
||||
def test_latex_to_omml():
|
||||
"""Test the /convert/latex-to-omml endpoint."""
|
||||
|
||||
# Test cases
|
||||
test_cases = [
|
||||
{
|
||||
"name": "Simple fraction",
|
||||
"latex": "\\frac{a}{b}",
|
||||
},
|
||||
{
|
||||
"name": "Quadratic formula",
|
||||
"latex": "x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}",
|
||||
},
|
||||
{
|
||||
"name": "Integral",
|
||||
"latex": "\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}",
|
||||
},
|
||||
{
|
||||
"name": "Matrix",
|
||||
"latex": "\\begin{matrix} a & b \\\\ c & d \\end{matrix}",
|
||||
},
|
||||
]
|
||||
|
||||
base_url = "http://localhost:8000/api/v1/convert/latex-to-omml"
|
||||
|
||||
print("Testing OMML Conversion API")
|
||||
print("=" * 80)
|
||||
|
||||
for i, test_case in enumerate(test_cases, 1):
|
||||
print(f"\nTest {i}: {test_case['name']}")
|
||||
print("-" * 80)
|
||||
print(f"LaTeX: {test_case['latex']}")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
base_url,
|
||||
json={"latex": test_case["latex"]},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
omml = result.get("omml", "")
|
||||
|
||||
print(f"✓ Status: {response.status_code}")
|
||||
print(f"OMML length: {len(omml)} characters")
|
||||
print(f"OMML preview: {omml[:150]}...")
|
||||
|
||||
else:
|
||||
print(f"✗ Status: {response.status_code}")
|
||||
print(f"Error: {response.text}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"✗ Request failed: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ Error: {e}")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
|
||||
def test_invalid_input():
|
||||
"""Test error handling with invalid input."""
|
||||
|
||||
print("\nTesting Error Handling")
|
||||
print("=" * 80)
|
||||
|
||||
base_url = "http://localhost:8000/api/v1/convert/latex-to-omml"
|
||||
|
||||
# Empty LaTeX
|
||||
print("\nTest: Empty LaTeX")
|
||||
response = requests.post(
|
||||
base_url,
|
||||
json={"latex": ""},
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Response: {response.json()}")
|
||||
|
||||
# Missing LaTeX field
|
||||
print("\nTest: Missing LaTeX field")
|
||||
response = requests.post(
|
||||
base_url,
|
||||
json={},
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Response: {response.json()}")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("OMML API Test Suite")
|
||||
print("Make sure the API server is running on http://localhost:8000")
|
||||
print()
|
||||
|
||||
try:
|
||||
test_latex_to_omml()
|
||||
test_invalid_input()
|
||||
print("\n✓ All tests completed!")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n✗ Tests interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"\n✗ Test suite failed: {e}")
|
||||
Reference in New Issue
Block a user