61 lines
2.6 KiB
Python
61 lines
2.6 KiB
Python
"""Request and response schemas for image OCR endpoint."""
|
|
|
|
from pydantic import BaseModel, Field, model_validator
|
|
|
|
|
|
class LayoutRegion(BaseModel):
|
|
"""A detected layout region in the document."""
|
|
|
|
type: str = Field(..., description="Region type: text, formula, table, figure")
|
|
bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]")
|
|
confidence: float = Field(..., description="Detection confidence score")
|
|
score: float = Field(..., description="Detection score")
|
|
|
|
|
|
class LayoutInfo(BaseModel):
|
|
"""Layout detection information."""
|
|
|
|
regions: list[LayoutRegion] = Field(default_factory=list)
|
|
MixedRecognition: bool = Field(False, description="Whether mixed recognition was used")
|
|
# FormulaRecognition: bool = Field(False, description="Whether formula recognition (with prompt) was used")
|
|
|
|
|
|
class ImageOCRRequest(BaseModel):
|
|
"""Request body for image OCR endpoint."""
|
|
|
|
image_url: str | None = Field(None, description="URL to fetch the image from")
|
|
image_base64: str | None = Field(None, description="Base64-encoded image data")
|
|
model_name: str = Field("mineru", description="Name of the model to use for OCR")
|
|
|
|
@model_validator(mode="after")
|
|
def validate_input(self):
|
|
"""Validate that exactly one of image_url or image_base64 is provided."""
|
|
if self.image_url is None and self.image_base64 is None:
|
|
raise ValueError("Either image_url or image_base64 must be provided")
|
|
if self.image_url is not None and self.image_base64 is not None:
|
|
raise ValueError("Only one of image_url or image_base64 should be provided")
|
|
return self
|
|
|
|
|
|
class ImageOCRResponse(BaseModel):
|
|
"""Response body for image OCR endpoint."""
|
|
|
|
latex: str = Field("", description="LaTeX representation of the content (empty if mixed content)")
|
|
markdown: str = Field("", description="Markdown representation of the content")
|
|
mathml: str = Field("", description="Standard MathML representation (empty if mixed content)")
|
|
mml: str = Field("", description="XML MathML with mml: namespace prefix (empty if mixed content)")
|
|
layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
|
|
recognition_mode: str = Field("", description="Recognition mode used: mixed_recognition or formula_recognition")
|
|
|
|
|
|
class LatexToOmmlRequest(BaseModel):
|
|
"""Request body for LaTeX to OMML conversion endpoint."""
|
|
|
|
latex: str = Field(..., description="Pure LaTeX formula (without $ or $$ delimiters)")
|
|
|
|
|
|
class LatexToOmmlResponse(BaseModel):
|
|
"""Response body for LaTeX to OMML conversion endpoint."""
|
|
|
|
omml: str = Field("", description="OMML (Office Math Markup Language) representation")
|