49 lines
2.0 KiB
Python
49 lines
2.0 KiB
Python
"""Request and response schemas for image OCR endpoint."""
|
|
|
|
from pydantic import BaseModel, Field, model_validator
|
|
|
|
|
|
class LayoutRegion(BaseModel):
|
|
"""A detected layout region in the document."""
|
|
|
|
type: str = Field(..., description="Region type: text, formula, table, figure")
|
|
bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]")
|
|
confidence: float = Field(..., description="Detection confidence score")
|
|
|
|
|
|
class LayoutInfo(BaseModel):
|
|
"""Layout detection information."""
|
|
|
|
regions: list[LayoutRegion] = Field(default_factory=list)
|
|
has_plain_text: bool = Field(False, description="Whether plain text was detected")
|
|
has_formula: bool = Field(False, description="Whether formulas were detected")
|
|
|
|
|
|
class ImageOCRRequest(BaseModel):
|
|
"""Request body for image OCR endpoint."""
|
|
|
|
image_url: str | None = Field(None, description="URL to fetch the image from")
|
|
image_base64: str | None = Field(None, description="Base64-encoded image data")
|
|
|
|
@model_validator(mode="after")
|
|
def validate_input(self):
|
|
"""Validate that exactly one of image_url or image_base64 is provided."""
|
|
if self.image_url is None and self.image_base64 is None:
|
|
raise ValueError("Either image_url or image_base64 must be provided")
|
|
if self.image_url is not None and self.image_base64 is not None:
|
|
raise ValueError("Only one of image_url or image_base64 should be provided")
|
|
return self
|
|
|
|
|
|
class ImageOCRResponse(BaseModel):
|
|
"""Response body for image OCR endpoint."""
|
|
|
|
latex: str = Field("", description="LaTeX representation of the content")
|
|
markdown: str = Field("", description="Markdown representation of the content")
|
|
mathml: str = Field("", description="MathML representation (empty if no math detected)")
|
|
layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
|
|
recognition_mode: str = Field(
|
|
"", description="Recognition mode used: mixed_recognition or formula_recognition"
|
|
)
|
|
|