"""Request and response schemas for image OCR endpoint.""" from pydantic import BaseModel, Field, model_validator class LayoutRegion(BaseModel): """A detected layout region in the document.""" type: str = Field(..., description="Region type: text, formula, table, figure") bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]") confidence: float = Field(..., description="Detection confidence score") score: float = Field(..., description="Detection score") class LayoutInfo(BaseModel): """Layout detection information.""" regions: list[LayoutRegion] = Field(default_factory=list) MixedRecognition: bool = Field(False, description="Whether mixed recognition was used") # FormulaRecognition: bool = Field(False, description="Whether formula recognition (with prompt) was used") class ImageOCRRequest(BaseModel): """Request body for image OCR endpoint.""" image_url: str | None = Field(None, description="URL to fetch the image from") image_base64: str | None = Field(None, description="Base64-encoded image data") @model_validator(mode="after") def validate_input(self): """Validate that exactly one of image_url or image_base64 is provided.""" if self.image_url is None and self.image_base64 is None: raise ValueError("Either image_url or image_base64 must be provided") if self.image_url is not None and self.image_base64 is not None: raise ValueError("Only one of image_url or image_base64 should be provided") return self class ImageOCRResponse(BaseModel): """Response body for image OCR endpoint.""" latex: str = Field("", description="LaTeX representation of the content") markdown: str = Field("", description="Markdown representation of the content") mathml: str = Field("", description="MathML representation (empty if no math detected)") layout_info: LayoutInfo = Field(default_factory=LayoutInfo) recognition_mode: str = Field( "", description="Recognition mode used: mixed_recognition or formula_recognition" )