Files
doc_processer/app/schemas/image.py
2026-01-05 17:30:54 +08:00

51 lines
2.1 KiB
Python

"""Request and response schemas for image OCR endpoint."""
from pydantic import BaseModel, Field, model_validator
class LayoutRegion(BaseModel):
"""A detected layout region in the document."""
type: str = Field(..., description="Region type: text, formula, table, figure")
bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]")
confidence: float = Field(..., description="Detection confidence score")
score: float = Field(..., description="Detection score")
class LayoutInfo(BaseModel):
"""Layout detection information."""
regions: list[LayoutRegion] = Field(default_factory=list)
MixedRecognition: bool = Field(False, description="Whether mixed recognition was used")
# FormulaRecognition: bool = Field(False, description="Whether formula recognition (with prompt) was used")
class ImageOCRRequest(BaseModel):
"""Request body for image OCR endpoint."""
image_url: str | None = Field(None, description="URL to fetch the image from")
image_base64: str | None = Field(None, description="Base64-encoded image data")
model_name: str = Field("mineru", description="Name of the model to use for OCR")
@model_validator(mode="after")
def validate_input(self):
"""Validate that exactly one of image_url or image_base64 is provided."""
if self.image_url is None and self.image_base64 is None:
raise ValueError("Either image_url or image_base64 must be provided")
if self.image_url is not None and self.image_base64 is not None:
raise ValueError("Only one of image_url or image_base64 should be provided")
return self
class ImageOCRResponse(BaseModel):
"""Response body for image OCR endpoint."""
latex: str = Field("", description="LaTeX representation of the content")
markdown: str = Field("", description="Markdown representation of the content")
mathml: str = Field("", description="MathML representation (empty if no math detected)")
layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
recognition_mode: str = Field(
"", description="Recognition mode used: mixed_recognition or formula_recognition"
)