init repo
This commit is contained in:
0
app/schemas/__init__.py
Normal file
0
app/schemas/__init__.py
Normal file
19
app/schemas/convert.py
Normal file
19
app/schemas/convert.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Request and response schemas for markdown to DOCX conversion endpoint."""
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
class MarkdownToDocxRequest(BaseModel):
|
||||
"""Request body for markdown to DOCX conversion endpoint."""
|
||||
|
||||
markdown: str = Field(..., description="Markdown content to convert")
|
||||
filename: str | None = Field(None, description="Optional output filename (without extension)")
|
||||
|
||||
@field_validator("markdown")
|
||||
@classmethod
|
||||
def validate_markdown_not_empty(cls, v: str) -> str:
|
||||
"""Validate that markdown content is not empty."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError("Markdown content cannot be empty")
|
||||
return v
|
||||
|
||||
48
app/schemas/image.py
Normal file
48
app/schemas/image.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Request and response schemas for image OCR endpoint."""
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
|
||||
class LayoutRegion(BaseModel):
|
||||
"""A detected layout region in the document."""
|
||||
|
||||
type: str = Field(..., description="Region type: text, formula, table, figure")
|
||||
bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]")
|
||||
confidence: float = Field(..., description="Detection confidence score")
|
||||
|
||||
|
||||
class LayoutInfo(BaseModel):
|
||||
"""Layout detection information."""
|
||||
|
||||
regions: list[LayoutRegion] = Field(default_factory=list)
|
||||
has_plain_text: bool = Field(False, description="Whether plain text was detected")
|
||||
has_formula: bool = Field(False, description="Whether formulas were detected")
|
||||
|
||||
|
||||
class ImageOCRRequest(BaseModel):
|
||||
"""Request body for image OCR endpoint."""
|
||||
|
||||
image_url: str | None = Field(None, description="URL to fetch the image from")
|
||||
image_base64: str | None = Field(None, description="Base64-encoded image data")
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_input(self):
|
||||
"""Validate that exactly one of image_url or image_base64 is provided."""
|
||||
if self.image_url is None and self.image_base64 is None:
|
||||
raise ValueError("Either image_url or image_base64 must be provided")
|
||||
if self.image_url is not None and self.image_base64 is not None:
|
||||
raise ValueError("Only one of image_url or image_base64 should be provided")
|
||||
return self
|
||||
|
||||
|
||||
class ImageOCRResponse(BaseModel):
|
||||
"""Response body for image OCR endpoint."""
|
||||
|
||||
latex: str = Field("", description="LaTeX representation of the content")
|
||||
markdown: str = Field("", description="Markdown representation of the content")
|
||||
mathml: str = Field("", description="MathML representation (empty if no math detected)")
|
||||
layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
|
||||
recognition_mode: str = Field(
|
||||
"", description="Recognition mode used: mixed_recognition or formula_recognition"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user