init repo

This commit is contained in:
liuyuanchuang
2025-12-29 17:34:58 +08:00
commit 874fd383cc
36 changed files with 2641 additions and 0 deletions

48
app/schemas/image.py Normal file
View File

@@ -0,0 +1,48 @@
"""Request and response schemas for image OCR endpoint."""
from pydantic import BaseModel, Field, model_validator
class LayoutRegion(BaseModel):
"""A detected layout region in the document."""
type: str = Field(..., description="Region type: text, formula, table, figure")
bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]")
confidence: float = Field(..., description="Detection confidence score")
class LayoutInfo(BaseModel):
"""Layout detection information."""
regions: list[LayoutRegion] = Field(default_factory=list)
has_plain_text: bool = Field(False, description="Whether plain text was detected")
has_formula: bool = Field(False, description="Whether formulas were detected")
class ImageOCRRequest(BaseModel):
"""Request body for image OCR endpoint."""
image_url: str | None = Field(None, description="URL to fetch the image from")
image_base64: str | None = Field(None, description="Base64-encoded image data")
@model_validator(mode="after")
def validate_input(self):
"""Validate that exactly one of image_url or image_base64 is provided."""
if self.image_url is None and self.image_base64 is None:
raise ValueError("Either image_url or image_base64 must be provided")
if self.image_url is not None and self.image_base64 is not None:
raise ValueError("Only one of image_url or image_base64 should be provided")
return self
class ImageOCRResponse(BaseModel):
"""Response body for image OCR endpoint."""
latex: str = Field("", description="LaTeX representation of the content")
markdown: str = Field("", description="Markdown representation of the content")
mathml: str = Field("", description="MathML representation (empty if no math detected)")
layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
recognition_mode: str = Field(
"", description="Recognition mode used: mixed_recognition or formula_recognition"
)