init repo

2025-12-29 17:34:58 +08:00
commit 874fd383cc
36 changed files with 2641 additions and 0 deletions
--- a/app/schemas/init.py
+++ b/app/schemas/init.py
--- a/app/schemas/convert.py
+++ b/app/schemas/convert.py
@@ -0,0 +1,19 @@
+"""Request and response schemas for markdown to DOCX conversion endpoint."""
+
+from pydantic import BaseModel, Field, field_validator
+
+
+class MarkdownToDocxRequest(BaseModel):
+    """Request body for markdown to DOCX conversion endpoint."""
+
+    markdown: str = Field(..., description="Markdown content to convert")
+    filename: str | None = Field(None, description="Optional output filename (without extension)")
+
+    @field_validator("markdown")
+    @classmethod
+    def validate_markdown_not_empty(cls, v: str) -> str:
+        """Validate that markdown content is not empty."""
+        if not v or not v.strip():
+            raise ValueError("Markdown content cannot be empty")
+        return v
+
--- a/app/schemas/image.py
+++ b/app/schemas/image.py
@@ -0,0 +1,48 @@
+"""Request and response schemas for image OCR endpoint."""
+
+from pydantic import BaseModel, Field, model_validator
+
+
+class LayoutRegion(BaseModel):
+    """A detected layout region in the document."""
+
+    type: str = Field(..., description="Region type: text, formula, table, figure")
+    bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]")
+    confidence: float = Field(..., description="Detection confidence score")
+
+
+class LayoutInfo(BaseModel):
+    """Layout detection information."""
+
+    regions: list[LayoutRegion] = Field(default_factory=list)
+    has_plain_text: bool = Field(False, description="Whether plain text was detected")
+    has_formula: bool = Field(False, description="Whether formulas were detected")
+
+
+class ImageOCRRequest(BaseModel):
+    """Request body for image OCR endpoint."""
+
+    image_url: str | None = Field(None, description="URL to fetch the image from")
+    image_base64: str | None = Field(None, description="Base64-encoded image data")
+
+    @model_validator(mode="after")
+    def validate_input(self):
+        """Validate that exactly one of image_url or image_base64 is provided."""
+        if self.image_url is None and self.image_base64 is None:
+            raise ValueError("Either image_url or image_base64 must be provided")
+        if self.image_url is not None and self.image_base64 is not None:
+            raise ValueError("Only one of image_url or image_base64 should be provided")
+        return self
+
+
+class ImageOCRResponse(BaseModel):
+    """Response body for image OCR endpoint."""
+
+    latex: str = Field("", description="LaTeX representation of the content")
+    markdown: str = Field("", description="Markdown representation of the content")
+    mathml: str = Field("", description="MathML representation (empty if no math detected)")
+    layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
+    recognition_mode: str = Field(
+        "", description="Recognition mode used: mixed_recognition or formula_recognition"
+    )
+