init repo

2025-12-29 17:34:58 +08:00
commit 874fd383cc
36 changed files with 2641 additions and 0 deletions
--- a/app/api/init.py
+++ b/app/api/init.py
--- a/app/api/v1/init.py
+++ b/app/api/v1/init.py
--- a/app/api/v1/endpoints/init.py
+++ b/app/api/v1/endpoints/init.py
--- a/app/api/v1/endpoints/convert.py
+++ b/app/api/v1/endpoints/convert.py
@@ -0,0 +1,37 @@
+"""Markdown to DOCX conversion endpoint."""
+
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import Response
+
+from app.core.dependencies import get_docx_converter
+from app.schemas.convert import MarkdownToDocxRequest
+from app.services.docx_converter import DocxConverter
+
+router = APIRouter()
+
+
+@router.post("/docx")
+async def convert_markdown_to_docx(
+    request: MarkdownToDocxRequest,
+    converter: DocxConverter = Depends(get_docx_converter),
+) -> Response:
+    """Convert markdown content to DOCX file.
+
+    Returns the generated DOCX file as a binary download.
+    """
+    try:
+        docx_bytes = converter.convert(request.markdown)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
+
+    # Determine filename
+    filename = request.filename or "output"
+    if not filename.endswith(".docx"):
+        filename = f"{filename}.docx"
+
+    return Response(
+        content=docx_bytes,
+        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+    )
+
--- a/app/api/v1/endpoints/image.py
+++ b/app/api/v1/endpoints/image.py
@@ -0,0 +1,59 @@
+"""Image OCR endpoint."""
+
+from fastapi import APIRouter, Depends, HTTPException
+
+from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service
+from app.schemas.image import ImageOCRRequest, ImageOCRResponse
+from app.services.image_processor import ImageProcessor
+from app.services.layout_detector import LayoutDetector
+from app.services.ocr_service import OCRService
+
+router = APIRouter()
+
+
+@router.post("/ocr", response_model=ImageOCRResponse)
+async def process_image_ocr(
+    request: ImageOCRRequest,
+    image_processor: ImageProcessor = Depends(get_image_processor),
+    layout_detector: LayoutDetector = Depends(get_layout_detector),
+    ocr_service: OCRService = Depends(get_ocr_service),
+) -> ImageOCRResponse:
+    """Process an image and extract content as LaTeX, Markdown, and MathML.
+
+    The processing pipeline:
+    1. Load and preprocess image (add 30% whitespace padding)
+    2. Detect layout using DocLayout-YOLO
+    3. Based on layout:
+       - If plain text exists: use PP-DocLayoutV2 for mixed recognition
+       - Otherwise: use PaddleOCR-VL with formula prompt
+    4. Convert output to LaTeX, Markdown, and MathML formats
+    """
+    try:
+        # 1. Load and preprocess image
+        image = image_processor.preprocess(
+            image_url=request.image_url,
+            image_base64=request.image_base64,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+    try:
+        # 2. Detect layout
+        layout_info = layout_detector.detect(image)
+    except RuntimeError as e:
+        raise HTTPException(status_code=500, detail=f"Layout detection failed: {e}")
+
+    try:
+        # 3. Perform OCR based on layout
+        ocr_result = ocr_service.recognize(image, layout_info)
+    except RuntimeError as e:
+        raise HTTPException(status_code=503, detail=str(e))
+
+    # 4. Return response
+    return ImageOCRResponse(
+        latex=ocr_result.get("latex", ""),
+        markdown=ocr_result.get("markdown", ""),
+        mathml=ocr_result.get("mathml", ""),
+        layout_info=layout_info,
+        recognition_mode=ocr_result.get("recognition_mode", ""),
+    )
--- a/app/api/v1/router.py
+++ b/app/api/v1/router.py
@@ -0,0 +1,13 @@
+"""API v1 router combining all endpoints."""
+
+from fastapi import APIRouter
+
+from app.api.v1.endpoints import convert, image
+
+api_router = APIRouter()
+
+# Include image processing endpoints
+api_router.include_router(image.router, prefix="/image", tags=["Image OCR"])
+
+# Include conversion endpoints
+api_router.include_router(convert.router, prefix="/convert", tags=["Conversion"])