fix: refact logic

2025-12-31 17:38:32 +08:00
parent 6ac50f7d2f
commit 35928c2484
17 changed files with 678 additions and 738 deletions
--- a/app/api/v1/endpoints/convert.py
+++ b/app/api/v1/endpoints/convert.py
@@ -3,34 +3,28 @@
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import Response

-from app.core.dependencies import get_docx_converter
+from app.core.dependencies import get_converter
 from app.schemas.convert import MarkdownToDocxRequest
-from app.services.docx_converter import DocxConverter
+from app.services.converter import Converter

 router = APIRouter()


-@router.post("/docx")
+@router.post("/file")
 async def convert_markdown_to_docx(
    request: MarkdownToDocxRequest,
-    converter: DocxConverter = Depends(get_docx_converter),
+    converter: Converter = Depends(get_converter),
 ) -> Response:
    """Convert markdown content to DOCX file.

-    Returns the generated DOCX file as a binary download.
+    Returns the generated DOCX file as a binary response.
    """
    try:
-        docx_bytes = converter.convert(request.markdown)
+        docx_bytes = converter.export_to_file(request.markdown, export_type="docx")
+        return Response(
+            content=docx_bytes,
+            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            headers={"Content-Disposition": f'attachment; filename="{request.filename}.docx"'},
+        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
-
-    # Determine filename
-    filename = request.filename or "output"
-    if not filename.endswith(".docx"):
-        filename = f"{filename}.docx"
-
-    return Response(
-        content=docx_bytes,
-        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
-    )
--- a/app/api/v1/endpoints/image.py
+++ b/app/api/v1/endpoints/image.py
@@ -28,24 +28,15 @@ async def process_image_ocr(
       - Otherwise: use PaddleOCR-VL with formula prompt
    4. Convert output to LaTeX, Markdown, and MathML formats
    """
-    try:
-        # 1. Load and preprocess image
-        image = image_processor.preprocess(
-            image_url=request.image_url,
-            image_base64=request.image_base64,
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))

-    try:
-        # 2. Detect layout
-        layout_info = layout_detector.detect(image)
-    except RuntimeError as e:
-        raise HTTPException(status_code=500, detail=f"Layout detection failed: {e}")
+    image = image_processor.preprocess(
+        image_url=request.image_url,
+        image_base64=request.image_base64,
+    )

    try:
        # 3. Perform OCR based on layout
-        ocr_result = ocr_service.recognize(image, layout_info)
+        ocr_result = ocr_service.recognize(image)
    except RuntimeError as e:
        raise HTTPException(status_code=503, detail=str(e))

@@ -54,6 +45,4 @@ async def process_image_ocr(
        latex=ocr_result.get("latex", ""),
        markdown=ocr_result.get("markdown", ""),
        mathml=ocr_result.get("mathml", ""),
-        layout_info=layout_info,
-        recognition_mode=ocr_result.get("recognition_mode", ""),
    )