"""Image OCR endpoint.""" from fastapi import APIRouter, Depends, HTTPException from app.core.dependencies import get_image_processor, get_layout_detector, get_ocr_service, get_mineru_ocr_service from app.schemas.image import ImageOCRRequest, ImageOCRResponse from app.services.image_processor import ImageProcessor from app.services.layout_detector import LayoutDetector from app.services.ocr_service import OCRService, MineruOCRService router = APIRouter() @router.post("/ocr", response_model=ImageOCRResponse) async def process_image_ocr( request: ImageOCRRequest, image_processor: ImageProcessor = Depends(get_image_processor), layout_detector: LayoutDetector = Depends(get_layout_detector), mineru_service: MineruOCRService = Depends(get_mineru_ocr_service), paddle_service: OCRService = Depends(get_ocr_service), ) -> ImageOCRResponse: """Process an image and extract content as LaTeX, Markdown, and MathML. The processing pipeline: 1. Load and preprocess image (add 30% whitespace padding) 2. Detect layout using DocLayout-YOLO 3. Based on layout: - If plain text exists: use PP-DocLayoutV2 for mixed recognition - Otherwise: use PaddleOCR-VL with formula prompt 4. Convert output to LaTeX, Markdown, and MathML formats """ image = image_processor.preprocess( image_url=request.image_url, image_base64=request.image_base64, ) try: if request.model_name == "mineru": ocr_result = mineru_service.recognize(image) elif request.model_name == "paddle": ocr_result = paddle_service.recognize(image) else: raise HTTPException(status_code=400, detail="Invalid model name") except RuntimeError as e: raise HTTPException(status_code=503, detail=str(e)) return ImageOCRResponse( latex=ocr_result.get("latex", ""), markdown=ocr_result.get("markdown", ""), mathml=ocr_result.get("mathml", ""), )