"""PP-DocLayoutV3 wrapper for document layout detection."""

import numpy as np
from paddleocr import LayoutDetection

from app.core.config import get_settings
from app.schemas.image import LayoutInfo, LayoutRegion
from app.services.layout_postprocess import apply_layout_postprocess

settings = get_settings()


class LayoutDetector:
    """Layout detector for PP-DocLayoutV2."""

    _layout_detector: LayoutDetection | None = None

    # PP-DocLayoutV2 class ID to label mapping
    CLS_ID_TO_LABEL: dict[int, str] = {
        0: "abstract",
        1: "algorithm",
        2: "aside_text",
        3: "chart",
        4: "content",
        5: "display_formula",
        6: "doc_title",
        7: "figure_title",
        8: "footer",
        9: "footer_image",
        10: "footnote",
        11: "formula_number",
        12: "header",
        13: "header_image",
        14: "image",
        15: "inline_formula",
        16: "number",
        17: "paragraph_title",
        18: "reference",
        19: "reference_content",
        20: "seal",
        21: "table",
        22: "text",
        23: "vertical_text",
        24: "vision_footnote",
    }

    # Mapping from raw labels to normalized region types
    LABEL_TO_TYPE: dict[str, str] = {
        # Text types
        "abstract": "text",
        "algorithm": "text",
        "aside_text": "text",
        "content": "text",
        "doc_title": "text",
        "footer": "text",
        "footnote": "text",
        "header": "text",
        "number": "text",
        "paragraph_title": "text",
        "reference": "text",
        "reference_content": "text",
        "text": "text",
        "vertical_text": "text",
        "vision_footnote": "text",
        # Formula types
        "display_formula": "formula",
        "inline_formula": "formula",
        # formula_number is a plain text annotation "(2.9)" next to a formula,
        # not a formula itself — use text prompt so vLLM returns plain text
        "formula_number": "text",
        # Table types
        "table": "table",
        # Figure types
        "chart": "figure",
        "figure_title": "figure",
        "footer_image": "figure",
        "header_image": "figure",
        "image": "figure",
        "seal": "figure",
    }

    def __init__(self):
        """Initialize layout detector.

        Args:
        """
        _ = self._get_layout_detector()

    def _get_layout_detector(self):
        """Get or create LayoutDetection instance."""
        if LayoutDetector._layout_detector is None:
            LayoutDetector._layout_detector = LayoutDetection(model_name="PP-DocLayoutV3")
        return LayoutDetector._layout_detector

    def detect(self, image: np.ndarray) -> LayoutInfo:
        """Detect layout of the image using PP-DocLayoutV3.

        Args:
            image: Input image as numpy array.

        Returns:
            LayoutInfo with detected regions and flags.
        """
        layout_detector = self._get_layout_detector()
        result = layout_detector.predict(image)

        # Parse the result
        regions: list[LayoutRegion] = []
        mixed_recognition = False

        # Handle result format: [{'input_path': ..., 'page_index': None, 'boxes': [...]}]
        if isinstance(result, list) and len(result) > 0:
            first_result = result[0]
            if isinstance(first_result, dict) and "boxes" in first_result:
                boxes = first_result.get("boxes", [])
            else:
                boxes = []
        else:
            boxes = []

        # Apply GLM-OCR layout post-processing (NMS, containment, unclip, clamp)
        if boxes:
            h, w = image.shape[:2]
            boxes = apply_layout_postprocess(
                boxes,
                img_size=(w, h),
                layout_nms=True,
                layout_unclip_ratio=None,
                layout_merge_bboxes_mode="large",
            )

        for box in boxes:
            cls_id = box.get("cls_id")
            label = box.get("label") or self.CLS_ID_TO_LABEL.get(cls_id, "other")
            score = box.get("score", 0.0)
            coordinate = box.get("coordinate", [0, 0, 0, 0])

            # Normalize label to region type
            region_type = self.LABEL_TO_TYPE.get(label, "text")

            regions.append(
                LayoutRegion(
                    type=region_type,
                    native_label=label,
                    bbox=coordinate,
                    confidence=score,
                    score=score,
                )
            )

        mixed_recognition = any(region.type == "text" and region.score > 0.3 for region in regions)

        return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)


if __name__ == "__main__":
    import cv2

    from app.core.config import get_settings
    from app.services.converter import Converter
    from app.services.image_processor import ImageProcessor
    from app.services.ocr_service import GLMOCREndToEndService

    settings = get_settings()

    # Initialize dependencies
    layout_detector = LayoutDetector()
    image_processor = ImageProcessor(padding_ratio=settings.image_padding_ratio)
    converter = Converter()

    # Initialize OCR service
    ocr_service = GLMOCREndToEndService(
        vl_server_url=settings.glm_ocr_url,
        layout_detector=layout_detector,
        image_processor=image_processor,
        converter=converter,
    )

    # Load test image
    image_path = "test/image2.png"
    image = cv2.imread(image_path)

    if image is None:
        print(f"Failed to load image: {image_path}")
    else:
        print(f"Image loaded: {image.shape}")

        # Run OCR recognition
        result = ocr_service.recognize(image)

        print("\n=== OCR Result ===")
        print(f"Markdown:\n{result['markdown']}")
        print(f"\nLaTeX:\n{result['latex']}")
        print(f"\nMathML:\n{result['mathml']}")