feat add glm-ocr core

2026-03-09 16:51:06 +08:00
parent d74130914c
commit 6dfaf9668b
17 changed files with 1687 additions and 140 deletions
--- a/app/services/layout_detector.py
+++ b/app/services/layout_detector.py
@@ -1,9 +1,10 @@
-"""PP-DocLayoutV2 wrapper for document layout detection."""
+"""PP-DocLayoutV3 wrapper for document layout detection."""

 import numpy as np

 from app.schemas.image import LayoutInfo, LayoutRegion
 from app.core.config import get_settings
+from app.services.layout_postprocess import apply_layout_postprocess
 from paddleocr import LayoutDetection
 from typing import Optional

@@ -116,6 +117,17 @@ class LayoutDetector:
        else:
            boxes = []

+        # Apply GLM-OCR layout post-processing (NMS, containment, unclip, clamp)
+        if boxes:
+            h, w = image.shape[:2]
+            boxes = apply_layout_postprocess(
+                boxes,
+                img_size=(w, h),
+                layout_nms=True,
+                layout_unclip_ratio=None,
+                layout_merge_bboxes_mode="large",
+            )
+
        for box in boxes:
            cls_id = box.get("cls_id")
            label = box.get("label") or self.CLS_ID_TO_LABEL.get(cls_id, "other")
@@ -128,6 +140,7 @@ class LayoutDetector:
            regions.append(
                LayoutRegion(
                    type=region_type,
+                    native_label=label,
                    bbox=coordinate,
                    confidence=score,
                    score=score,