feat add glm-ocr core

This commit is contained in:
liuyuanchuang
2026-03-09 16:51:06 +08:00
parent d74130914c
commit 6dfaf9668b
17 changed files with 1687 additions and 140 deletions

View File

@@ -1,9 +1,10 @@
"""PP-DocLayoutV2 wrapper for document layout detection."""
"""PP-DocLayoutV3 wrapper for document layout detection."""
import numpy as np
from app.schemas.image import LayoutInfo, LayoutRegion
from app.core.config import get_settings
from app.services.layout_postprocess import apply_layout_postprocess
from paddleocr import LayoutDetection
from typing import Optional
@@ -116,6 +117,17 @@ class LayoutDetector:
else:
boxes = []
# Apply GLM-OCR layout post-processing (NMS, containment, unclip, clamp)
if boxes:
h, w = image.shape[:2]
boxes = apply_layout_postprocess(
boxes,
img_size=(w, h),
layout_nms=True,
layout_unclip_ratio=None,
layout_merge_bboxes_mode="large",
)
for box in boxes:
cls_id = box.get("cls_id")
label = box.get("label") or self.CLS_ID_TO_LABEL.get(cls_id, "other")
@@ -128,6 +140,7 @@ class LayoutDetector:
regions.append(
LayoutRegion(
type=region_type,
native_label=label,
bbox=coordinate,
confidence=score,
score=score,