fix: remove padding from GLMOCREndToEndService and clean up ruff violations

- Drop image padding in GLMOCREndToEndService.recognize(); use raw image directly - Fix F821 undefined `padded` references replaced with `image` - Fix F601 duplicate dict key "≠" in converter - Fix F841 unused `image_cls_ids` variable in layout_postprocess - Fix E702 semicolon-separated statements in layout_postprocess - Fix UP031 percent-format replaced with f-string in logging_config - Auto-fix 44 additional ruff violations (import order, UP035/UP045/UP006, F401, F541) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-10 19:52:22 +08:00
parent f8173f7c0a
commit 30d2c2f45b
16 changed files with 162 additions and 140 deletions
--- a/app/services/layout_detector.py
+++ b/app/services/layout_detector.py
@@ -1,12 +1,11 @@
 """PP-DocLayoutV3 wrapper for document layout detection."""

 import numpy as np
-
-from app.schemas.image import LayoutInfo, LayoutRegion
-from app.core.config import get_settings
-from app.services.layout_postprocess import apply_layout_postprocess
 from paddleocr import LayoutDetection
-from typing import Optional
+
+from app.core.config import get_settings
+from app.schemas.image import LayoutInfo, LayoutRegion
+from app.services.layout_postprocess import apply_layout_postprocess

 settings = get_settings()

@@ -14,7 +13,7 @@ settings = get_settings()
 class LayoutDetector:
    """Layout detector for PP-DocLayoutV2."""

-    _layout_detector: Optional[LayoutDetection] = None
+    _layout_detector: LayoutDetection | None = None

    # PP-DocLayoutV2 class ID to label mapping
    CLS_ID_TO_LABEL: dict[int, str] = {
@@ -156,10 +155,11 @@ class LayoutDetector:

 if __name__ == "__main__":
    import cv2
+
    from app.core.config import get_settings
-    from app.services.image_processor import ImageProcessor
    from app.services.converter import Converter
-    from app.services.ocr_service import OCRService
+    from app.services.image_processor import ImageProcessor
+    from app.services.ocr_service import GLMOCREndToEndService

    settings = get_settings()

@@ -169,15 +169,15 @@ if __name__ == "__main__":
    converter = Converter()

    # Initialize OCR service
-    ocr_service = OCRService(
-        vl_server_url=settings.paddleocr_vl_url,
+    ocr_service = GLMOCREndToEndService(
+        vl_server_url=settings.glm_ocr_url,
        layout_detector=layout_detector,
        image_processor=image_processor,
        converter=converter,
    )

    # Load test image
-    image_path = "test/timeout.jpg"
+    image_path = "test/image2.png"
    image = cv2.imread(image_path)

    if image is None: