fix: remove padding from GLMOCREndToEndService and clean up ruff violations

- Drop image padding in GLMOCREndToEndService.recognize(); use raw image directly - Fix F821 undefined `padded` references replaced with `image` - Fix F601 duplicate dict key "≠" in converter - Fix F841 unused `image_cls_ids` variable in layout_postprocess - Fix E702 semicolon-separated statements in layout_postprocess - Fix UP031 percent-format replaced with f-string in logging_config - Auto-fix 44 additional ruff violations (import order, UP035/UP045/UP006, F401, F541) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-10 19:52:22 +08:00
parent f8173f7c0a
commit 30d2c2f45b
16 changed files with 162 additions and 140 deletions
--- a/app/schemas/convert.py
+++ b/app/schemas/convert.py
@@ -36,4 +36,3 @@ class LatexToOmmlResponse(BaseModel):
    """Response body for LaTeX to OMML conversion endpoint."""

    omml: str = Field("", description="OMML (Office Math Markup Language) representation")
-
--- a/app/schemas/image.py
+++ b/app/schemas/image.py
@@ -7,7 +7,9 @@ class LayoutRegion(BaseModel):
    """A detected layout region in the document."""

    type: str = Field(..., description="Region type: text, formula, table, figure")
-    native_label: str = Field("", description="Raw label before type mapping (e.g. doc_title, formula_number)")
+    native_label: str = Field(
+        "", description="Raw label before type mapping (e.g. doc_title, formula_number)"
+    )
    bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]")
    confidence: float = Field(..., description="Detection confidence score")
    score: float = Field(..., description="Detection score")
@@ -41,10 +43,15 @@ class ImageOCRRequest(BaseModel):
 class ImageOCRResponse(BaseModel):
    """Response body for image OCR endpoint."""

-    latex: str = Field("", description="LaTeX representation of the content (empty if mixed content)")
+    latex: str = Field(
+        "", description="LaTeX representation of the content (empty if mixed content)"
+    )
    markdown: str = Field("", description="Markdown representation of the content")
    mathml: str = Field("", description="Standard MathML representation (empty if mixed content)")
-    mml: str = Field("", description="XML MathML with mml: namespace prefix (empty if mixed content)")
+    mml: str = Field(
+        "", description="XML MathML with mml: namespace prefix (empty if mixed content)"
+    )
    layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
-    recognition_mode: str = Field("", description="Recognition mode used: mixed_recognition or formula_recognition")
-
+    recognition_mode: str = Field(
+        "", description="Recognition mode used: mixed_recognition or formula_recognition"
+    )