fix: remove padding from GLMOCREndToEndService and clean up ruff violations

- Drop image padding in GLMOCREndToEndService.recognize(); use raw image directly
- Fix F821 undefined `padded` references replaced with `image`
- Fix F601 duplicate dict key "≠" in converter
- Fix F841 unused `image_cls_ids` variable in layout_postprocess
- Fix E702 semicolon-separated statements in layout_postprocess
- Fix UP031 percent-format replaced with f-string in logging_config
- Auto-fix 44 additional ruff violations (import order, UP035/UP045/UP006, F401, F541)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
liuyuanchuang
2026-03-10 19:52:22 +08:00
parent f8173f7c0a
commit 30d2c2f45b
16 changed files with 162 additions and 140 deletions

View File

@@ -36,4 +36,3 @@ class LatexToOmmlResponse(BaseModel):
"""Response body for LaTeX to OMML conversion endpoint."""
omml: str = Field("", description="OMML (Office Math Markup Language) representation")

View File

@@ -7,7 +7,9 @@ class LayoutRegion(BaseModel):
"""A detected layout region in the document."""
type: str = Field(..., description="Region type: text, formula, table, figure")
native_label: str = Field("", description="Raw label before type mapping (e.g. doc_title, formula_number)")
native_label: str = Field(
"", description="Raw label before type mapping (e.g. doc_title, formula_number)"
)
bbox: list[float] = Field(..., description="Bounding box [x1, y1, x2, y2]")
confidence: float = Field(..., description="Detection confidence score")
score: float = Field(..., description="Detection score")
@@ -41,10 +43,15 @@ class ImageOCRRequest(BaseModel):
class ImageOCRResponse(BaseModel):
"""Response body for image OCR endpoint."""
latex: str = Field("", description="LaTeX representation of the content (empty if mixed content)")
latex: str = Field(
"", description="LaTeX representation of the content (empty if mixed content)"
)
markdown: str = Field("", description="Markdown representation of the content")
mathml: str = Field("", description="Standard MathML representation (empty if mixed content)")
mml: str = Field("", description="XML MathML with mml: namespace prefix (empty if mixed content)")
mml: str = Field(
"", description="XML MathML with mml: namespace prefix (empty if mixed content)"
)
layout_info: LayoutInfo = Field(default_factory=LayoutInfo)
recognition_mode: str = Field("", description="Recognition mode used: mixed_recognition or formula_recognition")
recognition_mode: str = Field(
"", description="Recognition mode used: mixed_recognition or formula_recognition"
)