fix: remove padding from GLMOCREndToEndService and clean up ruff violations
- Drop image padding in GLMOCREndToEndService.recognize(); use raw image directly - Fix F821 undefined `padded` references replaced with `image` - Fix F601 duplicate dict key "≠" in converter - Fix F841 unused `image_cls_ids` variable in layout_postprocess - Fix E702 semicolon-separated statements in layout_postprocess - Fix UP031 percent-format replaced with f-string in logging_config - Auto-fix 44 additional ruff violations (import order, UP035/UP045/UP006, F401, F541) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -112,14 +112,18 @@ class Converter:
|
||||
# Pre-compiled regex patterns for preprocessing
|
||||
_RE_VSPACE = re.compile(r"\\\[1mm\]")
|
||||
_RE_BLOCK_FORMULA_INLINE = re.compile(r"([^\n])(\s*)\\\[(.*?)\\\]([^\n])", re.DOTALL)
|
||||
_RE_BLOCK_FORMULA_LINE = re.compile(r"^(\s*)\\\[(.*?)\\\](\s*)(?=\n|$)", re.MULTILINE | re.DOTALL)
|
||||
_RE_BLOCK_FORMULA_LINE = re.compile(
|
||||
r"^(\s*)\\\[(.*?)\\\](\s*)(?=\n|$)", re.MULTILINE | re.DOTALL
|
||||
)
|
||||
_RE_ARITHMATEX = re.compile(r'<span class="arithmatex">(.*?)</span>')
|
||||
_RE_INLINE_SPACE = re.compile(r"(?<!\$)\$ +(.+?) +\$(?!\$)")
|
||||
_RE_ARRAY_SPECIFIER = re.compile(r"\\begin\{array\}\{([^}]+)\}")
|
||||
_RE_LEFT_BRACE = re.compile(r"\\left\\\{\s+")
|
||||
_RE_RIGHT_BRACE = re.compile(r"\s+\\right\\\}")
|
||||
_RE_CASES = re.compile(r"\\begin\{cases\}(.*?)\\end\{cases\}", re.DOTALL)
|
||||
_RE_ALIGNED_BRACE = re.compile(r"\\left\\\{\\begin\{aligned\}(.*?)\\end\{aligned\}\\right\.", re.DOTALL)
|
||||
_RE_ALIGNED_BRACE = re.compile(
|
||||
r"\\left\\\{\\begin\{aligned\}(.*?)\\end\{aligned\}\\right\.", re.DOTALL
|
||||
)
|
||||
_RE_ALIGNED = re.compile(r"\\begin\{aligned\}(.*?)\\end\{aligned\}", re.DOTALL)
|
||||
_RE_TAG = re.compile(r"\$\$(.*?)\\tag\s*\{([^}]+)\}\s*\$\$", re.DOTALL)
|
||||
_RE_VMATRIX = re.compile(r"\\begin\{vmatrix\}(.*?)\\end\{vmatrix\}", re.DOTALL)
|
||||
@@ -368,7 +372,9 @@ class Converter:
|
||||
mathml = latex_to_mathml(latex_formula)
|
||||
return Converter._postprocess_mathml_for_word(mathml)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"MathML conversion failed: {pandoc_error}. latex2mathml fallback also failed: {e}") from e
|
||||
raise RuntimeError(
|
||||
f"MathML conversion failed: {pandoc_error}. latex2mathml fallback also failed: {e}"
|
||||
) from e
|
||||
|
||||
@staticmethod
|
||||
def _postprocess_mathml_for_word(mathml: str) -> str:
|
||||
@@ -583,7 +589,6 @@ class Converter:
|
||||
"⇓": "⇓", # Downarrow
|
||||
"↕": "↕", # updownarrow
|
||||
"⇕": "⇕", # Updownarrow
|
||||
"≠": "≠", # ne
|
||||
"≪": "≪", # ll
|
||||
"≫": "≫", # gg
|
||||
"⩽": "⩽", # leqslant
|
||||
@@ -962,7 +967,7 @@ class Converter:
|
||||
"""Export to DOCX format using pypandoc."""
|
||||
extra_args = [
|
||||
"--highlight-style=pygments",
|
||||
f"--reference-doc=app/pkg/reference.docx",
|
||||
"--reference-doc=app/pkg/reference.docx",
|
||||
]
|
||||
pypandoc.convert_file(
|
||||
input_path,
|
||||
|
||||
Reference in New Issue
Block a user