fix: add preprocess

This commit is contained in:
liuyuanchuang
2026-02-04 12:45:34 +08:00
parent 69f9a70ae5
commit e31017cfe7
3 changed files with 354 additions and 1 deletions

View File

@@ -217,6 +217,9 @@ class Converter:
This is a separate method due to the performance overhead of OMML conversion,
which requires creating a temporary DOCX file.
The formula is preprocessed using the same logic as export_to_file to ensure
proper conversion.
Args:
latex_formula: Pure LaTeX formula (without delimiters like $ or $$).
@@ -230,7 +233,37 @@ class Converter:
if not latex_formula or not latex_formula.strip():
raise ValueError("LaTeX formula cannot be empty")
return self._latex_to_omml(latex_formula.strip())
# Preprocess formula using the same preprocessing as export
preprocessed = self._preprocess_formula_for_omml(latex_formula.strip())
return self._latex_to_omml(preprocessed)
def _preprocess_formula_for_omml(self, latex_formula: str) -> str:
"""Preprocess LaTeX formula for OMML conversion.
Applies the same preprocessing steps as preprocess_for_export to ensure
consistency. This fixes common issues that cause Pandoc OMML conversion to fail.
Args:
latex_formula: Pure LaTeX formula.
Returns:
Preprocessed LaTeX formula.
"""
# Use the same preprocessing methods as export
# 1. Convert matrix environments
latex_formula = self._convert_matrix_environments(latex_formula)
# 2. Fix array column specifiers (remove spaces)
latex_formula = self._fix_array_column_specifiers(latex_formula)
# 3. Fix brace spacing
latex_formula = self._fix_brace_spacing(latex_formula)
# 4. Convert special environments (cases, aligned)
latex_formula = self._convert_special_environments(latex_formula)
return latex_formula
def _extract_latex_formula(self, text: str) -> str:
"""Extract LaTeX formula from text by removing delimiters.