fix: update mathml
This commit is contained in:
@@ -296,29 +296,77 @@ class Converter:
|
||||
def _latex_to_mathml_cached(latex_formula: str) -> str:
|
||||
"""Cached conversion of LaTeX formula to MathML.
|
||||
|
||||
Uses Pandoc for conversion to ensure Word compatibility.
|
||||
Pandoc generates standard MathML that Word can properly import.
|
||||
|
||||
Uses LRU cache to avoid recomputing for repeated formulas.
|
||||
"""
|
||||
try:
|
||||
# Use latex2mathml library for conversion (fast, pure Python)
|
||||
return latex_to_mathml(latex_formula)
|
||||
except Exception as e:
|
||||
# Fallback: try with Pandoc (slower, but more robust)
|
||||
# Use Pandoc for Word-compatible MathML (primary method)
|
||||
mathml_html = pypandoc.convert_text(
|
||||
f"${latex_formula}$",
|
||||
"html",
|
||||
format="markdown+tex_math_dollars",
|
||||
extra_args=["--mathml"],
|
||||
)
|
||||
# Extract just the <math> element from the HTML
|
||||
match = Converter._RE_MATH_ELEMENT.search(mathml_html)
|
||||
if match:
|
||||
mathml = match.group(0)
|
||||
# Post-process for Word compatibility
|
||||
return Converter._postprocess_mathml_for_word(mathml)
|
||||
|
||||
# If no match, return as-is
|
||||
return mathml_html.rstrip("\n")
|
||||
|
||||
except Exception as pandoc_error:
|
||||
# Fallback: try latex2mathml (less Word-compatible)
|
||||
try:
|
||||
mathml_html = pypandoc.convert_text(
|
||||
f"${latex_formula}$",
|
||||
"html",
|
||||
format="markdown+tex_math_dollars",
|
||||
extra_args=["--mathml"],
|
||||
)
|
||||
# Extract just the <math> element from the HTML
|
||||
match = Converter._RE_MATH_ELEMENT.search(mathml_html)
|
||||
if match:
|
||||
return match.group(0)
|
||||
return mathml_html.rstrip("\n")
|
||||
except Exception as pandoc_error:
|
||||
mathml = latex_to_mathml(latex_formula)
|
||||
return Converter._postprocess_mathml_for_word(mathml)
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
f"MathML conversion failed: {e}. Pandoc fallback also failed: {pandoc_error}"
|
||||
f"MathML conversion failed: {pandoc_error}. latex2mathml fallback also failed: {e}"
|
||||
) from e
|
||||
|
||||
@staticmethod
|
||||
def _postprocess_mathml_for_word(mathml: str) -> str:
|
||||
"""Post-process MathML to improve Word compatibility.
|
||||
|
||||
Applies transformations to make MathML more compatible with Word:
|
||||
- Change display="inline" to display="block" for better rendering
|
||||
- Decode Unicode entities to actual characters (Word prefers this)
|
||||
- Clean up unnecessary attributes
|
||||
|
||||
Args:
|
||||
mathml: MathML string.
|
||||
|
||||
Returns:
|
||||
Word-compatible MathML string.
|
||||
"""
|
||||
# Change display to block for better Word rendering
|
||||
mathml = mathml.replace('display="inline"', 'display="block"')
|
||||
|
||||
# If no display attribute, add it
|
||||
if 'display=' not in mathml and '<math' in mathml:
|
||||
mathml = mathml.replace('<math', '<math display="block"', 1)
|
||||
|
||||
# Decode common Unicode entities to actual characters (Word prefers this)
|
||||
unicode_map = {
|
||||
'+': '+',
|
||||
'…': '⋯',
|
||||
'⋮': '⋮',
|
||||
'=': '=',
|
||||
'|': '|',
|
||||
',': ',',
|
||||
'(': '(',
|
||||
')': ')',
|
||||
}
|
||||
|
||||
for entity, char in unicode_map.items():
|
||||
mathml = mathml.replace(entity, char)
|
||||
|
||||
return mathml
|
||||
|
||||
def _latex_to_mathml(self, latex_formula: str) -> str:
|
||||
"""Convert LaTeX formula to standard MathML.
|
||||
|
||||
Reference in New Issue
Block a user