fix: handle mathml preprocess
This commit is contained in:
@@ -200,8 +200,11 @@ class Converter:
|
||||
# Extract the LaTeX formula content (remove delimiters)
|
||||
latex_formula = self._extract_latex_formula(md_text)
|
||||
|
||||
# Preprocess formula for better conversion (fix array specifiers, etc.)
|
||||
preprocessed_formula = self._preprocess_formula_for_conversion(latex_formula)
|
||||
|
||||
# Convert to MathML
|
||||
mathml = self._latex_to_mathml(latex_formula)
|
||||
mathml = self._latex_to_mathml(preprocessed_formula)
|
||||
|
||||
# Convert MathML to mml:math format (with namespace prefix)
|
||||
mml = self._mathml_to_mml(mathml)
|
||||
@@ -234,15 +237,16 @@ class Converter:
|
||||
raise ValueError("LaTeX formula cannot be empty")
|
||||
|
||||
# Preprocess formula using the same preprocessing as export
|
||||
preprocessed = self._preprocess_formula_for_omml(latex_formula.strip())
|
||||
preprocessed = self._preprocess_formula_for_conversion(latex_formula.strip())
|
||||
|
||||
return self._latex_to_omml(preprocessed)
|
||||
|
||||
def _preprocess_formula_for_omml(self, latex_formula: str) -> str:
|
||||
"""Preprocess LaTeX formula for OMML conversion.
|
||||
def _preprocess_formula_for_conversion(self, latex_formula: str) -> str:
|
||||
"""Preprocess LaTeX formula for any conversion (MathML, OMML, etc.).
|
||||
|
||||
Applies the same preprocessing steps as preprocess_for_export to ensure
|
||||
consistency. This fixes common issues that cause Pandoc OMML conversion to fail.
|
||||
consistency across all conversion paths. This fixes common issues that
|
||||
cause Pandoc conversion to fail.
|
||||
|
||||
Args:
|
||||
latex_formula: Pure LaTeX formula.
|
||||
@@ -254,7 +258,7 @@ class Converter:
|
||||
# 1. Convert matrix environments
|
||||
latex_formula = self._convert_matrix_environments(latex_formula)
|
||||
|
||||
# 2. Fix array column specifiers (remove spaces)
|
||||
# 2. Fix array column specifiers (remove spaces) - THIS IS THE KEY FIX
|
||||
latex_formula = self._fix_array_column_specifiers(latex_formula)
|
||||
|
||||
# 3. Fix brace spacing
|
||||
|
||||
254
test_array_fix_complete.py
Normal file
254
test_array_fix_complete.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""Comprehensive test for array column specifier fix in all conversion paths."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_problematic_array():
|
||||
"""Test the exact LaTeX that caused the error."""
|
||||
|
||||
print("=" * 80)
|
||||
print("Testing Problematic Array (from error log)")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# The exact LaTeX from the error log
|
||||
latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
|
||||
|
||||
print(f"\nLaTeX length: {len(latex)} characters")
|
||||
print(f"Contains '{{{\"c c c c\"}}}': {'{c c c c}' in latex}")
|
||||
|
||||
# Test 1: Preprocessing
|
||||
print("\n" + "-" * 80)
|
||||
print("Test 1: Preprocessing")
|
||||
print("-" * 80)
|
||||
|
||||
preprocessed = converter._preprocess_formula_for_conversion(latex)
|
||||
|
||||
if '{c c c c}' in preprocessed:
|
||||
print("✗ FAILED: Spaces NOT removed from array specifiers")
|
||||
print(f" Still found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+15]}")
|
||||
return False
|
||||
elif '{cccc}' in preprocessed:
|
||||
print("✓ SUCCESS: Spaces removed from array specifiers")
|
||||
print(f" '{{{\"c c c c\"}}}' → '{{cccc}}'")
|
||||
else:
|
||||
print("? WARNING: Could not verify specifier fix")
|
||||
|
||||
# Test 2: MathML Conversion
|
||||
print("\n" + "-" * 80)
|
||||
print("Test 2: MathML Conversion (via convert_to_formats)")
|
||||
print("-" * 80)
|
||||
|
||||
try:
|
||||
result = converter.convert_to_formats(f"$${latex}$$")
|
||||
|
||||
if result.mathml:
|
||||
print(f"✓ SUCCESS: MathML generated ({len(result.mathml)} chars)")
|
||||
|
||||
# Check for Word compatibility
|
||||
if 'display="block"' in result.mathml:
|
||||
print(" ✓ Has display='block' (Word-friendly)")
|
||||
|
||||
if '+' not in result.mathml and '=' not in result.mathml:
|
||||
print(" ✓ No problematic Unicode entities")
|
||||
|
||||
print(f"\n MathML preview:\n {result.mathml[:200]}...")
|
||||
else:
|
||||
print("✗ FAILED: No MathML generated")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: MathML conversion error: {e}")
|
||||
return False
|
||||
|
||||
# Test 3: OMML Conversion
|
||||
print("\n" + "-" * 80)
|
||||
print("Test 3: OMML Conversion")
|
||||
print("-" * 80)
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
|
||||
if omml:
|
||||
print(f"✓ SUCCESS: OMML generated ({len(omml)} chars)")
|
||||
|
||||
if 'oMath' in omml:
|
||||
print(" ✓ Valid OMML structure")
|
||||
|
||||
print(f"\n OMML preview:\n {omml[:200]}...")
|
||||
else:
|
||||
print("✗ FAILED: No OMML generated")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: OMML conversion error: {e}")
|
||||
return False
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("✓✓✓ ALL CONVERSION PATHS WORKING ✓✓✓")
|
||||
print("=" * 80)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test_simple_arrays():
|
||||
"""Test simple arrays with spaces in column specifiers."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Simple Arrays")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
test_cases = [
|
||||
("2x2 array", r"\begin{array}{c c} a & b \\ c & d \end{array}"),
|
||||
("3x3 array", r"\begin{array}{c c c} 1 & 2 & 3 \\ 4 & 5 & 6 \\ 7 & 8 & 9 \end{array}"),
|
||||
("Array with pipes", r"\left| \begin{array}{c c} a & b \\ c & d \end{array} \right|"),
|
||||
("Mixed alignment", r"\begin{array}{l r c} left & right & center \end{array}"),
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for name, latex in test_cases:
|
||||
print(f"\n{name}")
|
||||
print("-" * 40)
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
# Check preprocessing
|
||||
preprocessed = converter._preprocess_formula_for_conversion(latex)
|
||||
has_spaces = any(f"{{{' '.join(chars)}}}" in preprocessed for chars in [['c', 'c'], ['c', 'c', 'c'], ['l', 'r', 'c']])
|
||||
|
||||
try:
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
|
||||
if result.mathml and result.mml:
|
||||
status = "✓" if not has_spaces else "✗"
|
||||
print(f"{status} MathML: {len(result.mathml)} chars, MML: {len(result.mml)} chars")
|
||||
|
||||
if not has_spaces:
|
||||
print(" ✓ Array specifiers fixed")
|
||||
else:
|
||||
print(" ✗ Array specifiers still have spaces")
|
||||
all_passed = False
|
||||
else:
|
||||
print("✗ Conversion failed")
|
||||
all_passed = False
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Error: {e}")
|
||||
all_passed = False
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def test_conversion_consistency():
|
||||
"""Test that all conversion paths use the same preprocessing."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Conversion Consistency")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# Test formula with multiple issues
|
||||
latex = r"""
|
||||
\left\{ \begin{array}{l c}
|
||||
\begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\
|
||||
\begin{cases} x & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{sign}
|
||||
\end{array} \right.
|
||||
""".strip()
|
||||
|
||||
print(f"\nComplex formula with:")
|
||||
print(" - array with spaces: {l c}")
|
||||
print(" - vmatrix environment")
|
||||
print(" - cases environment")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print("Preprocessing check:")
|
||||
print("-" * 80)
|
||||
|
||||
preprocessed = converter._preprocess_formula_for_conversion(latex)
|
||||
|
||||
checks = {
|
||||
"Array spaces removed": '{l c}' not in preprocessed and '{lc}' in preprocessed,
|
||||
"vmatrix converted": 'vmatrix' not in preprocessed,
|
||||
"cases converted": 'cases' not in preprocessed and 'array' in preprocessed,
|
||||
}
|
||||
|
||||
for check, passed in checks.items():
|
||||
status = "✓" if passed else "✗"
|
||||
print(f"{status} {check}")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print("Conversion paths:")
|
||||
print("-" * 80)
|
||||
|
||||
all_passed = True
|
||||
|
||||
# Test MathML
|
||||
try:
|
||||
result = converter.convert_to_formats(f"$${latex}$$")
|
||||
print(f"✓ MathML: {len(result.mathml)} chars")
|
||||
print(f"✓ MML: {len(result.mml)} chars")
|
||||
except Exception as e:
|
||||
print(f"✗ MathML failed: {e}")
|
||||
all_passed = False
|
||||
|
||||
# Test OMML
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ OMML: {len(omml)} chars")
|
||||
except Exception as e:
|
||||
print(f"✗ OMML failed: {e}")
|
||||
all_passed = False
|
||||
|
||||
return all_passed and all(checks.values())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 80)
|
||||
print("COMPREHENSIVE ARRAY FIX TEST SUITE")
|
||||
print("Testing all conversion paths with preprocessing")
|
||||
print("=" * 80)
|
||||
|
||||
try:
|
||||
test1 = test_problematic_array()
|
||||
test2 = test_simple_arrays()
|
||||
test3 = test_conversion_consistency()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("FINAL SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
results = [
|
||||
("Problematic array fix", test1),
|
||||
("Simple arrays", test2),
|
||||
("Conversion consistency", test3),
|
||||
]
|
||||
|
||||
for name, passed in results:
|
||||
status = "✓ PASS" if passed else "✗ FAIL"
|
||||
print(f"{status}: {name}")
|
||||
|
||||
all_passed = all(result[1] for result in results)
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
|
||||
if all_passed:
|
||||
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
print("\nThe array column specifier fix is working in ALL conversion paths:")
|
||||
print(" • MathML conversion (for Word paste)")
|
||||
print(" • MML conversion (namespaced MathML)")
|
||||
print(" • OMML conversion (Word native)")
|
||||
else:
|
||||
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
Reference in New Issue
Block a user