fix: handle mathml preprocess

This commit is contained in:
liuyuanchuang
2026-02-04 15:52:04 +08:00
parent 56a02eb6da
commit 720cd05add
2 changed files with 264 additions and 6 deletions

254
test_array_fix_complete.py Normal file
View File

@@ -0,0 +1,254 @@
"""Comprehensive test for array column specifier fix in all conversion paths."""
from app.services.converter import Converter
def test_problematic_array():
"""Test the exact LaTeX that caused the error."""
print("=" * 80)
print("Testing Problematic Array (from error log)")
print("=" * 80)
converter = Converter()
# The exact LaTeX from the error log
latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
print(f"\nLaTeX length: {len(latex)} characters")
print(f"Contains '{{{\"c c c c\"}}}': {'{c c c c}' in latex}")
# Test 1: Preprocessing
print("\n" + "-" * 80)
print("Test 1: Preprocessing")
print("-" * 80)
preprocessed = converter._preprocess_formula_for_conversion(latex)
if '{c c c c}' in preprocessed:
print("✗ FAILED: Spaces NOT removed from array specifiers")
print(f" Still found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+15]}")
return False
elif '{cccc}' in preprocessed:
print("✓ SUCCESS: Spaces removed from array specifiers")
print(f" '{{{\"c c c c\"}}}''{{cccc}}'")
else:
print("? WARNING: Could not verify specifier fix")
# Test 2: MathML Conversion
print("\n" + "-" * 80)
print("Test 2: MathML Conversion (via convert_to_formats)")
print("-" * 80)
try:
result = converter.convert_to_formats(f"$${latex}$$")
if result.mathml:
print(f"✓ SUCCESS: MathML generated ({len(result.mathml)} chars)")
# Check for Word compatibility
if 'display="block"' in result.mathml:
print(" ✓ Has display='block' (Word-friendly)")
if '+' not in result.mathml and '=' not in result.mathml:
print(" ✓ No problematic Unicode entities")
print(f"\n MathML preview:\n {result.mathml[:200]}...")
else:
print("✗ FAILED: No MathML generated")
return False
except Exception as e:
print(f"✗ FAILED: MathML conversion error: {e}")
return False
# Test 3: OMML Conversion
print("\n" + "-" * 80)
print("Test 3: OMML Conversion")
print("-" * 80)
try:
omml = converter.convert_to_omml(latex)
if omml:
print(f"✓ SUCCESS: OMML generated ({len(omml)} chars)")
if 'oMath' in omml:
print(" ✓ Valid OMML structure")
print(f"\n OMML preview:\n {omml[:200]}...")
else:
print("✗ FAILED: No OMML generated")
return False
except Exception as e:
print(f"✗ FAILED: OMML conversion error: {e}")
return False
print("\n" + "=" * 80)
print("✓✓✓ ALL CONVERSION PATHS WORKING ✓✓✓")
print("=" * 80)
return True
def test_simple_arrays():
"""Test simple arrays with spaces in column specifiers."""
print("\n" + "=" * 80)
print("Testing Simple Arrays")
print("=" * 80)
converter = Converter()
test_cases = [
("2x2 array", r"\begin{array}{c c} a & b \\ c & d \end{array}"),
("3x3 array", r"\begin{array}{c c c} 1 & 2 & 3 \\ 4 & 5 & 6 \\ 7 & 8 & 9 \end{array}"),
("Array with pipes", r"\left| \begin{array}{c c} a & b \\ c & d \end{array} \right|"),
("Mixed alignment", r"\begin{array}{l r c} left & right & center \end{array}"),
]
all_passed = True
for name, latex in test_cases:
print(f"\n{name}")
print("-" * 40)
print(f"LaTeX: {latex}")
# Check preprocessing
preprocessed = converter._preprocess_formula_for_conversion(latex)
has_spaces = any(f"{{{' '.join(chars)}}}" in preprocessed for chars in [['c', 'c'], ['c', 'c', 'c'], ['l', 'r', 'c']])
try:
result = converter.convert_to_formats(f"${latex}$")
if result.mathml and result.mml:
status = "" if not has_spaces else ""
print(f"{status} MathML: {len(result.mathml)} chars, MML: {len(result.mml)} chars")
if not has_spaces:
print(" ✓ Array specifiers fixed")
else:
print(" ✗ Array specifiers still have spaces")
all_passed = False
else:
print("✗ Conversion failed")
all_passed = False
except Exception as e:
print(f"✗ Error: {e}")
all_passed = False
return all_passed
def test_conversion_consistency():
"""Test that all conversion paths use the same preprocessing."""
print("\n" + "=" * 80)
print("Testing Conversion Consistency")
print("=" * 80)
converter = Converter()
# Test formula with multiple issues
latex = r"""
\left\{ \begin{array}{l c}
\begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\
\begin{cases} x & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{sign}
\end{array} \right.
""".strip()
print(f"\nComplex formula with:")
print(" - array with spaces: {l c}")
print(" - vmatrix environment")
print(" - cases environment")
print("\n" + "-" * 80)
print("Preprocessing check:")
print("-" * 80)
preprocessed = converter._preprocess_formula_for_conversion(latex)
checks = {
"Array spaces removed": '{l c}' not in preprocessed and '{lc}' in preprocessed,
"vmatrix converted": 'vmatrix' not in preprocessed,
"cases converted": 'cases' not in preprocessed and 'array' in preprocessed,
}
for check, passed in checks.items():
status = "" if passed else ""
print(f"{status} {check}")
print("\n" + "-" * 80)
print("Conversion paths:")
print("-" * 80)
all_passed = True
# Test MathML
try:
result = converter.convert_to_formats(f"$${latex}$$")
print(f"✓ MathML: {len(result.mathml)} chars")
print(f"✓ MML: {len(result.mml)} chars")
except Exception as e:
print(f"✗ MathML failed: {e}")
all_passed = False
# Test OMML
try:
omml = converter.convert_to_omml(latex)
print(f"✓ OMML: {len(omml)} chars")
except Exception as e:
print(f"✗ OMML failed: {e}")
all_passed = False
return all_passed and all(checks.values())
if __name__ == "__main__":
print("=" * 80)
print("COMPREHENSIVE ARRAY FIX TEST SUITE")
print("Testing all conversion paths with preprocessing")
print("=" * 80)
try:
test1 = test_problematic_array()
test2 = test_simple_arrays()
test3 = test_conversion_consistency()
print("\n" + "=" * 80)
print("FINAL SUMMARY")
print("=" * 80)
results = [
("Problematic array fix", test1),
("Simple arrays", test2),
("Conversion consistency", test3),
]
for name, passed in results:
status = "✓ PASS" if passed else "✗ FAIL"
print(f"{status}: {name}")
all_passed = all(result[1] for result in results)
print("\n" + "-" * 80)
if all_passed:
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
print("\nThe array column specifier fix is working in ALL conversion paths:")
print(" • MathML conversion (for Word paste)")
print(" • MML conversion (namespaced MathML)")
print(" • OMML conversion (Word native)")
else:
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
print("=" * 80)
except KeyboardInterrupt:
print("\n\nTests interrupted")
except Exception as e:
print(f"\n\nTest error: {e}")
import traceback
traceback.print_exc()