fix: update mathml
This commit is contained in:
202
test_word_mathml.py
Normal file
202
test_word_mathml.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""Test Word-compatible MathML generation."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_mathml_word_compatibility():
|
||||
"""Test that generated MathML is Word-compatible."""
|
||||
|
||||
converter = Converter()
|
||||
|
||||
print("=" * 80)
|
||||
print("Testing Word-Compatible MathML Generation")
|
||||
print("=" * 80)
|
||||
|
||||
# Test case: Matrix with determinant (the problematic example)
|
||||
latex = r"""\left| \begin{array}{cccc} a_{11} & a_{12} & \dots & a_{1n} \\ \vdots & \vdots & & \vdots \\ a_{i1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a_{n1} & a_{n2} & \dots & a_{nn} \end{array} \right|"""
|
||||
|
||||
print(f"\nLaTeX: {latex[:80]}...")
|
||||
print("\n" + "-" * 80)
|
||||
|
||||
# Convert to formats
|
||||
result = converter.convert_to_formats(f"$${latex}$$")
|
||||
|
||||
if not result.mathml:
|
||||
print("✗ No MathML generated")
|
||||
return False
|
||||
|
||||
mathml = result.mathml
|
||||
|
||||
print("Checking Word compatibility features:")
|
||||
print("-" * 80)
|
||||
|
||||
# Check 1: Display attribute
|
||||
if 'display="block"' in mathml:
|
||||
print("✓ Has display='block' attribute")
|
||||
else:
|
||||
print("✗ Missing or wrong display attribute")
|
||||
print(f" Found: {mathml[:100]}...")
|
||||
|
||||
# Check 2: No Unicode entities for common symbols
|
||||
unicode_issues = []
|
||||
problematic_entities = ['+', '…', '⋮', '=', '|']
|
||||
for entity in problematic_entities:
|
||||
if entity in mathml:
|
||||
unicode_issues.append(entity)
|
||||
|
||||
if unicode_issues:
|
||||
print(f"✗ Contains Unicode entities: {unicode_issues}")
|
||||
else:
|
||||
print("✓ No problematic Unicode entities")
|
||||
|
||||
# Check 3: Uses mfenced for brackets (Word-friendly)
|
||||
if '<mfenced' in mathml or '<mo fence="true"' in mathml or 'stretchy="true"' in mathml:
|
||||
print("✓ Uses fence elements")
|
||||
else:
|
||||
print("? No fence elements found (might be OK)")
|
||||
|
||||
# Check 4: Has proper namespace
|
||||
if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
|
||||
print("✓ Has MathML namespace")
|
||||
else:
|
||||
print("✗ Missing MathML namespace")
|
||||
|
||||
# Show preview
|
||||
print("\n" + "-" * 80)
|
||||
print("MathML Preview (first 500 chars):")
|
||||
print("-" * 80)
|
||||
print(mathml[:500])
|
||||
if len(mathml) > 500:
|
||||
print("...")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print(f"Total length: {len(mathml)} characters")
|
||||
|
||||
# Check if this looks like Pandoc-generated MathML
|
||||
if 'mfenced' in mathml or 'columnalign' in mathml:
|
||||
print("✓ Appears to be Pandoc-generated (good for Word)")
|
||||
elif 'stretchy' in mathml and 'fence' in mathml:
|
||||
print("✓ Uses standard fence attributes")
|
||||
else:
|
||||
print("? MathML structure unclear")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test_simple_formulas():
|
||||
"""Test simple formulas for Word compatibility."""
|
||||
|
||||
converter = Converter()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Simple Formulas")
|
||||
print("=" * 80)
|
||||
|
||||
test_cases = [
|
||||
("Fraction", r"\frac{a}{b}"),
|
||||
("Square root", r"\sqrt{x^2 + y^2}"),
|
||||
("Summation", r"\sum_{i=1}^{n} i"),
|
||||
("Equation", r"E = mc^2"),
|
||||
("Matrix", r"\begin{pmatrix} a & b \\ c & d \end{pmatrix}"),
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for name, latex in test_cases:
|
||||
print(f"\n{name}: ${latex}$")
|
||||
|
||||
try:
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
mathml = result.mathml
|
||||
|
||||
# Quick checks
|
||||
checks = [
|
||||
('display="block"' in mathml, "display=block"),
|
||||
('+' not in mathml, "no +entity"),
|
||||
('=' not in mathml, "no =entity"),
|
||||
('xmlns=' in mathml, "namespace"),
|
||||
]
|
||||
|
||||
status = "✓" if all(check[0] for check in checks) else "✗"
|
||||
failed_checks = [check[1] for check in checks if not check[0]]
|
||||
|
||||
print(f" {status} Length: {len(mathml)} chars", end="")
|
||||
if failed_checks:
|
||||
print(f" | Issues: {', '.join(failed_checks)}")
|
||||
all_passed = False
|
||||
else:
|
||||
print(" | All checks passed")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error: {e}")
|
||||
all_passed = False
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def compare_with_reference():
|
||||
"""Compare our MathML with reference Word-compatible MathML."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Comparison with Reference MathML")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# Simple matrix example
|
||||
latex = r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|"
|
||||
|
||||
result = converter.convert_to_formats(f"$${latex}$$")
|
||||
our_mathml = result.mathml
|
||||
|
||||
print("\nOur MathML structure:")
|
||||
print("-" * 80)
|
||||
|
||||
# Analyze structure
|
||||
features = {
|
||||
"mfenced": "<mfenced" in our_mathml,
|
||||
"mo fence": '<mo fence="' in our_mathml or '<mo stretchy="true"' in our_mathml,
|
||||
"mtable": "<mtable" in our_mathml,
|
||||
"display block": 'display="block"' in our_mathml,
|
||||
"unicode entities": any(f"&#x{x};" in our_mathml for x in ["0002B", "0003D", "0007C"]),
|
||||
}
|
||||
|
||||
print("Features:")
|
||||
for feature, present in features.items():
|
||||
status = "✓" if present != (feature == "unicode entities") else "✗"
|
||||
print(f" {status} {feature}: {present}")
|
||||
|
||||
print(f"\nLength: {len(our_mathml)} characters")
|
||||
print(f"Preview:\n{our_mathml[:300]}...")
|
||||
|
||||
return not features["unicode entities"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Word-Compatible MathML Test Suite\n")
|
||||
|
||||
try:
|
||||
test1 = test_mathml_word_compatibility()
|
||||
test2 = test_simple_formulas()
|
||||
test3 = compare_with_reference()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
if test1 and test2 and test3:
|
||||
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
print("\nMathML should be Word-compatible!")
|
||||
print("Try copying the mathml output and pasting into Word.")
|
||||
else:
|
||||
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
|
||||
print("\nMathML may not be fully Word-compatible.")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
Reference in New Issue
Block a user