237 lines
7.4 KiB
Python
237 lines
7.4 KiB
Python
|
|
"""Diagnostic tool for MathML Word compatibility issues."""
|
||
|
|
|
||
|
|
from app.services.converter import Converter
|
||
|
|
|
||
|
|
|
||
|
|
def diagnose_mathml(latex: str) -> dict:
|
||
|
|
"""Diagnose MathML generation and Word compatibility.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
latex: LaTeX formula to convert.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Dictionary with diagnostic information.
|
||
|
|
"""
|
||
|
|
converter = Converter()
|
||
|
|
|
||
|
|
print("=" * 80)
|
||
|
|
print("MathML Word Compatibility Diagnostic")
|
||
|
|
print("=" * 80)
|
||
|
|
|
||
|
|
print(f"\nInput LaTeX: {latex}")
|
||
|
|
|
||
|
|
# Convert
|
||
|
|
try:
|
||
|
|
result = converter.convert_to_formats(f"${latex}$")
|
||
|
|
mathml = result.mathml
|
||
|
|
|
||
|
|
print(f"\n✓ Conversion successful")
|
||
|
|
print(f"MathML length: {len(mathml)} characters")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"\n✗ Conversion failed: {e}")
|
||
|
|
return {"success": False, "error": str(e)}
|
||
|
|
|
||
|
|
# Diagnostic checks
|
||
|
|
print("\n" + "-" * 80)
|
||
|
|
print("Word Compatibility Checks:")
|
||
|
|
print("-" * 80)
|
||
|
|
|
||
|
|
issues = []
|
||
|
|
|
||
|
|
# Check 1: Has proper namespace
|
||
|
|
if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
|
||
|
|
print("✓ Has correct MathML namespace")
|
||
|
|
else:
|
||
|
|
print("✗ Missing or incorrect MathML namespace")
|
||
|
|
issues.append("namespace")
|
||
|
|
|
||
|
|
# Check 2: Display attribute
|
||
|
|
if 'display="block"' in mathml:
|
||
|
|
print("✓ Has display='block' attribute")
|
||
|
|
elif 'display="inline"' in mathml:
|
||
|
|
print("⚠ Has display='inline' (Word prefers 'block')")
|
||
|
|
issues.append("display_inline")
|
||
|
|
else:
|
||
|
|
print("✗ Missing display attribute")
|
||
|
|
issues.append("no_display")
|
||
|
|
|
||
|
|
# Check 3: Check for problematic elements
|
||
|
|
if '<semantics>' in mathml:
|
||
|
|
print("⚠ Contains <semantics> element")
|
||
|
|
print(" Note: Word may ignore semantics wrapper")
|
||
|
|
issues.append("semantics")
|
||
|
|
|
||
|
|
if '<annotation' in mathml:
|
||
|
|
print("⚠ Contains <annotation> element")
|
||
|
|
print(" Note: Word doesn't need annotation, may cause issues")
|
||
|
|
issues.append("annotation")
|
||
|
|
|
||
|
|
# Check 4: Unicode entities
|
||
|
|
problematic_entities = ['&#x', '>', '<', '&']
|
||
|
|
has_entities = any(entity in mathml for entity in problematic_entities)
|
||
|
|
if has_entities:
|
||
|
|
print("⚠ Contains encoded entities (Word prefers actual characters)")
|
||
|
|
issues.append("entities")
|
||
|
|
else:
|
||
|
|
print("✓ No problematic entities")
|
||
|
|
|
||
|
|
# Check 5: Root element structure
|
||
|
|
if mathml.startswith('<math'):
|
||
|
|
print("✓ Starts with <math> element")
|
||
|
|
else:
|
||
|
|
print("✗ Doesn't start with <math> element")
|
||
|
|
issues.append("no_math_root")
|
||
|
|
|
||
|
|
# Check 6: Check for common Word-incompatible attributes
|
||
|
|
if 'class=' in mathml:
|
||
|
|
print("⚠ Contains 'class' attribute (Word ignores these)")
|
||
|
|
|
||
|
|
if 'style=' in mathml:
|
||
|
|
print("⚠ Contains 'style' attribute (Word ignores these)")
|
||
|
|
|
||
|
|
# Print MathML structure
|
||
|
|
print("\n" + "-" * 80)
|
||
|
|
print("MathML Structure:")
|
||
|
|
print("-" * 80)
|
||
|
|
|
||
|
|
# Show first 500 chars
|
||
|
|
print(mathml[:500])
|
||
|
|
if len(mathml) > 500:
|
||
|
|
print("...")
|
||
|
|
print(mathml[-200:])
|
||
|
|
|
||
|
|
# Recommendations
|
||
|
|
print("\n" + "-" * 80)
|
||
|
|
print("Recommendations:")
|
||
|
|
print("-" * 80)
|
||
|
|
|
||
|
|
if not issues:
|
||
|
|
print("✓ MathML appears to be Word-compatible!")
|
||
|
|
print("\nHow to paste into Word:")
|
||
|
|
print(" 1. Copy the MathML XML")
|
||
|
|
print(" 2. In Word: Insert → Equation → Ink Equation")
|
||
|
|
print(" 3. Right-click the equation → 'Professional'")
|
||
|
|
print(" 4. Right-click again → 'Save as new equation'")
|
||
|
|
print("\nOR use Alt text method:")
|
||
|
|
print(" 1. Insert → Equation")
|
||
|
|
print(" 2. Type any formula")
|
||
|
|
print(" 3. Right-click → Edit Alt Text")
|
||
|
|
print(" 4. Paste MathML in Alt Text field")
|
||
|
|
else:
|
||
|
|
print("Issues found:")
|
||
|
|
if "semantics" in issues or "annotation" in issues:
|
||
|
|
print("\n1. Remove <semantics> and <annotation> wrappers")
|
||
|
|
print(" Word only needs the <mrow> content inside")
|
||
|
|
|
||
|
|
if "display_inline" in issues:
|
||
|
|
print("\n2. Change display='inline' to display='block'")
|
||
|
|
|
||
|
|
if "entities" in issues:
|
||
|
|
print("\n3. Decode HTML entities to actual characters")
|
||
|
|
|
||
|
|
if "namespace" in issues:
|
||
|
|
print("\n4. Add xmlns='http://www.w3.org/1998/Math/MathML'")
|
||
|
|
|
||
|
|
return {
|
||
|
|
"success": True,
|
||
|
|
"mathml": mathml,
|
||
|
|
"issues": issues,
|
||
|
|
"length": len(mathml)
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def test_simple_formula():
|
||
|
|
"""Test with a simple formula."""
|
||
|
|
print("\nTest 1: Simple formula")
|
||
|
|
diagnose_mathml(r"\frac{a}{b}")
|
||
|
|
|
||
|
|
|
||
|
|
def test_complex_formula():
|
||
|
|
"""Test with a complex formula."""
|
||
|
|
print("\n\nTest 2: Complex formula with matrix")
|
||
|
|
diagnose_mathml(r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|")
|
||
|
|
|
||
|
|
|
||
|
|
def test_problematic_formula():
|
||
|
|
"""Test with the user's problematic formula."""
|
||
|
|
print("\n\nTest 3: User's formula (after OCR fix)")
|
||
|
|
diagnose_mathml(r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}")
|
||
|
|
|
||
|
|
|
||
|
|
def generate_clean_mathml():
|
||
|
|
"""Generate a clean MathML without semantics/annotation."""
|
||
|
|
|
||
|
|
print("\n" + "=" * 80)
|
||
|
|
print("Generating Clean MathML for Word")
|
||
|
|
print("=" * 80)
|
||
|
|
|
||
|
|
converter = Converter()
|
||
|
|
latex = r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}"
|
||
|
|
|
||
|
|
result = converter.convert_to_formats(f"${latex}$")
|
||
|
|
mathml = result.mathml
|
||
|
|
|
||
|
|
# Remove semantics wrapper if present
|
||
|
|
import re
|
||
|
|
|
||
|
|
# Extract content from semantics if present
|
||
|
|
if '<semantics>' in mathml:
|
||
|
|
print("\n⚠ Original has <semantics> wrapper")
|
||
|
|
|
||
|
|
# Try to extract just the mrow content
|
||
|
|
match = re.search(r'<semantics>(.*?)<annotation', mathml, re.DOTALL)
|
||
|
|
if match:
|
||
|
|
content = match.group(1).strip()
|
||
|
|
|
||
|
|
# Rebuild without semantics
|
||
|
|
clean_mathml = f'<math display="block" xmlns="http://www.w3.org/1998/Math/MathML">{content}</math>'
|
||
|
|
|
||
|
|
print("\nCleaned MathML (without semantics):")
|
||
|
|
print("-" * 80)
|
||
|
|
print(clean_mathml)
|
||
|
|
|
||
|
|
print("\n✓ Try pasting this version into Word")
|
||
|
|
return clean_mathml
|
||
|
|
|
||
|
|
print("\nGenerated MathML:")
|
||
|
|
print("-" * 80)
|
||
|
|
print(mathml)
|
||
|
|
|
||
|
|
return mathml
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
print("MathML Word Compatibility Diagnostic Tool\n")
|
||
|
|
|
||
|
|
try:
|
||
|
|
test_simple_formula()
|
||
|
|
test_complex_formula()
|
||
|
|
test_problematic_formula()
|
||
|
|
|
||
|
|
print("\n\n")
|
||
|
|
clean = generate_clean_mathml()
|
||
|
|
|
||
|
|
print("\n" + "=" * 80)
|
||
|
|
print("SUMMARY")
|
||
|
|
print("=" * 80)
|
||
|
|
print("\nCommon reasons MathML doesn't work in Word:")
|
||
|
|
print(" 1. <semantics> wrapper - Word may not parse it correctly")
|
||
|
|
print(" 2. <annotation> element - Word doesn't need it")
|
||
|
|
print(" 3. HTML entities - Word prefers actual Unicode characters")
|
||
|
|
print(" 4. Missing xmlns attribute")
|
||
|
|
print(" 5. Wrong paste location in Word")
|
||
|
|
|
||
|
|
print("\nBest practice for Word:")
|
||
|
|
print(" • Use simple MathML without semantics wrapper")
|
||
|
|
print(" • Include xmlns attribute")
|
||
|
|
print(" • Use display='block'")
|
||
|
|
print(" • Use actual characters, not entities")
|
||
|
|
|
||
|
|
print("\n" + "=" * 80)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"\nError: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|