"""Diagnostic tool for MathML Word compatibility issues.""" from app.services.converter import Converter def diagnose_mathml(latex: str) -> dict: """Diagnose MathML generation and Word compatibility. Args: latex: LaTeX formula to convert. Returns: Dictionary with diagnostic information. """ converter = Converter() print("=" * 80) print("MathML Word Compatibility Diagnostic") print("=" * 80) print(f"\nInput LaTeX: {latex}") # Convert try: result = converter.convert_to_formats(f"${latex}$") mathml = result.mathml print(f"\n✓ Conversion successful") print(f"MathML length: {len(mathml)} characters") except Exception as e: print(f"\n✗ Conversion failed: {e}") return {"success": False, "error": str(e)} # Diagnostic checks print("\n" + "-" * 80) print("Word Compatibility Checks:") print("-" * 80) issues = [] # Check 1: Has proper namespace if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml: print("✓ Has correct MathML namespace") else: print("✗ Missing or incorrect MathML namespace") issues.append("namespace") # Check 2: Display attribute if 'display="block"' in mathml: print("✓ Has display='block' attribute") elif 'display="inline"' in mathml: print("⚠ Has display='inline' (Word prefers 'block')") issues.append("display_inline") else: print("✗ Missing display attribute") issues.append("no_display") # Check 3: Check for problematic elements if '' in mathml: print("⚠ Contains element") print(" Note: Word may ignore semantics wrapper") issues.append("semantics") if ' element") print(" Note: Word doesn't need annotation, may cause issues") issues.append("annotation") # Check 4: Unicode entities problematic_entities = ['&#x', '>', '<', '&'] has_entities = any(entity in mathml for entity in problematic_entities) if has_entities: print("⚠ Contains encoded entities (Word prefers actual characters)") issues.append("entities") else: print("✓ No problematic entities") # Check 5: Root element structure if mathml.startswith(' element") else: print("✗ Doesn't start with element") issues.append("no_math_root") # Check 6: Check for common Word-incompatible attributes if 'class=' in mathml: print("⚠ Contains 'class' attribute (Word ignores these)") if 'style=' in mathml: print("⚠ Contains 'style' attribute (Word ignores these)") # Print MathML structure print("\n" + "-" * 80) print("MathML Structure:") print("-" * 80) # Show first 500 chars print(mathml[:500]) if len(mathml) > 500: print("...") print(mathml[-200:]) # Recommendations print("\n" + "-" * 80) print("Recommendations:") print("-" * 80) if not issues: print("✓ MathML appears to be Word-compatible!") print("\nHow to paste into Word:") print(" 1. Copy the MathML XML") print(" 2. In Word: Insert → Equation → Ink Equation") print(" 3. Right-click the equation → 'Professional'") print(" 4. Right-click again → 'Save as new equation'") print("\nOR use Alt text method:") print(" 1. Insert → Equation") print(" 2. Type any formula") print(" 3. Right-click → Edit Alt Text") print(" 4. Paste MathML in Alt Text field") else: print("Issues found:") if "semantics" in issues or "annotation" in issues: print("\n1. Remove and wrappers") print(" Word only needs the content inside") if "display_inline" in issues: print("\n2. Change display='inline' to display='block'") if "entities" in issues: print("\n3. Decode HTML entities to actual characters") if "namespace" in issues: print("\n4. Add xmlns='http://www.w3.org/1998/Math/MathML'") return { "success": True, "mathml": mathml, "issues": issues, "length": len(mathml) } def test_simple_formula(): """Test with a simple formula.""" print("\nTest 1: Simple formula") diagnose_mathml(r"\frac{a}{b}") def test_complex_formula(): """Test with a complex formula.""" print("\n\nTest 2: Complex formula with matrix") diagnose_mathml(r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|") def test_problematic_formula(): """Test with the user's problematic formula.""" print("\n\nTest 3: User's formula (after OCR fix)") diagnose_mathml(r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}") def generate_clean_mathml(): """Generate a clean MathML without semantics/annotation.""" print("\n" + "=" * 80) print("Generating Clean MathML for Word") print("=" * 80) converter = Converter() latex = r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}" result = converter.convert_to_formats(f"${latex}$") mathml = result.mathml # Remove semantics wrapper if present import re # Extract content from semantics if present if '' in mathml: print("\n⚠ Original has wrapper") # Try to extract just the mrow content match = re.search(r'(.*?){content}' print("\nCleaned MathML (without semantics):") print("-" * 80) print(clean_mathml) print("\n✓ Try pasting this version into Word") return clean_mathml print("\nGenerated MathML:") print("-" * 80) print(mathml) return mathml if __name__ == "__main__": print("MathML Word Compatibility Diagnostic Tool\n") try: test_simple_formula() test_complex_formula() test_problematic_formula() print("\n\n") clean = generate_clean_mathml() print("\n" + "=" * 80) print("SUMMARY") print("=" * 80) print("\nCommon reasons MathML doesn't work in Word:") print(" 1. wrapper - Word may not parse it correctly") print(" 2. element - Word doesn't need it") print(" 3. HTML entities - Word prefers actual Unicode characters") print(" 4. Missing xmlns attribute") print(" 5. Wrong paste location in Word") print("\nBest practice for Word:") print(" • Use simple MathML without semantics wrapper") print(" • Include xmlns attribute") print(" • Use display='block'") print(" • Use actual characters, not entities") print("\n" + "=" * 80) except Exception as e: print(f"\nError: {e}") import traceback traceback.print_exc()