"""Diagnostic tool for MathML Word compatibility issues."""

from app.services.converter import Converter


def diagnose_mathml(latex: str) -> dict:
    """Diagnose MathML generation and Word compatibility.
    
    Args:
        latex: LaTeX formula to convert.
        
    Returns:
        Dictionary with diagnostic information.
    """
    converter = Converter()
    
    print("=" * 80)
    print("MathML Word Compatibility Diagnostic")
    print("=" * 80)
    
    print(f"\nInput LaTeX: {latex}")
    
    # Convert
    try:
        result = converter.convert_to_formats(f"${latex}$")
        mathml = result.mathml
        
        print(f"\n✓ Conversion successful")
        print(f"MathML length: {len(mathml)} characters")
        
    except Exception as e:
        print(f"\n✗ Conversion failed: {e}")
        return {"success": False, "error": str(e)}
    
    # Diagnostic checks
    print("\n" + "-" * 80)
    print("Word Compatibility Checks:")
    print("-" * 80)
    
    issues = []
    
    # Check 1: Has proper namespace
    if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
        print("✓ Has correct MathML namespace")
    else:
        print("✗ Missing or incorrect MathML namespace")
        issues.append("namespace")
    
    # Check 2: Display attribute
    if 'display="block"' in mathml:
        print("✓ Has display='block' attribute")
    elif 'display="inline"' in mathml:
        print("⚠ Has display='inline' (Word prefers 'block')")
        issues.append("display_inline")
    else:
        print("✗ Missing display attribute")
        issues.append("no_display")
    
    # Check 3: Check for problematic elements
    if '<semantics>' in mathml:
        print("⚠ Contains <semantics> element")
        print("  Note: Word may ignore semantics wrapper")
        issues.append("semantics")
    
    if '<annotation' in mathml:
        print("⚠ Contains <annotation> element")
        print("  Note: Word doesn't need annotation, may cause issues")
        issues.append("annotation")
    
    # Check 4: Unicode entities
    problematic_entities = ['&#x', '&gt;', '&lt;', '&amp;']
    has_entities = any(entity in mathml for entity in problematic_entities)
    if has_entities:
        print("⚠ Contains encoded entities (Word prefers actual characters)")
        issues.append("entities")
    else:
        print("✓ No problematic entities")
    
    # Check 5: Root element structure
    if mathml.startswith('<math'):
        print("✓ Starts with <math> element")
    else:
        print("✗ Doesn't start with <math> element")
        issues.append("no_math_root")
    
    # Check 6: Check for common Word-incompatible attributes
    if 'class=' in mathml:
        print("⚠ Contains 'class' attribute (Word ignores these)")
    
    if 'style=' in mathml:
        print("⚠ Contains 'style' attribute (Word ignores these)")
    
    # Print MathML structure
    print("\n" + "-" * 80)
    print("MathML Structure:")
    print("-" * 80)
    
    # Show first 500 chars
    print(mathml[:500])
    if len(mathml) > 500:
        print("...")
        print(mathml[-200:])
    
    # Recommendations
    print("\n" + "-" * 80)
    print("Recommendations:")
    print("-" * 80)
    
    if not issues:
        print("✓ MathML appears to be Word-compatible!")
        print("\nHow to paste into Word:")
        print("  1. Copy the MathML XML")
        print("  2. In Word: Insert → Equation → Ink Equation")
        print("  3. Right-click the equation → 'Professional'")
        print("  4. Right-click again → 'Save as new equation'")
        print("\nOR use Alt text method:")
        print("  1. Insert → Equation")
        print("  2. Type any formula")
        print("  3. Right-click → Edit Alt Text")
        print("  4. Paste MathML in Alt Text field")
    else:
        print("Issues found:")
        if "semantics" in issues or "annotation" in issues:
            print("\n1. Remove <semantics> and <annotation> wrappers")
            print("   Word only needs the <mrow> content inside")
        
        if "display_inline" in issues:
            print("\n2. Change display='inline' to display='block'")
        
        if "entities" in issues:
            print("\n3. Decode HTML entities to actual characters")
        
        if "namespace" in issues:
            print("\n4. Add xmlns='http://www.w3.org/1998/Math/MathML'")
    
    return {
        "success": True,
        "mathml": mathml,
        "issues": issues,
        "length": len(mathml)
    }


def test_simple_formula():
    """Test with a simple formula."""
    print("\nTest 1: Simple formula")
    diagnose_mathml(r"\frac{a}{b}")


def test_complex_formula():
    """Test with a complex formula."""
    print("\n\nTest 2: Complex formula with matrix")
    diagnose_mathml(r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|")


def test_problematic_formula():
    """Test with the user's problematic formula."""
    print("\n\nTest 3: User's formula (after OCR fix)")
    diagnose_mathml(r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}")


def generate_clean_mathml():
    """Generate a clean MathML without semantics/annotation."""
    
    print("\n" + "=" * 80)
    print("Generating Clean MathML for Word")
    print("=" * 80)
    
    converter = Converter()
    latex = r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}"
    
    result = converter.convert_to_formats(f"${latex}$")
    mathml = result.mathml
    
    # Remove semantics wrapper if present
    import re
    
    # Extract content from semantics if present
    if '<semantics>' in mathml:
        print("\n⚠ Original has <semantics> wrapper")
        
        # Try to extract just the mrow content
        match = re.search(r'<semantics>(.*?)<annotation', mathml, re.DOTALL)
        if match:
            content = match.group(1).strip()
            
            # Rebuild without semantics
            clean_mathml = f'<math display="block" xmlns="http://www.w3.org/1998/Math/MathML">{content}</math>'
            
            print("\nCleaned MathML (without semantics):")
            print("-" * 80)
            print(clean_mathml)
            
            print("\n✓ Try pasting this version into Word")
            return clean_mathml
    
    print("\nGenerated MathML:")
    print("-" * 80)
    print(mathml)
    
    return mathml


if __name__ == "__main__":
    print("MathML Word Compatibility Diagnostic Tool\n")
    
    try:
        test_simple_formula()
        test_complex_formula()
        test_problematic_formula()
        
        print("\n\n")
        clean = generate_clean_mathml()
        
        print("\n" + "=" * 80)
        print("SUMMARY")
        print("=" * 80)
        print("\nCommon reasons MathML doesn't work in Word:")
        print("  1. <semantics> wrapper - Word may not parse it correctly")
        print("  2. <annotation> element - Word doesn't need it")
        print("  3. HTML entities - Word prefers actual Unicode characters")
        print("  4. Missing xmlns attribute")
        print("  5. Wrong paste location in Word")
        
        print("\nBest practice for Word:")
        print("  • Use simple MathML without semantics wrapper")
        print("  • Include xmlns attribute")
        print("  • Use display='block'")
        print("  • Use actual characters, not entities")
        
        print("\n" + "=" * 80)
        
    except Exception as e:
        print(f"\nError: {e}")
        import traceback
        traceback.print_exc()