refact: rm test file

2026-02-04 17:33:42 +08:00
parent cd790231ec
commit 808d29bd45
12 changed files with 0 additions and 1995 deletions
--- a/test_array_fix.py
+++ b/test_array_fix.py
@@ -1,102 +0,0 @@
-"""Test script for array column specifier fix."""
-
-from app.services.converter import Converter
-
-
-def test_array_specifier_fix():
-    """Test that array column specifiers with spaces are fixed."""
-    
-    converter = Converter()
-    
-    # The problematic LaTeX from the error
-    latex_formula = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
-    
-    print("Testing array column specifier fix")
-    print("=" * 80)
-    print(f"\nOriginal LaTeX (first 200 chars):\n{latex_formula[:200]}...")
-    
-    # Test preprocessing
-    print("\n" + "-" * 80)
-    print("Step 1: Preprocessing")
-    preprocessed = converter._preprocess_formula_for_omml(latex_formula)
-    
-    # Check if spaces were removed from array specifiers
-    if "{c c c c}" in preprocessed:
-        print("✗ FAILED: Spaces not removed from array specifiers")
-        print(f"Found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+10]}")
-    elif "{cccc}" in preprocessed:
-        print("✓ SUCCESS: Spaces removed from array specifiers")
-        print(f"Changed '{{{\"c c c c\"}}}' → '{{cccc}}'")
-    else:
-        print("? Could not find array specifier in preprocessed output")
-    
-    # Test OMML conversion
-    print("\n" + "-" * 80)
-    print("Step 2: OMML Conversion")
-    try:
-        omml = converter.convert_to_omml(latex_formula)
-        print(f"✓ SUCCESS: OMML conversion completed")
-        print(f"OMML length: {len(omml)} characters")
-        print(f"OMML preview (first 300 chars):\n{omml[:300]}...")
-        
-        # Check if it contains oMath element
-        if "oMath" in omml:
-            print("\n✓ Valid OMML: Contains oMath element")
-        else:
-            print("\n✗ WARNING: OMML might be incomplete (no oMath element found)")
-            
-    except Exception as e:
-        print(f"✗ FAILED: OMML conversion error")
-        print(f"Error: {e}")
-        return False
-    
-    print("\n" + "=" * 80)
-    print("✓ All tests passed!")
-    return True
-
-
-def test_simple_array():
-    """Test with a simpler array example."""
-    
-    converter = Converter()
-    
-    print("\nTesting simple array")
-    print("=" * 80)
-    
-    # Simple array with spaces in column specifier
-    latex_formula = r"\begin{array}{c c c} a & b & c \\ d & e & f \end{array}"
-    
-    print(f"LaTeX: {latex_formula}")
-    
-    try:
-        omml = converter.convert_to_omml(latex_formula)
-        print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
-        print(f"Preview: {omml[:200]}...")
-        return True
-    except Exception as e:
-        print(f"✗ FAILED: {e}")
-        return False
-
-
-if __name__ == "__main__":
-    print("Array Column Specifier Fix Test Suite\n")
-    
-    try:
-        test1 = test_simple_array()
-        test2 = test_array_specifier_fix()
-        
-        if test1 and test2:
-            print("\n" + "=" * 80)
-            print("✓✓✓ ALL TESTS PASSED ✓✓✓")
-            print("=" * 80)
-        else:
-            print("\n" + "=" * 80)
-            print("✗✗✗ SOME TESTS FAILED ✗✗✗")
-            print("=" * 80)
-            
-    except KeyboardInterrupt:
-        print("\n\nTests interrupted by user")
-    except Exception as e:
-        print(f"\n\nTest suite error: {e}")
-        import traceback
-        traceback.print_exc()
--- a/test_array_fix_complete.py
+++ b/test_array_fix_complete.py
@@ -1,254 +0,0 @@
-"""Comprehensive test for array column specifier fix in all conversion paths."""
-
-from app.services.converter import Converter
-
-
-def test_problematic_array():
-    """Test the exact LaTeX that caused the error."""
-    
-    print("=" * 80)
-    print("Testing Problematic Array (from error log)")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    # The exact LaTeX from the error log
-    latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
-    
-    print(f"\nLaTeX length: {len(latex)} characters")
-    print(f"Contains '{{{\"c c c c\"}}}': {'{c c c c}' in latex}")
-    
-    # Test 1: Preprocessing
-    print("\n" + "-" * 80)
-    print("Test 1: Preprocessing")
-    print("-" * 80)
-    
-    preprocessed = converter._preprocess_formula_for_conversion(latex)
-    
-    if '{c c c c}' in preprocessed:
-        print("✗ FAILED: Spaces NOT removed from array specifiers")
-        print(f"  Still found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+15]}")
-        return False
-    elif '{cccc}' in preprocessed:
-        print("✓ SUCCESS: Spaces removed from array specifiers")
-        print(f"  '{{{\"c c c c\"}}}' → '{{cccc}}'")
-    else:
-        print("? WARNING: Could not verify specifier fix")
-    
-    # Test 2: MathML Conversion
-    print("\n" + "-" * 80)
-    print("Test 2: MathML Conversion (via convert_to_formats)")
-    print("-" * 80)
-    
-    try:
-        result = converter.convert_to_formats(f"$${latex}$$")
-        
-        if result.mathml:
-            print(f"✓ SUCCESS: MathML generated ({len(result.mathml)} chars)")
-            
-            # Check for Word compatibility
-            if 'display="block"' in result.mathml:
-                print("  ✓ Has display='block' (Word-friendly)")
-            
-            if '&#x0002B;' not in result.mathml and '&#x0003D;' not in result.mathml:
-                print("  ✓ No problematic Unicode entities")
-            
-            print(f"\n  MathML preview:\n  {result.mathml[:200]}...")
-        else:
-            print("✗ FAILED: No MathML generated")
-            return False
-            
-    except Exception as e:
-        print(f"✗ FAILED: MathML conversion error: {e}")
-        return False
-    
-    # Test 3: OMML Conversion
-    print("\n" + "-" * 80)
-    print("Test 3: OMML Conversion")
-    print("-" * 80)
-    
-    try:
-        omml = converter.convert_to_omml(latex)
-        
-        if omml:
-            print(f"✓ SUCCESS: OMML generated ({len(omml)} chars)")
-            
-            if 'oMath' in omml:
-                print("  ✓ Valid OMML structure")
-            
-            print(f"\n  OMML preview:\n  {omml[:200]}...")
-        else:
-            print("✗ FAILED: No OMML generated")
-            return False
-            
-    except Exception as e:
-        print(f"✗ FAILED: OMML conversion error: {e}")
-        return False
-    
-    print("\n" + "=" * 80)
-    print("✓✓✓ ALL CONVERSION PATHS WORKING ✓✓✓")
-    print("=" * 80)
-    
-    return True
-
-
-def test_simple_arrays():
-    """Test simple arrays with spaces in column specifiers."""
-    
-    print("\n" + "=" * 80)
-    print("Testing Simple Arrays")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    test_cases = [
-        ("2x2 array", r"\begin{array}{c c} a & b \\ c & d \end{array}"),
-        ("3x3 array", r"\begin{array}{c c c} 1 & 2 & 3 \\ 4 & 5 & 6 \\ 7 & 8 & 9 \end{array}"),
-        ("Array with pipes", r"\left| \begin{array}{c c} a & b \\ c & d \end{array} \right|"),
-        ("Mixed alignment", r"\begin{array}{l r c} left & right & center \end{array}"),
-    ]
-    
-    all_passed = True
-    
-    for name, latex in test_cases:
-        print(f"\n{name}")
-        print("-" * 40)
-        print(f"LaTeX: {latex}")
-        
-        # Check preprocessing
-        preprocessed = converter._preprocess_formula_for_conversion(latex)
-        has_spaces = any(f"{{{'  '.join(chars)}}}" in preprocessed for chars in [['c', 'c'], ['c', 'c', 'c'], ['l', 'r', 'c']])
-        
-        try:
-            result = converter.convert_to_formats(f"${latex}$")
-            
-            if result.mathml and result.mml:
-                status = "✓" if not has_spaces else "✗"
-                print(f"{status} MathML: {len(result.mathml)} chars, MML: {len(result.mml)} chars")
-                
-                if not has_spaces:
-                    print("  ✓ Array specifiers fixed")
-                else:
-                    print("  ✗ Array specifiers still have spaces")
-                    all_passed = False
-            else:
-                print("✗ Conversion failed")
-                all_passed = False
-                
-        except Exception as e:
-            print(f"✗ Error: {e}")
-            all_passed = False
-    
-    return all_passed
-
-
-def test_conversion_consistency():
-    """Test that all conversion paths use the same preprocessing."""
-    
-    print("\n" + "=" * 80)
-    print("Testing Conversion Consistency")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    # Test formula with multiple issues
-    latex = r"""
-    \left\{ \begin{array}{l c}
-        \begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\
-        \begin{cases} x & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{sign}
-    \end{array} \right.
-    """.strip()
-    
-    print(f"\nComplex formula with:")
-    print("  - array with spaces: {l c}")
-    print("  - vmatrix environment")
-    print("  - cases environment")
-    
-    print("\n" + "-" * 80)
-    print("Preprocessing check:")
-    print("-" * 80)
-    
-    preprocessed = converter._preprocess_formula_for_conversion(latex)
-    
-    checks = {
-        "Array spaces removed": '{l c}' not in preprocessed and '{lc}' in preprocessed,
-        "vmatrix converted": 'vmatrix' not in preprocessed,
-        "cases converted": 'cases' not in preprocessed and 'array' in preprocessed,
-    }
-    
-    for check, passed in checks.items():
-        status = "✓" if passed else "✗"
-        print(f"{status} {check}")
-    
-    print("\n" + "-" * 80)
-    print("Conversion paths:")
-    print("-" * 80)
-    
-    all_passed = True
-    
-    # Test MathML
-    try:
-        result = converter.convert_to_formats(f"$${latex}$$")
-        print(f"✓ MathML: {len(result.mathml)} chars")
-        print(f"✓ MML: {len(result.mml)} chars")
-    except Exception as e:
-        print(f"✗ MathML failed: {e}")
-        all_passed = False
-    
-    # Test OMML
-    try:
-        omml = converter.convert_to_omml(latex)
-        print(f"✓ OMML: {len(omml)} chars")
-    except Exception as e:
-        print(f"✗ OMML failed: {e}")
-        all_passed = False
-    
-    return all_passed and all(checks.values())
-
-
-if __name__ == "__main__":
-    print("=" * 80)
-    print("COMPREHENSIVE ARRAY FIX TEST SUITE")
-    print("Testing all conversion paths with preprocessing")
-    print("=" * 80)
-    
-    try:
-        test1 = test_problematic_array()
-        test2 = test_simple_arrays()
-        test3 = test_conversion_consistency()
-        
-        print("\n" + "=" * 80)
-        print("FINAL SUMMARY")
-        print("=" * 80)
-        
-        results = [
-            ("Problematic array fix", test1),
-            ("Simple arrays", test2),
-            ("Conversion consistency", test3),
-        ]
-        
-        for name, passed in results:
-            status = "✓ PASS" if passed else "✗ FAIL"
-            print(f"{status}: {name}")
-        
-        all_passed = all(result[1] for result in results)
-        
-        print("\n" + "-" * 80)
-        
-        if all_passed:
-            print("✓✓✓ ALL TESTS PASSED ✓✓✓")
-            print("\nThe array column specifier fix is working in ALL conversion paths:")
-            print("  • MathML conversion (for Word paste)")
-            print("  • MML conversion (namespaced MathML)")
-            print("  • OMML conversion (Word native)")
-        else:
-            print("✗✗✗ SOME TESTS FAILED ✗✗✗")
-        
-        print("=" * 80)
-        
-    except KeyboardInterrupt:
-        print("\n\nTests interrupted")
-    except Exception as e:
-        print(f"\n\nTest error: {e}")
-        import traceback
-        traceback.print_exc()
--- a/test_converter.py
+++ b/test_converter.py
@@ -1,57 +0,0 @@
-"""Test script for converter functionality."""
-
-from app.services.converter import Converter
-
-
-def test_latex_only_conversion():
-    """Test conversion of LaTeX-only content."""
-    converter = Converter()
-
-    # Test case 1: Display math with $$...$$
-    latex_input = "$$E = mc^2$$"
-    result = converter.convert_to_formats(latex_input)
-
-    print("Test 1: Display math ($$...$$)")
-    print(f"Input: {latex_input}")
-    print(f"LaTeX: {result.latex}")
-    print(f"MathML: {result.mathml[:100]}...")
-    print(f"MML: {result.mml[:100]}...")
-    print(f"OMML: {result.omml[:100] if result.omml else 'Empty'}...")
-    print()
-
-    # Test case 2: Inline math with $...$
-    latex_input2 = "$\\frac{a}{b}$"
-    result2 = converter.convert_to_formats(latex_input2)
-
-    print("Test 2: Inline math ($...$)")
-    print(f"Input: {latex_input2}")
-    print(f"LaTeX: {result2.latex}")
-    print(f"MathML: {result2.mathml[:100]}...")
-    print()
-
-    # Test case 3: Complex formula
-    latex_input3 = "$$\\int_{0}^{\\infty} e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}$$"
-    result3 = converter.convert_to_formats(latex_input3)
-
-    print("Test 3: Complex formula")
-    print(f"Input: {latex_input3}")
-    print(f"LaTeX: {result3.latex}")
-    print(f"MathML: {result3.mathml[:150]}...")
-    print(f"OMML length: {len(result3.omml)}")
-    print()
-
-    # Test case 4: Regular markdown (not LaTeX-only)
-    markdown_input = "# Hello\n\nThis is a test with math: $x = 2$"
-    result4 = converter.convert_to_formats(markdown_input)
-
-    print("Test 4: Regular markdown")
-    print(f"Input: {markdown_input}")
-    print(f"LaTeX: {result4.latex[:100]}...")
-    print(f"MathML: {result4.mathml[:100]}...")
-    print(f"MML: {result4.mml}")
-    print(f"OMML: {result4.omml}")
-    print()
-
-
-if __name__ == "__main__":
-    test_latex_only_conversion()
--- a/test_mathml_comparison.py
+++ b/test_mathml_comparison.py
@@ -1,95 +0,0 @@
-"""对比测试：展示 MathML 简化前后的差异."""
-
-from app.services.converter import Converter
-
-
-def compare_simplification():
-    """对比简化前后的 MathML."""
-    
-    # 模拟简化前的 MathML（Pandoc 典型输出）
-    before_example = '''<math display="inline" xmlns="http://www.w3.org/1998/Math/MathML">
-<semantics>
-<mrow>
-<mi>γ</mi>
-<mo form="infix">=</mo>
-<mn>22</mn>
-<mo form="infix">.</mo>
-<mn>2</mn>
-<mo form="infix" separator="true">,</mo>
-<mi>c</mi>
-<mo form="infix">=</mo>
-<mn>30</mn>
-<mo form="infix">.</mo>
-<mn>4</mn>
-</mrow>
-<annotation encoding="application/x-tex">\\gamma = 22.2, c = 30.4</annotation>
-</semantics>
-</math>'''
-    
-    # 测试实际转换
-    converter = Converter()
-    result = converter.convert_to_formats(r"$\gamma = 22.2, c = 30.4$")
-    
-    print("=" * 80)
-    print("MathML 简化效果对比")
-    print("=" * 80)
-    
-    print("\n【简化前（典型 Pandoc 输出）】")
-    print(f"长度: {len(before_example)} 字符")
-    print(before_example)
-    
-    print("\n" + "-" * 80)
-    
-    print("\n【简化后（当前输出）】")
-    print(f"长度: {len(result.mathml)} 字符")
-    print(result.mathml)
-    
-    print("\n" + "-" * 80)
-    
-    # 计算减少的比例
-    reduction = ((len(before_example) - len(result.mathml)) / len(before_example)) * 100
-    print(f"\n📊 大小减少: {reduction:.1f}%")
-    
-    # 列出移除的冗余元素
-    print("\n✅ 已移除的冗余:")
-    removed = [
-        "<semantics> 包装器",
-        "<annotation> 元素",
-        'form="infix" 属性',
-        'form="prefix" 属性',
-        'form="postfix" 属性',
-        'separator="true" 属性',
-        'stretchy="true" 属性',
-        'fence="true" 属性',
-        'columnalign 属性',
-        'columnspacing 属性',
-        '不必要的空白',
-        'display="inline" → display="block"',
-        'Unicode 实体 → 实际字符'
-    ]
-    
-    for item in removed:
-        print(f"  • {item}")
-    
-    print("\n" + "=" * 80)
-    
-    # 测试更多示例
-    test_cases = [
-        (r"\frac{a}{b}", "分数"),
-        (r"x^{2} + y^{2} = r^{2}", "幂次"),
-        (r"\sqrt{a + b}", "根号"),
-        (r"\left| \frac{a}{b} \right|", "括号和分数"),
-    ]
-    
-    print("\n更多示例:")
-    print("=" * 80)
-    
-    for latex, desc in test_cases:
-        result = converter.convert_to_formats(f"${latex}$")
-        print(f"\n{desc}: ${latex}$")
-        print(f"长度: {len(result.mathml)} 字符")
-        print(result.mathml[:200] + ("..." if len(result.mathml) > 200 else ""))
-
-
-if __name__ == "__main__":
-    compare_simplification()
--- a/test_mathml_simplification.py
+++ b/test_mathml_simplification.py
@@ -1,55 +0,0 @@
-"""Test MathML simplification."""
-
-from app.services.converter import Converter
-
-
-def show_current_output():
-    """Show current MathML output."""
-    converter = Converter()
-    
-    test_cases = [
-        (r"\gamma = 22.2", "简单公式"),
-        (r"\frac{a}{b}", "分数"),
-        (r"x^{2} + y^{2}", "上标"),
-        (r"\sqrt{a + b}", "根号"),
-    ]
-    
-    print("=" * 80)
-    print("当前 MathML 输出分析")
-    print("=" * 80)
-    
-    for latex, desc in test_cases:
-        print(f"\n{desc}: ${latex}$")
-        print("-" * 80)
-        
-        result = converter.convert_to_formats(f"${latex}$")
-        mathml = result.mathml
-        
-        print(f"长度: {len(mathml)} 字符")
-        print(f"\n{mathml}\n")
-        
-        # 分析冗余
-        redundancies = []
-        
-        if '<mrow>' in mathml and mathml.count('<mrow>') > 1:
-            redundancies.append(f"多层 <mrow> 嵌套 ({mathml.count('<mrow>')} 个)")
-        
-        if 'columnalign="center"' in mathml:
-            redundancies.append("columnalign 属性（可能不必要）")
-        
-        if 'form="prefix"' in mathml or 'form="postfix"' in mathml:
-            redundancies.append("form 属性（可简化）")
-        
-        if 'stretchy="true"' in mathml:
-            redundancies.append("stretchy 属性（可简化）")
-        
-        if redundancies:
-            print("可能的冗余:")
-            for r in redundancies:
-                print(f"  • {r}")
-        else:
-            print("✓ 已经很简洁")
-
-
-if __name__ == "__main__":
-    show_current_output()
--- a/test_mathml_word_compatibility.py
+++ b/test_mathml_word_compatibility.py
@@ -1,236 +0,0 @@
-"""Diagnostic tool for MathML Word compatibility issues."""
-
-from app.services.converter import Converter
-
-
-def diagnose_mathml(latex: str) -> dict:
-    """Diagnose MathML generation and Word compatibility.
-    
-    Args:
-        latex: LaTeX formula to convert.
-        
-    Returns:
-        Dictionary with diagnostic information.
-    """
-    converter = Converter()
-    
-    print("=" * 80)
-    print("MathML Word Compatibility Diagnostic")
-    print("=" * 80)
-    
-    print(f"\nInput LaTeX: {latex}")
-    
-    # Convert
-    try:
-        result = converter.convert_to_formats(f"${latex}$")
-        mathml = result.mathml
-        
-        print(f"\n✓ Conversion successful")
-        print(f"MathML length: {len(mathml)} characters")
-        
-    except Exception as e:
-        print(f"\n✗ Conversion failed: {e}")
-        return {"success": False, "error": str(e)}
-    
-    # Diagnostic checks
-    print("\n" + "-" * 80)
-    print("Word Compatibility Checks:")
-    print("-" * 80)
-    
-    issues = []
-    
-    # Check 1: Has proper namespace
-    if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
-        print("✓ Has correct MathML namespace")
-    else:
-        print("✗ Missing or incorrect MathML namespace")
-        issues.append("namespace")
-    
-    # Check 2: Display attribute
-    if 'display="block"' in mathml:
-        print("✓ Has display='block' attribute")
-    elif 'display="inline"' in mathml:
-        print("⚠ Has display='inline' (Word prefers 'block')")
-        issues.append("display_inline")
-    else:
-        print("✗ Missing display attribute")
-        issues.append("no_display")
-    
-    # Check 3: Check for problematic elements
-    if '<semantics>' in mathml:
-        print("⚠ Contains <semantics> element")
-        print("  Note: Word may ignore semantics wrapper")
-        issues.append("semantics")
-    
-    if '<annotation' in mathml:
-        print("⚠ Contains <annotation> element")
-        print("  Note: Word doesn't need annotation, may cause issues")
-        issues.append("annotation")
-    
-    # Check 4: Unicode entities
-    problematic_entities = ['&#x', '&gt;', '&lt;', '&amp;']
-    has_entities = any(entity in mathml for entity in problematic_entities)
-    if has_entities:
-        print("⚠ Contains encoded entities (Word prefers actual characters)")
-        issues.append("entities")
-    else:
-        print("✓ No problematic entities")
-    
-    # Check 5: Root element structure
-    if mathml.startswith('<math'):
-        print("✓ Starts with <math> element")
-    else:
-        print("✗ Doesn't start with <math> element")
-        issues.append("no_math_root")
-    
-    # Check 6: Check for common Word-incompatible attributes
-    if 'class=' in mathml:
-        print("⚠ Contains 'class' attribute (Word ignores these)")
-    
-    if 'style=' in mathml:
-        print("⚠ Contains 'style' attribute (Word ignores these)")
-    
-    # Print MathML structure
-    print("\n" + "-" * 80)
-    print("MathML Structure:")
-    print("-" * 80)
-    
-    # Show first 500 chars
-    print(mathml[:500])
-    if len(mathml) > 500:
-        print("...")
-        print(mathml[-200:])
-    
-    # Recommendations
-    print("\n" + "-" * 80)
-    print("Recommendations:")
-    print("-" * 80)
-    
-    if not issues:
-        print("✓ MathML appears to be Word-compatible!")
-        print("\nHow to paste into Word:")
-        print("  1. Copy the MathML XML")
-        print("  2. In Word: Insert → Equation → Ink Equation")
-        print("  3. Right-click the equation → 'Professional'")
-        print("  4. Right-click again → 'Save as new equation'")
-        print("\nOR use Alt text method:")
-        print("  1. Insert → Equation")
-        print("  2. Type any formula")
-        print("  3. Right-click → Edit Alt Text")
-        print("  4. Paste MathML in Alt Text field")
-    else:
-        print("Issues found:")
-        if "semantics" in issues or "annotation" in issues:
-            print("\n1. Remove <semantics> and <annotation> wrappers")
-            print("   Word only needs the <mrow> content inside")
-        
-        if "display_inline" in issues:
-            print("\n2. Change display='inline' to display='block'")
-        
-        if "entities" in issues:
-            print("\n3. Decode HTML entities to actual characters")
-        
-        if "namespace" in issues:
-            print("\n4. Add xmlns='http://www.w3.org/1998/Math/MathML'")
-    
-    return {
-        "success": True,
-        "mathml": mathml,
-        "issues": issues,
-        "length": len(mathml)
-    }
-
-
-def test_simple_formula():
-    """Test with a simple formula."""
-    print("\nTest 1: Simple formula")
-    diagnose_mathml(r"\frac{a}{b}")
-
-
-def test_complex_formula():
-    """Test with a complex formula."""
-    print("\n\nTest 2: Complex formula with matrix")
-    diagnose_mathml(r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|")
-
-
-def test_problematic_formula():
-    """Test with the user's problematic formula."""
-    print("\n\nTest 3: User's formula (after OCR fix)")
-    diagnose_mathml(r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}")
-
-
-def generate_clean_mathml():
-    """Generate a clean MathML without semantics/annotation."""
-    
-    print("\n" + "=" * 80)
-    print("Generating Clean MathML for Word")
-    print("=" * 80)
-    
-    converter = Converter()
-    latex = r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}"
-    
-    result = converter.convert_to_formats(f"${latex}$")
-    mathml = result.mathml
-    
-    # Remove semantics wrapper if present
-    import re
-    
-    # Extract content from semantics if present
-    if '<semantics>' in mathml:
-        print("\n⚠ Original has <semantics> wrapper")
-        
-        # Try to extract just the mrow content
-        match = re.search(r'<semantics>(.*?)<annotation', mathml, re.DOTALL)
-        if match:
-            content = match.group(1).strip()
-            
-            # Rebuild without semantics
-            clean_mathml = f'<math display="block" xmlns="http://www.w3.org/1998/Math/MathML">{content}</math>'
-            
-            print("\nCleaned MathML (without semantics):")
-            print("-" * 80)
-            print(clean_mathml)
-            
-            print("\n✓ Try pasting this version into Word")
-            return clean_mathml
-    
-    print("\nGenerated MathML:")
-    print("-" * 80)
-    print(mathml)
-    
-    return mathml
-
-
-if __name__ == "__main__":
-    print("MathML Word Compatibility Diagnostic Tool\n")
-    
-    try:
-        test_simple_formula()
-        test_complex_formula()
-        test_problematic_formula()
-        
-        print("\n\n")
-        clean = generate_clean_mathml()
-        
-        print("\n" + "=" * 80)
-        print("SUMMARY")
-        print("=" * 80)
-        print("\nCommon reasons MathML doesn't work in Word:")
-        print("  1. <semantics> wrapper - Word may not parse it correctly")
-        print("  2. <annotation> element - Word doesn't need it")
-        print("  3. HTML entities - Word prefers actual Unicode characters")
-        print("  4. Missing xmlns attribute")
-        print("  5. Wrong paste location in Word")
-        
-        print("\nBest practice for Word:")
-        print("  • Use simple MathML without semantics wrapper")
-        print("  • Include xmlns attribute")
-        print("  • Use display='block'")
-        print("  • Use actual characters, not entities")
-        
-        print("\n" + "=" * 80)
-        
-    except Exception as e:
-        print(f"\nError: {e}")
-        import traceback
-        traceback.print_exc()
--- a/test_mineru_fix.py
+++ b/test_mineru_fix.py
@@ -1,105 +0,0 @@
-"""Quick test to verify MinerU postprocessing is enabled."""
-
-from app.services.ocr_service import _postprocess_markdown
-
-
-def test_mineru_postprocessing():
-    """Test that postprocessing works for MinerU output."""
-    
-    print("=" * 80)
-    print("Testing MinerU Postprocessing")
-    print("=" * 80)
-    
-    # Simulate MinerU OCR output (with number errors)
-    mineru_markdown = r"""$$
-\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}
-$$"""
-    
-    print("\nMinerU OCR Output (raw):")
-    print(mineru_markdown)
-    
-    # Apply postprocessing
-    fixed = _postprocess_markdown(mineru_markdown)
-    
-    print("\nAfter Postprocessing:")
-    print(fixed)
-    
-    print("\n" + "-" * 80)
-    print("Verification:")
-    print("-" * 80)
-    
-    checks = [
-        ("Has '22.2'", "22.2" in fixed),
-        ("Has '30.4'", "30.4" in fixed),
-        ("Has '25.4'", "25.4" in fixed),
-        ("No '2 2'", "2 2" not in fixed),
-        ("No '3 0'", "3 0" not in fixed),
-        ("No '2 5'", "2 5" not in fixed),
-    ]
-    
-    all_passed = True
-    for check_name, passed in checks:
-        status = "✓" if passed else "✗"
-        print(f"{status} {check_name}")
-        if not passed:
-            all_passed = False
-    
-    if all_passed:
-        print("\n✓✓✓ MinerU postprocessing is working! ✓✓✓")
-    else:
-        print("\n✗✗✗ MinerU postprocessing has issues ✗✗✗")
-    
-    return all_passed
-
-
-def test_expected_api_response():
-    """Test what the API response should look like."""
-    
-    print("\n" + "=" * 80)
-    print("Expected API Response Format")
-    print("=" * 80)
-    
-    ocr_output = r"$$\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}$$"
-    fixed = _postprocess_markdown(ocr_output)
-    
-    print("\nBefore postprocessing:")
-    print(f"  markdown: {ocr_output}")
-    
-    print("\nAfter postprocessing (what API should return):")
-    print(f"  markdown: {fixed}")
-    
-    print("\nExpected changes:")
-    print("  • '2 2. 2' → '22.2'")
-    print("  • '3 0. 4' → '30.4'")
-    print("  • '2 5. 4' → '25.4'")
-    
-    print("\n" + "-" * 80)
-    print("Note: The API should return the FIXED markdown")
-    print("      All other formats (latex, mathml, mml) are derived from this")
-    print("-" * 80)
-
-
-if __name__ == "__main__":
-    print("MinerU Postprocessing Verification\n")
-    
-    try:
-        test1 = test_mineru_postprocessing()
-        test_expected_api_response()
-        
-        print("\n" + "=" * 80)
-        
-        if test1:
-            print("✓ MinerU postprocessing is NOW ENABLED")
-            print("\nNext steps:")
-            print("  1. Restart the server")
-            print("  2. Test with the same request")
-            print("  3. The markdown field should now have '22.2' instead of '2 2. 2'")
-        else:
-            print("✗ There may still be issues")
-        
-        print("=" * 80)
-        
-    except Exception as e:
-        print(f"\nError: {e}")
-        import traceback
-        traceback.print_exc()
--- a/test_ocr_number_fix.py
+++ b/test_ocr_number_fix.py
@@ -1,294 +0,0 @@
-"""Test OCR number error fixing."""
-
-from app.services.converter import Converter
-
-
-def test_ocr_number_errors():
-    """Test fixing of common OCR number errors."""
-    
-    print("=" * 80)
-    print("Testing OCR Number Error Fixes")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    # Test cases from the error
-    test_cases = [
-        {
-            "name": "Original error case",
-            "latex": r"\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}",
-            "expected_fixes": ["22.2", "30.4", "25.4"],
-            "should_not_have": ["2 2", "3 0", "2 5"],
-        },
-        {
-            "name": "Simple decimal with space",
-            "latex": r"x = 3. 14",
-            "expected_fixes": ["3.14"],
-            "should_not_have": ["3. 14"],
-        },
-        {
-            "name": "Multiple decimals",
-            "latex": r"a = 1 2. 5, b = 9. 8 7",
-            "expected_fixes": ["12.5", "9.87"],
-            "should_not_have": ["1 2", "9. 8"],
-        },
-        {
-            "name": "Large numbers with spaces",
-            "latex": r"n = 1 5 0, m = 2 0 0 0",
-            "expected_fixes": ["150", "2000"],
-            "should_not_have": ["1 5", "2 0 0"],
-        },
-        {
-            "name": "Don't merge across operators",
-            "latex": r"2 + 3 = 5",
-            "expected_fixes": ["2 + 3 = 5"],  # Should stay the same
-            "should_not_have": ["23=5"],
-        },
-    ]
-    
-    all_passed = True
-    
-    for i, test in enumerate(test_cases, 1):
-        print(f"\nTest {i}: {test['name']}")
-        print("-" * 80)
-        print(f"Input:  {test['latex']}")
-        
-        # Apply fix
-        fixed = converter._fix_ocr_number_errors(test['latex'])
-        print(f"Fixed:  {fixed}")
-        
-        # Check expected fixes
-        checks_passed = []
-        
-        for expected in test['expected_fixes']:
-            if expected in fixed:
-                checks_passed.append(f"✓ Contains '{expected}'")
-            else:
-                checks_passed.append(f"✗ Missing '{expected}'")
-                all_passed = False
-        
-        for should_not in test['should_not_have']:
-            if should_not not in fixed:
-                checks_passed.append(f"✓ Removed '{should_not}'")
-            else:
-                checks_passed.append(f"✗ Still has '{should_not}'")
-                all_passed = False
-        
-        for check in checks_passed:
-            print(f"  {check}")
-    
-    return all_passed
-
-
-def test_mathml_quality():
-    """Test that fixed LaTeX produces better MathML."""
-    
-    print("\n" + "=" * 80)
-    print("Testing MathML Quality After OCR Fix")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    # The problematic LaTeX from the error
-    latex = r"\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}"
-    
-    print(f"\nOriginal LaTeX: {latex}")
-    
-    # Convert to MathML
-    result = converter.convert_to_formats(f"${latex}$")
-    mathml = result.mathml
-    
-    print(f"\nMathML length: {len(mathml)} chars")
-    
-    # Check quality indicators
-    print("\nQuality checks:")
-    print("-" * 80)
-    
-    checks = {
-        "No separate digits for decimals": "<mn>22.2</mn>" in mathml or "22.2" in mathml,
-        "No dot as identifier": "<mi>.</mi>" not in mathml,
-        "Properly formatted numbers": "<mn>30.4</mn>" in mathml or "30.4" in mathml,
-        "Has namespace": 'xmlns=' in mathml,
-        "Display block": 'display="block"' in mathml,
-    }
-    
-    all_passed = True
-    
-    for check, passed in checks.items():
-        status = "✓" if passed else "✗"
-        print(f"{status} {check}")
-        if not passed:
-            all_passed = False
-    
-    # Show a preview
-    print("\n" + "-" * 80)
-    print("MathML preview:")
-    print("-" * 80)
-    print(mathml[:400])
-    if len(mathml) > 400:
-        print("...")
-    
-    return all_passed
-
-
-def test_edge_cases():
-    """Test edge cases for OCR number fixing."""
-    
-    print("\n" + "=" * 80)
-    print("Testing Edge Cases")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    test_cases = [
-        {
-            "name": "Should NOT merge: arithmetic",
-            "input": r"2 + 3 = 5",
-            "should_stay": "2 + 3 = 5",
-        },
-        {
-            "name": "Should NOT merge: multiplication",
-            "input": r"2 \times 3",
-            "should_stay": r"2 \times 3",
-        },
-        {
-            "name": "Should merge: decimal at end",
-            "input": r"x = 1 2. 5",
-            "should_become": "12.5",
-        },
-        {
-            "name": "Should merge: multiple spaces",
-            "input": r"n =  1  2  .  3  4",
-            "should_have": "12.34",
-        },
-        {
-            "name": "Complex: mixed scenarios",
-            "input": r"a = 1 2. 3 + 4 5. 6 - 7",
-            "should_have": ["12.3", "45.6", "- 7"],
-        },
-    ]
-    
-    all_passed = True
-    
-    for test in test_cases:
-        print(f"\n{test['name']}")
-        print(f"  Input:  {test['input']}")
-        
-        fixed = converter._fix_ocr_number_errors(test['input'])
-        print(f"  Output: {fixed}")
-        
-        if 'should_stay' in test:
-            if fixed == test['should_stay']:
-                print(f"  ✓ Correctly unchanged")
-            else:
-                print(f"  ✗ Should stay '{test['should_stay']}' but got '{fixed}'")
-                all_passed = False
-        
-        if 'should_become' in test:
-            if test['should_become'] in fixed:
-                print(f"  ✓ Contains '{test['should_become']}'")
-            else:
-                print(f"  ✗ Should contain '{test['should_become']}'")
-                all_passed = False
-        
-        if 'should_have' in test:
-            for expected in test['should_have']:
-                if expected in fixed:
-                    print(f"  ✓ Contains '{expected}'")
-                else:
-                    print(f"  ✗ Should contain '{expected}'")
-                    all_passed = False
-    
-    return all_passed
-
-
-def compare_before_after():
-    """Compare MathML before and after OCR fix."""
-    
-    print("\n" + "=" * 80)
-    print("Before/After Comparison")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    # Simulate OCR error
-    ocr_latex = r"\gamma = 2 2. 2, c = 3 0. 4"
-    correct_latex = r"\gamma = 22.2, c = 30.4"
-    
-    print(f"\nOCR LaTeX:     {ocr_latex}")
-    print(f"Correct LaTeX: {correct_latex}")
-    
-    # Convert both
-    ocr_result = converter.convert_to_formats(f"${ocr_latex}$")
-    correct_result = converter.convert_to_formats(f"${correct_latex}$")
-    
-    print("\n" + "-" * 80)
-    print("MathML comparison:")
-    print("-" * 80)
-    
-    # Check if they produce similar quality output
-    ocr_has_decimal = "22.2" in ocr_result.mathml
-    correct_has_decimal = "22.2" in correct_result.mathml
-    
-    ocr_has_dot_error = "<mi>.</mi>" in ocr_result.mathml
-    correct_has_dot_error = "<mi>.</mi>" in correct_result.mathml
-    
-    print(f"OCR output has proper decimals: {'✓' if ocr_has_decimal else '✗'}")
-    print(f"Correct output has proper decimals: {'✓' if correct_has_decimal else '✗'}")
-    print(f"OCR output has dot errors: {'✗ Yes' if ocr_has_dot_error else '✓ No'}")
-    print(f"Correct output has dot errors: {'✗ Yes' if correct_has_dot_error else '✓ No'}")
-    
-    if ocr_has_decimal and not ocr_has_dot_error:
-        print("\n✓ OCR fix is working! Output quality matches correct input.")
-        return True
-    else:
-        print("\n✗ OCR fix may need improvement.")
-        return False
-
-
-if __name__ == "__main__":
-    print("OCR Number Error Fix Test Suite\n")
-    
-    try:
-        test1 = test_ocr_number_errors()
-        test2 = test_mathml_quality()
-        test3 = test_edge_cases()
-        test4 = compare_before_after()
-        
-        print("\n" + "=" * 80)
-        print("SUMMARY")
-        print("=" * 80)
-        
-        results = [
-            ("OCR error fixes", test1),
-            ("MathML quality", test2),
-            ("Edge cases", test3),
-            ("Before/after comparison", test4),
-        ]
-        
-        for name, passed in results:
-            status = "✓ PASS" if passed else "✗ FAIL"
-            print(f"{status}: {name}")
-        
-        all_passed = all(r[1] for r in results)
-        
-        print("\n" + "-" * 80)
-        
-        if all_passed:
-            print("✓✓✓ ALL TESTS PASSED ✓✓✓")
-            print("\nOCR number errors are being fixed automatically!")
-            print("Examples:")
-            print("  • '2 2. 2' → '22.2'")
-            print("  • '3 0. 4' → '30.4'")
-            print("  • '1 5 0' → '150'")
-        else:
-            print("✗✗✗ SOME TESTS FAILED ✗✗✗")
-        
-        print("=" * 80)
-        
-    except KeyboardInterrupt:
-        print("\n\nTests interrupted")
-    except Exception as e:
-        print(f"\n\nTest error: {e}")
-        import traceback
-        traceback.print_exc()
--- a/test_ocr_pipeline.py
+++ b/test_ocr_pipeline.py
@@ -1,265 +0,0 @@
-"""Test OCR number error fixing in the complete pipeline."""
-
-from app.services.ocr_service import _postprocess_markdown
-
-
-def test_ocr_postprocessing():
-    """Test that OCR postprocessing fixes number errors."""
-    
-    print("=" * 80)
-    print("Testing OCR Postprocessing Pipeline")
-    print("=" * 80)
-    
-    # Simulate OCR output with common errors
-    test_cases = [
-        {
-            "name": "Inline formula with decimal errors",
-            "input": r"The value is $\gamma = 2 2. 2$ and $c = 3 0. 4$.",
-            "should_have": ["22.2", "30.4"],
-            "should_not_have": ["2 2", "3 0"],
-        },
-        {
-            "name": "Display formula with decimal errors",
-            "input": r"$$\phi = 2 5. 4 ^ {\circ}$$",
-            "should_have": ["25.4"],
-            "should_not_have": ["2 5"],
-        },
-        {
-            "name": "Multiple formulas",
-            "input": r"$a = 1 2. 5$, $b = 9. 8 7$, and $c = 1 5 0$",
-            "should_have": ["12.5", "9.87", "150"],
-            "should_not_have": ["1 2", "9. 8", "1 5"],
-        },
-        {
-            "name": "Mixed content (text + formulas)",
-            "input": r"The equation $x = 3. 14$ is approximately pi. Then $y = 2 7. 3$.",
-            "should_have": ["3.14", "27.3"],
-            "should_not_have": ["3. 14", "2 7"],
-        },
-        {
-            "name": "Normal arithmetic (should not be affected)",
-            "input": r"$2 + 3 = 5$ and $10 - 7 = 3$",
-            "should_stay": True,
-        },
-    ]
-    
-    all_passed = True
-    
-    for i, test in enumerate(test_cases, 1):
-        print(f"\nTest {i}: {test['name']}")
-        print("-" * 80)
-        print(f"Input:  {test['input']}")
-        
-        # Apply postprocessing
-        output = _postprocess_markdown(test['input'])
-        print(f"Output: {output}")
-        
-        # Check results
-        if 'should_have' in test:
-            for expected in test['should_have']:
-                if expected in output:
-                    print(f"  ✓ Contains '{expected}'")
-                else:
-                    print(f"  ✗ Missing '{expected}'")
-                    all_passed = False
-        
-        if 'should_not_have' in test:
-            for unexpected in test['should_not_have']:
-                if unexpected not in output:
-                    print(f"  ✓ Removed '{unexpected}'")
-                else:
-                    print(f"  ✗ Still has '{unexpected}'")
-                    all_passed = False
-        
-        if test.get('should_stay'):
-            if test['input'] == output:
-                print(f"  ✓ Correctly unchanged")
-            else:
-                print(f"  ✗ Should not change but did")
-                all_passed = False
-    
-    return all_passed
-
-
-def test_real_world_case():
-    """Test the exact case from the error report."""
-    
-    print("\n" + "=" * 80)
-    print("Testing Real-World Error Case")
-    print("=" * 80)
-    
-    # The exact input from the error report
-    ocr_output = r"$$\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}$$"
-    
-    print(f"\nOCR Output (with errors):")
-    print(f"  {ocr_output}")
-    
-    # Apply postprocessing
-    fixed = _postprocess_markdown(ocr_output)
-    
-    print(f"\nAfter Postprocessing:")
-    print(f"  {fixed}")
-    
-    # Check if fixed
-    checks = {
-        "Has 22.2": "22.2" in fixed,
-        "Has 30.4": "30.4" in fixed,
-        "Has 25.4": "25.4" in fixed,
-        "No '2 2'": "2 2" not in fixed,
-        "No '3 0'": "3 0" not in fixed,
-        "No '2 5'": "2 5" not in fixed,
-    }
-    
-    print("\nQuality Checks:")
-    print("-" * 80)
-    
-    all_passed = True
-    for check, passed in checks.items():
-        status = "✓" if passed else "✗"
-        print(f"{status} {check}")
-        if not passed:
-            all_passed = False
-    
-    if all_passed:
-        print("\n✓ Real-world case fixed successfully!")
-    else:
-        print("\n✗ Real-world case still has issues")
-    
-    return all_passed
-
-
-def test_edge_cases():
-    """Test edge cases to ensure we don't break valid formulas."""
-    
-    print("\n" + "=" * 80)
-    print("Testing Edge Cases")
-    print("=" * 80)
-    
-    test_cases = [
-        {
-            "name": "Arithmetic operations",
-            "input": r"$2 + 3 = 5$ and $10 - 7 = 3$",
-            "should_stay": True,
-        },
-        {
-            "name": "Multiplication",
-            "input": r"$2 \times 3 = 6$",
-            "should_stay": True,
-        },
-        {
-            "name": "Exponents",
-            "input": r"$x ^ 2 + y ^ 2 = r ^ 2$",
-            "should_stay": True,
-        },
-        {
-            "name": "Fractions",
-            "input": r"$\frac{1}{2} + \frac{3}{4}$",
-            "should_stay": True,
-        },
-        {
-            "name": "Subscripts",
-            "input": r"$x _ 1 + x _ 2$",
-            "should_stay": True,
-        },
-    ]
-    
-    all_passed = True
-    
-    for test in test_cases:
-        print(f"\n{test['name']}")
-        print(f"  Input:  {test['input']}")
-        
-        output = _postprocess_markdown(test['input'])
-        print(f"  Output: {output}")
-        
-        if test.get('should_stay'):
-            # For these cases, we allow some whitespace changes but structure should stay
-            if output.replace(" ", "") == test['input'].replace(" ", ""):
-                print(f"  ✓ Structure preserved")
-            else:
-                print(f"  ✗ Structure changed unexpectedly")
-                all_passed = False
-    
-    return all_passed
-
-
-def test_performance():
-    """Test performance with large content."""
-    
-    print("\n" + "=" * 80)
-    print("Testing Performance")
-    print("=" * 80)
-    
-    # Create a large markdown with many formulas
-    large_content = ""
-    for i in range(100):
-        large_content += f"Formula {i}: $x = {i} {i}. {i}$ and $y = {i*2} {i*2}. {i*2}$\n"
-    
-    print(f"\nContent size: {len(large_content)} characters")
-    print(f"Number of formulas: ~200")
-    
-    import time
-    start = time.time()
-    output = _postprocess_markdown(large_content)
-    elapsed = time.time() - start
-    
-    print(f"Processing time: {elapsed*1000:.2f}ms")
-    
-    if elapsed < 1.0:
-        print("✓ Performance is acceptable (< 1s)")
-        return True
-    else:
-        print("✗ Performance may need optimization")
-        return False
-
-
-if __name__ == "__main__":
-    print("OCR Pipeline Integration Test Suite\n")
-    
-    try:
-        test1 = test_ocr_postprocessing()
-        test2 = test_real_world_case()
-        test3 = test_edge_cases()
-        test4 = test_performance()
-        
-        print("\n" + "=" * 80)
-        print("SUMMARY")
-        print("=" * 80)
-        
-        results = [
-            ("OCR postprocessing", test1),
-            ("Real-world case", test2),
-            ("Edge cases", test3),
-            ("Performance", test4),
-        ]
-        
-        for name, passed in results:
-            status = "✓ PASS" if passed else "✗ FAIL"
-            print(f"{status}: {name}")
-        
-        all_passed = all(r[1] for r in results)
-        
-        print("\n" + "-" * 80)
-        
-        if all_passed:
-            print("✓✓✓ ALL TESTS PASSED ✓✓✓")
-            print("\nOCR number error fixing is integrated into the pipeline!")
-            print("\nFlow:")
-            print("  1. OCR recognizes image → produces Markdown with LaTeX")
-            print("  2. _postprocess_markdown() fixes number errors")
-            print("  3. Clean LaTeX is used for all conversions")
-            print("\nBenefits:")
-            print("  • Fixed once at the source")
-            print("  • All output formats benefit (MathML, MML, OMML)")
-            print("  • Better performance (no repeated fixes)")
-        else:
-            print("✗✗✗ SOME TESTS FAILED ✗✗✗")
-        
-        print("=" * 80)
-        
-    except KeyboardInterrupt:
-        print("\n\nTests interrupted")
-    except Exception as e:
-        print(f"\n\nTest error: {e}")
-        import traceback
-        traceback.print_exc()
--- a/test_omml_api.py
+++ b/test_omml_api.py
@@ -1,112 +0,0 @@
-"""Test script for OMML conversion API endpoint."""
-
-import requests
-import json
-
-
-def test_latex_to_omml():
-    """Test the /convert/latex-to-omml endpoint."""
-    
-    # Test cases
-    test_cases = [
-        {
-            "name": "Simple fraction",
-            "latex": "\\frac{a}{b}",
-        },
-        {
-            "name": "Quadratic formula",
-            "latex": "x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}",
-        },
-        {
-            "name": "Integral",
-            "latex": "\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}",
-        },
-        {
-            "name": "Matrix",
-            "latex": "\\begin{matrix} a & b \\\\ c & d \\end{matrix}",
-        },
-    ]
-    
-    base_url = "http://localhost:8000/api/v1/convert/latex-to-omml"
-    
-    print("Testing OMML Conversion API")
-    print("=" * 80)
-    
-    for i, test_case in enumerate(test_cases, 1):
-        print(f"\nTest {i}: {test_case['name']}")
-        print("-" * 80)
-        print(f"LaTeX: {test_case['latex']}")
-        
-        try:
-            response = requests.post(
-                base_url,
-                json={"latex": test_case["latex"]},
-                headers={"Content-Type": "application/json"},
-                timeout=10,
-            )
-            
-            if response.status_code == 200:
-                result = response.json()
-                omml = result.get("omml", "")
-                
-                print(f"✓ Status: {response.status_code}")
-                print(f"OMML length: {len(omml)} characters")
-                print(f"OMML preview: {omml[:150]}...")
-                
-            else:
-                print(f"✗ Status: {response.status_code}")
-                print(f"Error: {response.text}")
-                
-        except requests.exceptions.RequestException as e:
-            print(f"✗ Request failed: {e}")
-        except Exception as e:
-            print(f"✗ Error: {e}")
-    
-    print("\n" + "=" * 80)
-
-
-def test_invalid_input():
-    """Test error handling with invalid input."""
-    
-    print("\nTesting Error Handling")
-    print("=" * 80)
-    
-    base_url = "http://localhost:8000/api/v1/convert/latex-to-omml"
-    
-    # Empty LaTeX
-    print("\nTest: Empty LaTeX")
-    response = requests.post(
-        base_url,
-        json={"latex": ""},
-        headers={"Content-Type": "application/json"},
-    )
-    print(f"Status: {response.status_code}")
-    print(f"Response: {response.json()}")
-    
-    # Missing LaTeX field
-    print("\nTest: Missing LaTeX field")
-    response = requests.post(
-        base_url,
-        json={},
-        headers={"Content-Type": "application/json"},
-    )
-    print(f"Status: {response.status_code}")
-    print(f"Response: {response.json()}")
-    
-    print("\n" + "=" * 80)
-
-
-if __name__ == "__main__":
-    print("OMML API Test Suite")
-    print("Make sure the API server is running on http://localhost:8000")
-    print()
-    
-    try:
-        test_latex_to_omml()
-        test_invalid_input()
-        print("\n✓ All tests completed!")
-        
-    except KeyboardInterrupt:
-        print("\n\n✗ Tests interrupted by user")
-    except Exception as e:
-        print(f"\n✗ Test suite failed: {e}")
--- a/test_omml_preprocessing.py
+++ b/test_omml_preprocessing.py
@@ -1,218 +0,0 @@
-"""Comprehensive test for OMML conversion with preprocessing."""
-
-from app.services.converter import Converter
-
-
-def test_case_1_array_with_spaces():
-    """Test: Array with spaces in column specifier (the original issue)."""
-    print("\n" + "=" * 80)
-    print("Test 1: Array with spaces in column specifier")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    # The problematic LaTeX from the error
-    latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
-    
-    print(f"LaTeX length: {len(latex)} chars")
-    print(f"Preview: {latex[:100]}...")
-    
-    try:
-        omml = converter.convert_to_omml(latex)
-        print(f"\n✓ SUCCESS: Converted to OMML")
-        print(f"OMML length: {len(omml)} chars")
-        
-        if "oMath" in omml:
-            print("✓ Valid OMML structure detected")
-        
-        # Check preprocessing worked
-        preprocessed = converter._preprocess_formula_for_omml(latex)
-        if "{c c c c}" not in preprocessed and "{cccc}" in preprocessed:
-            print("✓ Array column specifiers fixed: '{c c c c}' → '{cccc}'")
-        
-        return True
-        
-    except Exception as e:
-        print(f"\n✗ FAILED: {e}")
-        return False
-
-
-def test_case_2_vmatrix():
-    """Test: vmatrix environment conversion."""
-    print("\n" + "=" * 80)
-    print("Test 2: vmatrix environment")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    latex = r"\begin{vmatrix} a & b \\ c & d \end{vmatrix}"
-    print(f"LaTeX: {latex}")
-    
-    try:
-        omml = converter.convert_to_omml(latex)
-        print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
-        
-        # Check if vmatrix was converted
-        preprocessed = converter._preprocess_formula_for_omml(latex)
-        if "vmatrix" not in preprocessed and r"\left|" in preprocessed:
-            print("✓ vmatrix converted to \\left| ... \\right|")
-        
-        return True
-        
-    except Exception as e:
-        print(f"✗ FAILED: {e}")
-        return False
-
-
-def test_case_3_cases_environment():
-    """Test: cases environment conversion."""
-    print("\n" + "=" * 80)
-    print("Test 3: cases environment")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    latex = r"f(x) = \begin{cases} x^2 & x \geq 0 \\ -x & x < 0 \end{cases}"
-    print(f"LaTeX: {latex}")
-    
-    try:
-        omml = converter.convert_to_omml(latex)
-        print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
-        
-        # Check if cases was converted to array
-        preprocessed = converter._preprocess_formula_for_omml(latex)
-        if "cases" not in preprocessed and "array" in preprocessed:
-            print("✓ cases converted to array environment")
-        
-        return True
-        
-    except Exception as e:
-        print(f"✗ FAILED: {e}")
-        return False
-
-
-def test_case_4_aligned_environment():
-    """Test: aligned environment conversion."""
-    print("\n" + "=" * 80)
-    print("Test 4: aligned environment")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    latex = r"\begin{aligned} x + y &= 5 \\ 2x - y &= 1 \end{aligned}"
-    print(f"LaTeX: {latex}")
-    
-    try:
-        omml = converter.convert_to_omml(latex)
-        print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
-        
-        # Check if aligned was converted
-        preprocessed = converter._preprocess_formula_for_omml(latex)
-        if "aligned" not in preprocessed and "array" in preprocessed:
-            print("✓ aligned converted to array environment")
-        if "&" not in preprocessed or preprocessed.count("&") < latex.count("&"):
-            print("✓ Alignment markers removed")
-        
-        return True
-        
-    except Exception as e:
-        print(f"✗ FAILED: {e}")
-        return False
-
-
-def test_case_5_simple_formula():
-    """Test: Simple formula (should work without preprocessing)."""
-    print("\n" + "=" * 80)
-    print("Test 5: Simple formula")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    latex = r"x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}"
-    print(f"LaTeX: {latex}")
-    
-    try:
-        omml = converter.convert_to_omml(latex)
-        print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
-        return True
-        
-    except Exception as e:
-        print(f"✗ FAILED: {e}")
-        return False
-
-
-def test_case_6_nested_structures():
-    """Test: Nested structures with multiple issues."""
-    print("\n" + "=" * 80)
-    print("Test 6: Nested structures")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    latex = r"\left\{ \begin{array}{l c} \begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\ f(x) = \begin{cases} 1 & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{step function} \end{array} \right."
-    print(f"LaTeX: {latex}")
-    
-    try:
-        omml = converter.convert_to_omml(latex)
-        print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
-        
-        preprocessed = converter._preprocess_formula_for_omml(latex)
-        print("\nPreprocessing applied:")
-        if "vmatrix" not in preprocessed:
-            print("  ✓ vmatrix converted")
-        if "cases" not in preprocessed:
-            print("  ✓ cases converted")
-        if "{l c}" not in preprocessed and "{lc}" in preprocessed:
-            print("  ✓ Array specifiers fixed")
-        
-        return True
-        
-    except Exception as e:
-        print(f"✗ FAILED: {e}")
-        return False
-
-
-if __name__ == "__main__":
-    print("=" * 80)
-    print("OMML CONVERSION TEST SUITE")
-    print("Testing preprocessing and conversion")
-    print("=" * 80)
-    
-    results = []
-    
-    try:
-        results.append(("Simple formula", test_case_5_simple_formula()))
-        results.append(("Array with spaces", test_case_1_array_with_spaces()))
-        results.append(("vmatrix", test_case_2_vmatrix()))
-        results.append(("cases", test_case_3_cases_environment()))
-        results.append(("aligned", test_case_4_aligned_environment()))
-        results.append(("Nested structures", test_case_6_nested_structures()))
-        
-        # Summary
-        print("\n" + "=" * 80)
-        print("TEST SUMMARY")
-        print("=" * 80)
-        
-        passed = sum(1 for _, result in results if result)
-        total = len(results)
-        
-        for name, result in results:
-            status = "✓ PASS" if result else "✗ FAIL"
-            print(f"{status}: {name}")
-        
-        print("\n" + "-" * 80)
-        print(f"Total: {passed}/{total} tests passed")
-        
-        if passed == total:
-            print("\n✓✓✓ ALL TESTS PASSED ✓✓✓")
-        else:
-            print(f"\n✗✗✗ {total - passed} TESTS FAILED ✗✗✗")
-        
-        print("=" * 80)
-        
-    except KeyboardInterrupt:
-        print("\n\nTests interrupted by user")
-    except Exception as e:
-        print(f"\n\nTest suite error: {e}")
-        import traceback
-        traceback.print_exc()
--- a/test_word_mathml.py
+++ b/test_word_mathml.py
@@ -1,202 +0,0 @@
-"""Test Word-compatible MathML generation."""
-
-from app.services.converter import Converter
-
-
-def test_mathml_word_compatibility():
-    """Test that generated MathML is Word-compatible."""
-    
-    converter = Converter()
-    
-    print("=" * 80)
-    print("Testing Word-Compatible MathML Generation")
-    print("=" * 80)
-    
-    # Test case: Matrix with determinant (the problematic example)
-    latex = r"""\left| \begin{array}{cccc} a_{11} & a_{12} & \dots & a_{1n} \\ \vdots & \vdots & & \vdots \\ a_{i1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a_{n1} & a_{n2} & \dots & a_{nn} \end{array} \right|"""
-    
-    print(f"\nLaTeX: {latex[:80]}...")
-    print("\n" + "-" * 80)
-    
-    # Convert to formats
-    result = converter.convert_to_formats(f"$${latex}$$")
-    
-    if not result.mathml:
-        print("✗ No MathML generated")
-        return False
-    
-    mathml = result.mathml
-    
-    print("Checking Word compatibility features:")
-    print("-" * 80)
-    
-    # Check 1: Display attribute
-    if 'display="block"' in mathml:
-        print("✓ Has display='block' attribute")
-    else:
-        print("✗ Missing or wrong display attribute")
-        print(f"  Found: {mathml[:100]}...")
-    
-    # Check 2: No Unicode entities for common symbols
-    unicode_issues = []
-    problematic_entities = ['&#x0002B;', '&#x02026;', '&#x022EE;', '&#x0003D;', '&#x0007C;']
-    for entity in problematic_entities:
-        if entity in mathml:
-            unicode_issues.append(entity)
-    
-    if unicode_issues:
-        print(f"✗ Contains Unicode entities: {unicode_issues}")
-    else:
-        print("✓ No problematic Unicode entities")
-    
-    # Check 3: Uses mfenced for brackets (Word-friendly)
-    if '<mfenced' in mathml or '<mo fence="true"' in mathml or 'stretchy="true"' in mathml:
-        print("✓ Uses fence elements")
-    else:
-        print("? No fence elements found (might be OK)")
-    
-    # Check 4: Has proper namespace
-    if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
-        print("✓ Has MathML namespace")
-    else:
-        print("✗ Missing MathML namespace")
-    
-    # Show preview
-    print("\n" + "-" * 80)
-    print("MathML Preview (first 500 chars):")
-    print("-" * 80)
-    print(mathml[:500])
-    if len(mathml) > 500:
-        print("...")
-    
-    print("\n" + "-" * 80)
-    print(f"Total length: {len(mathml)} characters")
-    
-    # Check if this looks like Pandoc-generated MathML
-    if 'mfenced' in mathml or 'columnalign' in mathml:
-        print("✓ Appears to be Pandoc-generated (good for Word)")
-    elif 'stretchy' in mathml and 'fence' in mathml:
-        print("✓ Uses standard fence attributes")
-    else:
-        print("? MathML structure unclear")
-    
-    return True
-
-
-def test_simple_formulas():
-    """Test simple formulas for Word compatibility."""
-    
-    converter = Converter()
-    
-    print("\n" + "=" * 80)
-    print("Testing Simple Formulas")
-    print("=" * 80)
-    
-    test_cases = [
-        ("Fraction", r"\frac{a}{b}"),
-        ("Square root", r"\sqrt{x^2 + y^2}"),
-        ("Summation", r"\sum_{i=1}^{n} i"),
-        ("Equation", r"E = mc^2"),
-        ("Matrix", r"\begin{pmatrix} a & b \\ c & d \end{pmatrix}"),
-    ]
-    
-    all_passed = True
-    
-    for name, latex in test_cases:
-        print(f"\n{name}: ${latex}$")
-        
-        try:
-            result = converter.convert_to_formats(f"${latex}$")
-            mathml = result.mathml
-            
-            # Quick checks
-            checks = [
-                ('display="block"' in mathml, "display=block"),
-                ('&#x0002B;' not in mathml, "no +entity"),
-                ('&#x0003D;' not in mathml, "no =entity"),
-                ('xmlns=' in mathml, "namespace"),
-            ]
-            
-            status = "✓" if all(check[0] for check in checks) else "✗"
-            failed_checks = [check[1] for check in checks if not check[0]]
-            
-            print(f"  {status} Length: {len(mathml)} chars", end="")
-            if failed_checks:
-                print(f" | Issues: {', '.join(failed_checks)}")
-                all_passed = False
-            else:
-                print(" | All checks passed")
-                
-        except Exception as e:
-            print(f"  ✗ Error: {e}")
-            all_passed = False
-    
-    return all_passed
-
-
-def compare_with_reference():
-    """Compare our MathML with reference Word-compatible MathML."""
-    
-    print("\n" + "=" * 80)
-    print("Comparison with Reference MathML")
-    print("=" * 80)
-    
-    converter = Converter()
-    
-    # Simple matrix example
-    latex = r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|"
-    
-    result = converter.convert_to_formats(f"$${latex}$$")
-    our_mathml = result.mathml
-    
-    print("\nOur MathML structure:")
-    print("-" * 80)
-    
-    # Analyze structure
-    features = {
-        "mfenced": "<mfenced" in our_mathml,
-        "mo fence": '<mo fence="' in our_mathml or '<mo stretchy="true"' in our_mathml,
-        "mtable": "<mtable" in our_mathml,
-        "display block": 'display="block"' in our_mathml,
-        "unicode entities": any(f"&#x{x};" in our_mathml for x in ["0002B", "0003D", "0007C"]),
-    }
-    
-    print("Features:")
-    for feature, present in features.items():
-        status = "✓" if present != (feature == "unicode entities") else "✗"
-        print(f"  {status} {feature}: {present}")
-    
-    print(f"\nLength: {len(our_mathml)} characters")
-    print(f"Preview:\n{our_mathml[:300]}...")
-    
-    return not features["unicode entities"]
-
-
-if __name__ == "__main__":
-    print("Word-Compatible MathML Test Suite\n")
-    
-    try:
-        test1 = test_mathml_word_compatibility()
-        test2 = test_simple_formulas()
-        test3 = compare_with_reference()
-        
-        print("\n" + "=" * 80)
-        print("SUMMARY")
-        print("=" * 80)
-        
-        if test1 and test2 and test3:
-            print("✓✓✓ ALL TESTS PASSED ✓✓✓")
-            print("\nMathML should be Word-compatible!")
-            print("Try copying the mathml output and pasting into Word.")
-        else:
-            print("✗✗✗ SOME TESTS FAILED ✗✗✗")
-            print("\nMathML may not be fully Word-compatible.")
-        
-        print("=" * 80)
-        
-    except KeyboardInterrupt:
-        print("\n\nTests interrupted")
-    except Exception as e:
-        print(f"\n\nTest error: {e}")
-        import traceback
-        traceback.print_exc()