refact: rm test file

This commit is contained in:
liuyuanchuang
2026-02-04 17:33:42 +08:00
parent cd790231ec
commit 808d29bd45
12 changed files with 0 additions and 1995 deletions

View File

@@ -1,102 +0,0 @@
"""Test script for array column specifier fix."""
from app.services.converter import Converter
def test_array_specifier_fix():
"""Test that array column specifiers with spaces are fixed."""
converter = Converter()
# The problematic LaTeX from the error
latex_formula = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
print("Testing array column specifier fix")
print("=" * 80)
print(f"\nOriginal LaTeX (first 200 chars):\n{latex_formula[:200]}...")
# Test preprocessing
print("\n" + "-" * 80)
print("Step 1: Preprocessing")
preprocessed = converter._preprocess_formula_for_omml(latex_formula)
# Check if spaces were removed from array specifiers
if "{c c c c}" in preprocessed:
print("✗ FAILED: Spaces not removed from array specifiers")
print(f"Found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+10]}")
elif "{cccc}" in preprocessed:
print("✓ SUCCESS: Spaces removed from array specifiers")
print(f"Changed '{{{\"c c c c\"}}}''{{cccc}}'")
else:
print("? Could not find array specifier in preprocessed output")
# Test OMML conversion
print("\n" + "-" * 80)
print("Step 2: OMML Conversion")
try:
omml = converter.convert_to_omml(latex_formula)
print(f"✓ SUCCESS: OMML conversion completed")
print(f"OMML length: {len(omml)} characters")
print(f"OMML preview (first 300 chars):\n{omml[:300]}...")
# Check if it contains oMath element
if "oMath" in omml:
print("\n✓ Valid OMML: Contains oMath element")
else:
print("\n✗ WARNING: OMML might be incomplete (no oMath element found)")
except Exception as e:
print(f"✗ FAILED: OMML conversion error")
print(f"Error: {e}")
return False
print("\n" + "=" * 80)
print("✓ All tests passed!")
return True
def test_simple_array():
"""Test with a simpler array example."""
converter = Converter()
print("\nTesting simple array")
print("=" * 80)
# Simple array with spaces in column specifier
latex_formula = r"\begin{array}{c c c} a & b & c \\ d & e & f \end{array}"
print(f"LaTeX: {latex_formula}")
try:
omml = converter.convert_to_omml(latex_formula)
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
print(f"Preview: {omml[:200]}...")
return True
except Exception as e:
print(f"✗ FAILED: {e}")
return False
if __name__ == "__main__":
print("Array Column Specifier Fix Test Suite\n")
try:
test1 = test_simple_array()
test2 = test_array_specifier_fix()
if test1 and test2:
print("\n" + "=" * 80)
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
print("=" * 80)
else:
print("\n" + "=" * 80)
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
print("=" * 80)
except KeyboardInterrupt:
print("\n\nTests interrupted by user")
except Exception as e:
print(f"\n\nTest suite error: {e}")
import traceback
traceback.print_exc()

View File

@@ -1,254 +0,0 @@
"""Comprehensive test for array column specifier fix in all conversion paths."""
from app.services.converter import Converter
def test_problematic_array():
"""Test the exact LaTeX that caused the error."""
print("=" * 80)
print("Testing Problematic Array (from error log)")
print("=" * 80)
converter = Converter()
# The exact LaTeX from the error log
latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
print(f"\nLaTeX length: {len(latex)} characters")
print(f"Contains '{{{\"c c c c\"}}}': {'{c c c c}' in latex}")
# Test 1: Preprocessing
print("\n" + "-" * 80)
print("Test 1: Preprocessing")
print("-" * 80)
preprocessed = converter._preprocess_formula_for_conversion(latex)
if '{c c c c}' in preprocessed:
print("✗ FAILED: Spaces NOT removed from array specifiers")
print(f" Still found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+15]}")
return False
elif '{cccc}' in preprocessed:
print("✓ SUCCESS: Spaces removed from array specifiers")
print(f" '{{{\"c c c c\"}}}''{{cccc}}'")
else:
print("? WARNING: Could not verify specifier fix")
# Test 2: MathML Conversion
print("\n" + "-" * 80)
print("Test 2: MathML Conversion (via convert_to_formats)")
print("-" * 80)
try:
result = converter.convert_to_formats(f"$${latex}$$")
if result.mathml:
print(f"✓ SUCCESS: MathML generated ({len(result.mathml)} chars)")
# Check for Word compatibility
if 'display="block"' in result.mathml:
print(" ✓ Has display='block' (Word-friendly)")
if '+' not in result.mathml and '=' not in result.mathml:
print(" ✓ No problematic Unicode entities")
print(f"\n MathML preview:\n {result.mathml[:200]}...")
else:
print("✗ FAILED: No MathML generated")
return False
except Exception as e:
print(f"✗ FAILED: MathML conversion error: {e}")
return False
# Test 3: OMML Conversion
print("\n" + "-" * 80)
print("Test 3: OMML Conversion")
print("-" * 80)
try:
omml = converter.convert_to_omml(latex)
if omml:
print(f"✓ SUCCESS: OMML generated ({len(omml)} chars)")
if 'oMath' in omml:
print(" ✓ Valid OMML structure")
print(f"\n OMML preview:\n {omml[:200]}...")
else:
print("✗ FAILED: No OMML generated")
return False
except Exception as e:
print(f"✗ FAILED: OMML conversion error: {e}")
return False
print("\n" + "=" * 80)
print("✓✓✓ ALL CONVERSION PATHS WORKING ✓✓✓")
print("=" * 80)
return True
def test_simple_arrays():
"""Test simple arrays with spaces in column specifiers."""
print("\n" + "=" * 80)
print("Testing Simple Arrays")
print("=" * 80)
converter = Converter()
test_cases = [
("2x2 array", r"\begin{array}{c c} a & b \\ c & d \end{array}"),
("3x3 array", r"\begin{array}{c c c} 1 & 2 & 3 \\ 4 & 5 & 6 \\ 7 & 8 & 9 \end{array}"),
("Array with pipes", r"\left| \begin{array}{c c} a & b \\ c & d \end{array} \right|"),
("Mixed alignment", r"\begin{array}{l r c} left & right & center \end{array}"),
]
all_passed = True
for name, latex in test_cases:
print(f"\n{name}")
print("-" * 40)
print(f"LaTeX: {latex}")
# Check preprocessing
preprocessed = converter._preprocess_formula_for_conversion(latex)
has_spaces = any(f"{{{' '.join(chars)}}}" in preprocessed for chars in [['c', 'c'], ['c', 'c', 'c'], ['l', 'r', 'c']])
try:
result = converter.convert_to_formats(f"${latex}$")
if result.mathml and result.mml:
status = "" if not has_spaces else ""
print(f"{status} MathML: {len(result.mathml)} chars, MML: {len(result.mml)} chars")
if not has_spaces:
print(" ✓ Array specifiers fixed")
else:
print(" ✗ Array specifiers still have spaces")
all_passed = False
else:
print("✗ Conversion failed")
all_passed = False
except Exception as e:
print(f"✗ Error: {e}")
all_passed = False
return all_passed
def test_conversion_consistency():
"""Test that all conversion paths use the same preprocessing."""
print("\n" + "=" * 80)
print("Testing Conversion Consistency")
print("=" * 80)
converter = Converter()
# Test formula with multiple issues
latex = r"""
\left\{ \begin{array}{l c}
\begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\
\begin{cases} x & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{sign}
\end{array} \right.
""".strip()
print(f"\nComplex formula with:")
print(" - array with spaces: {l c}")
print(" - vmatrix environment")
print(" - cases environment")
print("\n" + "-" * 80)
print("Preprocessing check:")
print("-" * 80)
preprocessed = converter._preprocess_formula_for_conversion(latex)
checks = {
"Array spaces removed": '{l c}' not in preprocessed and '{lc}' in preprocessed,
"vmatrix converted": 'vmatrix' not in preprocessed,
"cases converted": 'cases' not in preprocessed and 'array' in preprocessed,
}
for check, passed in checks.items():
status = "" if passed else ""
print(f"{status} {check}")
print("\n" + "-" * 80)
print("Conversion paths:")
print("-" * 80)
all_passed = True
# Test MathML
try:
result = converter.convert_to_formats(f"$${latex}$$")
print(f"✓ MathML: {len(result.mathml)} chars")
print(f"✓ MML: {len(result.mml)} chars")
except Exception as e:
print(f"✗ MathML failed: {e}")
all_passed = False
# Test OMML
try:
omml = converter.convert_to_omml(latex)
print(f"✓ OMML: {len(omml)} chars")
except Exception as e:
print(f"✗ OMML failed: {e}")
all_passed = False
return all_passed and all(checks.values())
if __name__ == "__main__":
print("=" * 80)
print("COMPREHENSIVE ARRAY FIX TEST SUITE")
print("Testing all conversion paths with preprocessing")
print("=" * 80)
try:
test1 = test_problematic_array()
test2 = test_simple_arrays()
test3 = test_conversion_consistency()
print("\n" + "=" * 80)
print("FINAL SUMMARY")
print("=" * 80)
results = [
("Problematic array fix", test1),
("Simple arrays", test2),
("Conversion consistency", test3),
]
for name, passed in results:
status = "✓ PASS" if passed else "✗ FAIL"
print(f"{status}: {name}")
all_passed = all(result[1] for result in results)
print("\n" + "-" * 80)
if all_passed:
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
print("\nThe array column specifier fix is working in ALL conversion paths:")
print(" • MathML conversion (for Word paste)")
print(" • MML conversion (namespaced MathML)")
print(" • OMML conversion (Word native)")
else:
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
print("=" * 80)
except KeyboardInterrupt:
print("\n\nTests interrupted")
except Exception as e:
print(f"\n\nTest error: {e}")
import traceback
traceback.print_exc()

View File

@@ -1,57 +0,0 @@
"""Test script for converter functionality."""
from app.services.converter import Converter
def test_latex_only_conversion():
"""Test conversion of LaTeX-only content."""
converter = Converter()
# Test case 1: Display math with $$...$$
latex_input = "$$E = mc^2$$"
result = converter.convert_to_formats(latex_input)
print("Test 1: Display math ($$...$$)")
print(f"Input: {latex_input}")
print(f"LaTeX: {result.latex}")
print(f"MathML: {result.mathml[:100]}...")
print(f"MML: {result.mml[:100]}...")
print(f"OMML: {result.omml[:100] if result.omml else 'Empty'}...")
print()
# Test case 2: Inline math with $...$
latex_input2 = "$\\frac{a}{b}$"
result2 = converter.convert_to_formats(latex_input2)
print("Test 2: Inline math ($...$)")
print(f"Input: {latex_input2}")
print(f"LaTeX: {result2.latex}")
print(f"MathML: {result2.mathml[:100]}...")
print()
# Test case 3: Complex formula
latex_input3 = "$$\\int_{0}^{\\infty} e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}$$"
result3 = converter.convert_to_formats(latex_input3)
print("Test 3: Complex formula")
print(f"Input: {latex_input3}")
print(f"LaTeX: {result3.latex}")
print(f"MathML: {result3.mathml[:150]}...")
print(f"OMML length: {len(result3.omml)}")
print()
# Test case 4: Regular markdown (not LaTeX-only)
markdown_input = "# Hello\n\nThis is a test with math: $x = 2$"
result4 = converter.convert_to_formats(markdown_input)
print("Test 4: Regular markdown")
print(f"Input: {markdown_input}")
print(f"LaTeX: {result4.latex[:100]}...")
print(f"MathML: {result4.mathml[:100]}...")
print(f"MML: {result4.mml}")
print(f"OMML: {result4.omml}")
print()
if __name__ == "__main__":
test_latex_only_conversion()

View File

@@ -1,95 +0,0 @@
"""对比测试:展示 MathML 简化前后的差异."""
from app.services.converter import Converter
def compare_simplification():
"""对比简化前后的 MathML."""
# 模拟简化前的 MathMLPandoc 典型输出)
before_example = '''<math display="inline" xmlns="http://www.w3.org/1998/Math/MathML">
<semantics>
<mrow>
<mi>γ</mi>
<mo form="infix">=</mo>
<mn>22</mn>
<mo form="infix">.</mo>
<mn>2</mn>
<mo form="infix" separator="true">,</mo>
<mi>c</mi>
<mo form="infix">=</mo>
<mn>30</mn>
<mo form="infix">.</mo>
<mn>4</mn>
</mrow>
<annotation encoding="application/x-tex">\\gamma = 22.2, c = 30.4</annotation>
</semantics>
</math>'''
# 测试实际转换
converter = Converter()
result = converter.convert_to_formats(r"$\gamma = 22.2, c = 30.4$")
print("=" * 80)
print("MathML 简化效果对比")
print("=" * 80)
print("\n【简化前(典型 Pandoc 输出)】")
print(f"长度: {len(before_example)} 字符")
print(before_example)
print("\n" + "-" * 80)
print("\n【简化后(当前输出)】")
print(f"长度: {len(result.mathml)} 字符")
print(result.mathml)
print("\n" + "-" * 80)
# 计算减少的比例
reduction = ((len(before_example) - len(result.mathml)) / len(before_example)) * 100
print(f"\n📊 大小减少: {reduction:.1f}%")
# 列出移除的冗余元素
print("\n✅ 已移除的冗余:")
removed = [
"<semantics> 包装器",
"<annotation> 元素",
'form="infix" 属性',
'form="prefix" 属性',
'form="postfix" 属性',
'separator="true" 属性',
'stretchy="true" 属性',
'fence="true" 属性',
'columnalign 属性',
'columnspacing 属性',
'不必要的空白',
'display="inline" → display="block"',
'Unicode 实体 → 实际字符'
]
for item in removed:
print(f"{item}")
print("\n" + "=" * 80)
# 测试更多示例
test_cases = [
(r"\frac{a}{b}", "分数"),
(r"x^{2} + y^{2} = r^{2}", "幂次"),
(r"\sqrt{a + b}", "根号"),
(r"\left| \frac{a}{b} \right|", "括号和分数"),
]
print("\n更多示例:")
print("=" * 80)
for latex, desc in test_cases:
result = converter.convert_to_formats(f"${latex}$")
print(f"\n{desc}: ${latex}$")
print(f"长度: {len(result.mathml)} 字符")
print(result.mathml[:200] + ("..." if len(result.mathml) > 200 else ""))
if __name__ == "__main__":
compare_simplification()

View File

@@ -1,55 +0,0 @@
"""Test MathML simplification."""
from app.services.converter import Converter
def show_current_output():
"""Show current MathML output."""
converter = Converter()
test_cases = [
(r"\gamma = 22.2", "简单公式"),
(r"\frac{a}{b}", "分数"),
(r"x^{2} + y^{2}", "上标"),
(r"\sqrt{a + b}", "根号"),
]
print("=" * 80)
print("当前 MathML 输出分析")
print("=" * 80)
for latex, desc in test_cases:
print(f"\n{desc}: ${latex}$")
print("-" * 80)
result = converter.convert_to_formats(f"${latex}$")
mathml = result.mathml
print(f"长度: {len(mathml)} 字符")
print(f"\n{mathml}\n")
# 分析冗余
redundancies = []
if '<mrow>' in mathml and mathml.count('<mrow>') > 1:
redundancies.append(f"多层 <mrow> 嵌套 ({mathml.count('<mrow>')} 个)")
if 'columnalign="center"' in mathml:
redundancies.append("columnalign 属性(可能不必要)")
if 'form="prefix"' in mathml or 'form="postfix"' in mathml:
redundancies.append("form 属性(可简化)")
if 'stretchy="true"' in mathml:
redundancies.append("stretchy 属性(可简化)")
if redundancies:
print("可能的冗余:")
for r in redundancies:
print(f"{r}")
else:
print("✓ 已经很简洁")
if __name__ == "__main__":
show_current_output()

View File

@@ -1,236 +0,0 @@
"""Diagnostic tool for MathML Word compatibility issues."""
from app.services.converter import Converter
def diagnose_mathml(latex: str) -> dict:
"""Diagnose MathML generation and Word compatibility.
Args:
latex: LaTeX formula to convert.
Returns:
Dictionary with diagnostic information.
"""
converter = Converter()
print("=" * 80)
print("MathML Word Compatibility Diagnostic")
print("=" * 80)
print(f"\nInput LaTeX: {latex}")
# Convert
try:
result = converter.convert_to_formats(f"${latex}$")
mathml = result.mathml
print(f"\n✓ Conversion successful")
print(f"MathML length: {len(mathml)} characters")
except Exception as e:
print(f"\n✗ Conversion failed: {e}")
return {"success": False, "error": str(e)}
# Diagnostic checks
print("\n" + "-" * 80)
print("Word Compatibility Checks:")
print("-" * 80)
issues = []
# Check 1: Has proper namespace
if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
print("✓ Has correct MathML namespace")
else:
print("✗ Missing or incorrect MathML namespace")
issues.append("namespace")
# Check 2: Display attribute
if 'display="block"' in mathml:
print("✓ Has display='block' attribute")
elif 'display="inline"' in mathml:
print("⚠ Has display='inline' (Word prefers 'block')")
issues.append("display_inline")
else:
print("✗ Missing display attribute")
issues.append("no_display")
# Check 3: Check for problematic elements
if '<semantics>' in mathml:
print("⚠ Contains <semantics> element")
print(" Note: Word may ignore semantics wrapper")
issues.append("semantics")
if '<annotation' in mathml:
print("⚠ Contains <annotation> element")
print(" Note: Word doesn't need annotation, may cause issues")
issues.append("annotation")
# Check 4: Unicode entities
problematic_entities = ['&#x', '&gt;', '&lt;', '&amp;']
has_entities = any(entity in mathml for entity in problematic_entities)
if has_entities:
print("⚠ Contains encoded entities (Word prefers actual characters)")
issues.append("entities")
else:
print("✓ No problematic entities")
# Check 5: Root element structure
if mathml.startswith('<math'):
print("✓ Starts with <math> element")
else:
print("✗ Doesn't start with <math> element")
issues.append("no_math_root")
# Check 6: Check for common Word-incompatible attributes
if 'class=' in mathml:
print("⚠ Contains 'class' attribute (Word ignores these)")
if 'style=' in mathml:
print("⚠ Contains 'style' attribute (Word ignores these)")
# Print MathML structure
print("\n" + "-" * 80)
print("MathML Structure:")
print("-" * 80)
# Show first 500 chars
print(mathml[:500])
if len(mathml) > 500:
print("...")
print(mathml[-200:])
# Recommendations
print("\n" + "-" * 80)
print("Recommendations:")
print("-" * 80)
if not issues:
print("✓ MathML appears to be Word-compatible!")
print("\nHow to paste into Word:")
print(" 1. Copy the MathML XML")
print(" 2. In Word: Insert → Equation → Ink Equation")
print(" 3. Right-click the equation → 'Professional'")
print(" 4. Right-click again → 'Save as new equation'")
print("\nOR use Alt text method:")
print(" 1. Insert → Equation")
print(" 2. Type any formula")
print(" 3. Right-click → Edit Alt Text")
print(" 4. Paste MathML in Alt Text field")
else:
print("Issues found:")
if "semantics" in issues or "annotation" in issues:
print("\n1. Remove <semantics> and <annotation> wrappers")
print(" Word only needs the <mrow> content inside")
if "display_inline" in issues:
print("\n2. Change display='inline' to display='block'")
if "entities" in issues:
print("\n3. Decode HTML entities to actual characters")
if "namespace" in issues:
print("\n4. Add xmlns='http://www.w3.org/1998/Math/MathML'")
return {
"success": True,
"mathml": mathml,
"issues": issues,
"length": len(mathml)
}
def test_simple_formula():
"""Test with a simple formula."""
print("\nTest 1: Simple formula")
diagnose_mathml(r"\frac{a}{b}")
def test_complex_formula():
"""Test with a complex formula."""
print("\n\nTest 2: Complex formula with matrix")
diagnose_mathml(r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|")
def test_problematic_formula():
"""Test with the user's problematic formula."""
print("\n\nTest 3: User's formula (after OCR fix)")
diagnose_mathml(r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}")
def generate_clean_mathml():
"""Generate a clean MathML without semantics/annotation."""
print("\n" + "=" * 80)
print("Generating Clean MathML for Word")
print("=" * 80)
converter = Converter()
latex = r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}"
result = converter.convert_to_formats(f"${latex}$")
mathml = result.mathml
# Remove semantics wrapper if present
import re
# Extract content from semantics if present
if '<semantics>' in mathml:
print("\n⚠ Original has <semantics> wrapper")
# Try to extract just the mrow content
match = re.search(r'<semantics>(.*?)<annotation', mathml, re.DOTALL)
if match:
content = match.group(1).strip()
# Rebuild without semantics
clean_mathml = f'<math display="block" xmlns="http://www.w3.org/1998/Math/MathML">{content}</math>'
print("\nCleaned MathML (without semantics):")
print("-" * 80)
print(clean_mathml)
print("\n✓ Try pasting this version into Word")
return clean_mathml
print("\nGenerated MathML:")
print("-" * 80)
print(mathml)
return mathml
if __name__ == "__main__":
print("MathML Word Compatibility Diagnostic Tool\n")
try:
test_simple_formula()
test_complex_formula()
test_problematic_formula()
print("\n\n")
clean = generate_clean_mathml()
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print("\nCommon reasons MathML doesn't work in Word:")
print(" 1. <semantics> wrapper - Word may not parse it correctly")
print(" 2. <annotation> element - Word doesn't need it")
print(" 3. HTML entities - Word prefers actual Unicode characters")
print(" 4. Missing xmlns attribute")
print(" 5. Wrong paste location in Word")
print("\nBest practice for Word:")
print(" • Use simple MathML without semantics wrapper")
print(" • Include xmlns attribute")
print(" • Use display='block'")
print(" • Use actual characters, not entities")
print("\n" + "=" * 80)
except Exception as e:
print(f"\nError: {e}")
import traceback
traceback.print_exc()

View File

@@ -1,105 +0,0 @@
"""Quick test to verify MinerU postprocessing is enabled."""
from app.services.ocr_service import _postprocess_markdown
def test_mineru_postprocessing():
"""Test that postprocessing works for MinerU output."""
print("=" * 80)
print("Testing MinerU Postprocessing")
print("=" * 80)
# Simulate MinerU OCR output (with number errors)
mineru_markdown = r"""$$
\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}
$$"""
print("\nMinerU OCR Output (raw):")
print(mineru_markdown)
# Apply postprocessing
fixed = _postprocess_markdown(mineru_markdown)
print("\nAfter Postprocessing:")
print(fixed)
print("\n" + "-" * 80)
print("Verification:")
print("-" * 80)
checks = [
("Has '22.2'", "22.2" in fixed),
("Has '30.4'", "30.4" in fixed),
("Has '25.4'", "25.4" in fixed),
("No '2 2'", "2 2" not in fixed),
("No '3 0'", "3 0" not in fixed),
("No '2 5'", "2 5" not in fixed),
]
all_passed = True
for check_name, passed in checks:
status = "" if passed else ""
print(f"{status} {check_name}")
if not passed:
all_passed = False
if all_passed:
print("\n✓✓✓ MinerU postprocessing is working! ✓✓✓")
else:
print("\n✗✗✗ MinerU postprocessing has issues ✗✗✗")
return all_passed
def test_expected_api_response():
"""Test what the API response should look like."""
print("\n" + "=" * 80)
print("Expected API Response Format")
print("=" * 80)
ocr_output = r"$$\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}$$"
fixed = _postprocess_markdown(ocr_output)
print("\nBefore postprocessing:")
print(f" markdown: {ocr_output}")
print("\nAfter postprocessing (what API should return):")
print(f" markdown: {fixed}")
print("\nExpected changes:")
print("'2 2. 2''22.2'")
print("'3 0. 4''30.4'")
print("'2 5. 4''25.4'")
print("\n" + "-" * 80)
print("Note: The API should return the FIXED markdown")
print(" All other formats (latex, mathml, mml) are derived from this")
print("-" * 80)
if __name__ == "__main__":
print("MinerU Postprocessing Verification\n")
try:
test1 = test_mineru_postprocessing()
test_expected_api_response()
print("\n" + "=" * 80)
if test1:
print("✓ MinerU postprocessing is NOW ENABLED")
print("\nNext steps:")
print(" 1. Restart the server")
print(" 2. Test with the same request")
print(" 3. The markdown field should now have '22.2' instead of '2 2. 2'")
else:
print("✗ There may still be issues")
print("=" * 80)
except Exception as e:
print(f"\nError: {e}")
import traceback
traceback.print_exc()

View File

@@ -1,294 +0,0 @@
"""Test OCR number error fixing."""
from app.services.converter import Converter
def test_ocr_number_errors():
"""Test fixing of common OCR number errors."""
print("=" * 80)
print("Testing OCR Number Error Fixes")
print("=" * 80)
converter = Converter()
# Test cases from the error
test_cases = [
{
"name": "Original error case",
"latex": r"\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}",
"expected_fixes": ["22.2", "30.4", "25.4"],
"should_not_have": ["2 2", "3 0", "2 5"],
},
{
"name": "Simple decimal with space",
"latex": r"x = 3. 14",
"expected_fixes": ["3.14"],
"should_not_have": ["3. 14"],
},
{
"name": "Multiple decimals",
"latex": r"a = 1 2. 5, b = 9. 8 7",
"expected_fixes": ["12.5", "9.87"],
"should_not_have": ["1 2", "9. 8"],
},
{
"name": "Large numbers with spaces",
"latex": r"n = 1 5 0, m = 2 0 0 0",
"expected_fixes": ["150", "2000"],
"should_not_have": ["1 5", "2 0 0"],
},
{
"name": "Don't merge across operators",
"latex": r"2 + 3 = 5",
"expected_fixes": ["2 + 3 = 5"], # Should stay the same
"should_not_have": ["23=5"],
},
]
all_passed = True
for i, test in enumerate(test_cases, 1):
print(f"\nTest {i}: {test['name']}")
print("-" * 80)
print(f"Input: {test['latex']}")
# Apply fix
fixed = converter._fix_ocr_number_errors(test['latex'])
print(f"Fixed: {fixed}")
# Check expected fixes
checks_passed = []
for expected in test['expected_fixes']:
if expected in fixed:
checks_passed.append(f"✓ Contains '{expected}'")
else:
checks_passed.append(f"✗ Missing '{expected}'")
all_passed = False
for should_not in test['should_not_have']:
if should_not not in fixed:
checks_passed.append(f"✓ Removed '{should_not}'")
else:
checks_passed.append(f"✗ Still has '{should_not}'")
all_passed = False
for check in checks_passed:
print(f" {check}")
return all_passed
def test_mathml_quality():
"""Test that fixed LaTeX produces better MathML."""
print("\n" + "=" * 80)
print("Testing MathML Quality After OCR Fix")
print("=" * 80)
converter = Converter()
# The problematic LaTeX from the error
latex = r"\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}"
print(f"\nOriginal LaTeX: {latex}")
# Convert to MathML
result = converter.convert_to_formats(f"${latex}$")
mathml = result.mathml
print(f"\nMathML length: {len(mathml)} chars")
# Check quality indicators
print("\nQuality checks:")
print("-" * 80)
checks = {
"No separate digits for decimals": "<mn>22.2</mn>" in mathml or "22.2" in mathml,
"No dot as identifier": "<mi>.</mi>" not in mathml,
"Properly formatted numbers": "<mn>30.4</mn>" in mathml or "30.4" in mathml,
"Has namespace": 'xmlns=' in mathml,
"Display block": 'display="block"' in mathml,
}
all_passed = True
for check, passed in checks.items():
status = "" if passed else ""
print(f"{status} {check}")
if not passed:
all_passed = False
# Show a preview
print("\n" + "-" * 80)
print("MathML preview:")
print("-" * 80)
print(mathml[:400])
if len(mathml) > 400:
print("...")
return all_passed
def test_edge_cases():
"""Test edge cases for OCR number fixing."""
print("\n" + "=" * 80)
print("Testing Edge Cases")
print("=" * 80)
converter = Converter()
test_cases = [
{
"name": "Should NOT merge: arithmetic",
"input": r"2 + 3 = 5",
"should_stay": "2 + 3 = 5",
},
{
"name": "Should NOT merge: multiplication",
"input": r"2 \times 3",
"should_stay": r"2 \times 3",
},
{
"name": "Should merge: decimal at end",
"input": r"x = 1 2. 5",
"should_become": "12.5",
},
{
"name": "Should merge: multiple spaces",
"input": r"n = 1 2 . 3 4",
"should_have": "12.34",
},
{
"name": "Complex: mixed scenarios",
"input": r"a = 1 2. 3 + 4 5. 6 - 7",
"should_have": ["12.3", "45.6", "- 7"],
},
]
all_passed = True
for test in test_cases:
print(f"\n{test['name']}")
print(f" Input: {test['input']}")
fixed = converter._fix_ocr_number_errors(test['input'])
print(f" Output: {fixed}")
if 'should_stay' in test:
if fixed == test['should_stay']:
print(f" ✓ Correctly unchanged")
else:
print(f" ✗ Should stay '{test['should_stay']}' but got '{fixed}'")
all_passed = False
if 'should_become' in test:
if test['should_become'] in fixed:
print(f" ✓ Contains '{test['should_become']}'")
else:
print(f" ✗ Should contain '{test['should_become']}'")
all_passed = False
if 'should_have' in test:
for expected in test['should_have']:
if expected in fixed:
print(f" ✓ Contains '{expected}'")
else:
print(f" ✗ Should contain '{expected}'")
all_passed = False
return all_passed
def compare_before_after():
"""Compare MathML before and after OCR fix."""
print("\n" + "=" * 80)
print("Before/After Comparison")
print("=" * 80)
converter = Converter()
# Simulate OCR error
ocr_latex = r"\gamma = 2 2. 2, c = 3 0. 4"
correct_latex = r"\gamma = 22.2, c = 30.4"
print(f"\nOCR LaTeX: {ocr_latex}")
print(f"Correct LaTeX: {correct_latex}")
# Convert both
ocr_result = converter.convert_to_formats(f"${ocr_latex}$")
correct_result = converter.convert_to_formats(f"${correct_latex}$")
print("\n" + "-" * 80)
print("MathML comparison:")
print("-" * 80)
# Check if they produce similar quality output
ocr_has_decimal = "22.2" in ocr_result.mathml
correct_has_decimal = "22.2" in correct_result.mathml
ocr_has_dot_error = "<mi>.</mi>" in ocr_result.mathml
correct_has_dot_error = "<mi>.</mi>" in correct_result.mathml
print(f"OCR output has proper decimals: {'' if ocr_has_decimal else ''}")
print(f"Correct output has proper decimals: {'' if correct_has_decimal else ''}")
print(f"OCR output has dot errors: {'✗ Yes' if ocr_has_dot_error else '✓ No'}")
print(f"Correct output has dot errors: {'✗ Yes' if correct_has_dot_error else '✓ No'}")
if ocr_has_decimal and not ocr_has_dot_error:
print("\n✓ OCR fix is working! Output quality matches correct input.")
return True
else:
print("\n✗ OCR fix may need improvement.")
return False
if __name__ == "__main__":
print("OCR Number Error Fix Test Suite\n")
try:
test1 = test_ocr_number_errors()
test2 = test_mathml_quality()
test3 = test_edge_cases()
test4 = compare_before_after()
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
results = [
("OCR error fixes", test1),
("MathML quality", test2),
("Edge cases", test3),
("Before/after comparison", test4),
]
for name, passed in results:
status = "✓ PASS" if passed else "✗ FAIL"
print(f"{status}: {name}")
all_passed = all(r[1] for r in results)
print("\n" + "-" * 80)
if all_passed:
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
print("\nOCR number errors are being fixed automatically!")
print("Examples:")
print("'2 2. 2''22.2'")
print("'3 0. 4''30.4'")
print("'1 5 0''150'")
else:
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
print("=" * 80)
except KeyboardInterrupt:
print("\n\nTests interrupted")
except Exception as e:
print(f"\n\nTest error: {e}")
import traceback
traceback.print_exc()

View File

@@ -1,265 +0,0 @@
"""Test OCR number error fixing in the complete pipeline."""
from app.services.ocr_service import _postprocess_markdown
def test_ocr_postprocessing():
"""Test that OCR postprocessing fixes number errors."""
print("=" * 80)
print("Testing OCR Postprocessing Pipeline")
print("=" * 80)
# Simulate OCR output with common errors
test_cases = [
{
"name": "Inline formula with decimal errors",
"input": r"The value is $\gamma = 2 2. 2$ and $c = 3 0. 4$.",
"should_have": ["22.2", "30.4"],
"should_not_have": ["2 2", "3 0"],
},
{
"name": "Display formula with decimal errors",
"input": r"$$\phi = 2 5. 4 ^ {\circ}$$",
"should_have": ["25.4"],
"should_not_have": ["2 5"],
},
{
"name": "Multiple formulas",
"input": r"$a = 1 2. 5$, $b = 9. 8 7$, and $c = 1 5 0$",
"should_have": ["12.5", "9.87", "150"],
"should_not_have": ["1 2", "9. 8", "1 5"],
},
{
"name": "Mixed content (text + formulas)",
"input": r"The equation $x = 3. 14$ is approximately pi. Then $y = 2 7. 3$.",
"should_have": ["3.14", "27.3"],
"should_not_have": ["3. 14", "2 7"],
},
{
"name": "Normal arithmetic (should not be affected)",
"input": r"$2 + 3 = 5$ and $10 - 7 = 3$",
"should_stay": True,
},
]
all_passed = True
for i, test in enumerate(test_cases, 1):
print(f"\nTest {i}: {test['name']}")
print("-" * 80)
print(f"Input: {test['input']}")
# Apply postprocessing
output = _postprocess_markdown(test['input'])
print(f"Output: {output}")
# Check results
if 'should_have' in test:
for expected in test['should_have']:
if expected in output:
print(f" ✓ Contains '{expected}'")
else:
print(f" ✗ Missing '{expected}'")
all_passed = False
if 'should_not_have' in test:
for unexpected in test['should_not_have']:
if unexpected not in output:
print(f" ✓ Removed '{unexpected}'")
else:
print(f" ✗ Still has '{unexpected}'")
all_passed = False
if test.get('should_stay'):
if test['input'] == output:
print(f" ✓ Correctly unchanged")
else:
print(f" ✗ Should not change but did")
all_passed = False
return all_passed
def test_real_world_case():
"""Test the exact case from the error report."""
print("\n" + "=" * 80)
print("Testing Real-World Error Case")
print("=" * 80)
# The exact input from the error report
ocr_output = r"$$\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}$$"
print(f"\nOCR Output (with errors):")
print(f" {ocr_output}")
# Apply postprocessing
fixed = _postprocess_markdown(ocr_output)
print(f"\nAfter Postprocessing:")
print(f" {fixed}")
# Check if fixed
checks = {
"Has 22.2": "22.2" in fixed,
"Has 30.4": "30.4" in fixed,
"Has 25.4": "25.4" in fixed,
"No '2 2'": "2 2" not in fixed,
"No '3 0'": "3 0" not in fixed,
"No '2 5'": "2 5" not in fixed,
}
print("\nQuality Checks:")
print("-" * 80)
all_passed = True
for check, passed in checks.items():
status = "" if passed else ""
print(f"{status} {check}")
if not passed:
all_passed = False
if all_passed:
print("\n✓ Real-world case fixed successfully!")
else:
print("\n✗ Real-world case still has issues")
return all_passed
def test_edge_cases():
"""Test edge cases to ensure we don't break valid formulas."""
print("\n" + "=" * 80)
print("Testing Edge Cases")
print("=" * 80)
test_cases = [
{
"name": "Arithmetic operations",
"input": r"$2 + 3 = 5$ and $10 - 7 = 3$",
"should_stay": True,
},
{
"name": "Multiplication",
"input": r"$2 \times 3 = 6$",
"should_stay": True,
},
{
"name": "Exponents",
"input": r"$x ^ 2 + y ^ 2 = r ^ 2$",
"should_stay": True,
},
{
"name": "Fractions",
"input": r"$\frac{1}{2} + \frac{3}{4}$",
"should_stay": True,
},
{
"name": "Subscripts",
"input": r"$x _ 1 + x _ 2$",
"should_stay": True,
},
]
all_passed = True
for test in test_cases:
print(f"\n{test['name']}")
print(f" Input: {test['input']}")
output = _postprocess_markdown(test['input'])
print(f" Output: {output}")
if test.get('should_stay'):
# For these cases, we allow some whitespace changes but structure should stay
if output.replace(" ", "") == test['input'].replace(" ", ""):
print(f" ✓ Structure preserved")
else:
print(f" ✗ Structure changed unexpectedly")
all_passed = False
return all_passed
def test_performance():
"""Test performance with large content."""
print("\n" + "=" * 80)
print("Testing Performance")
print("=" * 80)
# Create a large markdown with many formulas
large_content = ""
for i in range(100):
large_content += f"Formula {i}: $x = {i} {i}. {i}$ and $y = {i*2} {i*2}. {i*2}$\n"
print(f"\nContent size: {len(large_content)} characters")
print(f"Number of formulas: ~200")
import time
start = time.time()
output = _postprocess_markdown(large_content)
elapsed = time.time() - start
print(f"Processing time: {elapsed*1000:.2f}ms")
if elapsed < 1.0:
print("✓ Performance is acceptable (< 1s)")
return True
else:
print("✗ Performance may need optimization")
return False
if __name__ == "__main__":
print("OCR Pipeline Integration Test Suite\n")
try:
test1 = test_ocr_postprocessing()
test2 = test_real_world_case()
test3 = test_edge_cases()
test4 = test_performance()
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
results = [
("OCR postprocessing", test1),
("Real-world case", test2),
("Edge cases", test3),
("Performance", test4),
]
for name, passed in results:
status = "✓ PASS" if passed else "✗ FAIL"
print(f"{status}: {name}")
all_passed = all(r[1] for r in results)
print("\n" + "-" * 80)
if all_passed:
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
print("\nOCR number error fixing is integrated into the pipeline!")
print("\nFlow:")
print(" 1. OCR recognizes image → produces Markdown with LaTeX")
print(" 2. _postprocess_markdown() fixes number errors")
print(" 3. Clean LaTeX is used for all conversions")
print("\nBenefits:")
print(" • Fixed once at the source")
print(" • All output formats benefit (MathML, MML, OMML)")
print(" • Better performance (no repeated fixes)")
else:
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
print("=" * 80)
except KeyboardInterrupt:
print("\n\nTests interrupted")
except Exception as e:
print(f"\n\nTest error: {e}")
import traceback
traceback.print_exc()

View File

@@ -1,112 +0,0 @@
"""Test script for OMML conversion API endpoint."""
import requests
import json
def test_latex_to_omml():
"""Test the /convert/latex-to-omml endpoint."""
# Test cases
test_cases = [
{
"name": "Simple fraction",
"latex": "\\frac{a}{b}",
},
{
"name": "Quadratic formula",
"latex": "x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}",
},
{
"name": "Integral",
"latex": "\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}",
},
{
"name": "Matrix",
"latex": "\\begin{matrix} a & b \\\\ c & d \\end{matrix}",
},
]
base_url = "http://localhost:8000/api/v1/convert/latex-to-omml"
print("Testing OMML Conversion API")
print("=" * 80)
for i, test_case in enumerate(test_cases, 1):
print(f"\nTest {i}: {test_case['name']}")
print("-" * 80)
print(f"LaTeX: {test_case['latex']}")
try:
response = requests.post(
base_url,
json={"latex": test_case["latex"]},
headers={"Content-Type": "application/json"},
timeout=10,
)
if response.status_code == 200:
result = response.json()
omml = result.get("omml", "")
print(f"✓ Status: {response.status_code}")
print(f"OMML length: {len(omml)} characters")
print(f"OMML preview: {omml[:150]}...")
else:
print(f"✗ Status: {response.status_code}")
print(f"Error: {response.text}")
except requests.exceptions.RequestException as e:
print(f"✗ Request failed: {e}")
except Exception as e:
print(f"✗ Error: {e}")
print("\n" + "=" * 80)
def test_invalid_input():
"""Test error handling with invalid input."""
print("\nTesting Error Handling")
print("=" * 80)
base_url = "http://localhost:8000/api/v1/convert/latex-to-omml"
# Empty LaTeX
print("\nTest: Empty LaTeX")
response = requests.post(
base_url,
json={"latex": ""},
headers={"Content-Type": "application/json"},
)
print(f"Status: {response.status_code}")
print(f"Response: {response.json()}")
# Missing LaTeX field
print("\nTest: Missing LaTeX field")
response = requests.post(
base_url,
json={},
headers={"Content-Type": "application/json"},
)
print(f"Status: {response.status_code}")
print(f"Response: {response.json()}")
print("\n" + "=" * 80)
if __name__ == "__main__":
print("OMML API Test Suite")
print("Make sure the API server is running on http://localhost:8000")
print()
try:
test_latex_to_omml()
test_invalid_input()
print("\n✓ All tests completed!")
except KeyboardInterrupt:
print("\n\n✗ Tests interrupted by user")
except Exception as e:
print(f"\n✗ Test suite failed: {e}")

View File

@@ -1,218 +0,0 @@
"""Comprehensive test for OMML conversion with preprocessing."""
from app.services.converter import Converter
def test_case_1_array_with_spaces():
"""Test: Array with spaces in column specifier (the original issue)."""
print("\n" + "=" * 80)
print("Test 1: Array with spaces in column specifier")
print("=" * 80)
converter = Converter()
# The problematic LaTeX from the error
latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
print(f"LaTeX length: {len(latex)} chars")
print(f"Preview: {latex[:100]}...")
try:
omml = converter.convert_to_omml(latex)
print(f"\n✓ SUCCESS: Converted to OMML")
print(f"OMML length: {len(omml)} chars")
if "oMath" in omml:
print("✓ Valid OMML structure detected")
# Check preprocessing worked
preprocessed = converter._preprocess_formula_for_omml(latex)
if "{c c c c}" not in preprocessed and "{cccc}" in preprocessed:
print("✓ Array column specifiers fixed: '{c c c c}''{cccc}'")
return True
except Exception as e:
print(f"\n✗ FAILED: {e}")
return False
def test_case_2_vmatrix():
"""Test: vmatrix environment conversion."""
print("\n" + "=" * 80)
print("Test 2: vmatrix environment")
print("=" * 80)
converter = Converter()
latex = r"\begin{vmatrix} a & b \\ c & d \end{vmatrix}"
print(f"LaTeX: {latex}")
try:
omml = converter.convert_to_omml(latex)
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
# Check if vmatrix was converted
preprocessed = converter._preprocess_formula_for_omml(latex)
if "vmatrix" not in preprocessed and r"\left|" in preprocessed:
print("✓ vmatrix converted to \\left| ... \\right|")
return True
except Exception as e:
print(f"✗ FAILED: {e}")
return False
def test_case_3_cases_environment():
"""Test: cases environment conversion."""
print("\n" + "=" * 80)
print("Test 3: cases environment")
print("=" * 80)
converter = Converter()
latex = r"f(x) = \begin{cases} x^2 & x \geq 0 \\ -x & x < 0 \end{cases}"
print(f"LaTeX: {latex}")
try:
omml = converter.convert_to_omml(latex)
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
# Check if cases was converted to array
preprocessed = converter._preprocess_formula_for_omml(latex)
if "cases" not in preprocessed and "array" in preprocessed:
print("✓ cases converted to array environment")
return True
except Exception as e:
print(f"✗ FAILED: {e}")
return False
def test_case_4_aligned_environment():
"""Test: aligned environment conversion."""
print("\n" + "=" * 80)
print("Test 4: aligned environment")
print("=" * 80)
converter = Converter()
latex = r"\begin{aligned} x + y &= 5 \\ 2x - y &= 1 \end{aligned}"
print(f"LaTeX: {latex}")
try:
omml = converter.convert_to_omml(latex)
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
# Check if aligned was converted
preprocessed = converter._preprocess_formula_for_omml(latex)
if "aligned" not in preprocessed and "array" in preprocessed:
print("✓ aligned converted to array environment")
if "&" not in preprocessed or preprocessed.count("&") < latex.count("&"):
print("✓ Alignment markers removed")
return True
except Exception as e:
print(f"✗ FAILED: {e}")
return False
def test_case_5_simple_formula():
"""Test: Simple formula (should work without preprocessing)."""
print("\n" + "=" * 80)
print("Test 5: Simple formula")
print("=" * 80)
converter = Converter()
latex = r"x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}"
print(f"LaTeX: {latex}")
try:
omml = converter.convert_to_omml(latex)
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
return True
except Exception as e:
print(f"✗ FAILED: {e}")
return False
def test_case_6_nested_structures():
"""Test: Nested structures with multiple issues."""
print("\n" + "=" * 80)
print("Test 6: Nested structures")
print("=" * 80)
converter = Converter()
latex = r"\left\{ \begin{array}{l c} \begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\ f(x) = \begin{cases} 1 & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{step function} \end{array} \right."
print(f"LaTeX: {latex}")
try:
omml = converter.convert_to_omml(latex)
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
preprocessed = converter._preprocess_formula_for_omml(latex)
print("\nPreprocessing applied:")
if "vmatrix" not in preprocessed:
print(" ✓ vmatrix converted")
if "cases" not in preprocessed:
print(" ✓ cases converted")
if "{l c}" not in preprocessed and "{lc}" in preprocessed:
print(" ✓ Array specifiers fixed")
return True
except Exception as e:
print(f"✗ FAILED: {e}")
return False
if __name__ == "__main__":
print("=" * 80)
print("OMML CONVERSION TEST SUITE")
print("Testing preprocessing and conversion")
print("=" * 80)
results = []
try:
results.append(("Simple formula", test_case_5_simple_formula()))
results.append(("Array with spaces", test_case_1_array_with_spaces()))
results.append(("vmatrix", test_case_2_vmatrix()))
results.append(("cases", test_case_3_cases_environment()))
results.append(("aligned", test_case_4_aligned_environment()))
results.append(("Nested structures", test_case_6_nested_structures()))
# Summary
print("\n" + "=" * 80)
print("TEST SUMMARY")
print("=" * 80)
passed = sum(1 for _, result in results if result)
total = len(results)
for name, result in results:
status = "✓ PASS" if result else "✗ FAIL"
print(f"{status}: {name}")
print("\n" + "-" * 80)
print(f"Total: {passed}/{total} tests passed")
if passed == total:
print("\n✓✓✓ ALL TESTS PASSED ✓✓✓")
else:
print(f"\n✗✗✗ {total - passed} TESTS FAILED ✗✗✗")
print("=" * 80)
except KeyboardInterrupt:
print("\n\nTests interrupted by user")
except Exception as e:
print(f"\n\nTest suite error: {e}")
import traceback
traceback.print_exc()

View File

@@ -1,202 +0,0 @@
"""Test Word-compatible MathML generation."""
from app.services.converter import Converter
def test_mathml_word_compatibility():
"""Test that generated MathML is Word-compatible."""
converter = Converter()
print("=" * 80)
print("Testing Word-Compatible MathML Generation")
print("=" * 80)
# Test case: Matrix with determinant (the problematic example)
latex = r"""\left| \begin{array}{cccc} a_{11} & a_{12} & \dots & a_{1n} \\ \vdots & \vdots & & \vdots \\ a_{i1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a_{n1} & a_{n2} & \dots & a_{nn} \end{array} \right|"""
print(f"\nLaTeX: {latex[:80]}...")
print("\n" + "-" * 80)
# Convert to formats
result = converter.convert_to_formats(f"$${latex}$$")
if not result.mathml:
print("✗ No MathML generated")
return False
mathml = result.mathml
print("Checking Word compatibility features:")
print("-" * 80)
# Check 1: Display attribute
if 'display="block"' in mathml:
print("✓ Has display='block' attribute")
else:
print("✗ Missing or wrong display attribute")
print(f" Found: {mathml[:100]}...")
# Check 2: No Unicode entities for common symbols
unicode_issues = []
problematic_entities = ['&#x0002B;', '&#x02026;', '&#x022EE;', '&#x0003D;', '&#x0007C;']
for entity in problematic_entities:
if entity in mathml:
unicode_issues.append(entity)
if unicode_issues:
print(f"✗ Contains Unicode entities: {unicode_issues}")
else:
print("✓ No problematic Unicode entities")
# Check 3: Uses mfenced for brackets (Word-friendly)
if '<mfenced' in mathml or '<mo fence="true"' in mathml or 'stretchy="true"' in mathml:
print("✓ Uses fence elements")
else:
print("? No fence elements found (might be OK)")
# Check 4: Has proper namespace
if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
print("✓ Has MathML namespace")
else:
print("✗ Missing MathML namespace")
# Show preview
print("\n" + "-" * 80)
print("MathML Preview (first 500 chars):")
print("-" * 80)
print(mathml[:500])
if len(mathml) > 500:
print("...")
print("\n" + "-" * 80)
print(f"Total length: {len(mathml)} characters")
# Check if this looks like Pandoc-generated MathML
if 'mfenced' in mathml or 'columnalign' in mathml:
print("✓ Appears to be Pandoc-generated (good for Word)")
elif 'stretchy' in mathml and 'fence' in mathml:
print("✓ Uses standard fence attributes")
else:
print("? MathML structure unclear")
return True
def test_simple_formulas():
"""Test simple formulas for Word compatibility."""
converter = Converter()
print("\n" + "=" * 80)
print("Testing Simple Formulas")
print("=" * 80)
test_cases = [
("Fraction", r"\frac{a}{b}"),
("Square root", r"\sqrt{x^2 + y^2}"),
("Summation", r"\sum_{i=1}^{n} i"),
("Equation", r"E = mc^2"),
("Matrix", r"\begin{pmatrix} a & b \\ c & d \end{pmatrix}"),
]
all_passed = True
for name, latex in test_cases:
print(f"\n{name}: ${latex}$")
try:
result = converter.convert_to_formats(f"${latex}$")
mathml = result.mathml
# Quick checks
checks = [
('display="block"' in mathml, "display=block"),
('&#x0002B;' not in mathml, "no +entity"),
('&#x0003D;' not in mathml, "no =entity"),
('xmlns=' in mathml, "namespace"),
]
status = "" if all(check[0] for check in checks) else ""
failed_checks = [check[1] for check in checks if not check[0]]
print(f" {status} Length: {len(mathml)} chars", end="")
if failed_checks:
print(f" | Issues: {', '.join(failed_checks)}")
all_passed = False
else:
print(" | All checks passed")
except Exception as e:
print(f" ✗ Error: {e}")
all_passed = False
return all_passed
def compare_with_reference():
"""Compare our MathML with reference Word-compatible MathML."""
print("\n" + "=" * 80)
print("Comparison with Reference MathML")
print("=" * 80)
converter = Converter()
# Simple matrix example
latex = r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|"
result = converter.convert_to_formats(f"$${latex}$$")
our_mathml = result.mathml
print("\nOur MathML structure:")
print("-" * 80)
# Analyze structure
features = {
"mfenced": "<mfenced" in our_mathml,
"mo fence": '<mo fence="' in our_mathml or '<mo stretchy="true"' in our_mathml,
"mtable": "<mtable" in our_mathml,
"display block": 'display="block"' in our_mathml,
"unicode entities": any(f"&#x{x};" in our_mathml for x in ["0002B", "0003D", "0007C"]),
}
print("Features:")
for feature, present in features.items():
status = "" if present != (feature == "unicode entities") else ""
print(f" {status} {feature}: {present}")
print(f"\nLength: {len(our_mathml)} characters")
print(f"Preview:\n{our_mathml[:300]}...")
return not features["unicode entities"]
if __name__ == "__main__":
print("Word-Compatible MathML Test Suite\n")
try:
test1 = test_mathml_word_compatibility()
test2 = test_simple_formulas()
test3 = compare_with_reference()
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
if test1 and test2 and test3:
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
print("\nMathML should be Word-compatible!")
print("Try copying the mathml output and pasting into Word.")
else:
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
print("\nMathML may not be fully Word-compatible.")
print("=" * 80)
except KeyboardInterrupt:
print("\n\nTests interrupted")
except Exception as e:
print(f"\n\nTest error: {e}")
import traceback
traceback.print_exc()