refact: rm test file
This commit is contained in:
@@ -1,102 +0,0 @@
|
||||
"""Test script for array column specifier fix."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_array_specifier_fix():
|
||||
"""Test that array column specifiers with spaces are fixed."""
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# The problematic LaTeX from the error
|
||||
latex_formula = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
|
||||
|
||||
print("Testing array column specifier fix")
|
||||
print("=" * 80)
|
||||
print(f"\nOriginal LaTeX (first 200 chars):\n{latex_formula[:200]}...")
|
||||
|
||||
# Test preprocessing
|
||||
print("\n" + "-" * 80)
|
||||
print("Step 1: Preprocessing")
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex_formula)
|
||||
|
||||
# Check if spaces were removed from array specifiers
|
||||
if "{c c c c}" in preprocessed:
|
||||
print("✗ FAILED: Spaces not removed from array specifiers")
|
||||
print(f"Found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+10]}")
|
||||
elif "{cccc}" in preprocessed:
|
||||
print("✓ SUCCESS: Spaces removed from array specifiers")
|
||||
print(f"Changed '{{{\"c c c c\"}}}' → '{{cccc}}'")
|
||||
else:
|
||||
print("? Could not find array specifier in preprocessed output")
|
||||
|
||||
# Test OMML conversion
|
||||
print("\n" + "-" * 80)
|
||||
print("Step 2: OMML Conversion")
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex_formula)
|
||||
print(f"✓ SUCCESS: OMML conversion completed")
|
||||
print(f"OMML length: {len(omml)} characters")
|
||||
print(f"OMML preview (first 300 chars):\n{omml[:300]}...")
|
||||
|
||||
# Check if it contains oMath element
|
||||
if "oMath" in omml:
|
||||
print("\n✓ Valid OMML: Contains oMath element")
|
||||
else:
|
||||
print("\n✗ WARNING: OMML might be incomplete (no oMath element found)")
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: OMML conversion error")
|
||||
print(f"Error: {e}")
|
||||
return False
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("✓ All tests passed!")
|
||||
return True
|
||||
|
||||
|
||||
def test_simple_array():
|
||||
"""Test with a simpler array example."""
|
||||
|
||||
converter = Converter()
|
||||
|
||||
print("\nTesting simple array")
|
||||
print("=" * 80)
|
||||
|
||||
# Simple array with spaces in column specifier
|
||||
latex_formula = r"\begin{array}{c c c} a & b & c \\ d & e & f \end{array}"
|
||||
|
||||
print(f"LaTeX: {latex_formula}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex_formula)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
print(f"Preview: {omml[:200]}...")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Array Column Specifier Fix Test Suite\n")
|
||||
|
||||
try:
|
||||
test1 = test_simple_array()
|
||||
test2 = test_array_specifier_fix()
|
||||
|
||||
if test1 and test2:
|
||||
print("\n" + "=" * 80)
|
||||
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
print("=" * 80)
|
||||
else:
|
||||
print("\n" + "=" * 80)
|
||||
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest suite error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -1,254 +0,0 @@
|
||||
"""Comprehensive test for array column specifier fix in all conversion paths."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_problematic_array():
|
||||
"""Test the exact LaTeX that caused the error."""
|
||||
|
||||
print("=" * 80)
|
||||
print("Testing Problematic Array (from error log)")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# The exact LaTeX from the error log
|
||||
latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
|
||||
|
||||
print(f"\nLaTeX length: {len(latex)} characters")
|
||||
print(f"Contains '{{{\"c c c c\"}}}': {'{c c c c}' in latex}")
|
||||
|
||||
# Test 1: Preprocessing
|
||||
print("\n" + "-" * 80)
|
||||
print("Test 1: Preprocessing")
|
||||
print("-" * 80)
|
||||
|
||||
preprocessed = converter._preprocess_formula_for_conversion(latex)
|
||||
|
||||
if '{c c c c}' in preprocessed:
|
||||
print("✗ FAILED: Spaces NOT removed from array specifiers")
|
||||
print(f" Still found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+15]}")
|
||||
return False
|
||||
elif '{cccc}' in preprocessed:
|
||||
print("✓ SUCCESS: Spaces removed from array specifiers")
|
||||
print(f" '{{{\"c c c c\"}}}' → '{{cccc}}'")
|
||||
else:
|
||||
print("? WARNING: Could not verify specifier fix")
|
||||
|
||||
# Test 2: MathML Conversion
|
||||
print("\n" + "-" * 80)
|
||||
print("Test 2: MathML Conversion (via convert_to_formats)")
|
||||
print("-" * 80)
|
||||
|
||||
try:
|
||||
result = converter.convert_to_formats(f"$${latex}$$")
|
||||
|
||||
if result.mathml:
|
||||
print(f"✓ SUCCESS: MathML generated ({len(result.mathml)} chars)")
|
||||
|
||||
# Check for Word compatibility
|
||||
if 'display="block"' in result.mathml:
|
||||
print(" ✓ Has display='block' (Word-friendly)")
|
||||
|
||||
if '+' not in result.mathml and '=' not in result.mathml:
|
||||
print(" ✓ No problematic Unicode entities")
|
||||
|
||||
print(f"\n MathML preview:\n {result.mathml[:200]}...")
|
||||
else:
|
||||
print("✗ FAILED: No MathML generated")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: MathML conversion error: {e}")
|
||||
return False
|
||||
|
||||
# Test 3: OMML Conversion
|
||||
print("\n" + "-" * 80)
|
||||
print("Test 3: OMML Conversion")
|
||||
print("-" * 80)
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
|
||||
if omml:
|
||||
print(f"✓ SUCCESS: OMML generated ({len(omml)} chars)")
|
||||
|
||||
if 'oMath' in omml:
|
||||
print(" ✓ Valid OMML structure")
|
||||
|
||||
print(f"\n OMML preview:\n {omml[:200]}...")
|
||||
else:
|
||||
print("✗ FAILED: No OMML generated")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: OMML conversion error: {e}")
|
||||
return False
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("✓✓✓ ALL CONVERSION PATHS WORKING ✓✓✓")
|
||||
print("=" * 80)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test_simple_arrays():
|
||||
"""Test simple arrays with spaces in column specifiers."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Simple Arrays")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
test_cases = [
|
||||
("2x2 array", r"\begin{array}{c c} a & b \\ c & d \end{array}"),
|
||||
("3x3 array", r"\begin{array}{c c c} 1 & 2 & 3 \\ 4 & 5 & 6 \\ 7 & 8 & 9 \end{array}"),
|
||||
("Array with pipes", r"\left| \begin{array}{c c} a & b \\ c & d \end{array} \right|"),
|
||||
("Mixed alignment", r"\begin{array}{l r c} left & right & center \end{array}"),
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for name, latex in test_cases:
|
||||
print(f"\n{name}")
|
||||
print("-" * 40)
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
# Check preprocessing
|
||||
preprocessed = converter._preprocess_formula_for_conversion(latex)
|
||||
has_spaces = any(f"{{{' '.join(chars)}}}" in preprocessed for chars in [['c', 'c'], ['c', 'c', 'c'], ['l', 'r', 'c']])
|
||||
|
||||
try:
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
|
||||
if result.mathml and result.mml:
|
||||
status = "✓" if not has_spaces else "✗"
|
||||
print(f"{status} MathML: {len(result.mathml)} chars, MML: {len(result.mml)} chars")
|
||||
|
||||
if not has_spaces:
|
||||
print(" ✓ Array specifiers fixed")
|
||||
else:
|
||||
print(" ✗ Array specifiers still have spaces")
|
||||
all_passed = False
|
||||
else:
|
||||
print("✗ Conversion failed")
|
||||
all_passed = False
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Error: {e}")
|
||||
all_passed = False
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def test_conversion_consistency():
|
||||
"""Test that all conversion paths use the same preprocessing."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Conversion Consistency")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# Test formula with multiple issues
|
||||
latex = r"""
|
||||
\left\{ \begin{array}{l c}
|
||||
\begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\
|
||||
\begin{cases} x & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{sign}
|
||||
\end{array} \right.
|
||||
""".strip()
|
||||
|
||||
print(f"\nComplex formula with:")
|
||||
print(" - array with spaces: {l c}")
|
||||
print(" - vmatrix environment")
|
||||
print(" - cases environment")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print("Preprocessing check:")
|
||||
print("-" * 80)
|
||||
|
||||
preprocessed = converter._preprocess_formula_for_conversion(latex)
|
||||
|
||||
checks = {
|
||||
"Array spaces removed": '{l c}' not in preprocessed and '{lc}' in preprocessed,
|
||||
"vmatrix converted": 'vmatrix' not in preprocessed,
|
||||
"cases converted": 'cases' not in preprocessed and 'array' in preprocessed,
|
||||
}
|
||||
|
||||
for check, passed in checks.items():
|
||||
status = "✓" if passed else "✗"
|
||||
print(f"{status} {check}")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print("Conversion paths:")
|
||||
print("-" * 80)
|
||||
|
||||
all_passed = True
|
||||
|
||||
# Test MathML
|
||||
try:
|
||||
result = converter.convert_to_formats(f"$${latex}$$")
|
||||
print(f"✓ MathML: {len(result.mathml)} chars")
|
||||
print(f"✓ MML: {len(result.mml)} chars")
|
||||
except Exception as e:
|
||||
print(f"✗ MathML failed: {e}")
|
||||
all_passed = False
|
||||
|
||||
# Test OMML
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ OMML: {len(omml)} chars")
|
||||
except Exception as e:
|
||||
print(f"✗ OMML failed: {e}")
|
||||
all_passed = False
|
||||
|
||||
return all_passed and all(checks.values())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 80)
|
||||
print("COMPREHENSIVE ARRAY FIX TEST SUITE")
|
||||
print("Testing all conversion paths with preprocessing")
|
||||
print("=" * 80)
|
||||
|
||||
try:
|
||||
test1 = test_problematic_array()
|
||||
test2 = test_simple_arrays()
|
||||
test3 = test_conversion_consistency()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("FINAL SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
results = [
|
||||
("Problematic array fix", test1),
|
||||
("Simple arrays", test2),
|
||||
("Conversion consistency", test3),
|
||||
]
|
||||
|
||||
for name, passed in results:
|
||||
status = "✓ PASS" if passed else "✗ FAIL"
|
||||
print(f"{status}: {name}")
|
||||
|
||||
all_passed = all(result[1] for result in results)
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
|
||||
if all_passed:
|
||||
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
print("\nThe array column specifier fix is working in ALL conversion paths:")
|
||||
print(" • MathML conversion (for Word paste)")
|
||||
print(" • MML conversion (namespaced MathML)")
|
||||
print(" • OMML conversion (Word native)")
|
||||
else:
|
||||
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -1,57 +0,0 @@
|
||||
"""Test script for converter functionality."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_latex_only_conversion():
|
||||
"""Test conversion of LaTeX-only content."""
|
||||
converter = Converter()
|
||||
|
||||
# Test case 1: Display math with $$...$$
|
||||
latex_input = "$$E = mc^2$$"
|
||||
result = converter.convert_to_formats(latex_input)
|
||||
|
||||
print("Test 1: Display math ($$...$$)")
|
||||
print(f"Input: {latex_input}")
|
||||
print(f"LaTeX: {result.latex}")
|
||||
print(f"MathML: {result.mathml[:100]}...")
|
||||
print(f"MML: {result.mml[:100]}...")
|
||||
print(f"OMML: {result.omml[:100] if result.omml else 'Empty'}...")
|
||||
print()
|
||||
|
||||
# Test case 2: Inline math with $...$
|
||||
latex_input2 = "$\\frac{a}{b}$"
|
||||
result2 = converter.convert_to_formats(latex_input2)
|
||||
|
||||
print("Test 2: Inline math ($...$)")
|
||||
print(f"Input: {latex_input2}")
|
||||
print(f"LaTeX: {result2.latex}")
|
||||
print(f"MathML: {result2.mathml[:100]}...")
|
||||
print()
|
||||
|
||||
# Test case 3: Complex formula
|
||||
latex_input3 = "$$\\int_{0}^{\\infty} e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}$$"
|
||||
result3 = converter.convert_to_formats(latex_input3)
|
||||
|
||||
print("Test 3: Complex formula")
|
||||
print(f"Input: {latex_input3}")
|
||||
print(f"LaTeX: {result3.latex}")
|
||||
print(f"MathML: {result3.mathml[:150]}...")
|
||||
print(f"OMML length: {len(result3.omml)}")
|
||||
print()
|
||||
|
||||
# Test case 4: Regular markdown (not LaTeX-only)
|
||||
markdown_input = "# Hello\n\nThis is a test with math: $x = 2$"
|
||||
result4 = converter.convert_to_formats(markdown_input)
|
||||
|
||||
print("Test 4: Regular markdown")
|
||||
print(f"Input: {markdown_input}")
|
||||
print(f"LaTeX: {result4.latex[:100]}...")
|
||||
print(f"MathML: {result4.mathml[:100]}...")
|
||||
print(f"MML: {result4.mml}")
|
||||
print(f"OMML: {result4.omml}")
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_latex_only_conversion()
|
||||
@@ -1,95 +0,0 @@
|
||||
"""对比测试:展示 MathML 简化前后的差异."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def compare_simplification():
|
||||
"""对比简化前后的 MathML."""
|
||||
|
||||
# 模拟简化前的 MathML(Pandoc 典型输出)
|
||||
before_example = '''<math display="inline" xmlns="http://www.w3.org/1998/Math/MathML">
|
||||
<semantics>
|
||||
<mrow>
|
||||
<mi>γ</mi>
|
||||
<mo form="infix">=</mo>
|
||||
<mn>22</mn>
|
||||
<mo form="infix">.</mo>
|
||||
<mn>2</mn>
|
||||
<mo form="infix" separator="true">,</mo>
|
||||
<mi>c</mi>
|
||||
<mo form="infix">=</mo>
|
||||
<mn>30</mn>
|
||||
<mo form="infix">.</mo>
|
||||
<mn>4</mn>
|
||||
</mrow>
|
||||
<annotation encoding="application/x-tex">\\gamma = 22.2, c = 30.4</annotation>
|
||||
</semantics>
|
||||
</math>'''
|
||||
|
||||
# 测试实际转换
|
||||
converter = Converter()
|
||||
result = converter.convert_to_formats(r"$\gamma = 22.2, c = 30.4$")
|
||||
|
||||
print("=" * 80)
|
||||
print("MathML 简化效果对比")
|
||||
print("=" * 80)
|
||||
|
||||
print("\n【简化前(典型 Pandoc 输出)】")
|
||||
print(f"长度: {len(before_example)} 字符")
|
||||
print(before_example)
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
|
||||
print("\n【简化后(当前输出)】")
|
||||
print(f"长度: {len(result.mathml)} 字符")
|
||||
print(result.mathml)
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
|
||||
# 计算减少的比例
|
||||
reduction = ((len(before_example) - len(result.mathml)) / len(before_example)) * 100
|
||||
print(f"\n📊 大小减少: {reduction:.1f}%")
|
||||
|
||||
# 列出移除的冗余元素
|
||||
print("\n✅ 已移除的冗余:")
|
||||
removed = [
|
||||
"<semantics> 包装器",
|
||||
"<annotation> 元素",
|
||||
'form="infix" 属性',
|
||||
'form="prefix" 属性',
|
||||
'form="postfix" 属性',
|
||||
'separator="true" 属性',
|
||||
'stretchy="true" 属性',
|
||||
'fence="true" 属性',
|
||||
'columnalign 属性',
|
||||
'columnspacing 属性',
|
||||
'不必要的空白',
|
||||
'display="inline" → display="block"',
|
||||
'Unicode 实体 → 实际字符'
|
||||
]
|
||||
|
||||
for item in removed:
|
||||
print(f" • {item}")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
# 测试更多示例
|
||||
test_cases = [
|
||||
(r"\frac{a}{b}", "分数"),
|
||||
(r"x^{2} + y^{2} = r^{2}", "幂次"),
|
||||
(r"\sqrt{a + b}", "根号"),
|
||||
(r"\left| \frac{a}{b} \right|", "括号和分数"),
|
||||
]
|
||||
|
||||
print("\n更多示例:")
|
||||
print("=" * 80)
|
||||
|
||||
for latex, desc in test_cases:
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
print(f"\n{desc}: ${latex}$")
|
||||
print(f"长度: {len(result.mathml)} 字符")
|
||||
print(result.mathml[:200] + ("..." if len(result.mathml) > 200 else ""))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
compare_simplification()
|
||||
@@ -1,55 +0,0 @@
|
||||
"""Test MathML simplification."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def show_current_output():
|
||||
"""Show current MathML output."""
|
||||
converter = Converter()
|
||||
|
||||
test_cases = [
|
||||
(r"\gamma = 22.2", "简单公式"),
|
||||
(r"\frac{a}{b}", "分数"),
|
||||
(r"x^{2} + y^{2}", "上标"),
|
||||
(r"\sqrt{a + b}", "根号"),
|
||||
]
|
||||
|
||||
print("=" * 80)
|
||||
print("当前 MathML 输出分析")
|
||||
print("=" * 80)
|
||||
|
||||
for latex, desc in test_cases:
|
||||
print(f"\n{desc}: ${latex}$")
|
||||
print("-" * 80)
|
||||
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
mathml = result.mathml
|
||||
|
||||
print(f"长度: {len(mathml)} 字符")
|
||||
print(f"\n{mathml}\n")
|
||||
|
||||
# 分析冗余
|
||||
redundancies = []
|
||||
|
||||
if '<mrow>' in mathml and mathml.count('<mrow>') > 1:
|
||||
redundancies.append(f"多层 <mrow> 嵌套 ({mathml.count('<mrow>')} 个)")
|
||||
|
||||
if 'columnalign="center"' in mathml:
|
||||
redundancies.append("columnalign 属性(可能不必要)")
|
||||
|
||||
if 'form="prefix"' in mathml or 'form="postfix"' in mathml:
|
||||
redundancies.append("form 属性(可简化)")
|
||||
|
||||
if 'stretchy="true"' in mathml:
|
||||
redundancies.append("stretchy 属性(可简化)")
|
||||
|
||||
if redundancies:
|
||||
print("可能的冗余:")
|
||||
for r in redundancies:
|
||||
print(f" • {r}")
|
||||
else:
|
||||
print("✓ 已经很简洁")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
show_current_output()
|
||||
@@ -1,236 +0,0 @@
|
||||
"""Diagnostic tool for MathML Word compatibility issues."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def diagnose_mathml(latex: str) -> dict:
|
||||
"""Diagnose MathML generation and Word compatibility.
|
||||
|
||||
Args:
|
||||
latex: LaTeX formula to convert.
|
||||
|
||||
Returns:
|
||||
Dictionary with diagnostic information.
|
||||
"""
|
||||
converter = Converter()
|
||||
|
||||
print("=" * 80)
|
||||
print("MathML Word Compatibility Diagnostic")
|
||||
print("=" * 80)
|
||||
|
||||
print(f"\nInput LaTeX: {latex}")
|
||||
|
||||
# Convert
|
||||
try:
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
mathml = result.mathml
|
||||
|
||||
print(f"\n✓ Conversion successful")
|
||||
print(f"MathML length: {len(mathml)} characters")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n✗ Conversion failed: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
# Diagnostic checks
|
||||
print("\n" + "-" * 80)
|
||||
print("Word Compatibility Checks:")
|
||||
print("-" * 80)
|
||||
|
||||
issues = []
|
||||
|
||||
# Check 1: Has proper namespace
|
||||
if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
|
||||
print("✓ Has correct MathML namespace")
|
||||
else:
|
||||
print("✗ Missing or incorrect MathML namespace")
|
||||
issues.append("namespace")
|
||||
|
||||
# Check 2: Display attribute
|
||||
if 'display="block"' in mathml:
|
||||
print("✓ Has display='block' attribute")
|
||||
elif 'display="inline"' in mathml:
|
||||
print("⚠ Has display='inline' (Word prefers 'block')")
|
||||
issues.append("display_inline")
|
||||
else:
|
||||
print("✗ Missing display attribute")
|
||||
issues.append("no_display")
|
||||
|
||||
# Check 3: Check for problematic elements
|
||||
if '<semantics>' in mathml:
|
||||
print("⚠ Contains <semantics> element")
|
||||
print(" Note: Word may ignore semantics wrapper")
|
||||
issues.append("semantics")
|
||||
|
||||
if '<annotation' in mathml:
|
||||
print("⚠ Contains <annotation> element")
|
||||
print(" Note: Word doesn't need annotation, may cause issues")
|
||||
issues.append("annotation")
|
||||
|
||||
# Check 4: Unicode entities
|
||||
problematic_entities = ['&#x', '>', '<', '&']
|
||||
has_entities = any(entity in mathml for entity in problematic_entities)
|
||||
if has_entities:
|
||||
print("⚠ Contains encoded entities (Word prefers actual characters)")
|
||||
issues.append("entities")
|
||||
else:
|
||||
print("✓ No problematic entities")
|
||||
|
||||
# Check 5: Root element structure
|
||||
if mathml.startswith('<math'):
|
||||
print("✓ Starts with <math> element")
|
||||
else:
|
||||
print("✗ Doesn't start with <math> element")
|
||||
issues.append("no_math_root")
|
||||
|
||||
# Check 6: Check for common Word-incompatible attributes
|
||||
if 'class=' in mathml:
|
||||
print("⚠ Contains 'class' attribute (Word ignores these)")
|
||||
|
||||
if 'style=' in mathml:
|
||||
print("⚠ Contains 'style' attribute (Word ignores these)")
|
||||
|
||||
# Print MathML structure
|
||||
print("\n" + "-" * 80)
|
||||
print("MathML Structure:")
|
||||
print("-" * 80)
|
||||
|
||||
# Show first 500 chars
|
||||
print(mathml[:500])
|
||||
if len(mathml) > 500:
|
||||
print("...")
|
||||
print(mathml[-200:])
|
||||
|
||||
# Recommendations
|
||||
print("\n" + "-" * 80)
|
||||
print("Recommendations:")
|
||||
print("-" * 80)
|
||||
|
||||
if not issues:
|
||||
print("✓ MathML appears to be Word-compatible!")
|
||||
print("\nHow to paste into Word:")
|
||||
print(" 1. Copy the MathML XML")
|
||||
print(" 2. In Word: Insert → Equation → Ink Equation")
|
||||
print(" 3. Right-click the equation → 'Professional'")
|
||||
print(" 4. Right-click again → 'Save as new equation'")
|
||||
print("\nOR use Alt text method:")
|
||||
print(" 1. Insert → Equation")
|
||||
print(" 2. Type any formula")
|
||||
print(" 3. Right-click → Edit Alt Text")
|
||||
print(" 4. Paste MathML in Alt Text field")
|
||||
else:
|
||||
print("Issues found:")
|
||||
if "semantics" in issues or "annotation" in issues:
|
||||
print("\n1. Remove <semantics> and <annotation> wrappers")
|
||||
print(" Word only needs the <mrow> content inside")
|
||||
|
||||
if "display_inline" in issues:
|
||||
print("\n2. Change display='inline' to display='block'")
|
||||
|
||||
if "entities" in issues:
|
||||
print("\n3. Decode HTML entities to actual characters")
|
||||
|
||||
if "namespace" in issues:
|
||||
print("\n4. Add xmlns='http://www.w3.org/1998/Math/MathML'")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"mathml": mathml,
|
||||
"issues": issues,
|
||||
"length": len(mathml)
|
||||
}
|
||||
|
||||
|
||||
def test_simple_formula():
|
||||
"""Test with a simple formula."""
|
||||
print("\nTest 1: Simple formula")
|
||||
diagnose_mathml(r"\frac{a}{b}")
|
||||
|
||||
|
||||
def test_complex_formula():
|
||||
"""Test with a complex formula."""
|
||||
print("\n\nTest 2: Complex formula with matrix")
|
||||
diagnose_mathml(r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|")
|
||||
|
||||
|
||||
def test_problematic_formula():
|
||||
"""Test with the user's problematic formula."""
|
||||
print("\n\nTest 3: User's formula (after OCR fix)")
|
||||
diagnose_mathml(r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}")
|
||||
|
||||
|
||||
def generate_clean_mathml():
|
||||
"""Generate a clean MathML without semantics/annotation."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Generating Clean MathML for Word")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
latex = r"\gamma = 22.2, c = 30.4, \phi = 25.4 ^ {\circ}"
|
||||
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
mathml = result.mathml
|
||||
|
||||
# Remove semantics wrapper if present
|
||||
import re
|
||||
|
||||
# Extract content from semantics if present
|
||||
if '<semantics>' in mathml:
|
||||
print("\n⚠ Original has <semantics> wrapper")
|
||||
|
||||
# Try to extract just the mrow content
|
||||
match = re.search(r'<semantics>(.*?)<annotation', mathml, re.DOTALL)
|
||||
if match:
|
||||
content = match.group(1).strip()
|
||||
|
||||
# Rebuild without semantics
|
||||
clean_mathml = f'<math display="block" xmlns="http://www.w3.org/1998/Math/MathML">{content}</math>'
|
||||
|
||||
print("\nCleaned MathML (without semantics):")
|
||||
print("-" * 80)
|
||||
print(clean_mathml)
|
||||
|
||||
print("\n✓ Try pasting this version into Word")
|
||||
return clean_mathml
|
||||
|
||||
print("\nGenerated MathML:")
|
||||
print("-" * 80)
|
||||
print(mathml)
|
||||
|
||||
return mathml
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("MathML Word Compatibility Diagnostic Tool\n")
|
||||
|
||||
try:
|
||||
test_simple_formula()
|
||||
test_complex_formula()
|
||||
test_problematic_formula()
|
||||
|
||||
print("\n\n")
|
||||
clean = generate_clean_mathml()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY")
|
||||
print("=" * 80)
|
||||
print("\nCommon reasons MathML doesn't work in Word:")
|
||||
print(" 1. <semantics> wrapper - Word may not parse it correctly")
|
||||
print(" 2. <annotation> element - Word doesn't need it")
|
||||
print(" 3. HTML entities - Word prefers actual Unicode characters")
|
||||
print(" 4. Missing xmlns attribute")
|
||||
print(" 5. Wrong paste location in Word")
|
||||
|
||||
print("\nBest practice for Word:")
|
||||
print(" • Use simple MathML without semantics wrapper")
|
||||
print(" • Include xmlns attribute")
|
||||
print(" • Use display='block'")
|
||||
print(" • Use actual characters, not entities")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nError: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -1,105 +0,0 @@
|
||||
"""Quick test to verify MinerU postprocessing is enabled."""
|
||||
|
||||
from app.services.ocr_service import _postprocess_markdown
|
||||
|
||||
|
||||
def test_mineru_postprocessing():
|
||||
"""Test that postprocessing works for MinerU output."""
|
||||
|
||||
print("=" * 80)
|
||||
print("Testing MinerU Postprocessing")
|
||||
print("=" * 80)
|
||||
|
||||
# Simulate MinerU OCR output (with number errors)
|
||||
mineru_markdown = r"""$$
|
||||
\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}
|
||||
$$"""
|
||||
|
||||
print("\nMinerU OCR Output (raw):")
|
||||
print(mineru_markdown)
|
||||
|
||||
# Apply postprocessing
|
||||
fixed = _postprocess_markdown(mineru_markdown)
|
||||
|
||||
print("\nAfter Postprocessing:")
|
||||
print(fixed)
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print("Verification:")
|
||||
print("-" * 80)
|
||||
|
||||
checks = [
|
||||
("Has '22.2'", "22.2" in fixed),
|
||||
("Has '30.4'", "30.4" in fixed),
|
||||
("Has '25.4'", "25.4" in fixed),
|
||||
("No '2 2'", "2 2" not in fixed),
|
||||
("No '3 0'", "3 0" not in fixed),
|
||||
("No '2 5'", "2 5" not in fixed),
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
for check_name, passed in checks:
|
||||
status = "✓" if passed else "✗"
|
||||
print(f"{status} {check_name}")
|
||||
if not passed:
|
||||
all_passed = False
|
||||
|
||||
if all_passed:
|
||||
print("\n✓✓✓ MinerU postprocessing is working! ✓✓✓")
|
||||
else:
|
||||
print("\n✗✗✗ MinerU postprocessing has issues ✗✗✗")
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def test_expected_api_response():
|
||||
"""Test what the API response should look like."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Expected API Response Format")
|
||||
print("=" * 80)
|
||||
|
||||
ocr_output = r"$$\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}$$"
|
||||
fixed = _postprocess_markdown(ocr_output)
|
||||
|
||||
print("\nBefore postprocessing:")
|
||||
print(f" markdown: {ocr_output}")
|
||||
|
||||
print("\nAfter postprocessing (what API should return):")
|
||||
print(f" markdown: {fixed}")
|
||||
|
||||
print("\nExpected changes:")
|
||||
print(" • '2 2. 2' → '22.2'")
|
||||
print(" • '3 0. 4' → '30.4'")
|
||||
print(" • '2 5. 4' → '25.4'")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print("Note: The API should return the FIXED markdown")
|
||||
print(" All other formats (latex, mathml, mml) are derived from this")
|
||||
print("-" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("MinerU Postprocessing Verification\n")
|
||||
|
||||
try:
|
||||
test1 = test_mineru_postprocessing()
|
||||
test_expected_api_response()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
if test1:
|
||||
print("✓ MinerU postprocessing is NOW ENABLED")
|
||||
print("\nNext steps:")
|
||||
print(" 1. Restart the server")
|
||||
print(" 2. Test with the same request")
|
||||
print(" 3. The markdown field should now have '22.2' instead of '2 2. 2'")
|
||||
else:
|
||||
print("✗ There may still be issues")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nError: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -1,294 +0,0 @@
|
||||
"""Test OCR number error fixing."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_ocr_number_errors():
|
||||
"""Test fixing of common OCR number errors."""
|
||||
|
||||
print("=" * 80)
|
||||
print("Testing OCR Number Error Fixes")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# Test cases from the error
|
||||
test_cases = [
|
||||
{
|
||||
"name": "Original error case",
|
||||
"latex": r"\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}",
|
||||
"expected_fixes": ["22.2", "30.4", "25.4"],
|
||||
"should_not_have": ["2 2", "3 0", "2 5"],
|
||||
},
|
||||
{
|
||||
"name": "Simple decimal with space",
|
||||
"latex": r"x = 3. 14",
|
||||
"expected_fixes": ["3.14"],
|
||||
"should_not_have": ["3. 14"],
|
||||
},
|
||||
{
|
||||
"name": "Multiple decimals",
|
||||
"latex": r"a = 1 2. 5, b = 9. 8 7",
|
||||
"expected_fixes": ["12.5", "9.87"],
|
||||
"should_not_have": ["1 2", "9. 8"],
|
||||
},
|
||||
{
|
||||
"name": "Large numbers with spaces",
|
||||
"latex": r"n = 1 5 0, m = 2 0 0 0",
|
||||
"expected_fixes": ["150", "2000"],
|
||||
"should_not_have": ["1 5", "2 0 0"],
|
||||
},
|
||||
{
|
||||
"name": "Don't merge across operators",
|
||||
"latex": r"2 + 3 = 5",
|
||||
"expected_fixes": ["2 + 3 = 5"], # Should stay the same
|
||||
"should_not_have": ["23=5"],
|
||||
},
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for i, test in enumerate(test_cases, 1):
|
||||
print(f"\nTest {i}: {test['name']}")
|
||||
print("-" * 80)
|
||||
print(f"Input: {test['latex']}")
|
||||
|
||||
# Apply fix
|
||||
fixed = converter._fix_ocr_number_errors(test['latex'])
|
||||
print(f"Fixed: {fixed}")
|
||||
|
||||
# Check expected fixes
|
||||
checks_passed = []
|
||||
|
||||
for expected in test['expected_fixes']:
|
||||
if expected in fixed:
|
||||
checks_passed.append(f"✓ Contains '{expected}'")
|
||||
else:
|
||||
checks_passed.append(f"✗ Missing '{expected}'")
|
||||
all_passed = False
|
||||
|
||||
for should_not in test['should_not_have']:
|
||||
if should_not not in fixed:
|
||||
checks_passed.append(f"✓ Removed '{should_not}'")
|
||||
else:
|
||||
checks_passed.append(f"✗ Still has '{should_not}'")
|
||||
all_passed = False
|
||||
|
||||
for check in checks_passed:
|
||||
print(f" {check}")
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def test_mathml_quality():
|
||||
"""Test that fixed LaTeX produces better MathML."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing MathML Quality After OCR Fix")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# The problematic LaTeX from the error
|
||||
latex = r"\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}"
|
||||
|
||||
print(f"\nOriginal LaTeX: {latex}")
|
||||
|
||||
# Convert to MathML
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
mathml = result.mathml
|
||||
|
||||
print(f"\nMathML length: {len(mathml)} chars")
|
||||
|
||||
# Check quality indicators
|
||||
print("\nQuality checks:")
|
||||
print("-" * 80)
|
||||
|
||||
checks = {
|
||||
"No separate digits for decimals": "<mn>22.2</mn>" in mathml or "22.2" in mathml,
|
||||
"No dot as identifier": "<mi>.</mi>" not in mathml,
|
||||
"Properly formatted numbers": "<mn>30.4</mn>" in mathml or "30.4" in mathml,
|
||||
"Has namespace": 'xmlns=' in mathml,
|
||||
"Display block": 'display="block"' in mathml,
|
||||
}
|
||||
|
||||
all_passed = True
|
||||
|
||||
for check, passed in checks.items():
|
||||
status = "✓" if passed else "✗"
|
||||
print(f"{status} {check}")
|
||||
if not passed:
|
||||
all_passed = False
|
||||
|
||||
# Show a preview
|
||||
print("\n" + "-" * 80)
|
||||
print("MathML preview:")
|
||||
print("-" * 80)
|
||||
print(mathml[:400])
|
||||
if len(mathml) > 400:
|
||||
print("...")
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def test_edge_cases():
|
||||
"""Test edge cases for OCR number fixing."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Edge Cases")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
test_cases = [
|
||||
{
|
||||
"name": "Should NOT merge: arithmetic",
|
||||
"input": r"2 + 3 = 5",
|
||||
"should_stay": "2 + 3 = 5",
|
||||
},
|
||||
{
|
||||
"name": "Should NOT merge: multiplication",
|
||||
"input": r"2 \times 3",
|
||||
"should_stay": r"2 \times 3",
|
||||
},
|
||||
{
|
||||
"name": "Should merge: decimal at end",
|
||||
"input": r"x = 1 2. 5",
|
||||
"should_become": "12.5",
|
||||
},
|
||||
{
|
||||
"name": "Should merge: multiple spaces",
|
||||
"input": r"n = 1 2 . 3 4",
|
||||
"should_have": "12.34",
|
||||
},
|
||||
{
|
||||
"name": "Complex: mixed scenarios",
|
||||
"input": r"a = 1 2. 3 + 4 5. 6 - 7",
|
||||
"should_have": ["12.3", "45.6", "- 7"],
|
||||
},
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for test in test_cases:
|
||||
print(f"\n{test['name']}")
|
||||
print(f" Input: {test['input']}")
|
||||
|
||||
fixed = converter._fix_ocr_number_errors(test['input'])
|
||||
print(f" Output: {fixed}")
|
||||
|
||||
if 'should_stay' in test:
|
||||
if fixed == test['should_stay']:
|
||||
print(f" ✓ Correctly unchanged")
|
||||
else:
|
||||
print(f" ✗ Should stay '{test['should_stay']}' but got '{fixed}'")
|
||||
all_passed = False
|
||||
|
||||
if 'should_become' in test:
|
||||
if test['should_become'] in fixed:
|
||||
print(f" ✓ Contains '{test['should_become']}'")
|
||||
else:
|
||||
print(f" ✗ Should contain '{test['should_become']}'")
|
||||
all_passed = False
|
||||
|
||||
if 'should_have' in test:
|
||||
for expected in test['should_have']:
|
||||
if expected in fixed:
|
||||
print(f" ✓ Contains '{expected}'")
|
||||
else:
|
||||
print(f" ✗ Should contain '{expected}'")
|
||||
all_passed = False
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def compare_before_after():
|
||||
"""Compare MathML before and after OCR fix."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Before/After Comparison")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# Simulate OCR error
|
||||
ocr_latex = r"\gamma = 2 2. 2, c = 3 0. 4"
|
||||
correct_latex = r"\gamma = 22.2, c = 30.4"
|
||||
|
||||
print(f"\nOCR LaTeX: {ocr_latex}")
|
||||
print(f"Correct LaTeX: {correct_latex}")
|
||||
|
||||
# Convert both
|
||||
ocr_result = converter.convert_to_formats(f"${ocr_latex}$")
|
||||
correct_result = converter.convert_to_formats(f"${correct_latex}$")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print("MathML comparison:")
|
||||
print("-" * 80)
|
||||
|
||||
# Check if they produce similar quality output
|
||||
ocr_has_decimal = "22.2" in ocr_result.mathml
|
||||
correct_has_decimal = "22.2" in correct_result.mathml
|
||||
|
||||
ocr_has_dot_error = "<mi>.</mi>" in ocr_result.mathml
|
||||
correct_has_dot_error = "<mi>.</mi>" in correct_result.mathml
|
||||
|
||||
print(f"OCR output has proper decimals: {'✓' if ocr_has_decimal else '✗'}")
|
||||
print(f"Correct output has proper decimals: {'✓' if correct_has_decimal else '✗'}")
|
||||
print(f"OCR output has dot errors: {'✗ Yes' if ocr_has_dot_error else '✓ No'}")
|
||||
print(f"Correct output has dot errors: {'✗ Yes' if correct_has_dot_error else '✓ No'}")
|
||||
|
||||
if ocr_has_decimal and not ocr_has_dot_error:
|
||||
print("\n✓ OCR fix is working! Output quality matches correct input.")
|
||||
return True
|
||||
else:
|
||||
print("\n✗ OCR fix may need improvement.")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("OCR Number Error Fix Test Suite\n")
|
||||
|
||||
try:
|
||||
test1 = test_ocr_number_errors()
|
||||
test2 = test_mathml_quality()
|
||||
test3 = test_edge_cases()
|
||||
test4 = compare_before_after()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
results = [
|
||||
("OCR error fixes", test1),
|
||||
("MathML quality", test2),
|
||||
("Edge cases", test3),
|
||||
("Before/after comparison", test4),
|
||||
]
|
||||
|
||||
for name, passed in results:
|
||||
status = "✓ PASS" if passed else "✗ FAIL"
|
||||
print(f"{status}: {name}")
|
||||
|
||||
all_passed = all(r[1] for r in results)
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
|
||||
if all_passed:
|
||||
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
print("\nOCR number errors are being fixed automatically!")
|
||||
print("Examples:")
|
||||
print(" • '2 2. 2' → '22.2'")
|
||||
print(" • '3 0. 4' → '30.4'")
|
||||
print(" • '1 5 0' → '150'")
|
||||
else:
|
||||
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -1,265 +0,0 @@
|
||||
"""Test OCR number error fixing in the complete pipeline."""
|
||||
|
||||
from app.services.ocr_service import _postprocess_markdown
|
||||
|
||||
|
||||
def test_ocr_postprocessing():
|
||||
"""Test that OCR postprocessing fixes number errors."""
|
||||
|
||||
print("=" * 80)
|
||||
print("Testing OCR Postprocessing Pipeline")
|
||||
print("=" * 80)
|
||||
|
||||
# Simulate OCR output with common errors
|
||||
test_cases = [
|
||||
{
|
||||
"name": "Inline formula with decimal errors",
|
||||
"input": r"The value is $\gamma = 2 2. 2$ and $c = 3 0. 4$.",
|
||||
"should_have": ["22.2", "30.4"],
|
||||
"should_not_have": ["2 2", "3 0"],
|
||||
},
|
||||
{
|
||||
"name": "Display formula with decimal errors",
|
||||
"input": r"$$\phi = 2 5. 4 ^ {\circ}$$",
|
||||
"should_have": ["25.4"],
|
||||
"should_not_have": ["2 5"],
|
||||
},
|
||||
{
|
||||
"name": "Multiple formulas",
|
||||
"input": r"$a = 1 2. 5$, $b = 9. 8 7$, and $c = 1 5 0$",
|
||||
"should_have": ["12.5", "9.87", "150"],
|
||||
"should_not_have": ["1 2", "9. 8", "1 5"],
|
||||
},
|
||||
{
|
||||
"name": "Mixed content (text + formulas)",
|
||||
"input": r"The equation $x = 3. 14$ is approximately pi. Then $y = 2 7. 3$.",
|
||||
"should_have": ["3.14", "27.3"],
|
||||
"should_not_have": ["3. 14", "2 7"],
|
||||
},
|
||||
{
|
||||
"name": "Normal arithmetic (should not be affected)",
|
||||
"input": r"$2 + 3 = 5$ and $10 - 7 = 3$",
|
||||
"should_stay": True,
|
||||
},
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for i, test in enumerate(test_cases, 1):
|
||||
print(f"\nTest {i}: {test['name']}")
|
||||
print("-" * 80)
|
||||
print(f"Input: {test['input']}")
|
||||
|
||||
# Apply postprocessing
|
||||
output = _postprocess_markdown(test['input'])
|
||||
print(f"Output: {output}")
|
||||
|
||||
# Check results
|
||||
if 'should_have' in test:
|
||||
for expected in test['should_have']:
|
||||
if expected in output:
|
||||
print(f" ✓ Contains '{expected}'")
|
||||
else:
|
||||
print(f" ✗ Missing '{expected}'")
|
||||
all_passed = False
|
||||
|
||||
if 'should_not_have' in test:
|
||||
for unexpected in test['should_not_have']:
|
||||
if unexpected not in output:
|
||||
print(f" ✓ Removed '{unexpected}'")
|
||||
else:
|
||||
print(f" ✗ Still has '{unexpected}'")
|
||||
all_passed = False
|
||||
|
||||
if test.get('should_stay'):
|
||||
if test['input'] == output:
|
||||
print(f" ✓ Correctly unchanged")
|
||||
else:
|
||||
print(f" ✗ Should not change but did")
|
||||
all_passed = False
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def test_real_world_case():
|
||||
"""Test the exact case from the error report."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Real-World Error Case")
|
||||
print("=" * 80)
|
||||
|
||||
# The exact input from the error report
|
||||
ocr_output = r"$$\gamma = 2 2. 2, c = 3 0. 4, \phi = 2 5. 4 ^ {\circ}$$"
|
||||
|
||||
print(f"\nOCR Output (with errors):")
|
||||
print(f" {ocr_output}")
|
||||
|
||||
# Apply postprocessing
|
||||
fixed = _postprocess_markdown(ocr_output)
|
||||
|
||||
print(f"\nAfter Postprocessing:")
|
||||
print(f" {fixed}")
|
||||
|
||||
# Check if fixed
|
||||
checks = {
|
||||
"Has 22.2": "22.2" in fixed,
|
||||
"Has 30.4": "30.4" in fixed,
|
||||
"Has 25.4": "25.4" in fixed,
|
||||
"No '2 2'": "2 2" not in fixed,
|
||||
"No '3 0'": "3 0" not in fixed,
|
||||
"No '2 5'": "2 5" not in fixed,
|
||||
}
|
||||
|
||||
print("\nQuality Checks:")
|
||||
print("-" * 80)
|
||||
|
||||
all_passed = True
|
||||
for check, passed in checks.items():
|
||||
status = "✓" if passed else "✗"
|
||||
print(f"{status} {check}")
|
||||
if not passed:
|
||||
all_passed = False
|
||||
|
||||
if all_passed:
|
||||
print("\n✓ Real-world case fixed successfully!")
|
||||
else:
|
||||
print("\n✗ Real-world case still has issues")
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def test_edge_cases():
|
||||
"""Test edge cases to ensure we don't break valid formulas."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Edge Cases")
|
||||
print("=" * 80)
|
||||
|
||||
test_cases = [
|
||||
{
|
||||
"name": "Arithmetic operations",
|
||||
"input": r"$2 + 3 = 5$ and $10 - 7 = 3$",
|
||||
"should_stay": True,
|
||||
},
|
||||
{
|
||||
"name": "Multiplication",
|
||||
"input": r"$2 \times 3 = 6$",
|
||||
"should_stay": True,
|
||||
},
|
||||
{
|
||||
"name": "Exponents",
|
||||
"input": r"$x ^ 2 + y ^ 2 = r ^ 2$",
|
||||
"should_stay": True,
|
||||
},
|
||||
{
|
||||
"name": "Fractions",
|
||||
"input": r"$\frac{1}{2} + \frac{3}{4}$",
|
||||
"should_stay": True,
|
||||
},
|
||||
{
|
||||
"name": "Subscripts",
|
||||
"input": r"$x _ 1 + x _ 2$",
|
||||
"should_stay": True,
|
||||
},
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for test in test_cases:
|
||||
print(f"\n{test['name']}")
|
||||
print(f" Input: {test['input']}")
|
||||
|
||||
output = _postprocess_markdown(test['input'])
|
||||
print(f" Output: {output}")
|
||||
|
||||
if test.get('should_stay'):
|
||||
# For these cases, we allow some whitespace changes but structure should stay
|
||||
if output.replace(" ", "") == test['input'].replace(" ", ""):
|
||||
print(f" ✓ Structure preserved")
|
||||
else:
|
||||
print(f" ✗ Structure changed unexpectedly")
|
||||
all_passed = False
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def test_performance():
|
||||
"""Test performance with large content."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Performance")
|
||||
print("=" * 80)
|
||||
|
||||
# Create a large markdown with many formulas
|
||||
large_content = ""
|
||||
for i in range(100):
|
||||
large_content += f"Formula {i}: $x = {i} {i}. {i}$ and $y = {i*2} {i*2}. {i*2}$\n"
|
||||
|
||||
print(f"\nContent size: {len(large_content)} characters")
|
||||
print(f"Number of formulas: ~200")
|
||||
|
||||
import time
|
||||
start = time.time()
|
||||
output = _postprocess_markdown(large_content)
|
||||
elapsed = time.time() - start
|
||||
|
||||
print(f"Processing time: {elapsed*1000:.2f}ms")
|
||||
|
||||
if elapsed < 1.0:
|
||||
print("✓ Performance is acceptable (< 1s)")
|
||||
return True
|
||||
else:
|
||||
print("✗ Performance may need optimization")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("OCR Pipeline Integration Test Suite\n")
|
||||
|
||||
try:
|
||||
test1 = test_ocr_postprocessing()
|
||||
test2 = test_real_world_case()
|
||||
test3 = test_edge_cases()
|
||||
test4 = test_performance()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
results = [
|
||||
("OCR postprocessing", test1),
|
||||
("Real-world case", test2),
|
||||
("Edge cases", test3),
|
||||
("Performance", test4),
|
||||
]
|
||||
|
||||
for name, passed in results:
|
||||
status = "✓ PASS" if passed else "✗ FAIL"
|
||||
print(f"{status}: {name}")
|
||||
|
||||
all_passed = all(r[1] for r in results)
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
|
||||
if all_passed:
|
||||
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
print("\nOCR number error fixing is integrated into the pipeline!")
|
||||
print("\nFlow:")
|
||||
print(" 1. OCR recognizes image → produces Markdown with LaTeX")
|
||||
print(" 2. _postprocess_markdown() fixes number errors")
|
||||
print(" 3. Clean LaTeX is used for all conversions")
|
||||
print("\nBenefits:")
|
||||
print(" • Fixed once at the source")
|
||||
print(" • All output formats benefit (MathML, MML, OMML)")
|
||||
print(" • Better performance (no repeated fixes)")
|
||||
else:
|
||||
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
112
test_omml_api.py
112
test_omml_api.py
@@ -1,112 +0,0 @@
|
||||
"""Test script for OMML conversion API endpoint."""
|
||||
|
||||
import requests
|
||||
import json
|
||||
|
||||
|
||||
def test_latex_to_omml():
|
||||
"""Test the /convert/latex-to-omml endpoint."""
|
||||
|
||||
# Test cases
|
||||
test_cases = [
|
||||
{
|
||||
"name": "Simple fraction",
|
||||
"latex": "\\frac{a}{b}",
|
||||
},
|
||||
{
|
||||
"name": "Quadratic formula",
|
||||
"latex": "x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}",
|
||||
},
|
||||
{
|
||||
"name": "Integral",
|
||||
"latex": "\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}",
|
||||
},
|
||||
{
|
||||
"name": "Matrix",
|
||||
"latex": "\\begin{matrix} a & b \\\\ c & d \\end{matrix}",
|
||||
},
|
||||
]
|
||||
|
||||
base_url = "http://localhost:8000/api/v1/convert/latex-to-omml"
|
||||
|
||||
print("Testing OMML Conversion API")
|
||||
print("=" * 80)
|
||||
|
||||
for i, test_case in enumerate(test_cases, 1):
|
||||
print(f"\nTest {i}: {test_case['name']}")
|
||||
print("-" * 80)
|
||||
print(f"LaTeX: {test_case['latex']}")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
base_url,
|
||||
json={"latex": test_case["latex"]},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
omml = result.get("omml", "")
|
||||
|
||||
print(f"✓ Status: {response.status_code}")
|
||||
print(f"OMML length: {len(omml)} characters")
|
||||
print(f"OMML preview: {omml[:150]}...")
|
||||
|
||||
else:
|
||||
print(f"✗ Status: {response.status_code}")
|
||||
print(f"Error: {response.text}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"✗ Request failed: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ Error: {e}")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
|
||||
def test_invalid_input():
|
||||
"""Test error handling with invalid input."""
|
||||
|
||||
print("\nTesting Error Handling")
|
||||
print("=" * 80)
|
||||
|
||||
base_url = "http://localhost:8000/api/v1/convert/latex-to-omml"
|
||||
|
||||
# Empty LaTeX
|
||||
print("\nTest: Empty LaTeX")
|
||||
response = requests.post(
|
||||
base_url,
|
||||
json={"latex": ""},
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Response: {response.json()}")
|
||||
|
||||
# Missing LaTeX field
|
||||
print("\nTest: Missing LaTeX field")
|
||||
response = requests.post(
|
||||
base_url,
|
||||
json={},
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Response: {response.json()}")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("OMML API Test Suite")
|
||||
print("Make sure the API server is running on http://localhost:8000")
|
||||
print()
|
||||
|
||||
try:
|
||||
test_latex_to_omml()
|
||||
test_invalid_input()
|
||||
print("\n✓ All tests completed!")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n✗ Tests interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"\n✗ Test suite failed: {e}")
|
||||
@@ -1,218 +0,0 @@
|
||||
"""Comprehensive test for OMML conversion with preprocessing."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_case_1_array_with_spaces():
|
||||
"""Test: Array with spaces in column specifier (the original issue)."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 1: Array with spaces in column specifier")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# The problematic LaTeX from the error
|
||||
latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
|
||||
|
||||
print(f"LaTeX length: {len(latex)} chars")
|
||||
print(f"Preview: {latex[:100]}...")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"\n✓ SUCCESS: Converted to OMML")
|
||||
print(f"OMML length: {len(omml)} chars")
|
||||
|
||||
if "oMath" in omml:
|
||||
print("✓ Valid OMML structure detected")
|
||||
|
||||
# Check preprocessing worked
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
if "{c c c c}" not in preprocessed and "{cccc}" in preprocessed:
|
||||
print("✓ Array column specifiers fixed: '{c c c c}' → '{cccc}'")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_2_vmatrix():
|
||||
"""Test: vmatrix environment conversion."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 2: vmatrix environment")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"\begin{vmatrix} a & b \\ c & d \end{vmatrix}"
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
|
||||
# Check if vmatrix was converted
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
if "vmatrix" not in preprocessed and r"\left|" in preprocessed:
|
||||
print("✓ vmatrix converted to \\left| ... \\right|")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_3_cases_environment():
|
||||
"""Test: cases environment conversion."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 3: cases environment")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"f(x) = \begin{cases} x^2 & x \geq 0 \\ -x & x < 0 \end{cases}"
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
|
||||
# Check if cases was converted to array
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
if "cases" not in preprocessed and "array" in preprocessed:
|
||||
print("✓ cases converted to array environment")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_4_aligned_environment():
|
||||
"""Test: aligned environment conversion."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 4: aligned environment")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"\begin{aligned} x + y &= 5 \\ 2x - y &= 1 \end{aligned}"
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
|
||||
# Check if aligned was converted
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
if "aligned" not in preprocessed and "array" in preprocessed:
|
||||
print("✓ aligned converted to array environment")
|
||||
if "&" not in preprocessed or preprocessed.count("&") < latex.count("&"):
|
||||
print("✓ Alignment markers removed")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_5_simple_formula():
|
||||
"""Test: Simple formula (should work without preprocessing)."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 5: Simple formula")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}"
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_6_nested_structures():
|
||||
"""Test: Nested structures with multiple issues."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 6: Nested structures")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"\left\{ \begin{array}{l c} \begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\ f(x) = \begin{cases} 1 & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{step function} \end{array} \right."
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
print("\nPreprocessing applied:")
|
||||
if "vmatrix" not in preprocessed:
|
||||
print(" ✓ vmatrix converted")
|
||||
if "cases" not in preprocessed:
|
||||
print(" ✓ cases converted")
|
||||
if "{l c}" not in preprocessed and "{lc}" in preprocessed:
|
||||
print(" ✓ Array specifiers fixed")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 80)
|
||||
print("OMML CONVERSION TEST SUITE")
|
||||
print("Testing preprocessing and conversion")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
|
||||
try:
|
||||
results.append(("Simple formula", test_case_5_simple_formula()))
|
||||
results.append(("Array with spaces", test_case_1_array_with_spaces()))
|
||||
results.append(("vmatrix", test_case_2_vmatrix()))
|
||||
results.append(("cases", test_case_3_cases_environment()))
|
||||
results.append(("aligned", test_case_4_aligned_environment()))
|
||||
results.append(("Nested structures", test_case_6_nested_structures()))
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 80)
|
||||
print("TEST SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
passed = sum(1 for _, result in results if result)
|
||||
total = len(results)
|
||||
|
||||
for name, result in results:
|
||||
status = "✓ PASS" if result else "✗ FAIL"
|
||||
print(f"{status}: {name}")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print(f"Total: {passed}/{total} tests passed")
|
||||
|
||||
if passed == total:
|
||||
print("\n✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
else:
|
||||
print(f"\n✗✗✗ {total - passed} TESTS FAILED ✗✗✗")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest suite error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -1,202 +0,0 @@
|
||||
"""Test Word-compatible MathML generation."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_mathml_word_compatibility():
|
||||
"""Test that generated MathML is Word-compatible."""
|
||||
|
||||
converter = Converter()
|
||||
|
||||
print("=" * 80)
|
||||
print("Testing Word-Compatible MathML Generation")
|
||||
print("=" * 80)
|
||||
|
||||
# Test case: Matrix with determinant (the problematic example)
|
||||
latex = r"""\left| \begin{array}{cccc} a_{11} & a_{12} & \dots & a_{1n} \\ \vdots & \vdots & & \vdots \\ a_{i1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a_{n1} & a_{n2} & \dots & a_{nn} \end{array} \right|"""
|
||||
|
||||
print(f"\nLaTeX: {latex[:80]}...")
|
||||
print("\n" + "-" * 80)
|
||||
|
||||
# Convert to formats
|
||||
result = converter.convert_to_formats(f"$${latex}$$")
|
||||
|
||||
if not result.mathml:
|
||||
print("✗ No MathML generated")
|
||||
return False
|
||||
|
||||
mathml = result.mathml
|
||||
|
||||
print("Checking Word compatibility features:")
|
||||
print("-" * 80)
|
||||
|
||||
# Check 1: Display attribute
|
||||
if 'display="block"' in mathml:
|
||||
print("✓ Has display='block' attribute")
|
||||
else:
|
||||
print("✗ Missing or wrong display attribute")
|
||||
print(f" Found: {mathml[:100]}...")
|
||||
|
||||
# Check 2: No Unicode entities for common symbols
|
||||
unicode_issues = []
|
||||
problematic_entities = ['+', '…', '⋮', '=', '|']
|
||||
for entity in problematic_entities:
|
||||
if entity in mathml:
|
||||
unicode_issues.append(entity)
|
||||
|
||||
if unicode_issues:
|
||||
print(f"✗ Contains Unicode entities: {unicode_issues}")
|
||||
else:
|
||||
print("✓ No problematic Unicode entities")
|
||||
|
||||
# Check 3: Uses mfenced for brackets (Word-friendly)
|
||||
if '<mfenced' in mathml or '<mo fence="true"' in mathml or 'stretchy="true"' in mathml:
|
||||
print("✓ Uses fence elements")
|
||||
else:
|
||||
print("? No fence elements found (might be OK)")
|
||||
|
||||
# Check 4: Has proper namespace
|
||||
if 'xmlns="http://www.w3.org/1998/Math/MathML"' in mathml:
|
||||
print("✓ Has MathML namespace")
|
||||
else:
|
||||
print("✗ Missing MathML namespace")
|
||||
|
||||
# Show preview
|
||||
print("\n" + "-" * 80)
|
||||
print("MathML Preview (first 500 chars):")
|
||||
print("-" * 80)
|
||||
print(mathml[:500])
|
||||
if len(mathml) > 500:
|
||||
print("...")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print(f"Total length: {len(mathml)} characters")
|
||||
|
||||
# Check if this looks like Pandoc-generated MathML
|
||||
if 'mfenced' in mathml or 'columnalign' in mathml:
|
||||
print("✓ Appears to be Pandoc-generated (good for Word)")
|
||||
elif 'stretchy' in mathml and 'fence' in mathml:
|
||||
print("✓ Uses standard fence attributes")
|
||||
else:
|
||||
print("? MathML structure unclear")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test_simple_formulas():
|
||||
"""Test simple formulas for Word compatibility."""
|
||||
|
||||
converter = Converter()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Testing Simple Formulas")
|
||||
print("=" * 80)
|
||||
|
||||
test_cases = [
|
||||
("Fraction", r"\frac{a}{b}"),
|
||||
("Square root", r"\sqrt{x^2 + y^2}"),
|
||||
("Summation", r"\sum_{i=1}^{n} i"),
|
||||
("Equation", r"E = mc^2"),
|
||||
("Matrix", r"\begin{pmatrix} a & b \\ c & d \end{pmatrix}"),
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for name, latex in test_cases:
|
||||
print(f"\n{name}: ${latex}$")
|
||||
|
||||
try:
|
||||
result = converter.convert_to_formats(f"${latex}$")
|
||||
mathml = result.mathml
|
||||
|
||||
# Quick checks
|
||||
checks = [
|
||||
('display="block"' in mathml, "display=block"),
|
||||
('+' not in mathml, "no +entity"),
|
||||
('=' not in mathml, "no =entity"),
|
||||
('xmlns=' in mathml, "namespace"),
|
||||
]
|
||||
|
||||
status = "✓" if all(check[0] for check in checks) else "✗"
|
||||
failed_checks = [check[1] for check in checks if not check[0]]
|
||||
|
||||
print(f" {status} Length: {len(mathml)} chars", end="")
|
||||
if failed_checks:
|
||||
print(f" | Issues: {', '.join(failed_checks)}")
|
||||
all_passed = False
|
||||
else:
|
||||
print(" | All checks passed")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error: {e}")
|
||||
all_passed = False
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def compare_with_reference():
|
||||
"""Compare our MathML with reference Word-compatible MathML."""
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Comparison with Reference MathML")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# Simple matrix example
|
||||
latex = r"\left| \begin{array}{cc} a & b \\ c & d \end{array} \right|"
|
||||
|
||||
result = converter.convert_to_formats(f"$${latex}$$")
|
||||
our_mathml = result.mathml
|
||||
|
||||
print("\nOur MathML structure:")
|
||||
print("-" * 80)
|
||||
|
||||
# Analyze structure
|
||||
features = {
|
||||
"mfenced": "<mfenced" in our_mathml,
|
||||
"mo fence": '<mo fence="' in our_mathml or '<mo stretchy="true"' in our_mathml,
|
||||
"mtable": "<mtable" in our_mathml,
|
||||
"display block": 'display="block"' in our_mathml,
|
||||
"unicode entities": any(f"&#x{x};" in our_mathml for x in ["0002B", "0003D", "0007C"]),
|
||||
}
|
||||
|
||||
print("Features:")
|
||||
for feature, present in features.items():
|
||||
status = "✓" if present != (feature == "unicode entities") else "✗"
|
||||
print(f" {status} {feature}: {present}")
|
||||
|
||||
print(f"\nLength: {len(our_mathml)} characters")
|
||||
print(f"Preview:\n{our_mathml[:300]}...")
|
||||
|
||||
return not features["unicode entities"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Word-Compatible MathML Test Suite\n")
|
||||
|
||||
try:
|
||||
test1 = test_mathml_word_compatibility()
|
||||
test2 = test_simple_formulas()
|
||||
test3 = compare_with_reference()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
if test1 and test2 and test3:
|
||||
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
print("\nMathML should be Word-compatible!")
|
||||
print("Try copying the mathml output and pasting into Word.")
|
||||
else:
|
||||
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
|
||||
print("\nMathML may not be fully Word-compatible.")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
Reference in New Issue
Block a user