fix: add preprocess
This commit is contained in:
@@ -217,6 +217,9 @@ class Converter:
|
||||
This is a separate method due to the performance overhead of OMML conversion,
|
||||
which requires creating a temporary DOCX file.
|
||||
|
||||
The formula is preprocessed using the same logic as export_to_file to ensure
|
||||
proper conversion.
|
||||
|
||||
Args:
|
||||
latex_formula: Pure LaTeX formula (without delimiters like $ or $$).
|
||||
|
||||
@@ -230,7 +233,37 @@ class Converter:
|
||||
if not latex_formula or not latex_formula.strip():
|
||||
raise ValueError("LaTeX formula cannot be empty")
|
||||
|
||||
return self._latex_to_omml(latex_formula.strip())
|
||||
# Preprocess formula using the same preprocessing as export
|
||||
preprocessed = self._preprocess_formula_for_omml(latex_formula.strip())
|
||||
|
||||
return self._latex_to_omml(preprocessed)
|
||||
|
||||
def _preprocess_formula_for_omml(self, latex_formula: str) -> str:
|
||||
"""Preprocess LaTeX formula for OMML conversion.
|
||||
|
||||
Applies the same preprocessing steps as preprocess_for_export to ensure
|
||||
consistency. This fixes common issues that cause Pandoc OMML conversion to fail.
|
||||
|
||||
Args:
|
||||
latex_formula: Pure LaTeX formula.
|
||||
|
||||
Returns:
|
||||
Preprocessed LaTeX formula.
|
||||
"""
|
||||
# Use the same preprocessing methods as export
|
||||
# 1. Convert matrix environments
|
||||
latex_formula = self._convert_matrix_environments(latex_formula)
|
||||
|
||||
# 2. Fix array column specifiers (remove spaces)
|
||||
latex_formula = self._fix_array_column_specifiers(latex_formula)
|
||||
|
||||
# 3. Fix brace spacing
|
||||
latex_formula = self._fix_brace_spacing(latex_formula)
|
||||
|
||||
# 4. Convert special environments (cases, aligned)
|
||||
latex_formula = self._convert_special_environments(latex_formula)
|
||||
|
||||
return latex_formula
|
||||
|
||||
def _extract_latex_formula(self, text: str) -> str:
|
||||
"""Extract LaTeX formula from text by removing delimiters.
|
||||
|
||||
102
test_array_fix.py
Normal file
102
test_array_fix.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""Test script for array column specifier fix."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_array_specifier_fix():
|
||||
"""Test that array column specifiers with spaces are fixed."""
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# The problematic LaTeX from the error
|
||||
latex_formula = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
|
||||
|
||||
print("Testing array column specifier fix")
|
||||
print("=" * 80)
|
||||
print(f"\nOriginal LaTeX (first 200 chars):\n{latex_formula[:200]}...")
|
||||
|
||||
# Test preprocessing
|
||||
print("\n" + "-" * 80)
|
||||
print("Step 1: Preprocessing")
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex_formula)
|
||||
|
||||
# Check if spaces were removed from array specifiers
|
||||
if "{c c c c}" in preprocessed:
|
||||
print("✗ FAILED: Spaces not removed from array specifiers")
|
||||
print(f"Found: {preprocessed[preprocessed.find('{c c c c}'):preprocessed.find('{c c c c}')+10]}")
|
||||
elif "{cccc}" in preprocessed:
|
||||
print("✓ SUCCESS: Spaces removed from array specifiers")
|
||||
print(f"Changed '{{{\"c c c c\"}}}' → '{{cccc}}'")
|
||||
else:
|
||||
print("? Could not find array specifier in preprocessed output")
|
||||
|
||||
# Test OMML conversion
|
||||
print("\n" + "-" * 80)
|
||||
print("Step 2: OMML Conversion")
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex_formula)
|
||||
print(f"✓ SUCCESS: OMML conversion completed")
|
||||
print(f"OMML length: {len(omml)} characters")
|
||||
print(f"OMML preview (first 300 chars):\n{omml[:300]}...")
|
||||
|
||||
# Check if it contains oMath element
|
||||
if "oMath" in omml:
|
||||
print("\n✓ Valid OMML: Contains oMath element")
|
||||
else:
|
||||
print("\n✗ WARNING: OMML might be incomplete (no oMath element found)")
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: OMML conversion error")
|
||||
print(f"Error: {e}")
|
||||
return False
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("✓ All tests passed!")
|
||||
return True
|
||||
|
||||
|
||||
def test_simple_array():
|
||||
"""Test with a simpler array example."""
|
||||
|
||||
converter = Converter()
|
||||
|
||||
print("\nTesting simple array")
|
||||
print("=" * 80)
|
||||
|
||||
# Simple array with spaces in column specifier
|
||||
latex_formula = r"\begin{array}{c c c} a & b & c \\ d & e & f \end{array}"
|
||||
|
||||
print(f"LaTeX: {latex_formula}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex_formula)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
print(f"Preview: {omml[:200]}...")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Array Column Specifier Fix Test Suite\n")
|
||||
|
||||
try:
|
||||
test1 = test_simple_array()
|
||||
test2 = test_array_specifier_fix()
|
||||
|
||||
if test1 and test2:
|
||||
print("\n" + "=" * 80)
|
||||
print("✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
print("=" * 80)
|
||||
else:
|
||||
print("\n" + "=" * 80)
|
||||
print("✗✗✗ SOME TESTS FAILED ✗✗✗")
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest suite error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
218
test_omml_preprocessing.py
Normal file
218
test_omml_preprocessing.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""Comprehensive test for OMML conversion with preprocessing."""
|
||||
|
||||
from app.services.converter import Converter
|
||||
|
||||
|
||||
def test_case_1_array_with_spaces():
|
||||
"""Test: Array with spaces in column specifier (the original issue)."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 1: Array with spaces in column specifier")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
# The problematic LaTeX from the error
|
||||
latex = r"""\begin{array}{l} D = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} + 0 + \dots + 0 & 0 + a _ {i 2} + \dots + 0 & \dots & 0 + \dots + 0 + a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ = \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ a _ {i 1} & 0 & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & a _ {i 2} & \dots & 0 \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right| \\ + \dots + \left| \begin{array}{c c c c} a _ {1 1} & a _ {1 2} & \dots & a _ {1 n} \\ \vdots & \vdots & & \vdots \\ 0 & 0 & \dots & a _ {i n} \\ \vdots & \vdots & & \vdots \\ a _ {n 1} & a _ {n 2} & \dots & a _ {n n} \end{array} \right|, \\ \end{array}"""
|
||||
|
||||
print(f"LaTeX length: {len(latex)} chars")
|
||||
print(f"Preview: {latex[:100]}...")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"\n✓ SUCCESS: Converted to OMML")
|
||||
print(f"OMML length: {len(omml)} chars")
|
||||
|
||||
if "oMath" in omml:
|
||||
print("✓ Valid OMML structure detected")
|
||||
|
||||
# Check preprocessing worked
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
if "{c c c c}" not in preprocessed and "{cccc}" in preprocessed:
|
||||
print("✓ Array column specifiers fixed: '{c c c c}' → '{cccc}'")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_2_vmatrix():
|
||||
"""Test: vmatrix environment conversion."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 2: vmatrix environment")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"\begin{vmatrix} a & b \\ c & d \end{vmatrix}"
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
|
||||
# Check if vmatrix was converted
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
if "vmatrix" not in preprocessed and r"\left|" in preprocessed:
|
||||
print("✓ vmatrix converted to \\left| ... \\right|")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_3_cases_environment():
|
||||
"""Test: cases environment conversion."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 3: cases environment")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"f(x) = \begin{cases} x^2 & x \geq 0 \\ -x & x < 0 \end{cases}"
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
|
||||
# Check if cases was converted to array
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
if "cases" not in preprocessed and "array" in preprocessed:
|
||||
print("✓ cases converted to array environment")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_4_aligned_environment():
|
||||
"""Test: aligned environment conversion."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 4: aligned environment")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"\begin{aligned} x + y &= 5 \\ 2x - y &= 1 \end{aligned}"
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
|
||||
# Check if aligned was converted
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
if "aligned" not in preprocessed and "array" in preprocessed:
|
||||
print("✓ aligned converted to array environment")
|
||||
if "&" not in preprocessed or preprocessed.count("&") < latex.count("&"):
|
||||
print("✓ Alignment markers removed")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_5_simple_formula():
|
||||
"""Test: Simple formula (should work without preprocessing)."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 5: Simple formula")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}"
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_case_6_nested_structures():
|
||||
"""Test: Nested structures with multiple issues."""
|
||||
print("\n" + "=" * 80)
|
||||
print("Test 6: Nested structures")
|
||||
print("=" * 80)
|
||||
|
||||
converter = Converter()
|
||||
|
||||
latex = r"\left\{ \begin{array}{l c} \begin{vmatrix} a & b \\ c & d \end{vmatrix} & = ad - bc \\ f(x) = \begin{cases} 1 & x > 0 \\ 0 & x \leq 0 \end{cases} & \text{step function} \end{array} \right."
|
||||
print(f"LaTeX: {latex}")
|
||||
|
||||
try:
|
||||
omml = converter.convert_to_omml(latex)
|
||||
print(f"✓ SUCCESS: Converted to OMML ({len(omml)} chars)")
|
||||
|
||||
preprocessed = converter._preprocess_formula_for_omml(latex)
|
||||
print("\nPreprocessing applied:")
|
||||
if "vmatrix" not in preprocessed:
|
||||
print(" ✓ vmatrix converted")
|
||||
if "cases" not in preprocessed:
|
||||
print(" ✓ cases converted")
|
||||
if "{l c}" not in preprocessed and "{lc}" in preprocessed:
|
||||
print(" ✓ Array specifiers fixed")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 80)
|
||||
print("OMML CONVERSION TEST SUITE")
|
||||
print("Testing preprocessing and conversion")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
|
||||
try:
|
||||
results.append(("Simple formula", test_case_5_simple_formula()))
|
||||
results.append(("Array with spaces", test_case_1_array_with_spaces()))
|
||||
results.append(("vmatrix", test_case_2_vmatrix()))
|
||||
results.append(("cases", test_case_3_cases_environment()))
|
||||
results.append(("aligned", test_case_4_aligned_environment()))
|
||||
results.append(("Nested structures", test_case_6_nested_structures()))
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 80)
|
||||
print("TEST SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
passed = sum(1 for _, result in results if result)
|
||||
total = len(results)
|
||||
|
||||
for name, result in results:
|
||||
status = "✓ PASS" if result else "✗ FAIL"
|
||||
print(f"{status}: {name}")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print(f"Total: {passed}/{total} tests passed")
|
||||
|
||||
if passed == total:
|
||||
print("\n✓✓✓ ALL TESTS PASSED ✓✓✓")
|
||||
else:
|
||||
print(f"\n✗✗✗ {total - passed} TESTS FAILED ✗✗✗")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nTests interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"\n\nTest suite error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
Reference in New Issue
Block a user