diff --git a/app/services/converter.py b/app/services/converter.py index 4cf73a8..e18abd3 100644 --- a/app/services/converter.py +++ b/app/services/converter.py @@ -122,12 +122,18 @@ class Converter: # Convert matrix environments for better Word rendering cleaned_md = self._convert_matrix_environments(cleaned_md) + # Fix array environment column specifiers (remove spaces) + cleaned_md = self._fix_array_column_specifiers(cleaned_md) + # Fix brace spacing for equation systems cleaned_md = self._fix_brace_spacing(cleaned_md) # Convert cases and aligned environments cleaned_md = self._convert_special_environments(cleaned_md) + # Handle LaTeX \tag{} commands for equation numbering + cleaned_md = self._convert_tag_commands(cleaned_md) + return cleaned_md def _convert_matrix_environments(self, md_text: str) -> str: @@ -153,6 +159,37 @@ class Converter: return md_text + def _fix_array_column_specifiers(self, md_text: str) -> str: + """Fix array environment column specifiers by removing spaces. + + Pandoc's OMML converter doesn't accept spaces between column alignment + specifiers in array environments. This converts patterns like + {c c c c} to {cccc}. + + Args: + md_text: Markdown text with LaTeX formulas. + + Returns: + Markdown text with fixed array column specifiers. + """ + + def remove_spaces_in_specifier(match: re.Match) -> str: + """Remove spaces from column specifier.""" + specifier = match.group(1) + # Remove all spaces from the specifier + specifier_no_spaces = re.sub(r"\s+", "", specifier) + return f"\\begin{{array}}{{{specifier_no_spaces}}}" + + # Match \begin{array}{...} and remove spaces in the column specifier + # Pattern: \begin{array}{c c c ...} -> \begin{array}{ccc...} + md_text = re.sub( + r"\\begin\{array\}\{([^}]+)\}", + remove_spaces_in_specifier, + md_text, + ) + + return md_text + def _fix_brace_spacing(self, md_text: str) -> str: """Fix spacing issues with braces in equation systems. @@ -218,6 +255,41 @@ class Converter: return md_text + def _convert_tag_commands(self, md_text: str) -> str: + """Convert LaTeX \\tag{} commands to Word-compatible format. + + The \\tag{} command is not supported in Word OMML format, so we convert it to + use simple spacing (\quad) to push the equation number to the right side. + The tag remains inside the formula for better compatibility. + + Args: + md_text: Markdown text containing LaTeX formulas with \\tag{}. + + Returns: + Markdown text with \\tag{} commands converted to spacing format. + """ + + def convert_tag(match: re.Match) -> str: + """Convert a single \\tag{} command within a formula.""" + formula_content = match.group(1) + tag_content = match.group(2) + + # Replace \tag{...} with \quad (...) to push the number to the right + # Keep it inside the formula for better Word compatibility + return f"$${formula_content} \\quad ({tag_content})$$" + + # Match display formulas ($$...$$) containing \\tag{...} + # Pattern: $$...content...\\tag {?...}...$$ + # Allow optional space between \tag and { + md_text = re.sub( + r"\$\$(.*?)\\tag\s*\{([^}]+)\}\s*\$\$", + convert_tag, + md_text, + flags=re.DOTALL, + ) + + return md_text + def export_to_file(self, md_text: str, export_type: ExportType = "docx") -> bytes: """Export markdown to docx or pdf file.