import re from ..utils.latex import change_all from .format import format_latex def _rm_dollar_surr(content): pattern = re.compile(r'\\[a-zA-Z]+\$.*?\$|\$.*?\$') matches = pattern.findall(content) for match in matches: if not re.match(r'\\[a-zA-Z]+', match): new_match = match.strip('$') content = content.replace(match, ' ' + new_match + ' ') return content def to_katex(formula: str) -> str: """ Convert LaTeX formula to KaTeX-compatible format. This function processes a LaTeX formula string and converts it to a format that is compatible with KaTeX rendering. It removes unsupported commands and structures, simplifies LaTeX environments, and optimizes the formula for web display. Args: formula: LaTeX formula string to convert Returns: KaTeX-compatible formula string """ res = formula # remove mbox surrounding res = change_all(res, r'\mbox ', r' ', r'{', r'}', r'', r'') res = change_all(res, r'\mbox', r' ', r'{', r'}', r'', r'') # remove hbox surrounding res = re.sub(r'\\hbox to ?-? ?\d+\.\d+(pt)?\{', r'\\hbox{', res) res = change_all(res, r'\hbox', r' ', r'{', r'}', r'', r' ') # remove raise surrounding res = re.sub(r'\\raise ?-? ?\d+\.\d+(pt)?', r' ', res) # remove makebox res = re.sub(r'\\makebox ?\[\d+\.\d+(pt)?\]\{', r'\\makebox{', res) res = change_all(res, r'\makebox', r' ', r'{', r'}', r'', r' ') # remove vbox surrounding, scalebox surrounding res = re.sub(r'\\raisebox\{-? ?\d+\.\d+(pt)?\}\{', r'\\raisebox{', res) res = re.sub(r'\\scalebox\{-? ?\d+\.\d+(pt)?\}\{', r'\\scalebox{', res) res = change_all(res, r'\scalebox', r' ', r'{', r'}', r'', r' ') res = change_all(res, r'\raisebox', r' ', r'{', r'}', r'', r' ') res = change_all(res, r'\vbox', r' ', r'{', r'}', r'', r' ') origin_instructions = [ r'\Huge', r'\huge', r'\LARGE', r'\Large', r'\large', r'\normalsize', r'\small', r'\footnotesize', r'\tiny', ] for old_ins, new_ins in zip(origin_instructions, origin_instructions): res = change_all(res, old_ins, new_ins, r'$', r'$', '{', '}') res = change_all(res, r'\mathbf', r'\bm', r'{', r'}', r'{', r'}') res = change_all(res, r'\boldmath ', r'\bm', r'{', r'}', r'{', r'}') res = change_all(res, r'\boldmath', r'\bm', r'{', r'}', r'{', r'}') res = change_all(res, r'\boldmath ', r'\bm', r'$', r'$', r'{', r'}') res = change_all(res, r'\boldmath', r'\bm', r'$', r'$', r'{', r'}') res = change_all(res, r'\scriptsize', r'\scriptsize', r'$', r'$', r'{', r'}') res = change_all(res, r'\emph', r'\textit', r'{', r'}', r'{', r'}') res = change_all(res, r'\emph ', r'\textit', r'{', r'}', r'{', r'}') # remove bold command res = change_all(res, r'\bm', r' ', r'{', r'}', r'', r'') origin_instructions = [ r'\left', r'\middle', r'\right', r'\big', r'\Big', r'\bigg', r'\Bigg', r'\bigl', r'\Bigl', r'\biggl', r'\Biggl', r'\bigm', r'\Bigm', r'\biggm', r'\Biggm', r'\bigr', r'\Bigr', r'\biggr', r'\Biggr', ] for origin_ins in origin_instructions: res = change_all(res, origin_ins, origin_ins, r'{', r'}', r'', r'') res = re.sub(r'\\\[(.*?)\\\]', r'\1\\newline', res) if res.endswith(r'\newline'): res = res[:-8] # remove multiple spaces res = re.sub(r'(\\,){1,}', ' ', res) res = re.sub(r'(\\!){1,}', ' ', res) res = re.sub(r'(\\;){1,}', ' ', res) res = re.sub(r'(\\:){1,}', ' ', res) res = re.sub(r'\\vspace\{.*?}', '', res) # merge consecutive text def merge_texts(match): texts = match.group(0) merged_content = ''.join(re.findall(r'\\text\{([^}]*)\}', texts)) return f'\\text{{{merged_content}}}' res = re.sub(r'(\\text\{[^}]*\}\s*){2,}', merge_texts, res) res = res.replace(r'\bf ', '') res = _rm_dollar_surr(res) # remove extra spaces (keeping only one) res = re.sub(r' +', ' ', res) # format latex res = res.strip() res = format_latex(res) return res