2024-04-21 00:05:14 +08:00
|
|
|
import re
|
|
|
|
|
|
2025-04-16 14:23:02 +00:00
|
|
|
from ..utils.latex import change_all
|
|
|
|
|
from .format import format_latex
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _rm_dollar_surr(content):
|
2024-05-28 15:56:21 +00:00
|
|
|
pattern = re.compile(r'\\[a-zA-Z]+\$.*?\$|\$.*?\$')
|
|
|
|
|
matches = pattern.findall(content)
|
2025-02-28 19:56:49 +08:00
|
|
|
|
2024-05-28 15:56:21 +00:00
|
|
|
for match in matches:
|
|
|
|
|
if not re.match(r'\\[a-zA-Z]+', match):
|
|
|
|
|
new_match = match.strip('$')
|
|
|
|
|
content = content.replace(match, ' ' + new_match + ' ')
|
2025-02-28 19:56:49 +08:00
|
|
|
|
2024-05-28 15:56:21 +00:00
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
2024-04-21 00:05:14 +08:00
|
|
|
def to_katex(formula: str) -> str:
|
2025-04-21 02:34:56 +00:00
|
|
|
"""
|
|
|
|
|
Convert LaTeX formula to KaTeX-compatible format.
|
|
|
|
|
|
|
|
|
|
This function processes a LaTeX formula string and converts it to a format
|
|
|
|
|
that is compatible with KaTeX rendering. It removes unsupported commands
|
|
|
|
|
and structures, simplifies LaTeX environments, and optimizes the formula
|
|
|
|
|
for web display.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
formula: LaTeX formula string to convert
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
KaTeX-compatible formula string
|
|
|
|
|
"""
|
2024-04-21 00:05:14 +08:00
|
|
|
res = formula
|
2024-05-28 15:56:21 +00:00
|
|
|
# remove mbox surrounding
|
|
|
|
|
res = change_all(res, r'\mbox ', r' ', r'{', r'}', r'', r'')
|
|
|
|
|
res = change_all(res, r'\mbox', r' ', r'{', r'}', r'', r'')
|
|
|
|
|
# remove hbox surrounding
|
|
|
|
|
res = re.sub(r'\\hbox to ?-? ?\d+\.\d+(pt)?\{', r'\\hbox{', res)
|
|
|
|
|
res = change_all(res, r'\hbox', r' ', r'{', r'}', r'', r' ')
|
|
|
|
|
# remove raise surrounding
|
|
|
|
|
res = re.sub(r'\\raise ?-? ?\d+\.\d+(pt)?', r' ', res)
|
|
|
|
|
# remove makebox
|
|
|
|
|
res = re.sub(r'\\makebox ?\[\d+\.\d+(pt)?\]\{', r'\\makebox{', res)
|
|
|
|
|
res = change_all(res, r'\makebox', r' ', r'{', r'}', r'', r' ')
|
|
|
|
|
# remove vbox surrounding, scalebox surrounding
|
|
|
|
|
res = re.sub(r'\\raisebox\{-? ?\d+\.\d+(pt)?\}\{', r'\\raisebox{', res)
|
|
|
|
|
res = re.sub(r'\\scalebox\{-? ?\d+\.\d+(pt)?\}\{', r'\\scalebox{', res)
|
|
|
|
|
res = change_all(res, r'\scalebox', r' ', r'{', r'}', r'', r' ')
|
|
|
|
|
res = change_all(res, r'\raisebox', r' ', r'{', r'}', r'', r' ')
|
|
|
|
|
res = change_all(res, r'\vbox', r' ', r'{', r'}', r'', r' ')
|
|
|
|
|
|
2024-04-21 00:05:14 +08:00
|
|
|
origin_instructions = [
|
|
|
|
|
r'\Huge',
|
|
|
|
|
r'\huge',
|
|
|
|
|
r'\LARGE',
|
|
|
|
|
r'\Large',
|
|
|
|
|
r'\large',
|
|
|
|
|
r'\normalsize',
|
|
|
|
|
r'\small',
|
|
|
|
|
r'\footnotesize',
|
2025-02-28 19:56:49 +08:00
|
|
|
r'\tiny',
|
2024-04-21 00:05:14 +08:00
|
|
|
]
|
2025-02-28 19:56:49 +08:00
|
|
|
for old_ins, new_ins in zip(origin_instructions, origin_instructions):
|
2024-05-28 15:56:21 +00:00
|
|
|
res = change_all(res, old_ins, new_ins, r'$', r'$', '{', '}')
|
2025-03-01 22:38:27 +08:00
|
|
|
res = change_all(res, r'\mathbf', r'\bm', r'{', r'}', r'{', r'}')
|
2024-05-28 15:56:21 +00:00
|
|
|
res = change_all(res, r'\boldmath ', r'\bm', r'{', r'}', r'{', r'}')
|
|
|
|
|
res = change_all(res, r'\boldmath', r'\bm', r'{', r'}', r'{', r'}')
|
|
|
|
|
res = change_all(res, r'\boldmath ', r'\bm', r'$', r'$', r'{', r'}')
|
|
|
|
|
res = change_all(res, r'\boldmath', r'\bm', r'$', r'$', r'{', r'}')
|
|
|
|
|
res = change_all(res, r'\scriptsize', r'\scriptsize', r'$', r'$', r'{', r'}')
|
|
|
|
|
res = change_all(res, r'\emph', r'\textit', r'{', r'}', r'{', r'}')
|
|
|
|
|
res = change_all(res, r'\emph ', r'\textit', r'{', r'}', r'{', r'}')
|
2025-02-28 19:56:49 +08:00
|
|
|
|
2025-03-01 22:38:27 +08:00
|
|
|
# remove bold command
|
|
|
|
|
res = change_all(res, r'\bm', r' ', r'{', r'}', r'', r'')
|
|
|
|
|
|
2024-04-21 00:05:14 +08:00
|
|
|
origin_instructions = [
|
|
|
|
|
r'\left',
|
|
|
|
|
r'\middle',
|
|
|
|
|
r'\right',
|
|
|
|
|
r'\big',
|
|
|
|
|
r'\Big',
|
|
|
|
|
r'\bigg',
|
|
|
|
|
r'\Bigg',
|
|
|
|
|
r'\bigl',
|
|
|
|
|
r'\Bigl',
|
|
|
|
|
r'\biggl',
|
|
|
|
|
r'\Biggl',
|
|
|
|
|
r'\bigm',
|
|
|
|
|
r'\Bigm',
|
|
|
|
|
r'\biggm',
|
|
|
|
|
r'\Biggm',
|
|
|
|
|
r'\bigr',
|
|
|
|
|
r'\Bigr',
|
|
|
|
|
r'\biggr',
|
2025-02-28 19:56:49 +08:00
|
|
|
r'\Biggr',
|
2024-04-21 00:05:14 +08:00
|
|
|
]
|
|
|
|
|
for origin_ins in origin_instructions:
|
2024-05-28 15:56:21 +00:00
|
|
|
res = change_all(res, origin_ins, origin_ins, r'{', r'}', r'', r'')
|
2024-04-21 00:05:14 +08:00
|
|
|
|
|
|
|
|
res = re.sub(r'\\\[(.*?)\\\]', r'\1\\newline', res)
|
|
|
|
|
|
|
|
|
|
if res.endswith(r'\newline'):
|
|
|
|
|
res = res[:-8]
|
2024-05-28 15:56:21 +00:00
|
|
|
|
|
|
|
|
# remove multiple spaces
|
|
|
|
|
res = re.sub(r'(\\,){1,}', ' ', res)
|
|
|
|
|
res = re.sub(r'(\\!){1,}', ' ', res)
|
|
|
|
|
res = re.sub(r'(\\;){1,}', ' ', res)
|
|
|
|
|
res = re.sub(r'(\\:){1,}', ' ', res)
|
|
|
|
|
res = re.sub(r'\\vspace\{.*?}', '', res)
|
|
|
|
|
|
|
|
|
|
# merge consecutive text
|
|
|
|
|
def merge_texts(match):
|
|
|
|
|
texts = match.group(0)
|
|
|
|
|
merged_content = ''.join(re.findall(r'\\text\{([^}]*)\}', texts))
|
|
|
|
|
return f'\\text{{{merged_content}}}'
|
2025-02-28 19:56:49 +08:00
|
|
|
|
2024-05-28 15:56:21 +00:00
|
|
|
res = re.sub(r'(\\text\{[^}]*\}\s*){2,}', merge_texts, res)
|
|
|
|
|
|
|
|
|
|
res = res.replace(r'\bf ', '')
|
2025-04-16 14:23:02 +00:00
|
|
|
res = _rm_dollar_surr(res)
|
2024-05-28 15:56:21 +00:00
|
|
|
|
|
|
|
|
# remove extra spaces (keeping only one)
|
|
|
|
|
res = re.sub(r' +', ' ', res)
|
|
|
|
|
|
2025-03-03 00:55:40 +08:00
|
|
|
# format latex
|
|
|
|
|
res = res.strip()
|
2025-04-16 14:23:02 +00:00
|
|
|
res = format_latex(res)
|
2025-03-03 00:55:40 +08:00
|
|
|
|
|
|
|
|
return res
|