Bugfix: to_katex.py
1. Added `change_all` function to fix a bug where some LaTeX formulas with the same wrapper were causing issues. 2. Removed some unnecessary formatting commands. Bugfix: to_katex.py
This commit is contained in:
@@ -54,16 +54,58 @@ def change(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, ne
|
|||||||
result += input_str[i:start]
|
result += input_str[i:start]
|
||||||
i = start
|
i = start
|
||||||
|
|
||||||
if old_inst != new_inst and old_inst in result:
|
if old_inst != new_inst and (old_inst + old_surr_l) in result:
|
||||||
return change(result, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r)
|
return change(result, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r)
|
||||||
else:
|
else:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def find_substring_positions(string, substring):
|
||||||
|
positions = [match.start() for match in re.finditer(re.escape(substring), string)]
|
||||||
|
return positions
|
||||||
|
|
||||||
|
|
||||||
|
def rm_dollar_surr(content):
|
||||||
|
pattern = re.compile(r'\\[a-zA-Z]+\$.*?\$|\$.*?\$')
|
||||||
|
matches = pattern.findall(content)
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
if not re.match(r'\\[a-zA-Z]+', match):
|
||||||
|
new_match = match.strip('$')
|
||||||
|
content = content.replace(match, ' ' + new_match + ' ')
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def change_all(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r):
|
||||||
|
pos = find_substring_positions(input_str, old_inst + old_surr_l)
|
||||||
|
res = list(input_str)
|
||||||
|
for p in pos[::-1]:
|
||||||
|
res[p:] = list(change(''.join(res[p:]), old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r))
|
||||||
|
res = ''.join(res)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def to_katex(formula: str) -> str:
|
def to_katex(formula: str) -> str:
|
||||||
res = formula
|
res = formula
|
||||||
res = change(res, r'\mbox ', r'', r'{', r'}', r'', r'')
|
# remove mbox surrounding
|
||||||
res = change(res, r'\mbox', r'', r'{', r'}', r'', r'')
|
res = change_all(res, r'\mbox ', r' ', r'{', r'}', r'', r'')
|
||||||
|
res = change_all(res, r'\mbox', r' ', r'{', r'}', r'', r'')
|
||||||
|
# remove hbox surrounding
|
||||||
|
res = re.sub(r'\\hbox to ?-? ?\d+\.\d+(pt)?\{', r'\\hbox{', res)
|
||||||
|
res = change_all(res, r'\hbox', r' ', r'{', r'}', r'', r' ')
|
||||||
|
# remove raise surrounding
|
||||||
|
res = re.sub(r'\\raise ?-? ?\d+\.\d+(pt)?', r' ', res)
|
||||||
|
# remove makebox
|
||||||
|
res = re.sub(r'\\makebox ?\[\d+\.\d+(pt)?\]\{', r'\\makebox{', res)
|
||||||
|
res = change_all(res, r'\makebox', r' ', r'{', r'}', r'', r' ')
|
||||||
|
# remove vbox surrounding, scalebox surrounding
|
||||||
|
res = re.sub(r'\\raisebox\{-? ?\d+\.\d+(pt)?\}\{', r'\\raisebox{', res)
|
||||||
|
res = re.sub(r'\\scalebox\{-? ?\d+\.\d+(pt)?\}\{', r'\\scalebox{', res)
|
||||||
|
res = change_all(res, r'\scalebox', r' ', r'{', r'}', r'', r' ')
|
||||||
|
res = change_all(res, r'\raisebox', r' ', r'{', r'}', r'', r' ')
|
||||||
|
res = change_all(res, r'\vbox', r' ', r'{', r'}', r'', r' ')
|
||||||
|
|
||||||
|
|
||||||
origin_instructions = [
|
origin_instructions = [
|
||||||
r'\Huge',
|
r'\Huge',
|
||||||
@@ -77,10 +119,14 @@ def to_katex(formula: str) -> str:
|
|||||||
r'\tiny'
|
r'\tiny'
|
||||||
]
|
]
|
||||||
for (old_ins, new_ins) in zip(origin_instructions, origin_instructions):
|
for (old_ins, new_ins) in zip(origin_instructions, origin_instructions):
|
||||||
res = change(res, old_ins, new_ins, r'$', r'$', '{', '}')
|
res = change_all(res, old_ins, new_ins, r'$', r'$', '{', '}')
|
||||||
res = change(res, r'\boldmath ', r'\bm', r'$', r'$', r'{', r'}')
|
res = change_all(res, r'\boldmath ', r'\bm', r'{', r'}', r'{', r'}')
|
||||||
res = change(res, r'\boldmath', r'\bm', r'$', r'$', r'{', r'}')
|
res = change_all(res, r'\boldmath', r'\bm', r'{', r'}', r'{', r'}')
|
||||||
res = change(res, r'\scriptsize', r'\scriptsize', r'$', r'$', r'{', r'}')
|
res = change_all(res, r'\boldmath ', r'\bm', r'$', r'$', r'{', r'}')
|
||||||
|
res = change_all(res, r'\boldmath', r'\bm', r'$', r'$', r'{', r'}')
|
||||||
|
res = change_all(res, r'\scriptsize', r'\scriptsize', r'$', r'$', r'{', r'}')
|
||||||
|
res = change_all(res, r'\emph', r'\textit', r'{', r'}', r'{', r'}')
|
||||||
|
res = change_all(res, r'\emph ', r'\textit', r'{', r'}', r'{', r'}')
|
||||||
|
|
||||||
origin_instructions = [
|
origin_instructions = [
|
||||||
r'\left',
|
r'\left',
|
||||||
@@ -104,10 +150,31 @@ def to_katex(formula: str) -> str:
|
|||||||
r'\Biggr'
|
r'\Biggr'
|
||||||
]
|
]
|
||||||
for origin_ins in origin_instructions:
|
for origin_ins in origin_instructions:
|
||||||
res = change(res, origin_ins, origin_ins, r'{', r'}', r'', r'')
|
res = change_all(res, origin_ins, origin_ins, r'{', r'}', r'', r'')
|
||||||
|
|
||||||
res = re.sub(r'\\\[(.*?)\\\]', r'\1\\newline', res)
|
res = re.sub(r'\\\[(.*?)\\\]', r'\1\\newline', res)
|
||||||
|
|
||||||
if res.endswith(r'\newline'):
|
if res.endswith(r'\newline'):
|
||||||
res = res[:-8]
|
res = res[:-8]
|
||||||
return res
|
|
||||||
|
# remove multiple spaces
|
||||||
|
res = re.sub(r'(\\,){1,}', ' ', res)
|
||||||
|
res = re.sub(r'(\\!){1,}', ' ', res)
|
||||||
|
res = re.sub(r'(\\;){1,}', ' ', res)
|
||||||
|
res = re.sub(r'(\\:){1,}', ' ', res)
|
||||||
|
res = re.sub(r'\\vspace\{.*?}', '', res)
|
||||||
|
|
||||||
|
# merge consecutive text
|
||||||
|
def merge_texts(match):
|
||||||
|
texts = match.group(0)
|
||||||
|
merged_content = ''.join(re.findall(r'\\text\{([^}]*)\}', texts))
|
||||||
|
return f'\\text{{{merged_content}}}'
|
||||||
|
res = re.sub(r'(\\text\{[^}]*\}\s*){2,}', merge_texts, res)
|
||||||
|
|
||||||
|
res = res.replace(r'\bf ', '')
|
||||||
|
res = rm_dollar_surr(res)
|
||||||
|
|
||||||
|
# remove extra spaces (keeping only one)
|
||||||
|
res = re.sub(r' +', ' ', res)
|
||||||
|
|
||||||
|
return res.strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user