web demo支持katex, 不再需要本地安装xelatex渲染器
This commit is contained in:
124
src/web.py
124
src/web.py
@@ -2,14 +2,11 @@ import os
|
||||
import io
|
||||
import base64
|
||||
import tempfile
|
||||
import time
|
||||
import subprocess
|
||||
import shutil
|
||||
import streamlit as st
|
||||
import re
|
||||
|
||||
from PIL import Image, ImageChops
|
||||
from pathlib import Path
|
||||
from pdf2image import convert_from_path
|
||||
from PIL import Image
|
||||
from models.ocr_model.utils.inference import inference
|
||||
from models.ocr_model.model.TexTeller import TexTeller
|
||||
|
||||
@@ -69,6 +66,19 @@ def get_model():
|
||||
def get_tokenizer():
|
||||
return TexTeller.get_tokenizer(os.environ['TOKENIZER_DIR'])
|
||||
|
||||
def to_katex(formula: str) -> str:
|
||||
res = formula
|
||||
res = re.sub(r'\\mbox\{([^}]*)\}', r'\1', res)
|
||||
res = re.sub(r'boldmath\$(.*?)\$', r'bm{\1}', res)
|
||||
res = re.sub(r'\\\[(.*?)\\\]', r'\1\\newline', res)
|
||||
|
||||
pattern = r'(\\(?:left|middle|right|big|Big|bigg|Bigg|bigl|Bigl|biggl|Biggl|bigm|Bigm|biggm|Biggm|bigr|Bigr|biggr|Biggr))\{([^}]*)\}'
|
||||
replacement = r'\1\2'
|
||||
res = re.sub(pattern, replacement, res)
|
||||
if res.endswith(r'\newline'):
|
||||
res = res[:-8]
|
||||
return res
|
||||
|
||||
def get_image_base64(img_file):
|
||||
buffered = io.BytesIO()
|
||||
img_file.seek(0)
|
||||
@@ -76,55 +86,12 @@ def get_image_base64(img_file):
|
||||
img.save(buffered, format="PNG")
|
||||
return base64.b64encode(buffered.getvalue()).decode()
|
||||
|
||||
def rendering(formula: str, out_img_path: Path) -> bool:
|
||||
build_dir = out_img_path / 'build'
|
||||
build_dir.mkdir(exist_ok=True, parents=True)
|
||||
f = build_dir / 'formula.tex'
|
||||
f.touch(exist_ok=True)
|
||||
f.write_text(tex.format(formula=formula))
|
||||
|
||||
p = subprocess.Popen([
|
||||
'xelatex',
|
||||
f'-output-directory={build_dir}',
|
||||
'-interaction=nonstopmode',
|
||||
'-halt-on-error',
|
||||
f'{f}'
|
||||
])
|
||||
p.communicate()
|
||||
return p.returncode == 0
|
||||
|
||||
def pdf_to_pngbytes(pdf_path):
|
||||
images = convert_from_path(pdf_path, dpi=400,first_page=1, last_page=1)
|
||||
trimmed_images = trim(images[0])
|
||||
png_image_bytes = io.BytesIO()
|
||||
trimmed_images.save(png_image_bytes, format='PNG')
|
||||
png_image_bytes.seek(0)
|
||||
return png_image_bytes
|
||||
|
||||
def trim(im):
|
||||
bg = Image.new(im.mode, im.size, im.getpixel((0,0)))
|
||||
diff = ImageChops.difference(im, bg)
|
||||
diff = ImageChops.add(diff, diff, 2.0, -100)
|
||||
bbox = diff.getbbox()
|
||||
if bbox:
|
||||
return im.crop(bbox)
|
||||
return im
|
||||
|
||||
|
||||
model = get_model()
|
||||
tokenizer = get_tokenizer()
|
||||
# check if xelatex is installed
|
||||
xelatex_installed = os.system('which xelatex > /dev/null 2>&1') == 0
|
||||
|
||||
if "start" not in st.session_state:
|
||||
st.session_state["start"] = 1
|
||||
|
||||
if xelatex_installed:
|
||||
st.toast('Hooray!', icon='🎉')
|
||||
time.sleep(0.5)
|
||||
st.toast("Xelatex have been detected.", icon='✅')
|
||||
else:
|
||||
st.error('xelatex is not installed. Please install it before using TexTeller.')
|
||||
st.toast('Hooray!', icon='🎉')
|
||||
|
||||
|
||||
# ============================ pages =============================== #
|
||||
@@ -133,11 +100,6 @@ st.markdown(html_string, unsafe_allow_html=True)
|
||||
|
||||
uploaded_file = st.file_uploader("",type=['jpg', 'png', 'pdf'])
|
||||
|
||||
if xelatex_installed:
|
||||
st.caption('🥳 Xelatex have been detected, rendered image will be displayed in the web page.')
|
||||
else:
|
||||
st.caption('😭 Xelatex is not detected, please check the resulting latex code by yourself, or check ... to have your xelatex setup ready.')
|
||||
|
||||
if uploaded_file:
|
||||
img = Image.open(uploaded_file)
|
||||
|
||||
@@ -170,60 +132,18 @@ if uploaded_file:
|
||||
|
||||
with st.spinner("Predicting..."):
|
||||
uploaded_file.seek(0)
|
||||
TeXTeller_result = inference(
|
||||
TexTeller_result = inference(
|
||||
model,
|
||||
tokenizer,
|
||||
[png_file_path],
|
||||
True if os.environ['USE_CUDA'] == 'True' else False,
|
||||
int(os.environ['NUM_BEAM'])
|
||||
)[0]
|
||||
if not xelatex_installed:
|
||||
st.markdown(fail_gif_html, unsafe_allow_html=True)
|
||||
st.warning('Unable to find xelatex to render image. Please check the prediction results yourself.', icon="🤡")
|
||||
txt = st.text_area(
|
||||
":red[Predicted formula]",
|
||||
TeXTeller_result,
|
||||
height=150,
|
||||
)
|
||||
else:
|
||||
is_successed = rendering(TeXTeller_result, Path(temp_dir))
|
||||
if is_successed:
|
||||
# st.code(TeXTeller_result, language='latex')
|
||||
|
||||
img_base64 = get_image_base64(pdf_to_pngbytes(Path(temp_dir) / 'build' / 'formula.pdf'))
|
||||
st.markdown(suc_gif_html, unsafe_allow_html=True)
|
||||
st.success('Successfully rendered!', icon="✅")
|
||||
txt = st.text_area(
|
||||
":red[Predicted formula]",
|
||||
TeXTeller_result,
|
||||
height=150,
|
||||
)
|
||||
# st.latex(TeXTeller_result)
|
||||
st.markdown(f"""
|
||||
<style>
|
||||
.centered-container {{
|
||||
text-align: center;
|
||||
}}
|
||||
.centered-image {{
|
||||
display: block;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
max-width: 500px;
|
||||
max-height: 500px;
|
||||
}}
|
||||
</style>
|
||||
<div class="centered-container">
|
||||
<img src="data:image/png;base64,{img_base64}" class="centered-image" alt="Input image">
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
else:
|
||||
st.markdown(fail_gif_html, unsafe_allow_html=True)
|
||||
st.error('Rendering failed. You can try using a higher resolution image or splitting the multi line formula into a single line for better results.', icon="❌")
|
||||
txt = st.text_area(
|
||||
":red[Predicted formula]",
|
||||
TeXTeller_result,
|
||||
height=150,
|
||||
)
|
||||
st.success('Completed!', icon="✅")
|
||||
st.markdown(suc_gif_html, unsafe_allow_html=True)
|
||||
katex_res = to_katex(TexTeller_result)
|
||||
st.text_area(":red[Predicted formula]", katex_res, height=150)
|
||||
st.latex(katex_res)
|
||||
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user