2024-02-11 08:06:50 +00:00
|
|
|
|
import os
|
|
|
|
|
|
import io
|
|
|
|
|
|
import base64
|
|
|
|
|
|
import tempfile
|
2024-03-18 15:48:04 +00:00
|
|
|
|
import shutil
|
2024-02-11 08:06:50 +00:00
|
|
|
|
import streamlit as st
|
2024-04-06 07:27:27 +00:00
|
|
|
|
import re
|
2024-02-11 08:06:50 +00:00
|
|
|
|
|
2024-04-06 07:27:27 +00:00
|
|
|
|
from PIL import Image
|
2024-02-11 08:06:50 +00:00
|
|
|
|
from models.ocr_model.utils.inference import inference
|
|
|
|
|
|
from models.ocr_model.model.TexTeller import TexTeller
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-03-18 15:48:04 +00:00
|
|
|
|
html_string = '''
|
|
|
|
|
|
<h1 style="color: black; text-align: center;">
|
|
|
|
|
|
<img src="https://slackmojis.com/emojis/429-troll/download" width="50">
|
|
|
|
|
|
TexTeller
|
|
|
|
|
|
<img src="https://slackmojis.com/emojis/429-troll/download" width="50">
|
|
|
|
|
|
</h1>
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
suc_gif_html = '''
|
|
|
|
|
|
<h1 style="color: black; text-align: center;">
|
|
|
|
|
|
<img src="https://slackmojis.com/emojis/90621-clapclap-e/download" width="50">
|
|
|
|
|
|
<img src="https://slackmojis.com/emojis/90621-clapclap-e/download" width="50">
|
|
|
|
|
|
<img src="https://slackmojis.com/emojis/90621-clapclap-e/download" width="50">
|
|
|
|
|
|
</h1>
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
fail_gif_html = '''
|
|
|
|
|
|
<h1 style="color: black; text-align: center;">
|
|
|
|
|
|
<img src="https://slackmojis.com/emojis/51439-allthethings_intensifies/download" >
|
|
|
|
|
|
<img src="https://slackmojis.com/emojis/51439-allthethings_intensifies/download" >
|
|
|
|
|
|
<img src="https://slackmojis.com/emojis/51439-allthethings_intensifies/download" >
|
|
|
|
|
|
</h1>
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
tex = r'''
|
|
|
|
|
|
\documentclass{{article}}
|
|
|
|
|
|
\usepackage[
|
|
|
|
|
|
left=1in, % 左边距
|
|
|
|
|
|
right=1in, % 右边距
|
|
|
|
|
|
top=1in, % 上边距
|
|
|
|
|
|
bottom=1in,% 下边距
|
|
|
|
|
|
paperwidth=40cm, % 页面宽度
|
|
|
|
|
|
paperheight=40cm % 页面高度,这里以A4纸为例
|
|
|
|
|
|
]{{geometry}}
|
|
|
|
|
|
|
|
|
|
|
|
\usepackage[utf8]{{inputenc}}
|
|
|
|
|
|
\usepackage{{multirow,multicol,amsmath,amsfonts,amssymb,mathtools,bm,mathrsfs,wasysym,amsbsy,upgreek,mathalfa,stmaryrd,mathrsfs,dsfont,amsthm,amsmath,multirow}}
|
|
|
|
|
|
|
|
|
|
|
|
\begin{{document}}
|
|
|
|
|
|
|
|
|
|
|
|
{formula}
|
|
|
|
|
|
|
|
|
|
|
|
\pagenumbering{{gobble}}
|
|
|
|
|
|
\end{{document}}
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-02-11 08:06:50 +00:00
|
|
|
|
@st.cache_resource
|
|
|
|
|
|
def get_model():
|
|
|
|
|
|
return TexTeller.from_pretrained(os.environ['CHECKPOINT_DIR'])
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource
|
|
|
|
|
|
def get_tokenizer():
|
|
|
|
|
|
return TexTeller.get_tokenizer(os.environ['TOKENIZER_DIR'])
|
|
|
|
|
|
|
2024-04-06 07:27:27 +00:00
|
|
|
|
def to_katex(formula: str) -> str:
|
|
|
|
|
|
res = formula
|
|
|
|
|
|
res = re.sub(r'\\mbox\{([^}]*)\}', r'\1', res)
|
|
|
|
|
|
res = re.sub(r'boldmath\$(.*?)\$', r'bm{\1}', res)
|
|
|
|
|
|
res = re.sub(r'\\\[(.*?)\\\]', r'\1\\newline', res)
|
|
|
|
|
|
|
|
|
|
|
|
pattern = r'(\\(?:left|middle|right|big|Big|bigg|Bigg|bigl|Bigl|biggl|Biggl|bigm|Bigm|biggm|Biggm|bigr|Bigr|biggr|Biggr))\{([^}]*)\}'
|
|
|
|
|
|
replacement = r'\1\2'
|
|
|
|
|
|
res = re.sub(pattern, replacement, res)
|
|
|
|
|
|
if res.endswith(r'\newline'):
|
|
|
|
|
|
res = res[:-8]
|
|
|
|
|
|
return res
|
|
|
|
|
|
|
2024-03-18 15:48:04 +00:00
|
|
|
|
def get_image_base64(img_file):
|
|
|
|
|
|
buffered = io.BytesIO()
|
|
|
|
|
|
img_file.seek(0)
|
|
|
|
|
|
img = Image.open(img_file)
|
|
|
|
|
|
img.save(buffered, format="PNG")
|
|
|
|
|
|
return base64.b64encode(buffered.getvalue()).decode()
|
|
|
|
|
|
|
2024-02-11 08:06:50 +00:00
|
|
|
|
model = get_model()
|
|
|
|
|
|
tokenizer = get_tokenizer()
|
2024-03-18 15:48:04 +00:00
|
|
|
|
|
|
|
|
|
|
if "start" not in st.session_state:
|
|
|
|
|
|
st.session_state["start"] = 1
|
2024-04-06 07:27:27 +00:00
|
|
|
|
st.toast('Hooray!', icon='🎉')
|
2024-02-11 08:06:50 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================ pages =============================== #
|
2024-03-18 15:48:04 +00:00
|
|
|
|
|
2024-02-11 08:06:50 +00:00
|
|
|
|
st.markdown(html_string, unsafe_allow_html=True)
|
|
|
|
|
|
|
2024-03-18 15:48:04 +00:00
|
|
|
|
uploaded_file = st.file_uploader("",type=['jpg', 'png', 'pdf'])
|
2024-02-11 08:06:50 +00:00
|
|
|
|
|
|
|
|
|
|
if uploaded_file:
|
|
|
|
|
|
img = Image.open(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
temp_dir = tempfile.mkdtemp()
|
|
|
|
|
|
png_file_path = os.path.join(temp_dir, 'image.png')
|
|
|
|
|
|
img.save(png_file_path, 'PNG')
|
|
|
|
|
|
|
|
|
|
|
|
img_base64 = get_image_base64(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
st.markdown(f"""
|
|
|
|
|
|
<style>
|
|
|
|
|
|
.centered-container {{
|
|
|
|
|
|
text-align: center;
|
|
|
|
|
|
}}
|
|
|
|
|
|
.centered-image {{
|
|
|
|
|
|
display: block;
|
|
|
|
|
|
margin-left: auto;
|
|
|
|
|
|
margin-right: auto;
|
2024-03-18 15:48:04 +00:00
|
|
|
|
max-width: 500px;
|
|
|
|
|
|
max-height: 500px;
|
2024-02-11 08:06:50 +00:00
|
|
|
|
}}
|
|
|
|
|
|
</style>
|
|
|
|
|
|
<div class="centered-container">
|
|
|
|
|
|
<img src="data:image/png;base64,{img_base64}" class="centered-image" alt="Input image">
|
|
|
|
|
|
<p style="color:gray;">Input image ({img.height}✖️{img.width})</p>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
st.write("")
|
|
|
|
|
|
|
|
|
|
|
|
with st.spinner("Predicting..."):
|
|
|
|
|
|
uploaded_file.seek(0)
|
2024-04-06 07:27:27 +00:00
|
|
|
|
TexTeller_result = inference(
|
2024-02-11 08:06:50 +00:00
|
|
|
|
model,
|
|
|
|
|
|
tokenizer,
|
|
|
|
|
|
[png_file_path],
|
|
|
|
|
|
True if os.environ['USE_CUDA'] == 'True' else False,
|
|
|
|
|
|
int(os.environ['NUM_BEAM'])
|
|
|
|
|
|
)[0]
|
2024-04-06 07:27:27 +00:00
|
|
|
|
st.success('Completed!', icon="✅")
|
|
|
|
|
|
st.markdown(suc_gif_html, unsafe_allow_html=True)
|
|
|
|
|
|
katex_res = to_katex(TexTeller_result)
|
|
|
|
|
|
st.text_area(":red[Predicted formula]", katex_res, height=150)
|
|
|
|
|
|
st.latex(katex_res)
|
2024-03-18 15:48:04 +00:00
|
|
|
|
|
|
|
|
|
|
shutil.rmtree(temp_dir)
|
2024-02-11 08:06:50 +00:00
|
|
|
|
|
|
|
|
|
|
# ============================ pages =============================== #
|