TexTeller v2

This commit is contained in:
三洋三洋
2024-03-25 06:54:22 +00:00
parent 74341c7e8a
commit ef218d67f6
7 changed files with 28 additions and 22 deletions

View File

@@ -17,7 +17,7 @@ from transformers import (
class TexTeller(VisionEncoderDecoderModel):
REPO_NAME = '/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv2/checkpoint-356000'
REPO_NAME = '/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv2/checkpoint-588000'
def __init__(self, decoder_path=None, tokenizer_path=None):
encoder = ViTModel(ViTConfig(
image_size=FIXED_IMG_SIZE,

View File

@@ -3,7 +3,8 @@ set -exu
export CHECKPOINT_DIR="default"
export TOKENIZER_DIR="default"
export USE_CUDA=False # True or False (case-sensitive)
export NUM_BEAM=1
# export USE_CUDA=False # True or False (case-sensitive)
export USE_CUDA=True # True or False (case-sensitive)
export NUM_BEAM=10
streamlit run web.py

View File

@@ -95,7 +95,7 @@ def rendering(formula: str, out_img_path: Path) -> bool:
return p.returncode == 0
def pdf_to_pngbytes(pdf_path):
images = convert_from_path(pdf_path, first_page=1, last_page=1)
images = convert_from_path(pdf_path, dpi=400,first_page=1, last_page=1)
trimmed_images = trim(images[0])
png_image_bytes = io.BytesIO()
trimmed_images.save(png_image_bytes, format='PNG')