From 5cbbfb38d648f4eb2a6728377dab8ccd781fcee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=89=E6=B4=8B=E4=B8=89=E6=B4=8B?= <1258009915@qq.com> Date: Sun, 21 Apr 2024 12:09:26 +0000 Subject: [PATCH 1/2] =?UTF-8?q?1)=20=E4=BF=AE=E5=A4=8D=E4=BA=86to=5Fkatex.?= =?UTF-8?q?py=E7=9A=84bug;=202)=E6=8A=8ABox.py=E4=B8=AD=E7=9A=84=E8=BD=AC?= =?UTF-8?q?=E5=8C=96=E7=BB=93=E6=9E=9C=E5=86=99=E5=9C=A8logs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/models/det_model/Bbox.py | 8 +++++++- src/models/ocr_model/train/train.py | 4 ++-- src/models/ocr_model/utils/to_katex.py | 1 + src/start_web.sh | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/models/det_model/Bbox.py b/src/models/det_model/Bbox.py index 93f4723..9784541 100644 --- a/src/models/det_model/Bbox.py +++ b/src/models/det_model/Bbox.py @@ -1,5 +1,8 @@ +import os + from PIL import Image, ImageDraw from typing import List +from pathlib import Path class Point: @@ -63,6 +66,9 @@ class Bbox: def draw_bboxes(img: Image.Image, bboxes: List[Bbox], name="annotated_image.png"): + curr_work_dir = Path(os.getcwd()) + log_dir = curr_work_dir / "logs" + log_dir.mkdir(exist_ok=True) drawer = ImageDraw.Draw(img) for bbox in bboxes: # Calculate the coordinates for the rectangle to be drawn @@ -82,4 +88,4 @@ def draw_bboxes(img: Image.Image, bboxes: List[Bbox], name="annotated_image.png" drawer.text((left, bottom - 10), bbox.content[:10], fill="red") # Save the image with drawn rectangles - img.save(name) \ No newline at end of file + img.save(log_dir / name) \ No newline at end of file diff --git a/src/models/ocr_model/train/train.py b/src/models/ocr_model/train/train.py index 2a4e2dd..840b123 100644 --- a/src/models/ocr_model/train/train.py +++ b/src/models/ocr_model/train/train.py @@ -38,7 +38,7 @@ def train(model, tokenizer, train_dataset, eval_dataset, collate_fn_with_tokeniz ) # trainer.train(resume_from_checkpoint=None) - trainer.train(resume_from_checkpoint='/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-644000') + trainer.train(resume_from_checkpoint='/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-788000') def evaluate(model, tokenizer, eval_dataset, collate_fn): @@ -96,7 +96,7 @@ if __name__ == '__main__': collate_fn_with_tokenizer = partial(collate_fn, tokenizer=tokenizer) # model = TexTeller() - model = TexTeller.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-644000') + model = TexTeller.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-788000') # ================= debug ======================= # foo = train_dataset[:50] diff --git a/src/models/ocr_model/utils/to_katex.py b/src/models/ocr_model/utils/to_katex.py index 5a0a95b..98a6acf 100644 --- a/src/models/ocr_model/utils/to_katex.py +++ b/src/models/ocr_model/utils/to_katex.py @@ -51,6 +51,7 @@ def change(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, ne i = start + 1 continue else: + result += input_str[i:start] i = start if old_inst != new_inst and old_inst in result: diff --git a/src/start_web.sh b/src/start_web.sh index 6ec8f7b..ef96338 100755 --- a/src/start_web.sh +++ b/src/start_web.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -exu -export CHECKPOINT_DIR="default" +export CHECKPOINT_DIR="/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-788000" export TOKENIZER_DIR="default" streamlit run web.py From 9018c62f664582d67d9173c1dde9cc659aecf171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=89=E6=B4=8B=E4=B8=89=E6=B4=8B?= <1258009915@qq.com> Date: Sun, 21 Apr 2024 13:06:01 +0000 Subject: [PATCH 2/2] Update README.md --- README.md | 5 +---- assets/README_zh.md | 27 +++++++++++---------------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 0a79ce2..fb2b5a2 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,6 @@ TexTeller was trained with ~~550K~~7.5M image-formula pairs (dataset available [ * 📮[2024-04-12] Trained a **formula detection model**, thereby enhancing the capability to detect and recognize formulas in entire documents (whole-image inference)! - ## 🔑 Prerequisites python=3.10 @@ -85,9 +84,7 @@ TexTeller also supports **formula detection and recognition** on full images, al ### Download Weights -English documentation formula detection [[link](https://huggingface.co/TonyLee1256/texteller_det/resolve/main/rtdetr_r50vd_6x_coco_trained_on_IBEM_en_papers.onnx?download=true)]: Trained on 8272 images from the [IBEM dataset](https://zenodo.org/records/4757865). - -Chinese documentation formula detection [[link](https://huggingface.co/TonyLee1256/texteller_det/blob/main/rtdetr_r50vd_6x_coco_trained_on_cn_textbook.onnx)]: Trained on 2560 Chinese textbook images (100+ layouts). +Chinese-English documentation formula detection [[link](https://huggingface.co/TonyLee1256/texteller_det/resolve/main/rtdetr_r50vd_6x_coco.onnx?download=true)]: Trained on 3415 Chinese textbook images (130+ layouts). ### Formula Detection diff --git a/assets/README_zh.md b/assets/README_zh.md index b946d49..1b44cef 100644 --- a/assets/README_zh.md +++ b/assets/README_zh.md @@ -1,4 +1,4 @@ -📄 English | 中文 +📄 ``English`` | 中文