From 5cbbfb38d648f4eb2a6728377dab8ccd781fcee5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=89=E6=B4=8B=E4=B8=89=E6=B4=8B?= <1258009915@qq.com>
Date: Sun, 21 Apr 2024 12:09:26 +0000
Subject: [PATCH 1/2] =?UTF-8?q?1)=20=E4=BF=AE=E5=A4=8D=E4=BA=86to=5Fkatex.?=
 =?UTF-8?q?py=E7=9A=84bug;=202)=E6=8A=8ABox.py=E4=B8=AD=E7=9A=84=E8=BD=AC?=
 =?UTF-8?q?=E5=8C=96=E7=BB=93=E6=9E=9C=E5=86=99=E5=9C=A8logs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/models/det_model/Bbox.py           | 8 +++++++-
 src/models/ocr_model/train/train.py    | 4 ++--
 src/models/ocr_model/utils/to_katex.py | 1 +
 src/start_web.sh                       | 2 +-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/models/det_model/Bbox.py b/src/models/det_model/Bbox.py
index 93f4723..9784541 100644
--- a/src/models/det_model/Bbox.py
+++ b/src/models/det_model/Bbox.py
@@ -1,5 +1,8 @@
+import os
+
 from PIL import Image, ImageDraw
 from typing import List
+from pathlib import Path
 
 
 class Point:
@@ -63,6 +66,9 @@ class Bbox:
 
 
 def draw_bboxes(img: Image.Image, bboxes: List[Bbox], name="annotated_image.png"):
+    curr_work_dir = Path(os.getcwd())
+    log_dir = curr_work_dir / "logs"
+    log_dir.mkdir(exist_ok=True)
     drawer = ImageDraw.Draw(img)
     for bbox in bboxes:
         # Calculate the coordinates for the rectangle to be drawn
@@ -82,4 +88,4 @@ def draw_bboxes(img: Image.Image, bboxes: List[Bbox], name="annotated_image.png"
             drawer.text((left, bottom - 10), bbox.content[:10], fill="red")
 
     # Save the image with drawn rectangles
-    img.save(name)
\ No newline at end of file
+    img.save(log_dir / name)
\ No newline at end of file
diff --git a/src/models/ocr_model/train/train.py b/src/models/ocr_model/train/train.py
index 2a4e2dd..840b123 100644
--- a/src/models/ocr_model/train/train.py
+++ b/src/models/ocr_model/train/train.py
@@ -38,7 +38,7 @@ def train(model, tokenizer, train_dataset, eval_dataset, collate_fn_with_tokeniz
     )
 
     # trainer.train(resume_from_checkpoint=None)
-    trainer.train(resume_from_checkpoint='/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-644000')
+    trainer.train(resume_from_checkpoint='/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-788000')
 
 
 def evaluate(model, tokenizer, eval_dataset, collate_fn):
@@ -96,7 +96,7 @@ if __name__ == '__main__':
 
     collate_fn_with_tokenizer = partial(collate_fn, tokenizer=tokenizer)
     # model = TexTeller()
-    model = TexTeller.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-644000')
+    model = TexTeller.from_pretrained('/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-788000')
 
     # =================  debug  =======================
     # foo = train_dataset[:50]
diff --git a/src/models/ocr_model/utils/to_katex.py b/src/models/ocr_model/utils/to_katex.py
index 5a0a95b..98a6acf 100644
--- a/src/models/ocr_model/utils/to_katex.py
+++ b/src/models/ocr_model/utils/to_katex.py
@@ -51,6 +51,7 @@ def change(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, ne
                 i = start + 1
                 continue
         else:
+            result += input_str[i:start]
             i = start
     
     if old_inst != new_inst and old_inst in result:
diff --git a/src/start_web.sh b/src/start_web.sh
index 6ec8f7b..ef96338 100755
--- a/src/start_web.sh
+++ b/src/start_web.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -exu
 
-export CHECKPOINT_DIR="default"
+export CHECKPOINT_DIR="/home/lhy/code/TexTeller/src/models/ocr_model/train/train_result/TexTellerv3/checkpoint-788000"
 export TOKENIZER_DIR="default"
 
 streamlit run web.py

From 9018c62f664582d67d9173c1dde9cc659aecf171 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=89=E6=B4=8B=E4=B8=89=E6=B4=8B?= <1258009915@qq.com>
Date: Sun, 21 Apr 2024 13:06:01 +0000
Subject: [PATCH 2/2] Update README.md

---
 README.md           |  5 +----
 assets/README_zh.md | 27 +++++++++++----------------
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 0a79ce2..fb2b5a2 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,6 @@ TexTeller was trained with ~~550K~~7.5M image-formula pairs (dataset available [
 
 * 📮[2024-04-12] Trained a **formula detection model**, thereby enhancing the capability to detect and recognize formulas in entire documents (whole-image inference)!
 
-
 ## 🔑 Prerequisites
 
 python=3.10
@@ -85,9 +84,7 @@ TexTeller also supports **formula detection and recognition** on full images, al
 
 ### Download Weights
 
-English documentation formula detection [[link](https://huggingface.co/TonyLee1256/texteller_det/resolve/main/rtdetr_r50vd_6x_coco_trained_on_IBEM_en_papers.onnx?download=true)]: Trained on 8272 images from the [IBEM dataset](https://zenodo.org/records/4757865).
-
-Chinese documentation formula detection [[link](https://huggingface.co/TonyLee1256/texteller_det/blob/main/rtdetr_r50vd_6x_coco_trained_on_cn_textbook.onnx)]: Trained on 2560 Chinese textbook images (100+ layouts).
+Chinese-English documentation formula detection [[link](https://huggingface.co/TonyLee1256/texteller_det/resolve/main/rtdetr_r50vd_6x_coco.onnx?download=true)]: Trained on 3415 Chinese textbook images (130+ layouts).
 
 ### Formula Detection
 
diff --git a/assets/README_zh.md b/assets/README_zh.md
index b946d49..1b44cef 100644
--- a/assets/README_zh.md
+++ b/assets/README_zh.md
@@ -1,4 +1,4 @@
-📄 <a href="../README.md">English</a> | 中文
+📄 `<a href="../README.md">`English`</a>` | 中文
 
 <div align="center">
     <h1>
@@ -48,6 +48,7 @@ python=3.10
    ```
 
 2. [安装pytorch](https://pytorch.org/get-started/locally/#start-locally)
+
 3. 安装本项目的依赖包:
 
    ```bash
@@ -112,9 +113,7 @@ TexTeller还支持对整张图片进行**公式检测+公式识别**，从而对
 
 ### 下载权重
 
-英文文档公式检测 [[link](https://huggingface.co/TonyLee1256/texteller_det/resolve/main/rtdetr_r50vd_6x_coco_trained_on_IBEM_en_papers.onnx?download=true)]：在8272张[IBEM数据集](https://zenodo.org/records/4757865)上训练得到
-
-中文文档公式检测 [[link](https://huggingface.co/TonyLee1256/texteller_det/blob/main/rtdetr_r50vd_6x_coco_trained_on_cn_textbook.onnx)]：在2560张中文教材数据(100+版式)上训练得到
+中文英文文档公式检测 [[link](https://huggingface.co/TonyLee1256/texteller_det/resolve/main/rtdetr_r50vd_6x_coco.onnx?download=true)]：在3415张中文教材数据(130+版式)上训练得到
 
 ### 公式检测
 
@@ -149,14 +148,14 @@ python server.py
 ```
 
 | 参数 | 描述 |
-| - | - |
-| `-ckpt` | 权重文件的路径，*默认为TexTeller的预训练权重*。 |
-| `-tknz` | 分词器的路径，*默认为TexTeller的分词器*。 |
-| `-port` | 服务器的服务端口，*默认是8000*。 |
-| `--inference-mode`| 是否使用GPU(cuda或mps)推理，*默认为CPU*。 |
-| `--num_beams` | beam search的beam数量，*默认是1*。 |
-| `--num_replicas`| 在服务器上运行的服务副本数量，*默认1个副本*。你可以使用更多的副本来获取更大的吞吐量。 |
-| `--ncpu_per_replica` | 每个服务副本所用的CPU核心数，*默认为1*。 |
+| --- | --- |
+| `-ckpt` | 权重文件的路径，*默认为TexTeller的预训练权重*。|
+| `-tknz` | 分词器的路径，*默认为TexTeller的分词器*。|
+| `-port` | 服务器的服务端口，*默认是8000*。|
+| `--inference-mode` | 是否使用GPU(cuda或mps)推理，*默认为CPU*。|
+| `--num_beams` | beam search的beam数量，*默认是1*。|
+| `--num_replicas` | 在服务器上运行的服务副本数量，*默认1个副本*。你可以使用更多的副本来获取更大的吞吐量。|
+| `--ncpu_per_replica` | 每个服务副本所用的CPU核心数，*默认为1*。|
 | `--ngpu_per_replica` | 每个服务副本所用的GPU数量，*默认为1*。你可以把这个值设置成 0~1之间的数，这样会在一个GPU上运行多个服务副本来共享GPU，从而提高GPU的利用率。(注意，如果 --num_replicas 2, --ngpu_per_replica 0.7, 那么就必须要有2个GPU可用) |
 
 > [!NOTE]
@@ -207,13 +206,9 @@ python -m models.ocr_model.train.train
 ## 📅 计划
 
 - [X] ~~使用更大的数据集来训练模型(7.5M样本，即将发布)~~
-
 - [ ] 扫描图片识别
-
 - [ ] PDF文档识别 + 中英文场景支持
-
 - [ ] 推理加速
-
 - [ ] ...
 
 ## ⭐️ 观星曲线