完成了load1) er.py, 以 2) 部分代码的loader加载路径的更改
This commit is contained in:
@@ -5,7 +5,7 @@ from ...globals import VOCAB_SIZE
|
||||
|
||||
if __name__ == '__main__':
|
||||
tokenizer = TexTeller.get_tokenizer('/home/lhy/code/TeXify/src/models/tokenizer/roberta-tokenizer-raw')
|
||||
dataset = load_dataset("/home/lhy/code/TeXify/src/models/ocr_model/train/dataset/latex-formulas/latex-formulas.py", "cleaned_formulas")['train']
|
||||
dataset = load_dataset("/home/lhy/code/TexTeller/src/models/ocr_model/train/data/loader.py")['train']
|
||||
new_tokenizer = tokenizer.train_new_from_iterator(text_iterator=dataset['latex_formula'], vocab_size=VOCAB_SIZE)
|
||||
new_tokenizer.save_pretrained('/home/lhy/code/TeXify/src/models/tokenizer/roberta-tokenizer-550Kformulas')
|
||||
pause = 1
|
||||
|
||||
Reference in New Issue
Block a user