修改了functional.py中的tokenize_fn(图片以png形式而非数组的形式保存,防止占用过多硬盘空间)

This commit is contained in:
三洋三洋
2024-03-04 05:38:30 +00:00
parent 38877d90b8
commit 04a4bddd97

View File

@@ -21,7 +21,7 @@ def left_move(x: torch.Tensor, pad_val):
def tokenize_fn(samples: Dict[str, List[Any]], tokenizer=None) -> Dict[str, List[Any]]:
assert tokenizer is not None, 'tokenizer should not be None'
tokenized_formula = tokenizer(samples['latex_formula'], return_special_tokens_mask=True)
tokenized_formula['pixel_values'] = [np.array(sample) for sample in samples['image']]
tokenized_formula['pixel_values'] = samples['image']
return tokenized_formula