Skip to content

Commit

Permalink
no remove
Browse files Browse the repository at this point in the history
markus583 committed Apr 29, 2024
1 parent 2d4f8ed commit 96fa39c
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions wtpsplit/train/train_xlmr.py
Original file line number Diff line number Diff line change
@@ -332,13 +332,13 @@ def drop_some_non_punctuation_samples(examples):
if args.pack_samples:
assert not args.one_sample_per_line

if split == "train" and args.use_subwords:
with training_args.main_process_first():
for root, dirs, files in os.walk(os.environ.get("HF_DATASETS_CACHE")):
for file in files:
if file.startswith("m_c4-test-train"):
logger.warning(f"Removing {os.path.join(root, file)}")
os.remove(os.path.join(root, file))
# if split == "train" and args.use_subwords:
# with training_args.main_process_first():
# for root, dirs, files in os.walk(os.environ.get("HF_DATASETS_CACHE")):
# for file in files:
# if file.startswith("m_c4-test-train"):
# logger.warning(f"Removing {os.path.join(root, file)}")
# os.remove(os.path.join(root, file))

if not args.one_sample_per_line:
with training_args.main_process_first():

0 comments on commit 96fa39c

Please sign in to comment.