diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index c4e46079f64..0f199bc81ff 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -1443,7 +1443,7 @@ def save_to_disk( Path (e.g. `dataset/train`) or remote URI (e.g. `s3://my-bucket/dataset/train`) of the dataset directory where the dataset will be saved to. max_shard_size (`int` or `str`, *optional*, defaults to `"500MB"`): - The maximum size of the dataset shards to be uploaded to the hub. If expressed as a string, needs to be digits followed by a unit + The maximum size of the dataset shards to be saved to the filesystem. If expressed as a string, needs to be digits followed by a unit (like `"50MB"`). num_shards (`int`, *optional*): Number of shards to write. By default the number of shards depends on `max_shard_size` and `num_proc`. diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index 5e1946cf0f7..e83b8dfd982 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -1300,7 +1300,7 @@ def save_to_disk( Path (e.g. `dataset/train`) or remote URI (e.g. `s3://my-bucket/dataset/train`) of the dataset dict directory where the dataset dict will be saved to. max_shard_size (`int` or `str`, *optional*, defaults to `"500MB"`): - The maximum size of the dataset shards to be uploaded to the hub. If expressed as a string, needs to be digits followed by a unit + The maximum size of the dataset shards to be saved to the filesystem. If expressed as a string, needs to be digits followed by a unit (like `"50MB"`). num_shards (`Dict[str, int]`, *optional*): Number of shards to write. By default the number of shards depends on `max_shard_size` and `num_proc`.