File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1149,6 +1149,14 @@ def load_dataset_builder(
11491149 dataset_name = builder_kwargs .pop ("dataset_name" , None )
11501150 info = dataset_module .dataset_infos .get (config_name ) if dataset_module .dataset_infos else None
11511151
1152+ # Avoid passing duplicate keyword arguments to the builder.
1153+ # `builder_kwargs` can contain keys like `base_path`, and users may also pass them via `config_kwargs`.
1154+ # In that case, Python raises: "TypeError: got multiple values for keyword argument ...".
1155+ # Keep the user-provided values (config_kwargs) by dropping overlaps from builder_kwargs.
1156+ if config_kwargs :
1157+ for key in set (builder_kwargs ).intersection (config_kwargs ):
1158+ builder_kwargs .pop (key , None )
1159+
11521160 if (
11531161 path in _PACKAGED_DATASETS_MODULES
11541162 and data_files is None
Original file line number Diff line number Diff line change 1+ import pytest
2+
3+ from datasets .load import load_dataset_builder
4+
5+
6+ def test_load_dataset_builder_does_not_fail_with_duplicate_builder_kwargs (tmp_path ):
7+ # Regression test for https://github.com/huggingface/datasets/issues/4910
8+ # Some module factories provide `base_path` in `builder_kwargs`, and users can also pass `base_path`
9+ # via `config_kwargs`, which used to raise:
10+ # TypeError: ... got multiple values for keyword argument 'base_path'
11+ train_csv = tmp_path / "train.csv"
12+ train_csv .write_text ("col\n 1\n " , encoding = "utf-8" )
13+
14+ custom_base_path = str (tmp_path / "custom_base_path" )
15+ builder = load_dataset_builder ("csv" , data_files = str (train_csv ), base_path = custom_base_path )
16+ assert builder .base_path == custom_base_path
17+
You can’t perform that action at this time.
0 commit comments