Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dealing with derivatives #45

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions bids2openminds/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,11 @@


def convert(input_path, output_path=None, multiple_files=False, include_empty_properties=False):
if not (os.path.isdir(input_path)):
raise NotADirectoryError(
f"The input directory is not valid, you have specified {input_path} which is not a directory."
)
# if not(BIDSValidator().is_bids(input_path)):
# raise NotADirectoryError(f"The input directory is not valid, you have specified {input_path} which is not a BIDS directory.")

collection = Collection()
bids_layout = BIDSLayout(input_path)

layout_df = bids_layout.to_df()
[bids_layout, layout_df, dataset_description] = main.read_bids_directory(
input_path)

subjects_id = bids_layout.get_subjects()

Expand Down
56 changes: 54 additions & 2 deletions bids2openminds/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,70 @@
import os
import pathlib
from warnings import warn

from bids import BIDSLayout
import pandas as pd
from nameparser import HumanName

import openminds.latest.core as omcore
import openminds.latest.controlled_terms as controlled_terms
from openminds import IRI

from .utility import table_filter, pd_table_value, file_hash, file_storage_size
from .utility import table_filter, pd_table_value, file_hash, file_storage_size, read_json
from .mapping import bids2openminds_instance


def read_bids_directory(input_path):
"""
Reads the content of the dataset_description.json file and determines whether the dataset is raw, derivative, or contains derivatives. Then, it calls the appropriate BIDSLayout function.
Returns three Python objects.

Parameters:
- input_path: The path to the BIDS folder.

Returns:
- bids_layout: An instance of the Layout class from pybids for this dataset.
- layout_df: A pandas DataFrame containing all the files and corresponding information.
- dataset_description: A Python dictionary containing the content of the dataset_description JSON file.
"""

if not os.path.isdir(input_path):
raise NotADirectoryError(
f"The input path is not valid. You specified '{input_path}', which is not a directory.")

# if not(BIDSValidator().is_bids(input_path)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete commented lines or specify what the missing TODO / FIXME is for those lines

# raise NotADirectoryError(f"The input directory is not valid, you have specified {input_path} which is not a BIDS directory.")
dataset_description_path = os.path.join(
input_path, "dataset_description.json")
dataset_description = read_json(dataset_description_path)

if not dataset_description:
bids_layout = BIDSLayout(input_path)
layout_df = bids_layout.to_df()
dataset_description_path = table_filter(layout_df, "description")
dataset_description = read_json(dataset_description_path.iat[0, 0])
if not dataset_description:
raise FileNotFoundError(
"There was no dataset_description.json file. Every dataset MUST include this file."
)
return bids_layout, layout_df, dataset_description

if ("DatasetType" in dataset_description) and (dataset_description["DatasetType"] == "derivative"):
Copy link
Member

@lzehl lzehl May 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since dataset_description["DatasetType"] is only RECOMMENDED this is not a reliable path to check for derivatives.

I would suggest to only go with line 57-66 to determine if derivatives are present. This only works though in the case of derivatives that are provided in a subdirectory of a BIDS raw dataset. In case only derivative data are shared as BIDS dataset, there is no requirement to store those data in a "derivative" subdirectory (if I understand the specifications correctly).

However you can make use of the REQUIREMENT that the dataset_description.json for derivative data MUST have a "GeneratedBy" key specified.

Also note that if derived datasets are stored in a subdirectory of a raw dataset in BIDS, there are two dataset_description.json files: one for the raw data and one for (each) derived dataset.

bids_layout = BIDSLayout(input_path, is_derivative=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this only loading derivatives or all (raw + derivatives)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume all

layout_df = bids_layout.to_df()
return bids_layout, layout_df, dataset_description

derivatives_path = os.path.join(
input_path, "derivatives")
if os.path.isdir(derivatives_path):
bids_layout = BIDSLayout(input_path, derivatives=True)
layout_df = bids_layout.to_df()
return bids_layout, layout_df, dataset_description

bids_layout = BIDSLayout(input_path)
Copy link
Member

@lzehl lzehl May 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why you need the upper lines. Based on the specifications, would it not be sufficient enough to state

has_derived_data = False:
if os.path.isdir(os.path.join(input_path, "derivatives")) or
"GeneratedBy" in dataset_description:
    has_derived_data = True

bids_layout = BIDSLayout(input_path, is_derivative=has_derived_data)
layout_df = bids_layout.to_df()

?? (please double check the code, not sure I wrote this correctly)

Note that the data types also need to be set in openMINDS (can be an array). Not sure if you do this later somewhere?

layout_df = bids_layout.to_df()
return bids_layout, layout_df, dataset_description


def create_openminds_person(full_name):
# Regex for detecting any unwanted characters.
name_regex = re.compile(
Expand Down
3 changes: 2 additions & 1 deletion test/test_bids_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import bids2openminds.converter

example_dataset = [("ds003", 13), ("ds000247", 6),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make a comment and explain what the numbers are in the example dataset tuples?

("eeg_cbm", 20), ("asl001", 1), ("eeg_rest_fmri", 3)]
("eeg_cbm", 20), ("asl001", 1),
("eeg_rest_fmri", 3), ("ds000001-fmriprep", 4)]


@pytest.mark.parametrize("dataset_label,dataset_number_subject", example_dataset)
Expand Down