Skip to content

Commit 3257c88

Browse files
authored
Merge pull request #619 from hubmapconsortium/dataset_sample_and_donor_endpoints
Dataset sample and donor endpoints
2 parents 59dc58a + b7af360 commit 3257c88

File tree

3 files changed

+232
-3
lines changed

3 files changed

+232
-3
lines changed

entity-api-spec.yaml

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,6 +1962,93 @@ paths:
19621962
description: The target dataset could not be found
19631963
'500':
19641964
description: Internal error
1965+
'/datasets/{id}/organs':
1966+
get:
1967+
summary: Retrieve a list of all of the smples that are organs that are associated with the dataset id
1968+
parameters:
1969+
- name: id
1970+
in: path
1971+
description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
1972+
required: true
1973+
schema:
1974+
type: string
1975+
responses:
1976+
'200':
1977+
description: A list of entity_type == Sample with sample_category == organ associated with the dataset id
1978+
content:
1979+
application/json:
1980+
schema:
1981+
type: array
1982+
items:
1983+
$ref: '#/components/schemas/Sample'
1984+
'400':
1985+
description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
1986+
'401':
1987+
description: The user's token has expired or the user did not supply a valid token
1988+
'403':
1989+
description: The user is not authorized to query the revision number of the given dataset.
1990+
'404':
1991+
description: The target dataset could not be found
1992+
'500':
1993+
description: Internal error
1994+
'/datasets/{id}/samples':
1995+
get:
1996+
summary: Retrieve a list of all of the samples that are not organs that are associated with the dataset id
1997+
parameters:
1998+
- name: id
1999+
in: path
2000+
description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
2001+
required: true
2002+
schema:
2003+
type: string
2004+
responses:
2005+
'200':
2006+
description: A list of entity_type == Sample with sample_category != organ associated with the dataset id
2007+
content:
2008+
application/json:
2009+
schema:
2010+
type: array
2011+
items:
2012+
$ref: '#/components/schemas/Sample'
2013+
'400':
2014+
description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
2015+
'401':
2016+
description: The user's token has expired or the user did not supply a valid token
2017+
'403':
2018+
description: The user is not authorized to query the revision number of the given dataset.
2019+
'404':
2020+
description: The target dataset could not be found
2021+
'500':
2022+
description: Internal error
2023+
'/datasets/{id}/donors':
2024+
get:
2025+
summary: Retrieve a list of all of the donors that are associated with the dataset id
2026+
parameters:
2027+
- name: id
2028+
in: path
2029+
description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
2030+
required: true
2031+
schema:
2032+
type: string
2033+
responses:
2034+
'200':
2035+
description: A list of entity_type == Donor that are associated with the dataset id
2036+
content:
2037+
application/json:
2038+
schema:
2039+
type: array
2040+
items:
2041+
$ref: '#/components/schemas/Donor'
2042+
'400':
2043+
description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
2044+
'401':
2045+
description: The user's token has expired or the user did not supply a valid token
2046+
'403':
2047+
description: The user is not authorized to query the revision number of the given dataset.
2048+
'404':
2049+
description: The target dataset could not be found
2050+
'500':
2051+
description: Internal error
19652052
'/datasets/{id}/retract':
19662053
put:
19672054
summary: 'Retracts a dataset after it has been published. Requires a json body with a single field {retraction_reason: string}. The dataset for the given id is modified to include this new retraction_reason field and sets the dataset property sub_status to Retracted. The complete modified dataset is returned. Requires that the dataset being retracted has already been published (dataset.status == Published. Requires a user token with membership in the HuBMAP-Data-Admin group otherwise then a 403 will be returned.'

src/app.py

Lines changed: 107 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2704,8 +2704,7 @@ def get_associated_organs_from_dataset(id):
27042704
# we need to tell the client with a 401 error
27052705
validate_token_if_auth_header_exists(request)
27062706

2707-
# Use the internal token to query the target entity
2708-
# since public entities don't require user token
2707+
# Use the internal token to query the target entity since public entities don't require user token
27092708
token = get_internal_token()
27102709

27112710
# Query target entity against uuid-api and neo4j and return as a dict if exists
@@ -2725,7 +2724,7 @@ def get_associated_organs_from_dataset(id):
27252724
# the user token has the correct access level
27262725
associated_organs = app_neo4j_queries.get_associated_organs_from_dataset(neo4j_driver_instance, entity_dict['uuid'])
27272726

2728-
# If there are zero items in the list associated organs, then there are no associated
2727+
# If there are zero items in the list associated_organs, then there are no associated
27292728
# Organs and a 404 will be returned.
27302729
if len(associated_organs) < 1:
27312730
not_found_error("the dataset does not have any associated organs")
@@ -2737,6 +2736,111 @@ def get_associated_organs_from_dataset(id):
27372736

27382737
return jsonify(final_result)
27392738

2739+
"""
2740+
Get all samples associated with a given dataset
2741+
2742+
The gateway treats this endpoint as public accessible
2743+
2744+
Parameters
2745+
----------
2746+
id : str
2747+
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
2748+
2749+
Returns
2750+
-------
2751+
json
2752+
a list of all the samples associated with the target dataset
2753+
"""
2754+
@app.route('/datasets/<id>/samples', methods=['GET'])
2755+
def get_associated_samples_from_dataset(id):
2756+
# Token is not required, but if an invalid token provided,
2757+
# we need to tell the client with a 401 error
2758+
validate_token_if_auth_header_exists(request)
2759+
2760+
# Use the internal token to query the target entity
2761+
# since public entities don't require user token
2762+
token = get_internal_token()
2763+
2764+
# Query target entity against uuid-api and neo4j and return as a dict if exists
2765+
entity_dict = query_target_entity(id, token)
2766+
normalized_entity_type = entity_dict['entity_type']
2767+
2768+
# Only for Dataset
2769+
if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
2770+
bad_request_error("The entity of given id is not a Dataset or Publication")
2771+
2772+
# published/public datasets don't require token
2773+
if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
2774+
# Token is required and the user must belong to HuBMAP-READ group
2775+
token = get_user_token(request, non_public_access_required=True)
2776+
2777+
# By now, either the entity is public accessible or the user token has the correct access level
2778+
associated_samples = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance, entity_dict['uuid'])
2779+
2780+
# If there are zero items in the list associated_samples, then there are no associated
2781+
# samples and a 404 will be returned.
2782+
if len(associated_samples) < 1:
2783+
not_found_error("the dataset does not have any associated samples")
2784+
2785+
complete_entities_list = schema_manager.get_complete_entities_list(token, associated_samples)
2786+
2787+
# Final result after normalization
2788+
final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
2789+
2790+
return jsonify(final_result)
2791+
2792+
"""
2793+
Get all donors associated with a given dataset
2794+
2795+
The gateway treats this endpoint as public accessible
2796+
2797+
Parameters
2798+
----------
2799+
id : str
2800+
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
2801+
2802+
Returns
2803+
-------
2804+
json
2805+
a list of all the donors associated with the target dataset
2806+
"""
2807+
@app.route('/datasets/<id>/donors', methods=['GET'])
2808+
def get_associated_donors_from_dataset(id):
2809+
# Token is not required, but if an invalid token provided,
2810+
# we need to tell the client with a 401 error
2811+
validate_token_if_auth_header_exists(request)
2812+
2813+
# Use the internal token to query the target entity
2814+
# since public entities don't require user token
2815+
token = get_internal_token()
2816+
2817+
# Query target entity against uuid-api and neo4j and return as a dict if exists
2818+
entity_dict = query_target_entity(id, token)
2819+
normalized_entity_type = entity_dict['entity_type']
2820+
2821+
# Only for Dataset
2822+
if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
2823+
bad_request_error("The entity of given id is not a Dataset or Publication")
2824+
2825+
# published/public datasets don't require token
2826+
if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
2827+
# Token is required and the user must belong to HuBMAP-READ group
2828+
token = get_user_token(request, non_public_access_required=True)
2829+
2830+
# By now, either the entity is public accessible or the user token has the correct access level
2831+
associated_donors = app_neo4j_queries.get_associated_donors_from_dataset(neo4j_driver_instance, entity_dict['uuid'])
2832+
2833+
# If there are zero items in the list associated_donors, then there are no associated
2834+
# donors and a 404 will be returned.
2835+
if len(associated_donors) < 1:
2836+
not_found_error("the dataset does not have any associated donors")
2837+
2838+
complete_entities_list = schema_manager.get_complete_entities_list(token, associated_donors)
2839+
2840+
# Final result after normalization
2841+
final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
2842+
2843+
return jsonify(final_result)
27402844

27412845
"""
27422846
Get the complete provenance info for all datasets

src/app_neo4j_queries.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,44 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid):
670670

671671
return results
672672

673+
def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid):
674+
results = []
675+
676+
# specimen_type -> sample_category 12/15/2022
677+
query = (f"MATCH (ds:Dataset)<-[*]-(sample:Sample) "
678+
f"WHERE ds.uuid='{dataset_uuid}' AND NOT sample.sample_category = 'organ' "
679+
f"RETURN apoc.coll.toSet(COLLECT(sample)) AS {record_field_name}")
680+
681+
logger.info("======get_associated_samples_from_dataset() query======")
682+
logger.info(query)
683+
684+
with neo4j_driver.session() as session:
685+
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
686+
687+
if record and record[record_field_name]:
688+
results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
689+
690+
return results
691+
692+
def get_associated_donors_from_dataset(neo4j_driver, dataset_uuid):
693+
results = []
694+
695+
# specimen_type -> sample_category 12/15/2022
696+
query = (f"MATCH (ds:Dataset)<-[*]-(donor:Donor) "
697+
f"WHERE ds.uuid='{dataset_uuid}'"
698+
f"RETURN apoc.coll.toSet(COLLECT(donor)) AS {record_field_name}")
699+
700+
logger.info("======get_associated_donors_from_dataset() query======")
701+
logger.info(query)
702+
703+
with neo4j_driver.session() as session:
704+
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
705+
706+
if record and record[record_field_name]:
707+
results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
708+
709+
return results
710+
673711
"""
674712
Retrieve all the provenance information about each dataset. Each dataset's prov-info is given by a dictionary.
675713
Certain fields such as first sample where there can be multiple nearest datasets in the provenance above a given

0 commit comments

Comments
 (0)