Skip to content

Commit 99b5cd5

Browse files
authored
Merge pull request #297 from hubmapconsortium/test-release
v2.0.24 release
2 parents cfc710e + 672d31f commit 99b5cd5

File tree

9 files changed

+117
-63
lines changed

9 files changed

+117
-63
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.0.23
1+
2.0.24

docker/docker-compose.dev.yml

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,9 @@ services:
88
# since nginx is running under non-root user hubmap
99
ports:
1010
- "3333:8080"
11-
environment:
12-
- HOST_GID=${HOST_GID:-1000}
13-
- HOST_UID=${HOST_UID:-1000}
14-
init: true
15-
restart: always
1611
volumes:
1712
# Mount the VERSION file and BUILD file
1813
- "../VERSION:/usr/src/app/VERSION"
1914
- "../BUILD:/usr/src/app/BUILD"
2015
# Mount the source code
21-
- "../src:/usr/src/app/src"
22-
# Mount conf.d on host machine to the nginx conf.d on container
23-
- "./entity-api/nginx/conf.d:/etc/nginx/conf.d"
16+
- "../src:/usr/src/app/src"

docker/docker-compose.localhost.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,9 @@ version: "3.7"
33
services:
44

55
entity-api:
6-
environment:
7-
- HOST_GID=${HOST_GID:-1000}
8-
- HOST_UID=${HOST_UID:-1000}
96
volumes:
107
# Mount the VERSION file and BUILD file
118
- "../VERSION:/usr/src/app/VERSION"
129
- "../BUILD:/usr/src/app/BUILD"
1310
# Mount the source code to container
14-
- "../src:/usr/src/app/src"
11+
- "../src:/usr/src/app/src"

docker/docker-compose.prod.yml

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,4 @@ services:
77
# Only root can listen on ports below 1024, we use higher-numbered ports
88
# since nginx is running under non-root user hubmap
99
ports:
10-
- "3333:8080"
11-
environment:
12-
- HOST_GID=${HOST_GID:-1000}
13-
- HOST_UID=${HOST_UID:-1000}
14-
init: true
15-
restart: always
16-
volumes:
17-
# Mount conf.d on host machine to the nginx conf.d on container
18-
- "./entity-api/nginx/conf.d:/etc/nginx/conf.d"
19-
20-
10+
- "3333:8080"

docker/docker-compose.stage.yml

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,4 @@ services:
77
# Only root can listen on ports below 1024, we use higher-numbered ports
88
# since nginx is running under non-root user hubmap
99
ports:
10-
- "3333:8080"
11-
environment:
12-
- HOST_GID=${HOST_GID:-1000}
13-
- HOST_UID=${HOST_UID:-1000}
14-
init: true
15-
restart: always
16-
volumes:
17-
# Mount conf.d on host machine to the nginx conf.d on container
18-
- "./entity-api/nginx/conf.d:/etc/nginx/conf.d"
19-
20-
10+
- "3333:8080"

docker/docker-compose.test.yml

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,4 @@ services:
77
# Only root can listen on ports below 1024, we use higher-numbered ports
88
# since nginx is running under non-root user hubmap
99
ports:
10-
- "3333:8080"
11-
environment:
12-
- HOST_GID=${HOST_GID:-1000}
13-
- HOST_UID=${HOST_UID:-1000}
14-
init: true
15-
restart: always
16-
volumes:
17-
# Mount conf.d on host machine to the nginx conf.d on container
18-
- "./entity-api/nginx/conf.d:/etc/nginx/conf.d"
19-
10+
- "3333:8080"

docker/docker-compose.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,20 @@ services:
2020
timeout: 10s
2121
retries: 3
2222
start_period: 40s
23+
environment:
24+
- HOST_GID=${HOST_GID:-1000}
25+
- HOST_UID=${HOST_UID:-1000}
26+
init: true
27+
restart: always
2328
volumes:
2429
# Mount the app config to container in order to keep it outside of the image
2530
- "../src/instance:/usr/src/app/src/instance"
2631
# Mount the logging to container
2732
- "../log:/usr/src/app/log"
2833
# Mount the schema yaml file
2934
- "../src/schema/provenance_schema.yaml:/usr/src/app/src/schema/provenance_schema.yaml"
35+
# Mount conf.d on host machine to the nginx conf.d on container
36+
- "./entity-api/nginx/conf.d:/etc/nginx/conf.d"
3037
networks:
3138
- gateway_hubmap
3239

src/app.py

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,9 @@ def get_entity_types():
544544
Result filtering is supported based on query string
545545
For example: /<entity_type>/entities?property=uuid
546546
547+
NOTE: this endpoint is NOT exposed via AWS API Gateway due to performance consideration
548+
It's only used by search-api with making internal calls during index/reindex time bypassing AWS API Gateway
549+
547550
Parameters
548551
----------
549552
entity_type : str
@@ -2209,7 +2212,29 @@ def get_associated_organs_from_dataset(id):
22092212
"""
22102213
Get the complete provenance info for all datasets
22112214
2212-
Authorization handled by gateway. HuBMAP-Read group is required for this call.
2215+
Authentication
2216+
-------
2217+
No token is required, however if a token is given it must be valid or an error will be raised. If no token with HuBMAP
2218+
Read Group access is given, only datasets designated as "published" will be returned
2219+
2220+
Query Parameters
2221+
-------
2222+
format : string
2223+
Designates the output format of the returned data. Accepted values are "json" and "tsv". If none provided, by
2224+
default will return a tsv.
2225+
group_uuid : string
2226+
Filters returned datasets by a given group uuid.
2227+
organ : string
2228+
Filters returned datasets related to a samples of the given organ. Accepts 2 character organ codes. These codes
2229+
must match the organ types yaml at https://raw.githubusercontent.com/hubmapconsortium/search-api/test-release/src/search-schema/data/definitions/enums/organ_types.yaml
2230+
or an error will be raised
2231+
has_rui_info : string
2232+
Accepts strings "true" or "false. Any other value will result in an error. If true, only datasets connected to
2233+
an sample that contain rui info will be returned. If false, only datasets that are NOT connected to samples
2234+
containing rui info will be returned. By default, no filtering is performed.
2235+
dataset_status : string
2236+
Filters results by dataset status. Accepted values are "Published", "QA", and "NEW". If a user only has access
2237+
to published datasets and enters QA or New, an error will be raised. By default, no filtering is performed
22132238
22142239
Returns
22152240
-------
@@ -2258,6 +2283,7 @@ def get_prov_info():
22582283
HEADER_PROCESSED_DATASET_PORTAL_URL = 'processed_dataset_portal_url'
22592284
ASSAY_TYPES_URL = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/master/src/search-schema/data/definitions/enums/assay_types.yaml'
22602285
ORGAN_TYPES_URL = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/master/src/search-schema/data/definitions/enums/organ_types.yaml'
2286+
HEADER_PREVIOUS_VERSION_HUBMAP_IDS = 'previous_version_hubmap_ids'
22612287

22622288
headers = [
22632289
HEADER_DATASET_UUID, HEADER_DATASET_HUBMAP_ID, HEADER_DATASET_STATUS, HEADER_DATASET_GROUP_NAME,
@@ -2270,8 +2296,16 @@ def get_prov_info():
22702296
HEADER_DONOR_GROUP_NAME, HEADER_RUI_LOCATION_HUBMAP_ID, HEADER_RUI_LOCATION_SUBMISSION_ID,
22712297
HEADER_RUI_LOCATION_UUID, HEADER_SAMPLE_METADATA_HUBMAP_ID, HEADER_SAMPLE_METADATA_SUBMISSION_ID,
22722298
HEADER_SAMPLE_METADATA_UUID, HEADER_PROCESSED_DATASET_UUID, HEADER_PROCESSED_DATASET_HUBMAP_ID,
2273-
HEADER_PROCESSED_DATASET_STATUS, HEADER_PROCESSED_DATASET_PORTAL_URL
2299+
HEADER_PROCESSED_DATASET_STATUS, HEADER_PROCESSED_DATASET_PORTAL_URL, HEADER_PREVIOUS_VERSION_HUBMAP_IDS
22742300
]
2301+
published_only = True
2302+
2303+
# Token is not required, but if an invalid token is provided,
2304+
# we need to tell the client with a 401 error
2305+
validate_token_if_auth_header_exists(request)
2306+
2307+
if user_in_hubmap_read_group(request):
2308+
published_only = False
22752309

22762310
# Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
22772311
# because that would require using a urllib request for each dataset
@@ -2328,13 +2362,19 @@ def get_prov_info():
23282362
if dataset_status is not None:
23292363
if dataset_status.lower() not in ['new', 'qa', 'published']:
23302364
bad_request_error("Invalid Dataset Status. Must be 'new', 'qa', or 'published' Case-Insensitive")
2331-
param_dict['dataset_status'] = dataset_status
2365+
if published_only and dataset_status.lower() != 'published':
2366+
bad_request_error(f"Invalid Dataset Status. No auth token given or token is not a member of HuBMAP-Read"
2367+
" Group. If no token with HuBMAP-Read Group access is given, only datasets marked "
2368+
"'Published' are available. Try again with a proper token, or change/remove "
2369+
"dataset_status")
2370+
if not published_only:
2371+
param_dict['dataset_status'] = dataset_status
23322372

23332373
# Instantiation of the list dataset_prov_list
23342374
dataset_prov_list = []
23352375

23362376
# Call to app_neo4j_queries to prepare and execute the database query
2337-
prov_info = app_neo4j_queries.get_prov_info(neo4j_driver_instance, param_dict)
2377+
prov_info = app_neo4j_queries.get_prov_info(neo4j_driver_instance, param_dict, published_only)
23382378

23392379
# Each dataset's provinence info is placed into a dictionary
23402380
for dataset in prov_info:
@@ -2497,6 +2537,15 @@ def get_prov_info():
24972537
internal_dict[HEADER_PROCESSED_DATASET_UUID] = ",".join(processed_dataset_status_list)
24982538
internal_dict[HEADER_PROCESSED_DATASET_UUID] = ",".join(processed_dataset_portal_url_list)
24992539

2540+
2541+
if dataset['previous_version_hubmap_ids'] is not None:
2542+
previous_version_hubmap_ids_list = []
2543+
for item in dataset['previous_version_hubmap_ids']:
2544+
previous_version_hubmap_ids_list.append(item)
2545+
internal_dict[HEADER_PREVIOUS_VERSION_HUBMAP_IDS] = previous_version_hubmap_ids_list
2546+
if return_json is False:
2547+
internal_dict[HEADER_PREVIOUS_VERSION_HUBMAP_IDS] = ",".join(previous_version_hubmap_ids_list)
2548+
25002549
# Each dataset's dictionary is added to the list to be returned
25012550
dataset_prov_list.append(internal_dict)
25022551

@@ -2515,6 +2564,33 @@ def get_prov_info():
25152564
output.headers['Content-Disposition'] = 'attachment; filename=prov-info.tsv'
25162565
return output
25172566

2567+
2568+
"""
2569+
Get the complete provenance info for a given dataset
2570+
2571+
Authentication
2572+
-------
2573+
No token is required, however if a token is given it must be valid or an error will be raised. If no token with HuBMAP
2574+
Read Group access is given, only datasets designated as "published" will be returned
2575+
2576+
Query Parameters
2577+
-------
2578+
format : string
2579+
Designates the output format of the returned data. Accepted values are "json" and "tsv". If none provided, by
2580+
default will return a tsv.
2581+
2582+
Path Parameters
2583+
-------
2584+
id : string
2585+
A HuBMAP_ID or UUID for a dataset. If an invalid dataset id is given, an error will be raised
2586+
2587+
Returns
2588+
-------
2589+
json
2590+
an array of each datatset's provenance info
2591+
tsv
2592+
a text file of tab separated values where each row is a dataset and the columns include all its prov info
2593+
"""
25182594
@app.route('/datasets/<id>/prov-info', methods=['GET'])
25192595
def get_prov_info_for_dataset(id):
25202596
# Token is not required, but if an invalid token provided,

src/app_neo4j_queries.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -932,50 +932,55 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid):
932932
The neo4j database connection pool
933933
param_dict : dictionary
934934
Dictionary containing any parameters desired to filter for certain results
935+
published_only : boolean
936+
If a user does not have a token with HuBMAP-Read Group access, published_only is set to true. This will cause only
937+
datasets with status = 'Published' to be included in the result.
935938
"""
936-
def get_prov_info(neo4j_driver, param_dict):
939+
def get_prov_info(neo4j_driver, param_dict, published_only):
937940
group_uuid_query_string = ''
938941
organ_query_string = 'OPTIONAL MATCH'
939942
organ_where_clause = ""
940943
rui_info_query_string = 'OPTIONAL MATCH (ds)<-[*]-(ruiSample:Sample)'
941944
rui_info_where_clause = "WHERE NOT ruiSample.rui_location IS NULL AND NOT trim(ruiSample.rui_location) = '' "
942945
dataset_status_query_string = ''
943-
first_param = True
946+
published_only_query_string = ''
944947
if 'group_uuid' in param_dict:
945-
first_param = False
946-
group_uuid_query_string = f" WHERE toUpper(ds.group_uuid) = '{param_dict['group_uuid'].upper()}'"
948+
group_uuid_query_string = f" AND toUpper(ds.group_uuid) = '{param_dict['group_uuid'].upper()}'"
947949
if 'organ' in param_dict:
948950
organ_query_string = 'MATCH'
949951
# organ_where_clause = f", organ: '{param_dict['organ'].upper()}'"
950-
organ_where_clause = f" WHERE toUPPER(organ.organ) = '{param_dict['organ'].upper()}'"
952+
organ_where_clause = f" WHERE toUpper(organ.organ) = '{param_dict['organ'].upper()}'"
951953
if 'has_rui_info' in param_dict:
952954
rui_info_query_string = 'MATCH (ds)<-[*]-(ruiSample:Sample)'
953955
if param_dict['has_rui_info'].lower() == 'false':
954956
rui_info_query_string = 'MATCH (ds:Dataset)'
955957
rui_info_where_clause = "WHERE NOT EXISTS {MATCH (ds)<-[*]-(ruiSample:Sample) WHERE NOT ruiSample.rui_location IS NULL AND NOT TRIM(ruiSample.rui_location) = ''} MATCH (ds)<-[*]-(ruiSample:Sample)"
956958
if 'dataset_status' in param_dict:
957-
if first_param:
958-
dataset_status_query_string = f" WHERE toUpper(ds.status) = '{param_dict['dataset_status'].upper()}'"
959-
else:
960-
dataset_status_query_string = f" AND toUpper(ds.status) = '{param_dict['dataset_status'].upper()}'"
959+
dataset_status_query_string = f" AND toUpper(ds.status) = '{param_dict['dataset_status'].upper()}'"
960+
if published_only:
961+
published_only_query_string = f" AND toUpper(ds.status) = 'PUBLISHED'"
961962
query = (f"MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(a)<-[:ACTIVITY_INPUT]-(firstSample:Sample)<-[*]-(donor:Donor)"
963+
f"WHERE not (ds)-[:REVISION_OF]->(:Dataset)"
962964
f"{group_uuid_query_string}"
963965
f"{dataset_status_query_string}"
966+
f"{published_only_query_string}"
964967
f" WITH ds, COLLECT(distinct donor) AS DONOR, COLLECT(distinct firstSample) AS FIRSTSAMPLE"
968+
f" OPTIONAL MATCH (ds)<-[:REVISION_OF]-(rev:Dataset)"
969+
f" WITH ds, DONOR, FIRSTSAMPLE, COLLECT(rev.hubmap_id) as REVISIONS"
965970
f" OPTIONAL MATCH (ds)<-[*]-(metaSample:Sample)"
966971
f" WHERE NOT metaSample.metadata IS NULL AND NOT TRIM(metaSample.metadata) = ''"
967-
f" WITH ds, FIRSTSAMPLE, DONOR, collect(distinct metaSample) as METASAMPLE"
972+
f" WITH ds, FIRSTSAMPLE, DONOR, REVISIONS, collect(distinct metaSample) as METASAMPLE"
968973
f" {rui_info_query_string}"
969974
f" {rui_info_where_clause}"
970-
f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, collect(distinct ruiSample) as RUISAMPLE"
975+
f" WITH ds, FIRSTSAMPLE, DONOR, REVISIONS, METASAMPLE, collect(distinct ruiSample) as RUISAMPLE"
971976
f" {organ_query_string} (donor)-[:ACTIVITY_INPUT]->(oa)-[:ACTIVITY_OUTPUT]->(organ:Sample {{specimen_type:'organ'}})-[*]->(ds)"
972977
f" {organ_where_clause}"
973-
f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, RUISAMPLE, COLLECT(DISTINCT organ) AS ORGAN "
978+
f" WITH ds, FIRSTSAMPLE, DONOR, REVISIONS, METASAMPLE, RUISAMPLE, COLLECT(DISTINCT organ) AS ORGAN "
974979
f" OPTIONAL MATCH (ds)-[:ACTIVITY_INPUT]->(a3)-[:ACTIVITY_OUTPUT]->(processed_dataset:Dataset)"
975-
f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET"
980+
f" WITH ds, FIRSTSAMPLE, DONOR, REVISIONS, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET"
976981
f" RETURN ds.uuid, FIRSTSAMPLE, DONOR, RUISAMPLE, ORGAN, ds.hubmap_id, ds.status, ds.group_name,"
977982
f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, "
978-
f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET")
983+
f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET, REVISIONS")
979984
logger.debug("======get_prov_info() query======")
980985
logger.debug(query)
981986
with neo4j_driver.session() as session:
@@ -1033,6 +1038,11 @@ def get_prov_info(neo4j_driver, param_dict):
10331038
node_dict = _node_to_dict(entry)
10341039
content_sixteen.append(node_dict)
10351040
record_dict['processed_dataset'] = content_sixteen
1041+
content_seventeen = []
1042+
for entry in record_contents[17]:
1043+
node_dict = _node_to_dict(entry)
1044+
content_seventeen.append(node_dict)
1045+
record_dict['previous_version_hubmap_ids'] = content_seventeen
10361046
list_of_dictionaries.append(record_dict)
10371047
return list_of_dictionaries
10381048

0 commit comments

Comments
 (0)