@@ -163,15 +163,15 @@ def http_internal_server_error(e):
163163if MEMCACHED_MODE :
164164 try :
165165 # Use client pool to maintain a pool of already-connected clients for improved performance
166- # The uwsgi config launches the app across multiple threads (2 ) inside each process (4 ), making essentially 8 processes
166+ # The uwsgi config launches the app across multiple threads (8 ) inside each process (32 ), making essentially 256 processes
167167 # Set the connect_timeout and timeout to avoid blocking the process when memcached is slow, defaults to "forever"
168168 # connect_timeout: seconds to wait for a connection to the memcached server
169169 # timeout: seconds to wait for send or reveive calls on the socket connected to memcached
170170 # Use the ignore_exc flag to treat memcache/network errors as cache misses on calls to the get* methods
171171 # Set the no_delay flag to sent TCP_NODELAY (disable Nagle's algorithm to improve TCP/IP networks and decrease the number of packets)
172172 # If you intend to use anything but str as a value, it is a good idea to use a serializer
173173 memcached_client_instance = PooledClient (app .config ['MEMCACHED_SERVER' ],
174- max_pool_size = 8 ,
174+ max_pool_size = 256 ,
175175 connect_timeout = 1 ,
176176 timeout = 30 ,
177177 ignore_exc = True ,
@@ -369,7 +369,7 @@ def flush_all_cache():
369369Parameters
370370----------
371371id : str
372- The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity (Donor/Dataset/Sample/Upload)
372+ The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity (Donor/Dataset/Sample/Upload/Collection/Publication )
373373
374374Returns
375375-------
@@ -381,12 +381,8 @@ def flush_cache(id):
381381 msg = ''
382382
383383 if MEMCACHED_MODE :
384- msg = f'No cache found from Memcached for entity { id } '
385- cache_key = f'{ MEMCACHED_PREFIX } { id } '
386-
387- if memcached_client_instance .get (cache_key ) is not None :
388- memcached_client_instance .delete (cache_key )
389- msg = f'The cached data has been deleted from Memcached for entity { id } '
384+ delete_cache (id )
385+ msg = f'The cached data has been deleted from Memcached for entity { id } '
390386 else :
391387 msg = 'No caching is being used because Memcached mode is not enabled at all'
392388
@@ -633,14 +629,16 @@ def get_entity_by_id(id):
633629 # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
634630 entity_dict = query_target_entity (id , token )
635631 normalized_entity_type = entity_dict ['entity_type' ]
636- # To verify if a Collection is public, it is necessary to have its Datasets, which
637- # are populated as triggered data. So pull back the complete entity for
638- # _get_entity_visibility() to check.
632+
633+ # Get the generated complete entity result from cache if exists
634+ # Otherwise re-generate on the fly
639635 complete_dict = schema_manager .get_complete_entity_result (token , entity_dict )
640636
641637 # Determine if the entity is publicly visible base on its data, only.
642- entity_scope = _get_entity_visibility ( normalized_entity_type = normalized_entity_type
643- ,entity_dict = complete_dict )
638+ # To verify if a Collection is public, it is necessary to have its Datasets, which
639+ # are populated as triggered data. So pull back the complete entity for
640+ # _get_entity_visibility() to check.
641+ entity_scope = _get_entity_visibility (normalized_entity_type = normalized_entity_type , entity_dict = complete_dict )
644642
645643 # Initialize the user as authorized if the data is public. Otherwise, the
646644 # user is not authorized and credentials must be checked.
@@ -1441,34 +1439,18 @@ def update_entity(id):
14411439 if (return_all_properties is not None ) and (return_all_properties .lower () == 'true' ):
14421440 properties_to_skip = []
14431441
1444- # Generate the filtered or complete entity dict to send back
1445- complete_dict = schema_manager .get_complete_entity_result (user_token , merged_updated_dict , properties_to_skip )
1446-
1447- # Will also filter the result based on schema
1448- normalized_complete_dict = schema_manager .normalize_entity_result_for_response (complete_dict )
1449-
14501442 # Remove the cached entities if Memcached is being used
1443+ # DO NOT update the cache with new entity dict because the returned dict from PUT (some properties maybe skipped)
1444+ # can be different from the one generated by GET call
14511445 if MEMCACHED_MODE :
1452- # Delete the old cache data of this entity
1453- # DO NOT update the cache with new entity dict because the returned dict from PUT (some properties maybe skipped)
1454- # can be different from the one generated by GET call
1455- cache_key = f'{ MEMCACHED_PREFIX } { id } '
1456- memcached_client_instance .delete (cache_key )
1457-
1458- logger .info (f"Deleted cache of key: { cache_key } after entity update via PUT call" )
1459-
1460- # Also delete the cache of all the direct descendants (children)
1461- # Otherwise they'll have old cached data for the `direct_ancestor` (Sample) `direct_ancestors` (Dataset) fields
1462- # Note: must use uuid in the Neo4j query
1463- children_uuid_list = schema_neo4j_queries .get_children (neo4j_driver_instance , entity_dict ['uuid' ] , 'uuid' )
1464-
1465- logger .info (f"Also delete the cache of all the direct descendants (children) of { id } if exist" )
1446+ delete_cache (id )
14661447
1467- for child_uuid in children_uuid_list :
1468- cache_key = f' { MEMCACHED_PREFIX } { child_uuid } '
1469- memcached_client_instance . delete ( cache_key )
1448+ # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou
1449+ # # Generate the complete entity dict
1450+ # complete_dict = schema_manager.get_complete_entity_result(user_token, merged_updated_dict, properties_to_skip )
14701451
1471- logger .info (f"Deleted direct descendant cache of key: { cache_key } " )
1452+ # # Will also filter the result based on schema
1453+ # normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict)
14721454
14731455 # Also reindex the updated entity node in elasticsearch via search-api
14741456 if entity_dict ['entity_type' ] in ['Collection' ]:
@@ -1480,9 +1462,13 @@ def update_entity(id):
14801462 logger .log (logging .INFO
14811463 ,f"Re-indexing for creation of { entity_dict ['entity_type' ]} "
14821464 f" with UUID { entity_dict ['uuid' ]} " )
1465+
14831466 reindex_entity (entity_dict ['uuid' ], user_token )
14841467
1485- return jsonify (normalized_complete_dict )
1468+ # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou
1469+ # return jsonify(normalized_complete_dict)
1470+
1471+ return jsonify ({'message' : f"{ normalized_entity_type } of { id } has been updated" })
14861472
14871473
14881474"""
@@ -3372,11 +3358,9 @@ def sankey_data():
33723358 if memcached_client_instance .get (cache_key ) is not None :
33733359 dataset_sankey_list = memcached_client_instance .get (cache_key )
33743360
3375- current_datetime = datetime .now ()
3376-
33773361 if not dataset_sankey_list :
33783362 if MEMCACHED_MODE :
3379- logger .info (f'Sankey data cache not found or expired. Making a new data fetch at time { current_datetime } ' )
3363+ logger .info (f'Sankey data cache not found or expired. Making a new data fetch at time { datetime . now () } ' )
33803364
33813365 # Call to app_neo4j_queries to prepare and execute the database query
33823366 sankey_info = app_neo4j_queries .get_sankey_info (neo4j_driver_instance )
@@ -3415,7 +3399,7 @@ def sankey_data():
34153399 # Cache the result
34163400 memcached_client_instance .set (cache_key , dataset_sankey_list , expire = SchemaConstants .MEMCACHED_TTL )
34173401 else :
3418- logger .info (f'Using the cached sankey data at time { current_datetime } ' )
3402+ logger .info (f'Using the cached sankey data at time { datetime . now () } ' )
34193403
34203404 return jsonify (dataset_sankey_list )
34213405
@@ -4409,7 +4393,7 @@ def after_update(normalized_entity_type, user_token, entity_dict):
44094393
44104394
44114395"""
4412- Get target entity dict for the given id
4396+ Get target entity dict from Neo4j query for the given id
44134397
44144398Parameters
44154399----------
@@ -4425,20 +4409,20 @@ def after_update(normalized_entity_type, user_token, entity_dict):
44254409"""
44264410def query_target_entity (id , user_token ):
44274411 entity_dict = None
4428-
4429- cache_key = f'{ MEMCACHED_PREFIX } { id } '
4412+ cache_result = None
44304413
4431- if MEMCACHED_MODE :
4414+ if MEMCACHED_MODE and MEMCACHED_PREFIX and memcached_client_instance :
4415+ # If this id is hubmap_id rather than uuid, there won't be a cache
4416+ # Only uuid is used in the cache key
4417+ cache_key = f'{ MEMCACHED_PREFIX } _neo4j_{ id } '
44324418 # Memcached returns None if no cached data or expired
4433- entity_dict = memcached_client_instance .get (cache_key )
4419+ cache_result = memcached_client_instance .get (cache_key )
44344420
4435- current_datetime = datetime .now ()
4436-
4437- # Use the cached data if found and still valid
4438- # Otherwise, make a fresh query and add to cache
4439- if entity_dict is None :
4440- if MEMCACHED_MODE :
4441- logger .info (f'Cache not found or expired. Making a new query to retrieve { id } at time { current_datetime } ' )
4421+ # Use the cached data if the id is an uuid and we found a valid cache
4422+ # Otherwise, either the id is a hubmap_id or we don't have a cache for it even if it's uuid
4423+ if cache_result is None :
4424+ if MEMCACHED_MODE and MEMCACHED_PREFIX and memcached_client_instance :
4425+ logger .info (f'Neo4j entity cache of { id } not found or expired at time { datetime .now ()} ' )
44424426
44434427 try :
44444428 """
@@ -4462,15 +4446,31 @@ def query_target_entity(id, user_token):
44624446
44634447 # Get the target uuid if all good
44644448 uuid = hubmap_ids ['hm_uuid' ]
4465- entity_dict = schema_neo4j_queries .get_entity (neo4j_driver_instance , uuid )
44664449
4467- # The uuid exists via uuid-api doesn't mean it's also in Neo4j
4468- if not entity_dict :
4469- not_found_error (f"Entity of id: { id } not found in Neo4j" )
4470-
4471- if MEMCACHED_MODE :
4472- # Cache the result
4473- memcached_client_instance .set (cache_key , entity_dict , expire = SchemaConstants .MEMCACHED_TTL )
4450+ # Look up the cache again by the uuid since we only use uuid in the cache key
4451+ if MEMCACHED_MODE and MEMCACHED_PREFIX and memcached_client_instance :
4452+ cache_key = f'{ MEMCACHED_PREFIX } _neo4j_{ uuid } '
4453+ cache_result = memcached_client_instance .get (cache_key )
4454+
4455+ if cache_result is None :
4456+ logger .info (f'Neo4j entity cache of { uuid } not found or expired at time { datetime .now ()} ' )
4457+
4458+ # Make a new query against neo4j
4459+ entity_dict = schema_neo4j_queries .get_entity (neo4j_driver_instance , uuid )
4460+
4461+ # The uuid exists via uuid-api doesn't mean it also exists in Neo4j
4462+ if not entity_dict :
4463+ not_found_error (f"Entity of id: { uuid } not found in Neo4j" )
4464+
4465+ logger .info (f'Creating neo4j entity result cache of { uuid } at time { datetime .now ()} ' )
4466+
4467+ cache_key = f'{ MEMCACHED_PREFIX } _neo4j_{ uuid } '
4468+ memcached_client_instance .set (cache_key , entity_dict , expire = SchemaConstants .MEMCACHED_TTL )
4469+ else :
4470+ logger .info (f'Using neo4j entity cache of { uuid } at time { datetime .now ()} ' )
4471+ logger .debug (entity_dict )
4472+
4473+ entity_dict = cache_result
44744474 except requests .exceptions .RequestException as e :
44754475 # Due to the use of response.raise_for_status() in schema_manager.get_hubmap_ids()
44764476 # we can access the status codes from the exception
@@ -4483,10 +4483,12 @@ def query_target_entity(id, user_token):
44834483 else :
44844484 internal_server_error (e .response .text )
44854485 else :
4486- logger .info (f'Using the cache data of entity { id } at time { current_datetime } ' )
4486+ logger .info (f'Using neo4j entity cache of { id } at time { datetime . now () } ' )
44874487 logger .debug (entity_dict )
44884488
4489- # Final return
4489+ entity_dict = cache_result
4490+
4491+ # One final return
44904492 return entity_dict
44914493
44924494
@@ -4501,6 +4503,42 @@ def require_json(request):
45014503 bad_request_error ("A json body and appropriate Content-Type header are required" )
45024504
45034505
4506+
4507+ """
4508+ Delete the cached data of all possible keys used for the given entity id
4509+
4510+ Parameters
4511+ ----------
4512+ id : str
4513+ The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity (Donor/Dataset/Sample/Upload/Collection/Publication)
4514+ """
4515+ def delete_cache (id ):
4516+ if MEMCACHED_MODE :
4517+ # First delete the target entity cache
4518+ entity_dict = query_target_entity (id , get_internal_token ())
4519+ entity_uuid = entity_dict ['uuid' ]
4520+
4521+ # If the target entity is Sample (`direct_ancestor`) or Dataset/Publication (`direct_ancestors`)
4522+ # Delete the cache of all the direct descendants (children)
4523+ child_uuids = schema_neo4j_queries .get_children (neo4j_driver_instance , entity_uuid , 'uuid' )
4524+
4525+ # If the target entity is Collection, delete the cache for each of its associated
4526+ # Datasets and Publications (via [:IN_COLLECTION] relationship) as well as just Publications (via [:USES_DATA] relationship)
4527+ collection_dataset_uuids = schema_neo4j_queries .get_collection_associated_datasets (neo4j_driver_instance , entity_uuid , 'uuid' )
4528+
4529+ # If the target entity is Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD] relationship)
4530+ upload_dataset_uuids = schema_neo4j_queries .get_upload_datasets (neo4j_driver_instance , entity_uuid , 'uuid' )
4531+
4532+ # If the target entity is Datasets/Publication, delete the associated Collections cache, Upload cache
4533+ collection_uuids = schema_neo4j_queries .get_dataset_collections (neo4j_driver_instance , entity_uuid , 'uuid' )
4534+ collection_dict = schema_neo4j_queries .get_publication_associated_collection (neo4j_driver_instance , entity_uuid )
4535+ upload_dict = schema_neo4j_queries .get_dataset_upload (neo4j_driver_instance , entity_uuid )
4536+
4537+ # We only use uuid in the cache key acorss all the cache types
4538+ uuids_list = [entity_uuid ] + child_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + [collection_dict ['uuid' ]] + [upload_dict ['uuid' ]]
4539+ schema_manager .delete_memcached_cache (uuids_list )
4540+
4541+
45044542"""
45054543Make a call to each search-api instance to reindex this entity node in elasticsearch
45064544
0 commit comments