Skip to content

Commit 4ea9ffa

Browse files
stephprincepre-commit-ci[bot]rly
authored
Update warning about ignoring cached namespaces (#1258)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly <[email protected]>
1 parent 6f138ec commit 4ea9ffa

File tree

9 files changed

+319
-79
lines changed

9 files changed

+319
-79
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
### Enhancements
66
- Optimized `get` within `VectorIndex` to be more efficient when retrieving a dataset of references. @mavaylon1 [#1248](https://github.com/hdmf-dev/hdmf/pull/1248)
7+
- Enhanced warnings about ignoring cached namespaces. @stephprince [#1258](https://github.com/hdmf-dev/hdmf/pull/1258)
78
- Added support in append for same dimensional args for numpy arrays. @mavaylon1 [#1261](https://github.com/hdmf-dev/hdmf/pull/1261)
89
- Improved error messages when optional requirements are not installed. @rly [#1263](https://github.com/hdmf-dev/hdmf/pull/1263)
910

src/hdmf/backends/hdf5/h5tools.py

+2-44
Original file line numberDiff line numberDiff line change
@@ -202,24 +202,13 @@ def __load_namespaces(cls, namespace_catalog, namespaces, file_obj):
202202
namespaces = list(spec_group.keys())
203203

204204
readers = dict()
205-
deps = dict()
206205
for ns in namespaces:
207206
latest_version = namespace_versions[ns]
208207
ns_group = spec_group[ns][latest_version]
209208
reader = H5SpecReader(ns_group)
210209
readers[ns] = reader
211-
# for each namespace in the 'namespace' dataset, track all included namespaces (dependencies)
212-
for spec_ns in reader.read_namespace(cls.__ns_spec_path):
213-
deps[ns] = list()
214-
for s in spec_ns['schema']:
215-
dep = s.get('namespace')
216-
if dep is not None:
217-
deps[ns].append(dep)
218-
219-
order = cls._order_deps(deps)
220-
for ns in order:
221-
reader = readers[ns]
222-
d.update(namespace_catalog.load_namespaces(cls.__ns_spec_path, reader=reader))
210+
211+
d.update(namespace_catalog.load_namespaces(cls.__ns_spec_path, reader=readers))
223212

224213
return d
225214

@@ -285,37 +274,6 @@ def __get_namespaces(cls, file_obj):
285274

286275
return used_version_names
287276

288-
@classmethod
289-
def _order_deps(cls, deps):
290-
"""
291-
Order namespaces according to dependency for loading into a NamespaceCatalog
292-
293-
Args:
294-
deps (dict): a dictionary that maps a namespace name to a list of name of
295-
the namespaces on which the namespace is directly dependent
296-
Example: {'a': ['b', 'c'], 'b': ['d'], 'c': ['d'], 'd': []}
297-
Expected output: ['d', 'b', 'c', 'a']
298-
"""
299-
order = list()
300-
keys = list(deps.keys())
301-
deps = dict(deps)
302-
for k in keys:
303-
if k in deps:
304-
cls.__order_deps_aux(order, deps, k)
305-
return order
306-
307-
@classmethod
308-
def __order_deps_aux(cls, order, deps, key):
309-
"""
310-
A recursive helper function for _order_deps
311-
"""
312-
if key not in deps:
313-
return
314-
subdeps = deps.pop(key)
315-
for subk in subdeps:
316-
cls.__order_deps_aux(order, deps, subk)
317-
order.append(key)
318-
319277
@classmethod
320278
@docval({'name': 'source_filename', 'type': str, 'doc': 'the path to the HDF5 file to copy'},
321279
{'name': 'dest_filename', 'type': str, 'doc': 'the name of the destination file'},

src/hdmf/common/__init__.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,13 @@ def get_hdf5io(**kwargs):
233233
# load the hdmf-common namespace
234234
__resources = __get_resources()
235235
if os.path.exists(__resources['namespace_path']):
236-
__TYPE_MAP = TypeMap(NamespaceCatalog())
236+
# NOTE: even though HDMF does not guarantee backwards compatibility with schema
237+
# using an older version of the experimental namespace, in practice, this has not been
238+
# an issue, and it is costly to determine whether there is an incompatibility before issuing
239+
# a warning. so, we ignore the experimental namespace warning by default by specifying it
240+
# as a "core_namespace" in the NamespaceCatalog.
241+
# see https://github.com/hdmf-dev/hdmf/pull/1258
242+
__TYPE_MAP = TypeMap(NamespaceCatalog(core_namespaces=[CORE_NAMESPACE, EXP_NAMESPACE],))
237243

238244
load_namespaces(__resources['namespace_path'])
239245

src/hdmf/spec/namespace.py

+124-24
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from .catalog import SpecCatalog
1111
from .spec import DatasetSpec, GroupSpec
12-
from ..utils import docval, getargs, popargs, get_docval
12+
from ..utils import docval, getargs, popargs, get_docval, is_newer_version
1313

1414
_namespace_args = [
1515
{'name': 'doc', 'type': str, 'doc': 'a description about what this namespace represents'},
@@ -229,13 +229,19 @@ class NamespaceCatalog:
229229
{'name': 'dataset_spec_cls', 'type': type,
230230
'doc': 'the class to use for dataset specifications', 'default': DatasetSpec},
231231
{'name': 'spec_namespace_cls', 'type': type,
232-
'doc': 'the class to use for specification namespaces', 'default': SpecNamespace})
232+
'doc': 'the class to use for specification namespaces', 'default': SpecNamespace},
233+
{'name': 'core_namespaces', 'type': list,
234+
'doc': 'the names of the core namespaces', 'default': list()})
233235
def __init__(self, **kwargs):
234236
"""Create a catalog for storing multiple Namespaces"""
235237
self.__namespaces = OrderedDict()
236238
self.__dataset_spec_cls = getargs('dataset_spec_cls', kwargs)
237239
self.__group_spec_cls = getargs('group_spec_cls', kwargs)
238240
self.__spec_namespace_cls = getargs('spec_namespace_cls', kwargs)
241+
242+
core_namespaces = getargs('core_namespaces', kwargs)
243+
self.__core_namespaces = core_namespaces
244+
239245
# keep track of all spec objects ever loaded, so we don't have
240246
# multiple object instances of a spec
241247
self.__loaded_specs = dict()
@@ -248,6 +254,7 @@ def __copy__(self):
248254
ret = NamespaceCatalog(self.__group_spec_cls,
249255
self.__dataset_spec_cls,
250256
self.__spec_namespace_cls)
257+
ret.__core_namespaces = copy(self.__core_namespaces)
251258
ret.__namespaces = copy(self.__namespaces)
252259
ret.__loaded_specs = copy(self.__loaded_specs)
253260
ret.__included_specs = copy(self.__included_specs)
@@ -258,6 +265,8 @@ def merge(self, ns_catalog):
258265
for name, namespace in ns_catalog.__namespaces.items():
259266
self.add_namespace(name, namespace)
260267

268+
self.__core_namespaces.extend(ns_catalog.__core_namespaces)
269+
261270
@property
262271
@docval(returns='a tuple of the available namespaces', rtype=tuple)
263272
def namespaces(self):
@@ -279,6 +288,11 @@ def spec_namespace_cls(self):
279288
"""The SpecNamespace class used in this NamespaceCatalog"""
280289
return self.__spec_namespace_cls
281290

291+
@property
292+
def core_namespaces(self):
293+
"""The core namespaces used in this NamespaceCatalog"""
294+
return self.__core_namespaces
295+
282296
@docval({'name': 'name', 'type': str, 'doc': 'the name of this namespace'},
283297
{'name': 'namespace', 'type': SpecNamespace, 'doc': 'the SpecNamespace object'})
284298
def add_namespace(self, **kwargs):
@@ -508,36 +522,122 @@ def __register_dependent_types_helper(spec, inc_ns, catalog, registered_types):
508522
'type': bool,
509523
'doc': 'whether or not to include objects from included/parent spec objects', 'default': True},
510524
{'name': 'reader',
511-
'type': SpecReader,
512-
'doc': 'the class to user for reading specifications', 'default': None},
525+
'type': (SpecReader, dict),
526+
'doc': 'the SpecReader or dict of SpecReader classes to use for reading specifications',
527+
'default': None},
513528
returns='a dictionary describing the dependencies of loaded namespaces', rtype=dict)
514529
def load_namespaces(self, **kwargs):
515530
"""Load the namespaces in the given file"""
516531
namespace_path, resolve, reader = getargs('namespace_path', 'resolve', 'reader', kwargs)
532+
533+
# determine which readers and order of readers to use for loading specs
517534
if reader is None:
518535
# load namespace definition from file
519536
if not os.path.exists(namespace_path):
520537
msg = "namespace file '%s' not found" % namespace_path
521538
raise IOError(msg)
522-
reader = YAMLSpecReader(indir=os.path.dirname(namespace_path))
523-
ns_path_key = os.path.join(reader.source, os.path.basename(namespace_path))
524-
ret = self.__included_specs.get(ns_path_key)
525-
if ret is None:
526-
ret = dict()
539+
ordered_readers = [YAMLSpecReader(indir=os.path.dirname(namespace_path))]
540+
elif isinstance(reader, SpecReader):
541+
ordered_readers = [reader] # only one reader
527542
else:
528-
return ret
529-
namespaces = reader.read_namespace(namespace_path)
530-
to_load = list()
531-
for ns in namespaces:
532-
if ns['name'] in self.__namespaces:
533-
if ns['version'] != self.__namespaces.get(ns['name'])['version']:
534-
# warn if the cached namespace differs from the already loaded namespace
535-
warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
536-
% (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']))
537-
else:
538-
to_load.append(ns)
539-
# now load specs into namespace
540-
for ns in to_load:
541-
ret[ns['name']] = self.__load_namespace(ns, reader, resolve=resolve)
542-
self.__included_specs[ns_path_key] = ret
543+
deps = dict() # for each namespace, track all included namespaces (dependencies)
544+
for ns, r in reader.items():
545+
for spec_ns in r.read_namespace(namespace_path):
546+
deps[ns] = list()
547+
for s in spec_ns['schema']:
548+
dep = s.get('namespace')
549+
if dep is not None:
550+
deps[ns].append(dep)
551+
order = self._order_deps(deps)
552+
ordered_readers = [reader[ns] for ns in order]
553+
554+
# determine which namespaces to load and which to ignore
555+
ignored_namespaces = list()
556+
ret = dict()
557+
for r in ordered_readers:
558+
# continue to next reader if spec is already included
559+
ns_path_key = os.path.join(r.source, os.path.basename(namespace_path))
560+
included_specs = self.__included_specs.get(ns_path_key)
561+
if included_specs is not None:
562+
ret.update(included_specs)
563+
continue # continue to next reader if spec is already included
564+
565+
to_load = list()
566+
namespaces = r.read_namespace(namespace_path)
567+
for ns in namespaces:
568+
if ns['name'] in self.__namespaces:
569+
if ns['version'] != self.__namespaces.get(ns['name'])['version']:
570+
cached_version = ns['version']
571+
loaded_version = self.__namespaces.get(ns['name'])['version']
572+
ignored_namespaces.append((ns['name'], cached_version, loaded_version))
573+
else:
574+
to_load.append(ns)
575+
576+
# now load specs into namespace
577+
for ns in to_load:
578+
ret[ns['name']] = self.__load_namespace(ns, r, resolve=resolve)
579+
self.__included_specs[ns_path_key] = ret
580+
581+
# warn if there are any ignored namespaces
582+
if ignored_namespaces:
583+
self.warn_for_ignored_namespaces(ignored_namespaces)
584+
543585
return ret
586+
587+
def warn_for_ignored_namespaces(self, ignored_namespaces):
588+
"""Warning if namespaces were ignored where a different version was already loaded
589+
590+
Args:
591+
ignored_namespaces (list): name, cached version, and loaded version of the namespace
592+
"""
593+
core_warnings = list()
594+
other_warnings = list()
595+
warning_msg = list()
596+
for name, cached_version, loaded_version in ignored_namespaces:
597+
version_info = f"{name} - cached version: {cached_version}, loaded version: {loaded_version}"
598+
if name in self.__core_namespaces and is_newer_version(cached_version, loaded_version):
599+
core_warnings.append(version_info) # for core namespaces, warn if the cached version is newer
600+
elif name not in self.__core_namespaces:
601+
other_warnings.append(version_info) # for all other namespaces, issue a warning for compatibility
602+
603+
if core_warnings:
604+
joined_warnings = "\n".join(core_warnings)
605+
warning_msg.append(f'{joined_warnings}\nPlease update to the latest package versions.')
606+
if other_warnings:
607+
joined_warnings = "\n".join(other_warnings)
608+
warning_msg.append(f'{joined_warnings}\nThe loaded extension(s) may not be compatible with the cached '
609+
'extension(s) in the file. Please check the extension documentation and ignore this '
610+
'warning if these versions are compatible.')
611+
if warning_msg:
612+
joined_warnings = "\n".join(warning_msg)
613+
warn(f'Ignoring the following cached namespace(s) because another version is already loaded:\n'
614+
f'{joined_warnings}', category=UserWarning, stacklevel=2)
615+
616+
def _order_deps(self, deps):
617+
"""
618+
Order namespaces according to dependency for loading into a NamespaceCatalog
619+
620+
Args:
621+
deps (dict): a dictionary that maps a namespace name to a list of name of
622+
the namespaces on which the namespace is directly dependent
623+
Example: {'a': ['b', 'c'], 'b': ['d'], 'c': ['d'], 'd': []}
624+
Expected output: ['d', 'b', 'c', 'a']
625+
"""
626+
order = list()
627+
keys = list(deps.keys())
628+
deps = dict(deps)
629+
for k in keys:
630+
if k in deps:
631+
self.__order_deps_aux(order, deps, k)
632+
return order
633+
634+
def __order_deps_aux(self, order, deps, key):
635+
"""
636+
A recursive helper function for _order_deps
637+
"""
638+
if key not in deps:
639+
return
640+
subdeps = deps.pop(key)
641+
for subk in subdeps:
642+
self.__order_deps_aux(order, deps, subk)
643+
order.append(key)

src/hdmf/utils.py

+17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import collections
22
import copy as _copy
3+
import re
34
import types
45
import warnings
56
from abc import ABCMeta
@@ -876,6 +877,22 @@ def is_ragged(data):
876877

877878
return False
878879

880+
def is_newer_version(version_a: str, version_b: str) -> bool:
881+
# this method could be replaced by packaging.version if packaging is added as a dependency
882+
version_a_match = re.match(r"(\d+\.\d+\.\d+)", version_a)[0] # trim off any non-numeric symbols at end
883+
version_a_list = [int(i) for i in version_a_match.split(".")]
884+
885+
version_b_match = re.match(r"(\d+\.\d+\.\d+)", version_b)[0] # trim off any non-numeric symbols at end
886+
version_b_list = [int(i) for i in version_b_match.split(".")]
887+
888+
for a, b in zip(version_a_list, version_b_list):
889+
if a > b:
890+
return True
891+
elif a < b:
892+
return False
893+
894+
return False
895+
879896
def get_basic_array_info(array):
880897
def convert_bytes_to_str(bytes_size):
881898
suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']

0 commit comments

Comments
 (0)