Skip to content

Update warning about ignoring cached namespaces #1258

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
4c70b23
add util function for version comparison
stephprince Apr 7, 2025
cfed7d3
update warning and refactor namespace loading
stephprince Apr 7, 2025
aba88a6
add tests for new namespace warnings
stephprince Apr 7, 2025
91b0c42
remove old warning filtering
stephprince Apr 7, 2025
8762e01
remove order_deps functions from h5tools
stephprince Apr 7, 2025
583848d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 7, 2025
c6a2c3a
update CHANGELOG
stephprince Apr 7, 2025
d8d7e29
remove backslashes from f-strings for older python versions
stephprince Apr 7, 2025
b3a2f3a
fix spelling in comment
stephprince Apr 7, 2025
2a6c21d
Merge branch 'dev' into update-namespace-warning
stephprince Apr 8, 2025
bd97a44
add hdmf-experimental to core_namespace list
stephprince Apr 8, 2025
ffe9384
Merge branch 'dev' into update-namespace-warning
rly Apr 13, 2025
0ea3597
Update src/hdmf/spec/namespace.py
rly Apr 13, 2025
03dbb66
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 13, 2025
373ce06
update version comparison function
stephprince Apr 17, 2025
c2a0a9e
move core_namespace list definition to hdmf common init
stephprince Apr 17, 2025
d43e076
add warning comparison to tests
stephprince Apr 17, 2025
2e87ec7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 17, 2025
62528a3
update warning message for extension namespace compatibility
stephprince May 6, 2025
0e84263
Merge branch 'dev' into update-namespace-warning
stephprince May 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Enhancements
- Optimized `get` within `VectorIndex` to be more efficient when retrieving a dataset of references. @mavaylon1 [#1248](https://github.com/hdmf-dev/hdmf/pull/1248)
- Enhanced warnings about ignoring cached namespaces. @stephprince [#1258](https://github.com/hdmf-dev/hdmf/pull/1258)
- Added support in append for same dimensional args for numpy arrays. @mavaylon1 [#1261](https://github.com/hdmf-dev/hdmf/pull/1261)
- Improved error messages when optional requirements are not installed. @rly [#1263](https://github.com/hdmf-dev/hdmf/pull/1263)

Expand Down
46 changes: 2 additions & 44 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,24 +202,13 @@ def __load_namespaces(cls, namespace_catalog, namespaces, file_obj):
namespaces = list(spec_group.keys())

readers = dict()
deps = dict()
for ns in namespaces:
latest_version = namespace_versions[ns]
ns_group = spec_group[ns][latest_version]
reader = H5SpecReader(ns_group)
readers[ns] = reader
# for each namespace in the 'namespace' dataset, track all included namespaces (dependencies)
for spec_ns in reader.read_namespace(cls.__ns_spec_path):
deps[ns] = list()
for s in spec_ns['schema']:
dep = s.get('namespace')
if dep is not None:
deps[ns].append(dep)

order = cls._order_deps(deps)
for ns in order:
reader = readers[ns]
d.update(namespace_catalog.load_namespaces(cls.__ns_spec_path, reader=reader))

d.update(namespace_catalog.load_namespaces(cls.__ns_spec_path, reader=readers))

return d

Expand Down Expand Up @@ -285,37 +274,6 @@ def __get_namespaces(cls, file_obj):

return used_version_names

@classmethod
def _order_deps(cls, deps):
"""
Order namespaces according to dependency for loading into a NamespaceCatalog

Args:
deps (dict): a dictionary that maps a namespace name to a list of name of
the namespaces on which the namespace is directly dependent
Example: {'a': ['b', 'c'], 'b': ['d'], 'c': ['d'], 'd': []}
Expected output: ['d', 'b', 'c', 'a']
"""
order = list()
keys = list(deps.keys())
deps = dict(deps)
for k in keys:
if k in deps:
cls.__order_deps_aux(order, deps, k)
return order

@classmethod
def __order_deps_aux(cls, order, deps, key):
"""
A recursive helper function for _order_deps
"""
if key not in deps:
return
subdeps = deps.pop(key)
for subk in subdeps:
cls.__order_deps_aux(order, deps, subk)
order.append(key)

@classmethod
@docval({'name': 'source_filename', 'type': str, 'doc': 'the path to the HDF5 file to copy'},
{'name': 'dest_filename', 'type': str, 'doc': 'the name of the destination file'},
Expand Down
8 changes: 7 additions & 1 deletion src/hdmf/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,13 @@ def get_hdf5io(**kwargs):
# load the hdmf-common namespace
__resources = __get_resources()
if os.path.exists(__resources['namespace_path']):
__TYPE_MAP = TypeMap(NamespaceCatalog())
# NOTE: even though HDMF does not guarantee backwards compatibility with schema
# using an older version of the experimental namespace, in practice, this has not been
# an issue, and it is costly to determine whether there is an incompatibility before issuing
# a warning. so, we ignore the experimental namespace warning by default by specifying it
# as a "core_namespace" in the NamespaceCatalog.
# see https://github.com/hdmf-dev/hdmf/pull/1258
__TYPE_MAP = TypeMap(NamespaceCatalog(core_namespaces=[CORE_NAMESPACE, EXP_NAMESPACE],))

load_namespaces(__resources['namespace_path'])

Expand Down
148 changes: 124 additions & 24 deletions src/hdmf/spec/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from .catalog import SpecCatalog
from .spec import DatasetSpec, GroupSpec
from ..utils import docval, getargs, popargs, get_docval
from ..utils import docval, getargs, popargs, get_docval, is_newer_version

_namespace_args = [
{'name': 'doc', 'type': str, 'doc': 'a description about what this namespace represents'},
Expand Down Expand Up @@ -229,13 +229,19 @@
{'name': 'dataset_spec_cls', 'type': type,
'doc': 'the class to use for dataset specifications', 'default': DatasetSpec},
{'name': 'spec_namespace_cls', 'type': type,
'doc': 'the class to use for specification namespaces', 'default': SpecNamespace})
'doc': 'the class to use for specification namespaces', 'default': SpecNamespace},
{'name': 'core_namespaces', 'type': list,
'doc': 'the names of the core namespaces', 'default': list()})
def __init__(self, **kwargs):
"""Create a catalog for storing multiple Namespaces"""
self.__namespaces = OrderedDict()
self.__dataset_spec_cls = getargs('dataset_spec_cls', kwargs)
self.__group_spec_cls = getargs('group_spec_cls', kwargs)
self.__spec_namespace_cls = getargs('spec_namespace_cls', kwargs)

core_namespaces = getargs('core_namespaces', kwargs)
self.__core_namespaces = core_namespaces

# keep track of all spec objects ever loaded, so we don't have
# multiple object instances of a spec
self.__loaded_specs = dict()
Expand All @@ -248,6 +254,7 @@
ret = NamespaceCatalog(self.__group_spec_cls,
self.__dataset_spec_cls,
self.__spec_namespace_cls)
ret.__core_namespaces = copy(self.__core_namespaces)
ret.__namespaces = copy(self.__namespaces)
ret.__loaded_specs = copy(self.__loaded_specs)
ret.__included_specs = copy(self.__included_specs)
Expand All @@ -258,6 +265,8 @@
for name, namespace in ns_catalog.__namespaces.items():
self.add_namespace(name, namespace)

self.__core_namespaces.extend(ns_catalog.__core_namespaces)

@property
@docval(returns='a tuple of the available namespaces', rtype=tuple)
def namespaces(self):
Expand All @@ -279,6 +288,11 @@
"""The SpecNamespace class used in this NamespaceCatalog"""
return self.__spec_namespace_cls

@property
def core_namespaces(self):
"""The core namespaces used in this NamespaceCatalog"""
return self.__core_namespaces

@docval({'name': 'name', 'type': str, 'doc': 'the name of this namespace'},
{'name': 'namespace', 'type': SpecNamespace, 'doc': 'the SpecNamespace object'})
def add_namespace(self, **kwargs):
Expand Down Expand Up @@ -508,36 +522,122 @@
'type': bool,
'doc': 'whether or not to include objects from included/parent spec objects', 'default': True},
{'name': 'reader',
'type': SpecReader,
'doc': 'the class to user for reading specifications', 'default': None},
'type': (SpecReader, dict),
'doc': 'the SpecReader or dict of SpecReader classes to use for reading specifications',
'default': None},
returns='a dictionary describing the dependencies of loaded namespaces', rtype=dict)
def load_namespaces(self, **kwargs):
"""Load the namespaces in the given file"""
namespace_path, resolve, reader = getargs('namespace_path', 'resolve', 'reader', kwargs)

# determine which readers and order of readers to use for loading specs
if reader is None:
# load namespace definition from file
if not os.path.exists(namespace_path):
msg = "namespace file '%s' not found" % namespace_path
raise IOError(msg)
reader = YAMLSpecReader(indir=os.path.dirname(namespace_path))
ns_path_key = os.path.join(reader.source, os.path.basename(namespace_path))
ret = self.__included_specs.get(ns_path_key)
if ret is None:
ret = dict()
ordered_readers = [YAMLSpecReader(indir=os.path.dirname(namespace_path))]
elif isinstance(reader, SpecReader):
ordered_readers = [reader] # only one reader

Check warning on line 541 in src/hdmf/spec/namespace.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/spec/namespace.py#L541

Added line #L541 was not covered by tests
else:
return ret
namespaces = reader.read_namespace(namespace_path)
to_load = list()
for ns in namespaces:
if ns['name'] in self.__namespaces:
if ns['version'] != self.__namespaces.get(ns['name'])['version']:
# warn if the cached namespace differs from the already loaded namespace
warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
% (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']))
else:
to_load.append(ns)
# now load specs into namespace
for ns in to_load:
ret[ns['name']] = self.__load_namespace(ns, reader, resolve=resolve)
self.__included_specs[ns_path_key] = ret
deps = dict() # for each namespace, track all included namespaces (dependencies)
for ns, r in reader.items():
for spec_ns in r.read_namespace(namespace_path):
deps[ns] = list()
for s in spec_ns['schema']:
dep = s.get('namespace')
if dep is not None:
deps[ns].append(dep)
order = self._order_deps(deps)
ordered_readers = [reader[ns] for ns in order]

# determine which namespaces to load and which to ignore
ignored_namespaces = list()
ret = dict()
for r in ordered_readers:
# continue to next reader if spec is already included
ns_path_key = os.path.join(r.source, os.path.basename(namespace_path))
included_specs = self.__included_specs.get(ns_path_key)
if included_specs is not None:
ret.update(included_specs)
continue # continue to next reader if spec is already included

to_load = list()
namespaces = r.read_namespace(namespace_path)
for ns in namespaces:
if ns['name'] in self.__namespaces:
if ns['version'] != self.__namespaces.get(ns['name'])['version']:
cached_version = ns['version']
loaded_version = self.__namespaces.get(ns['name'])['version']
ignored_namespaces.append((ns['name'], cached_version, loaded_version))
else:
to_load.append(ns)

# now load specs into namespace
for ns in to_load:
ret[ns['name']] = self.__load_namespace(ns, r, resolve=resolve)
self.__included_specs[ns_path_key] = ret

# warn if there are any ignored namespaces
if ignored_namespaces:
self.warn_for_ignored_namespaces(ignored_namespaces)

return ret

def warn_for_ignored_namespaces(self, ignored_namespaces):
"""Warning if namespaces were ignored where a different version was already loaded

Args:
ignored_namespaces (list): name, cached version, and loaded version of the namespace
"""
core_warnings = list()
other_warnings = list()
warning_msg = list()
for name, cached_version, loaded_version in ignored_namespaces:
version_info = f"{name} - cached version: {cached_version}, loaded version: {loaded_version}"
if name in self.__core_namespaces and is_newer_version(cached_version, loaded_version):
core_warnings.append(version_info) # for core namespaces, warn if the cached version is newer
elif name not in self.__core_namespaces:
other_warnings.append(version_info) # for all other namespaces, issue a warning for compatibility

if core_warnings:
joined_warnings = "\n".join(core_warnings)
warning_msg.append(f'{joined_warnings}\nPlease update to the latest package versions.')
if other_warnings:
joined_warnings = "\n".join(other_warnings)
warning_msg.append(f'{joined_warnings}\nThe loaded extension(s) may not be compatible with the cached '
'extension(s) in the file. Please check the extension documentation and ignore this '
'warning if these versions are compatible.')
if warning_msg:
joined_warnings = "\n".join(warning_msg)
warn(f'Ignoring the following cached namespace(s) because another version is already loaded:\n'
f'{joined_warnings}', category=UserWarning, stacklevel=2)

def _order_deps(self, deps):
"""
Order namespaces according to dependency for loading into a NamespaceCatalog

Args:
deps (dict): a dictionary that maps a namespace name to a list of name of
the namespaces on which the namespace is directly dependent
Example: {'a': ['b', 'c'], 'b': ['d'], 'c': ['d'], 'd': []}
Expected output: ['d', 'b', 'c', 'a']
"""
order = list()
keys = list(deps.keys())
deps = dict(deps)
for k in keys:
if k in deps:
self.__order_deps_aux(order, deps, k)
return order

def __order_deps_aux(self, order, deps, key):
"""
A recursive helper function for _order_deps
"""
if key not in deps:
return
subdeps = deps.pop(key)
for subk in subdeps:
self.__order_deps_aux(order, deps, subk)
order.append(key)
17 changes: 17 additions & 0 deletions src/hdmf/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import collections
import copy as _copy
import re
import types
import warnings
from abc import ABCMeta
Expand Down Expand Up @@ -876,6 +877,22 @@ def is_ragged(data):

return False

def is_newer_version(version_a: str, version_b: str) -> bool:
# this method could be replaced by packaging.version if packaging is added as a dependency
version_a_match = re.match(r"(\d+\.\d+\.\d+)", version_a)[0] # trim off any non-numeric symbols at end
version_a_list = [int(i) for i in version_a_match.split(".")]

version_b_match = re.match(r"(\d+\.\d+\.\d+)", version_b)[0] # trim off any non-numeric symbols at end
version_b_list = [int(i) for i in version_b_match.split(".")]

for a, b in zip(version_a_list, version_b_list):
if a > b:
return True
elif a < b:
return False

return False

def get_basic_array_info(array):
def convert_bytes_to_str(bytes_size):
suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
Expand Down
Loading
Loading