hdmf-dev · rly · May 6, 2025 · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ### Enhancements
 - Optimized `get` within `VectorIndex` to be more efficient when retrieving a dataset of references. @mavaylon1 [#1248](https://github.com/hdmf-dev/hdmf/pull/1248)
+- Enhanced warnings about ignoring cached namespaces. @stephprince [#1258](https://github.com/hdmf-dev/hdmf/pull/1258)
 - Added support in append for same dimensional args for numpy arrays. @mavaylon1 [#1261](https://github.com/hdmf-dev/hdmf/pull/1261)
 - Improved error messages when optional requirements are not installed. @rly [#1263](https://github.com/hdmf-dev/hdmf/pull/1263)
 

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
@@ -202,24 +202,13 @@ def __load_namespaces(cls, namespace_catalog, namespaces, file_obj):
             namespaces = list(spec_group.keys())
 
         readers = dict()
-        deps = dict()
         for ns in namespaces:
             latest_version = namespace_versions[ns]
             ns_group = spec_group[ns][latest_version]
             reader = H5SpecReader(ns_group)
             readers[ns] = reader
-            # for each namespace in the 'namespace' dataset, track all included namespaces (dependencies)
-            for spec_ns in reader.read_namespace(cls.__ns_spec_path):
-                deps[ns] = list()
-                for s in spec_ns['schema']:
-                    dep = s.get('namespace')
-                    if dep is not None:
-                        deps[ns].append(dep)
-
-        order = cls._order_deps(deps)
-        for ns in order:
-            reader = readers[ns]
-            d.update(namespace_catalog.load_namespaces(cls.__ns_spec_path, reader=reader))
+
+        d.update(namespace_catalog.load_namespaces(cls.__ns_spec_path, reader=readers))
 
         return d
 
@@ -285,37 +274,6 @@ def __get_namespaces(cls, file_obj):
 
         return used_version_names
 
-    @classmethod
-    def _order_deps(cls, deps):
-        """
-        Order namespaces according to dependency for loading into a NamespaceCatalog
-
-        Args:
-            deps (dict): a dictionary that maps a namespace name to a list of name of
-                         the namespaces on which the namespace is directly dependent
-                         Example: {'a': ['b', 'c'], 'b': ['d'], 'c': ['d'], 'd': []}
-                         Expected output: ['d', 'b', 'c', 'a']
-        """
-        order = list()
-        keys = list(deps.keys())
-        deps = dict(deps)
-        for k in keys:
-            if k in deps:
-                cls.__order_deps_aux(order, deps, k)
-        return order
-
-    @classmethod
-    def __order_deps_aux(cls, order, deps, key):
-        """
-        A recursive helper function for _order_deps
-        """
-        if key not in deps:
-            return
-        subdeps = deps.pop(key)
-        for subk in subdeps:
-            cls.__order_deps_aux(order, deps, subk)
-        order.append(key)
-
     @classmethod
     @docval({'name': 'source_filename', 'type': str, 'doc': 'the path to the HDF5 file to copy'},
             {'name': 'dest_filename', 'type': str, 'doc': 'the name of the destination file'},

diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py
@@ -233,7 +233,13 @@ def get_hdf5io(**kwargs):
 # load the hdmf-common namespace
 __resources = __get_resources()
 if os.path.exists(__resources['namespace_path']):
-    __TYPE_MAP = TypeMap(NamespaceCatalog())
+    # NOTE: even though HDMF does not guarantee backwards compatibility with schema
+    # using an older version of the experimental namespace, in practice, this has not been
+    # an issue, and it is costly to determine whether there is an incompatibility before issuing
+    # a warning. so, we ignore the experimental namespace warning by default by specifying it
+    # as a "core_namespace" in the NamespaceCatalog.
+    # see https://github.com/hdmf-dev/hdmf/pull/1258
+    __TYPE_MAP = TypeMap(NamespaceCatalog(core_namespaces=[CORE_NAMESPACE, EXP_NAMESPACE],))
 
     load_namespaces(__resources['namespace_path'])
 

diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py
@@ -9,7 +9,7 @@
 
 from .catalog import SpecCatalog
 from .spec import DatasetSpec, GroupSpec
-from ..utils import docval, getargs, popargs, get_docval
+from ..utils import docval, getargs, popargs, get_docval, is_newer_version
 
 _namespace_args = [
     {'name': 'doc', 'type': str, 'doc': 'a description about what this namespace represents'},
@@ -229,13 +229,19 @@
             {'name': 'dataset_spec_cls', 'type': type,
              'doc': 'the class to use for dataset specifications', 'default': DatasetSpec},
             {'name': 'spec_namespace_cls', 'type': type,
-             'doc': 'the class to use for specification namespaces', 'default': SpecNamespace})
+             'doc': 'the class to use for specification namespaces', 'default': SpecNamespace},
+            {'name': 'core_namespaces', 'type': list,
+             'doc': 'the names of the core namespaces', 'default': list()})
     def __init__(self, **kwargs):
         """Create a catalog for storing  multiple Namespaces"""
         self.__namespaces = OrderedDict()
         self.__dataset_spec_cls = getargs('dataset_spec_cls', kwargs)
         self.__group_spec_cls = getargs('group_spec_cls', kwargs)
         self.__spec_namespace_cls = getargs('spec_namespace_cls', kwargs)
+
+        core_namespaces = getargs('core_namespaces', kwargs)
+        self.__core_namespaces = core_namespaces
+
         # keep track of all spec objects ever loaded, so we don't have
         # multiple object instances of a spec
         self.__loaded_specs = dict()
@@ -248,6 +254,7 @@
         ret = NamespaceCatalog(self.__group_spec_cls,
                                self.__dataset_spec_cls,
                                self.__spec_namespace_cls)
+        ret.__core_namespaces = copy(self.__core_namespaces)
         ret.__namespaces = copy(self.__namespaces)
         ret.__loaded_specs = copy(self.__loaded_specs)
         ret.__included_specs = copy(self.__included_specs)
@@ -258,6 +265,8 @@
         for name, namespace in ns_catalog.__namespaces.items():
             self.add_namespace(name, namespace)
 
+        self.__core_namespaces.extend(ns_catalog.__core_namespaces)
+
     @property
     @docval(returns='a tuple of the available namespaces', rtype=tuple)
     def namespaces(self):
@@ -279,6 +288,11 @@
         """The SpecNamespace class used in this NamespaceCatalog"""
         return self.__spec_namespace_cls
 
+    @property
+    def core_namespaces(self):
+        """The core namespaces used in this NamespaceCatalog"""
+        return self.__core_namespaces
+
     @docval({'name': 'name', 'type': str, 'doc': 'the name of this namespace'},
             {'name': 'namespace', 'type': SpecNamespace, 'doc': 'the SpecNamespace object'})
     def add_namespace(self, **kwargs):
@@ -508,36 +522,122 @@
              'type': bool,
              'doc': 'whether or not to include objects from included/parent spec objects', 'default': True},
             {'name': 'reader',
-             'type': SpecReader,
-             'doc': 'the class to user for reading specifications', 'default': None},
+             'type': (SpecReader, dict),
+             'doc': 'the SpecReader or dict of SpecReader classes to use for reading specifications',
+             'default': None},
             returns='a dictionary describing the dependencies of loaded namespaces', rtype=dict)
     def load_namespaces(self, **kwargs):
         """Load the namespaces in the given file"""
         namespace_path, resolve, reader = getargs('namespace_path', 'resolve', 'reader', kwargs)
+
+        # determine which readers and order of readers to use for loading specs
         if reader is None:
             # load namespace definition from file
             if not os.path.exists(namespace_path):
                 msg = "namespace file '%s' not found" % namespace_path
                 raise IOError(msg)
-            reader = YAMLSpecReader(indir=os.path.dirname(namespace_path))
-        ns_path_key = os.path.join(reader.source, os.path.basename(namespace_path))
-        ret = self.__included_specs.get(ns_path_key)
-        if ret is None:
-            ret = dict()
+            ordered_readers = [YAMLSpecReader(indir=os.path.dirname(namespace_path))]
+        elif isinstance(reader, SpecReader):
+            ordered_readers = [reader]  # only one reader
         else:
-            return ret
-        namespaces = reader.read_namespace(namespace_path)
-        to_load = list()
-        for ns in namespaces:
-            if ns['name'] in self.__namespaces:
-                if ns['version'] != self.__namespaces.get(ns['name'])['version']:
-                    # warn if the cached namespace differs from the already loaded namespace
-                    warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
-                         % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']))
-            else:
-                to_load.append(ns)
-        # now load specs into namespace
-        for ns in to_load:
-            ret[ns['name']] = self.__load_namespace(ns, reader, resolve=resolve)
-        self.__included_specs[ns_path_key] = ret
+            deps = dict()  # for each namespace, track all included namespaces (dependencies)
+            for ns, r in reader.items():
+                for spec_ns in r.read_namespace(namespace_path):
+                    deps[ns] = list()
+                    for s in spec_ns['schema']:
+                        dep = s.get('namespace')
+                        if dep is not None:
+                            deps[ns].append(dep)
+            order = self._order_deps(deps)
+            ordered_readers = [reader[ns] for ns in order]
+
+        # determine which namespaces to load and which to ignore
+        ignored_namespaces = list()
+        ret = dict()
+        for r in ordered_readers:
+            # continue to next reader if spec is already included
+            ns_path_key = os.path.join(r.source, os.path.basename(namespace_path))
+            included_specs = self.__included_specs.get(ns_path_key)
+            if included_specs is not None:
+                ret.update(included_specs)
+                continue  # continue to next reader if spec is already included
+
+            to_load = list()
+            namespaces = r.read_namespace(namespace_path)
+            for ns in namespaces:
+                if ns['name'] in self.__namespaces:
+                    if ns['version'] != self.__namespaces.get(ns['name'])['version']:
+                        cached_version = ns['version']
+                        loaded_version = self.__namespaces.get(ns['name'])['version']
+                        ignored_namespaces.append((ns['name'], cached_version, loaded_version))
+                else:
+                    to_load.append(ns)
+
+            # now load specs into namespace
+            for ns in to_load:
+                ret[ns['name']] = self.__load_namespace(ns, r, resolve=resolve)
+            self.__included_specs[ns_path_key] = ret
+
+        # warn if there are any ignored namespaces
+        if ignored_namespaces:
+            self.warn_for_ignored_namespaces(ignored_namespaces)
+
         return ret
+
+    def warn_for_ignored_namespaces(self, ignored_namespaces):
+        """Warning if namespaces were ignored where a different version was already loaded
+
+        Args:
+            ignored_namespaces (list): name, cached version, and loaded version of the namespace
+        """
+        core_warnings = list()
+        other_warnings = list()
+        warning_msg = list()
+        for name, cached_version, loaded_version in ignored_namespaces:
+            version_info = f"{name} - cached version: {cached_version}, loaded version: {loaded_version}"
+            if name in self.__core_namespaces and is_newer_version(cached_version, loaded_version):
+                core_warnings.append(version_info)  # for core namespaces, warn if the cached version is newer
+            elif name not in self.__core_namespaces:
+                other_warnings.append(version_info)  # for all other namespaces, issue a warning for compatibility
+
+        if core_warnings:
+            joined_warnings = "\n".join(core_warnings)
+            warning_msg.append(f'{joined_warnings}\nPlease update to the latest package versions.')
+        if other_warnings:
+            joined_warnings = "\n".join(other_warnings)
+            warning_msg.append(f'{joined_warnings}\nThe loaded extension(s) may not be compatible with the cached '
+                               'extension(s) in the file. Please check the extension documentation and ignore this '
+                               'warning if these versions are compatible.')
+        if warning_msg:
+            joined_warnings = "\n".join(warning_msg)
+            warn(f'Ignoring the following cached namespace(s) because another version is already loaded:\n'
+                 f'{joined_warnings}', category=UserWarning, stacklevel=2)
+
+    def _order_deps(self, deps):
+        """
+        Order namespaces according to dependency for loading into a NamespaceCatalog
+
+        Args:
+            deps (dict): a dictionary that maps a namespace name to a list of name of
+                         the namespaces on which the namespace is directly dependent
+                         Example: {'a': ['b', 'c'], 'b': ['d'], 'c': ['d'], 'd': []}
+                         Expected output: ['d', 'b', 'c', 'a']
+        """
+        order = list()
+        keys = list(deps.keys())
+        deps = dict(deps)
+        for k in keys:
+            if k in deps:
+                self.__order_deps_aux(order, deps, k)
+        return order
+
+    def __order_deps_aux(self, order, deps, key):
+        """
+        A recursive helper function for _order_deps
+        """
+        if key not in deps:
+            return
+        subdeps = deps.pop(key)
+        for subk in subdeps:
+            self.__order_deps_aux(order, deps, subk)
+        order.append(key)
diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py
@@ -1,5 +1,6 @@
 import collections
 import copy as _copy
+import re
 import types
 import warnings
 from abc import ABCMeta
@@ -876,6 +877,22 @@ def is_ragged(data):
 
     return False
 
+def is_newer_version(version_a: str, version_b: str) -> bool:
+    # this method could be replaced by packaging.version if packaging is added as a dependency
+    version_a_match = re.match(r"(\d+\.\d+\.\d+)", version_a)[0]  # trim off any non-numeric symbols at end
+    version_a_list = [int(i) for i in version_a_match.split(".")]
+
+    version_b_match = re.match(r"(\d+\.\d+\.\d+)", version_b)[0]  # trim off any non-numeric symbols at end
+    version_b_list = [int(i) for i in version_b_match.split(".")]
+
+    for a, b in zip(version_a_list, version_b_list):
+        if a > b:
+            return True
+        elif a < b:
+            return False
+
+    return False
+
 def get_basic_array_info(array):
     def convert_bytes_to_str(bytes_size):
         suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']