Skip to content

Commit 3a5cd45

Browse files
committed
group variable references
1 parent 77ccf56 commit 3a5cd45

File tree

4 files changed

+189
-101
lines changed

4 files changed

+189
-101
lines changed

lib/bald/__init__.py

Lines changed: 104 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,8 @@ def unpack_predicate(self, astring):
402402
return result
403403

404404
def unpack_rdfobject(self, astring, predicate):
405+
# if astring == 'x_wind':
406+
# import pdb; pdb.set_trace()
405407
result = astring
406408
if isinstance(astring, six.string_types) and self._prefix_suffix.match(astring):
407409
prefix, suffix = self._prefix_suffix.match(astring).groups()
@@ -431,6 +433,8 @@ def unpack_rdfobject(self, astring, predicate):
431433
result = str(results[0][0])
432434
except pyparsing.ParseException:
433435
pass
436+
except ValueError:
437+
pass
434438
return result
435439

436440
# def unpack_uri(self, astring):
@@ -900,7 +904,9 @@ def _prefixes_and_aliases(fhandle, identity, alias_dict, cache):
900904
return prefixes, aliases, aliasgraph, prefix_var_name
901905

902906

903-
def _load_netcdf_group(agroup, baseuri, gk, root_container, prefixes, prefix_group_name, aliases, aliasgraph, cache):
907+
def _load_netcdf_group(fhandle, agroup, baseuri, gk, root_container, file_variables, prefixes, prefix_group_name, aliases, aliasgraph, cache):
908+
file_variables = file_variables.copy()
909+
904910
gattrs = {}
905911
for k in agroup.ncattrs():
906912
gattrs[k] = getattr(agroup, k)
@@ -912,47 +918,47 @@ def _load_netcdf_group(agroup, baseuri, gk, root_container, prefixes, prefix_gro
912918

913919
gcontainer.attrs['bald__contains'] = set()
914920

915-
_load_netcdf_group_vars(agroup, gcontainer, gidentity, gattrs, prefixes, prefix_group_name, aliases, aliasgraph, cache)
921+
_load_netcdf_group_vars(fhandle, agroup, gcontainer, gidentity, gattrs, file_variables, prefixes, prefix_group_name, aliases, aliasgraph, cache)
916922
if 'bald__contains' not in root_container.attrs:
917923
root_container.attrs['bald__contains'] = set()
918924
root_container.attrs['bald__contains'].add(gcontainer)
919925
for gk in agroup.groups:
920926

921-
_load_netcdf_group(agroup.groups[gk], baseuri, gk, gcontainer,
927+
_load_netcdf_group(fhandle, agroup.groups[gk], baseuri, gk, gcontainer, file_variables,
922928
prefixes, prefix_group_name, aliases, aliasgraph, cache)
923929

924930

925931

926-
def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
932+
def _load_netcdf_group_vars(fhandle, agroup, root_container, baseuri, attrs, file_variables, prefixes,
927933
prefix_var_name, aliases, aliasgraph, cache):
928-
file_variables = {}
929-
for name in fhandle.variables:
934+
935+
for name in agroup.variables:
930936
if name == prefix_var_name:
931937
continue
932938

933-
sattrs = fhandle.variables[name].__dict__.copy()
939+
sattrs = agroup.variables[name].__dict__.copy()
934940

935941
identity = name
936942
if baseuri is not None:
937943
identity = baseuri + name
938944

939945
# netCDF coordinate variable special case
940-
if (len(fhandle.variables[name].dimensions) == 1 and
941-
fhandle.variables[name].dimensions[0] == name and
942-
len(fhandle.variables[name]) > 0):
946+
if (len(agroup.variables[name].dimensions) == 1 and
947+
agroup.variables[name].dimensions[0] == name and
948+
len(agroup.variables[name]) > 0):
943949

944-
if not isinstance(fhandle.variables[name][0], np.ma.core.MaskedConstant):
945-
sattrs['bald__first_value'] = fhandle.variables[name][0]
950+
if not isinstance(agroup.variables[name][0], np.ma.core.MaskedConstant):
951+
sattrs['bald__first_value'] = agroup.variables[name][0]
946952
if isinstance(sattrs['bald__first_value'], str):
947953
pass
948954

949955
elif np.issubdtype(sattrs['bald__first_value'], np.integer):
950956
sattrs['bald__first_value'] = int(sattrs['bald__first_value'])
951957
elif np.issubdtype(sattrs['bald__first_value'], np.floating):
952958
sattrs['bald__first_value'] = float(sattrs['bald__first_value'])
953-
if (len(fhandle.variables[name]) > 1 and
954-
not isinstance(fhandle.variables[name][-1], np.ma.core.MaskedConstant)):
955-
sattrs['bald__last_value'] = fhandle.variables[name][-1]
959+
if (len(agroup.variables[name]) > 1 and
960+
not isinstance(agroup.variables[name][-1], np.ma.core.MaskedConstant)):
961+
sattrs['bald__last_value'] = agroup.variables[name][-1]
956962
if isinstance(sattrs['bald__last_value'], str):
957963
pass
958964
elif np.issubdtype(sattrs['bald__last_value'], np.integer):
@@ -961,8 +967,8 @@ def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
961967
sattrs['bald__last_value'] = float(sattrs['bald__last_value'])
962968

963969
# datetime special case
964-
if 'units' in fhandle.variables[name].ncattrs():
965-
ustr = fhandle.variables[name].getncattr('units')
970+
if 'units' in agroup.variables[name].ncattrs():
971+
ustr = agroup.variables[name].getncattr('units')
966972
pattern = '^([a-z]+) since ([0-9T:\\. -]+)'
967973

968974
amatch = re.match(pattern, ustr)
@@ -973,15 +979,15 @@ def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
973979
tog = datetime.parse_datetime(origin,
974980
calendar=ig)
975981
if tog is not None:
976-
dtype = '{}{}'.format(fhandle.variables[name].dtype.kind,
977-
fhandle.variables[name].dtype.itemsize)
982+
dtype = '{}{}'.format(agroup.variables[name].dtype.kind,
983+
agroup.variables[name].dtype.itemsize)
978984
fv = netCDF4.default_fillvals.get(dtype)
979985
first = None
980-
if fhandle.variables[name][0] == fv:
981-
first = np.ma.MaskedArray(fhandle.variables[name][0],
986+
if agroup.variables[name][0] == fv:
987+
first = np.ma.MaskedArray(agroup.variables[name][0],
982988
mask=True)
983989
else:
984-
first = fhandle.variables[name][0]
990+
first = agroup.variables[name][0]
985991
if first is not None:
986992
try:
987993
first = int(first)
@@ -992,12 +998,12 @@ def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
992998
epoch=tog)
993999
if first is not np.ma.masked:
9941000
sattrs['bald__first_value'] = edate_first
995-
if len(fhandle.variables[name]) > 1:
996-
if fhandle.variables[name][0] == fv:
997-
last = np.ma.MaskedArray(fhandle.variables[name][-1],
1001+
if len(agroup.variables[name]) > 1:
1002+
if agroup.variables[name][0] == fv:
1003+
last = np.ma.MaskedArray(agroup.variables[name][-1],
9981004
mask=True)
9991005
else:
1000-
last = fhandle.variables[name][-1]
1006+
last = agroup.variables[name][-1]
10011007
if last:
10021008
try:
10031009
last = round(last)
@@ -1013,8 +1019,8 @@ def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
10131019

10141020

10151021

1016-
if fhandle.variables[name].shape:
1017-
sattrs['bald__shape'] = list(fhandle.variables[name].shape)
1022+
if agroup.variables[name].shape:
1023+
sattrs['bald__shape'] = list(agroup.variables[name].shape)
10181024
var = Array(baseuri, name, sattrs, prefixes=prefixes,
10191025
aliases=aliases, alias_graph=aliasgraph)
10201026
else:
@@ -1024,8 +1030,22 @@ def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
10241030

10251031
file_variables[name] = var
10261032

1033+
for prefix in prefixes:
1034+
if prefixes[prefix].startswith('http'):
1035+
# print('parsing: {}'.format(prefixes[prefix][:-1]))
1036+
try:
1037+
aliasgraph.parse(data=cache[prefixes[prefix][:-1]].text, format='xml')
1038+
# print('parsed: {}'.format(prefixes[prefix][:-1]))
1039+
except Exception:
1040+
try:
1041+
aliasgraph.parse(data=cache[prefixes[prefix][:-1]].text, format='n3')
1042+
# print('parsed: {} (n3)'.format(prefixes[prefix][:-1]))
1043+
except Exception:
1044+
pass
1045+
10271046
reference_prefixes = dict()
1028-
reference_graph = copy.copy(aliasgraph)
1047+
# reference_graph = copy.copy(aliasgraph)
1048+
reference_graph = aliasgraph
10291049

10301050
response = cache['https://www.opengis.net/def/binary-array-ld']
10311051
reference_graph.parse(data=response.text, format='n3')
@@ -1081,18 +1101,19 @@ def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
10811101
ref_prefs = [str(ref[0]) for ref in list(refs)]
10821102

10831103
# cycle again and find references
1084-
for name in fhandle.variables:
1104+
for name in agroup.variables:
1105+
10851106
if name == prefix_var_name:
10861107
continue
10871108

10881109
var = file_variables[name]
1089-
sattrs = fhandle.variables[name].__dict__.copy()
1110+
sattrs = agroup.variables[name].__dict__.copy()
10901111

10911112
# coordinate variables are bald__references too
10921113
if 'bald__Reference' not in var.rdf__type:
1093-
for dim in fhandle.variables[name].dimensions:
1114+
for dim in agroup.variables[name].dimensions:
10941115
if file_variables.get(dim) and name != dim:
1095-
_make_ref_entities(var, fhandle, dim, name,
1116+
_make_ref_entities(var, fhandle, agroup, dim, name,
10961117
baseuri, root_container,
10971118
file_variables, prefixes,
10981119
aliases, aliasgraph)
@@ -1111,7 +1132,7 @@ def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
11111132
var.attrs[sattr] = [file_variables.get(pref)
11121133
for pref in potrefs_list]
11131134
for pref in potrefs_list:
1114-
_make_ref_entities(var, fhandle,
1135+
_make_ref_entities(var, fhandle, agroup,
11151136
pref, name, baseuri,
11161137
root_container,
11171138
file_variables, prefixes,
@@ -1126,8 +1147,8 @@ def _load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
11261147
for pref in potrefs_set])
11271148
for pref in potrefs_set:
11281149
# coordinate variables already handled
1129-
if pref not in fhandle.variables[name].dimensions:
1130-
_make_ref_entities(var, fhandle,
1150+
if pref not in agroup.variables[name].dimensions:
1151+
_make_ref_entities(var, fhandle, agroup,
11311152
pref, name, baseuri,
11321153
root_container,
11331154
file_variables, prefixes,
@@ -1166,45 +1187,73 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None, file_locat
11661187
file_resource=True, file_locator=file_locator)
11671188

11681189
root_container.attrs['bald__contains'] = set()
1169-
1170-
_load_netcdf_group_vars(fhandle, root_container, baseuri, attrs, prefixes,
1190+
1191+
file_variables = {}
1192+
_load_netcdf_group_vars(fhandle, fhandle, root_container, baseuri, attrs, file_variables, prefixes,
11711193
prefix_group_name, aliases, aliasgraph, cache)
11721194

11731195
for gk in fhandle.groups:
11741196
if gk == prefix_group_name:
11751197
continue
11761198

1177-
_load_netcdf_group(fhandle.groups[gk], baseuri, gk, root_container,
1199+
_load_netcdf_group(fhandle, fhandle.groups[gk], baseuri, gk, root_container, file_variables,
11781200
prefixes, prefix_group_name, aliases, aliasgraph, cache)
11791201
# _create_references(root_container,
11801202
# prefixes, prefix_group_name, aliases, aliasgraph, cache)
11811203

11821204
return root_container
11831205

1184-
def _make_ref_entities(var, fhandle, pref, name, baseuri,
1206+
def _make_ref_entities(var, fhandle, variables, pref, name, baseuri,
11851207
root_container, file_variables,
11861208
prefixes, aliases, aliasgraph):
1187-
shapematch = (fhandle.variables[name].shape ==
1188-
fhandle.variables[pref].shape)
1209+
namevar = None
1210+
prefvar = None
1211+
try:
1212+
prefvar = variables[pref]
1213+
except IndexError:
1214+
try:
1215+
if not pref.startswith('/'):
1216+
ppref = '/' + pref
1217+
prefvar = fhandle[ppref]
1218+
except IndexError:
1219+
pass
1220+
try:
1221+
namevar = variables[name]
1222+
except IndexError:
1223+
try:
1224+
if not name.startswith('/'):
1225+
nname = '/' + name
1226+
namevar = fhandle[nname]
1227+
except IndexError:
1228+
pass
11891229

1190-
if (fhandle.variables[name].shape and not shapematch and
1191-
fhandle.variables[pref].shape):
1230+
# if pref in variables:
1231+
# prefvar = variables[pref]
1232+
# elif pref in fhandle:
1233+
# prefvar = fhandle[pref]
1234+
# if name in variables:
1235+
# namevar = variables[name]
1236+
# elif name in fhandle:
1237+
# namevar = fhandle[name]
1238+
shapematch = (namevar.shape == prefvar.shape)
1239+
1240+
if (namevar is not None and prefvar is not None and namevar.shape and not shapematch and
1241+
prefvar.shape):
11921242
try:
11931243
refset = var.attrs.get('bald__references', set())
11941244
if not isinstance(refset, set):
11951245
refset = set((refset,))
11961246
identity = None
11971247
rattrs = {}
11981248

1199-
reshapes = netcdf_shared_dimensions(fhandle.variables[name],
1200-
fhandle.variables[pref])
1249+
reshapes = netcdf_shared_dimensions(namevar, prefvar)
12011250

1202-
rattrs['bald__targetShape'] = list(fhandle.variables[pref].shape)
1251+
rattrs['bald__targetShape'] = list(prefvar.shape)
12031252
sourceReshape = [i[1] for i in reshapes['sourceReshape'].items()]
1204-
if sourceReshape != list(fhandle.variables[name].shape):
1253+
if sourceReshape != list(namevar.shape):
12051254
rattrs['bald__sourceReshape'] = sourceReshape
12061255
targetReshape = [i[1] for i in reshapes['targetReshape'].items()]
1207-
if targetReshape != list(fhandle.variables[pref].shape):
1256+
if targetReshape != list(prefvar.shape):
12081257
rattrs['bald__targetReshape'] = targetReshape
12091258
rattrs['bald__target'] = set((file_variables.get(pref),))
12101259
ref_node = Reference(baseuri, identity, rattrs,
@@ -1214,13 +1263,14 @@ def _make_ref_entities(var, fhandle, pref, name, baseuri,
12141263

12151264
refset.add(ref_node)
12161265
var.attrs['bald__references'] = refset
1266+
# Indexing and dimension identification can fail, especially
1267+
# with unexpectedy formated files. Fail silently on load, to
1268+
# that a partial graph may be returned. Issues like this are
1269+
# deferred to validation.
12171270
except ValueError:
1218-
# Indexing and dimension identification can fail, especially
1219-
# with unexpectedy formated files. Fail silently on load, to
1220-
# that a partial graph may be returned. Issues like this are
1221-
# deferred to validation.
12221271
pass
1223-
1272+
# except IndexError:
1273+
# pass
12241274

12251275
def validate_netcdf(afilepath, baseuri=None, cache=None, uris_resolve=False):
12261276
"""

lib/bald/tests/integration/CDL/group_array_geo.cdl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
netcdf tmpMwXy8U {
22
dimensions:
33
x_t = 144 ;
4-
y_t = 90 ;
4+
y_t = 91 ;
55
x_q = 144 ;
6-
y_q = 91 ;
6+
y_q = 90 ;
77
time = 4 ;
88
variables:
99
float x_t(x_t) ;

0 commit comments

Comments
 (0)