Skip to content

Commit aff4ad9

Browse files
authored
Is prefixed by remote (#96)
* prefix groups * remote prefix * CDL group order
1 parent 1f1f431 commit aff4ad9

File tree

8 files changed

+138
-41
lines changed

8 files changed

+138
-41
lines changed

lib/bald/__init__.py

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -802,31 +802,62 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None, file_locat
802802
cache = HttpCache()
803803

804804
with load(afilepath) as fhandle:
805-
# ensure that baseuri always temrinates in a '/'
805+
# ensure that baseuri always terminates in a '/'
806806
if baseuri is None:
807807
baseuri = 'file://{}/'.format(afilepath)
808808
elif type(baseuri) == str and not baseuri.endswith('/'):
809809
baseuri = '{}/'.format(baseuri)
810810

811811
identity = baseuri
812+
813+
# prefixes are defined as group attributes in a dedicated group, and/or
814+
# by external resources
812815
prefix_var_name = None
813816
if hasattr(fhandle, 'bald__isPrefixedBy'):
814817
prefix_var_name = fhandle.bald__isPrefixedBy
815818

816-
prefix_var = (fhandle[fhandle.bald__isPrefixedBy] if
817-
hasattr(fhandle, 'bald__isPrefixedBy') else {})
819+
prefix_ids = (fhandle.bald__isPrefixedBy if
820+
hasattr(fhandle, 'bald__isPrefixedBy') else '')
821+
prefix_urls = []
822+
prefix_groups = []
823+
for pid in prefix_ids.split(' '):
824+
if pid in fhandle.groups:
825+
prefix_groups.append(fhandle.groups[pid])
826+
elif pid.startswith('http://') or pid.startswith('https://'):
827+
prefix_urls.append(pid)
818828
prefixes = {}
819829

820830
skipped_variables = []
821-
if prefix_var != {}:
822-
prefixes = (dict([(prefix, getattr(prefix_var, prefix)) for
823-
prefix in prefix_var.ncattrs()]))
824-
if isinstance(prefix_var, netCDF4._netCDF4.Variable):
825-
skipped_variables.append(prefix_var.name)
826-
else:
827-
for k in fhandle.ncattrs():
828-
if k.endswith('__'):
829-
prefixes[k] = getattr(fhandle, k)
831+
for prefix_group in prefix_groups:
832+
if prefix_group != {}:
833+
prefixes = (dict([(prefix, getattr(prefix_group, prefix)) for
834+
prefix in prefix_group.ncattrs() if prefix.endswith('__')]))
835+
if isinstance(prefix_group, netCDF4._netCDF4.Variable):
836+
skipped_variables.append(prefix_var.name)
837+
# else:
838+
# for k in fhandle.ncattrs():
839+
# if k.endswith('__'):
840+
# prefixes[k] = getattr(fhandle, k)
841+
842+
prefix_graph = rdflib.Graph()
843+
for prefix_url in prefix_urls:
844+
res = cache[prefix_url]
845+
try:
846+
prefix_graph.parse(data=res.text, format='xml')
847+
except Exception:
848+
print('Failed to parse: {} for prefixes.'.format(prefix_url))
849+
850+
qres = prefix_graph.query("select ?prefix ?uri where \n"
851+
"{\n"
852+
"?s <http://purl.org/vocab/vann/preferredNamespacePrefix> ?prefix ;\n"
853+
"<http://purl.org/vocab/vann/preferredNamespaceUri> ?uri . \n"
854+
"}")
855+
for res in qres:
856+
key, value = (str(res[0]), str(res[1]))
857+
if key in prefixes and value !=prefixes[key]:
858+
prefixes.pop(key)
859+
else:
860+
prefixes[key] = value
830861

831862
# check that default set is handled, i.e. bald__ and rdf__
832863
if 'bald__' not in prefixes:

lib/bald/tests/integration/CDL/array_geo.cdl

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,6 @@ dimensions:
33
pdim0 = 11 ;
44
pdim1 = 17 ;
55
variables:
6-
int prefix_list ;
7-
prefix_list:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
8-
prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
9-
prefix_list:rdfs__ = "http://www.w3.org/2000/01/rdf-schema#" ;
10-
prefix_list:cf__ = "http://def.scitools.org.uk/CFTerms/" ;
11-
prefix_list:nc__ = "http://def.scitools.org.uk/NetCDF/" ;
12-
prefix_list:geo__ = "http://www.opengis.net/ont/geosparql#" ;
13-
prefix_list:nc__ = "http://def.scitools.org.uk/NetCDF/" ;
14-
156
int temp(pdim0, pdim1) ;
167
temp:cf__standard_name = "air_temperature" ;
178
temp:nc__long_name = "Air temperature obs example at point" ;
@@ -23,11 +14,23 @@ variables:
2314
pressure:nc__long_name = "Air pressure at UCAR Centre Green" ;
2415
pressure:rdfs__label = "Air pressure at UCAR Centre Green" ;
2516
pressure:geo__asWKT = "POINT(-105.24584700000003 40.0315278)" ;
26-
2717
// global attributes:
2818
:_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ;
2919
:rdf__type = "bald__Container" ;
3020
:bald__isPrefixedBy = "prefix_list" ;
3121
data:
3222

23+
// prefix group
24+
group: prefix_list {
25+
:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
26+
:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
27+
:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
28+
:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
29+
:rdfs__ = "http://www.w3.org/2000/01/rdf-schema#" ;
30+
:cf__ = "http://def.scitools.org.uk/CFTerms/" ;
31+
:nc__ = "http://def.scitools.org.uk/NetCDF/" ;
32+
:geo__ = "http://www.opengis.net/ont/geosparql#" ;
33+
:nc__ = "http://def.scitools.org.uk/NetCDF/" ;
34+
}
35+
3336
}

lib/bald/tests/integration/CDL/array_multitypes.cdl

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,6 @@ dimensions:
33
pdim0 = 11 ;
44
pdim1 = 17 ;
55
variables:
6-
int prefix_list ;
7-
prefix_list:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
8-
prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
9-
prefix_list:rdfs__ = "http://www.w3.org/2000/01/rdf-schema#" ;
10-
prefix_list:cf__ = "http://def.scitools.org.uk/CFTerms/" ;
11-
prefix_list:nc__ = "http://def.scitools.org.uk/NetCDF/" ;
12-
prefix_list:geo__ = "http://www.opengis.net/ont/geosparql#" ;
13-
prefix_list:nc__ = "http://def.scitools.org.uk/NetCDF/" ;
14-
156
int temp(pdim0, pdim1) ;
167
temp:cf__standard_name = "air_temperature" ;
178
temp:nc__long_name = "Air temperature obs example at point" ;
@@ -31,4 +22,15 @@ variables:
3122
:bald__isPrefixedBy = "prefix_list" ;
3223
data:
3324

25+
// prefix group
26+
group: prefix_list {
27+
:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
28+
:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
29+
:rdfs__ = "http://www.w3.org/2000/01/rdf-schema#" ;
30+
:cf__ = "http://def.scitools.org.uk/CFTerms/" ;
31+
:nc__ = "http://def.scitools.org.uk/NetCDF/" ;
32+
:geo__ = "http://www.opengis.net/ont/geosparql#" ;
33+
:nc__ = "http://def.scitools.org.uk/NetCDF/" ;
34+
}
35+
3436
}

lib/bald/tests/integration/CDL/array_reference.cdl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ variables:
88
parent_variable:bald__references = "child_variable" ;
99
int child_variable(pdim0, pdim1) ;
1010
child_variable:rdf__type = "bald__Reference" ;
11-
int prefix_list ;
12-
prefix_list:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
13-
prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
14-
11+
// prefix group
12+
group: prefix_list {
13+
:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
14+
:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
15+
}
1516
// global attributes:
1617
:_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ;
1718
:rdf__type = "bald__Container" ;
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
netcdf tmpMwXy8U {
2+
dimensions:
3+
pdim0 = 11 ;
4+
pdim1 = 17 ;
5+
variables:
6+
int parent_variable(pdim0, pdim1) ;
7+
parent_variable:rdf__type = "bald__Array" ;
8+
parent_variable:bald__references = "child_variable" ;
9+
int child_variable(pdim0, pdim1) ;
10+
child_variable:rdf__type = "bald__Reference" ;
11+
12+
// global attributes:
13+
:_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ;
14+
:rdf__type = "bald__Container" ;
15+
:bald__isPrefixedBy = "http://def.binary-array-ld.net/prefixes" ;
16+
}

lib/bald/tests/integration/CDL/multi_array_reference.cdl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,6 @@ dimensions:
33
pdim0 = 11 ;
44
pdim1 = 17 ;
55
variables:
6-
int prefix_list ;
7-
prefix_list:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
8-
prefix_list:metce__ = "http://codes.wmo.int/common/observation-type/METCE/2013/" ;
9-
prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
10-
116
int data_variable1(pdim0, pdim1) ;
127
data_variable1:bald__references = "location_variable" ;
138
data_variable1:long_name = "Gerald";
@@ -34,7 +29,12 @@ variables:
3429
int list_collection ;
3530
list_collection:bald__references = "( data_variable1 data_variable2 )" ;
3631

37-
32+
// prefix group
33+
group: prefix_list {
34+
:bald__ = "https://www.opengis.net/def/binary-array-ld/" ;
35+
:metce__ = "http://codes.wmo.int/common/observation-type/METCE/2013/" ;
36+
:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
37+
}
3838
// global attributes:
3939
:_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ;
4040
:bald__isPrefixedBy = "prefix_list" ;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
@prefix bald: <https://www.opengis.net/def/binary-array-ld/> .
2+
@prefix dcat: <http://www.w3.org/ns/dcat#> .
3+
@prefix dct: <http://purl.org/dc/terms/> .
4+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
5+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
6+
@prefix this: <file://CDL/array_reference_external_prefix.cdl/> .
7+
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
8+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
9+
10+
this: a bald:Container ;
11+
dct:format [ a dct:MediaType ;
12+
dct:identifier <http://vocab.nerc.ac.uk/collection/M01/current/NC/> ] ;
13+
dcat:distribution [ a dcat:Distribution ;
14+
dcat:mediaType [ a dcat:MediaType ;
15+
dct:identifier "application/x-netcdf" ] ] ;
16+
bald:contains this:child_variable,
17+
this:parent_variable ;
18+
bald:isPrefixedBy <http://def.binary-array-ld.net/prefixes> .
19+
20+
this:parent_variable a bald:Array ;
21+
bald:references this:child_variable ;
22+
bald:shape ( 11 17 ) .
23+
24+
this:child_variable a bald:Array,
25+
bald:Reference ;
26+
bald:shape ( 11 17 ) .
27+

lib/bald/tests/integration/test_cdl_rdfgraph.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,23 @@ def test_array_reference_with_baseuri(self):
6868
expected_rdfgraph.parse(sf, format='n3')
6969
self.check_result(rdfgraph, expected_rdfgraph)
7070

71+
def test_array_reference_external_prefix(self):
72+
with self.temp_filename('.nc') as tfile:
73+
cdlname = 'array_reference_external_prefix.cdl'
74+
cdl_file = os.path.join(self.cdl_path, cdlname)
75+
subprocess.check_call(['ncgen', '-o', tfile, cdl_file])
76+
cdl_file_uri = 'file://CDL/{}'.format(cdlname)
77+
root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, cache=self.acache)
78+
rdfgraph = root_container.rdfgraph()
79+
ttl = rdfgraph.serialize(format='n3').decode("utf-8")
80+
if os.environ.get('bald_update_results') is not None:
81+
with open(os.path.join(self.ttl_path, 'array_reference_external_prefix.ttl'), 'w') as sf:
82+
sf.write(ttl)
83+
with open(os.path.join(self.ttl_path, 'array_reference_external_prefix.ttl'), 'r') as sf:
84+
expected_rdfgraph = rdflib.Graph()
85+
expected_rdfgraph.parse(sf, format='n3')
86+
self.check_result(rdfgraph, expected_rdfgraph)
87+
7188
def test_multi_array_reference(self):
7289
with self.temp_filename('.nc') as tfile:
7390
cdlname = 'multi_array_reference.cdl'

0 commit comments

Comments
 (0)