forked from linked-statistics/COOS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoos.html
643 lines (560 loc) · 36.3 KB
/
coos.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
<!DOCTYPE html>
<html>
<head>
<title>Core Ontology for Official Statistics</title>
<meta http-equiv='Content-Type' content='text/html;charset=utf-8'/>
<script src='https://www.w3.org/Tools/respec/respec-w3c' class='remove' defer></script>
<script class='remove'>
var respecConfig = {
specStatus: "CG-DRAFT",
shortName: "COOS",
subtitle: "A Core Ontology for Official Statistics",
editors: [
{ name: "Franck Cotton", company: "INSEE", companyURL: "http://www.insee.fr/" }
],
authors: [
{ name: "Flavio Rizzolo", company: "Statistics Canada", companyURL: "https://www.statcan.gc.ca/" },
{ name: "Daniel Gillman", company: "Bureau of Labor Statistics", companyURL: "https://www.bls.gov/" }
],
wg: "Semantic Statistics Community Group",
wgURI: "https://www.w3.org/community/semstats/",
wgPatentURI: "http://creativecommons.org/licenses/by/4.0/",
processVersion: 2018,
edDraftURI: "https://github.com/linked-statistics/COOS",
github: "https://github.com/linked-statistics/COOS",
localBiblio: {
"CSDA": {
"date": "Novembre 2018",
"href": "https://statswiki.unece.org/display/DA",
"publisher": "Unece",
"title": "Common Statistical Data Architecture (CSDA)"
},
"DDI-CDI": {
"date": "July 2021",
"href": "https://ddialliance.org/announcement/public-review-ddi-cross-domain-integration-ddi-cdi",
"publisher": "DDI Alliance",
"title": "DDI - Cross Domain Integration"
},
"GAMSO": {
"date": "January 2019",
"href": "https://statswiki.unece.org/display/gamso",
"publisher": "Unece",
"title": "Generic Activity Model for Statistical Organizations, version 1.2"
},
"GSBPM": {
"date": "January 2019",
"href": "https://statswiki.unece.org/display/gsbpm",
"publisher": "Unece",
"title": "Generic Statistical Business Process Model, version 5.1"
},
"GSBPM-LM": {
"authors": [
"Franck Cotton",
"Daniel Gillman"
],
"date": "October 2015",
"href": "http://ceur-ws.org/Vol-1551/article-06.pdf",
"title": "Modeling the Statistical Process with Linked Metadata"
},
"GSIM": {
"href": "https://statswiki.unece.org/display/gsim",
"publisher": "Unece",
"title": "Generic Statistical Information Model, version 1.2"
},
"GSIM-LM": {
"authors": [
"Monica Scannapieco",
"Laura Tosco",
"Daniel Gillman",
"Antoine Dreyer",
"Guillaume Duffes"
],
"date": "October 2016",
"href": "http://ceur-ws.org/Vol-1654/article-03.pdf",
"title": "An OWL Ontology for the Generic Statistical Information Model (GSIM): Design and Implementation"
},
"PAV": {
"authors": [
"Paolo Ciccarese",
"Stian Soiland-Reyes"
],
"date": "16 March 2015",
"href": "https://pav-ontology.github.io/pav/",
"title": "PAV ontology: provenance, authoring and versioning"
},
"PROV-RL":{
"authors": [
"Franck Cotton",
"Guillaume Duffes",
"Flavio Rizzolo"
],
"date": "October 2019",
"href": "http://ceur-ws.org/Vol-2549/article-08.pdf",
"title": "Using PROV-O to Represent Lineage in Statistical Processes: A Record Linkage Example"
}
}
};
</script>
</head>
<body>
<section id='abstract'>
<p>The abstract will go there.</p>
</section>
<section id='sotd'>
<p>This is a draft document and may be updated, replaced or obsoleted by other documents at any time. It is inappropriate to cite this document as other than work in progress.</p>
</section>
<section>
<h2>Background and Motivation</h2>
<p>The Official Statistics community has been collaborating for some years in order to define business models aiming at building common representations of its processes and information. These works, mostly conducted under the auspices of the UNECE High-Level Group for the Modernisation of Official Statistics (ModernStats), include the Generic Statistical Business Process Model (GSBPM), the Generic Statistical Information Model (GSIM), the Generic Activity Model for Statistical Organisation (GAMSO), and the Common Statistical Data Architecture (CSDA).</p>
<p>GSBPM provides a framework to describe the building blocks of statistical production in terms of sub-processes. Its main goal is to help statistical organizations standardize their statistical production processes. It was the first ModernStats model to be published, back in 2008, and has been widely used by national and international statistical agencies since then.</p>
<p>GAMSO provides a framework to describe the building blocks of statistical production in terms of activities. It complements the GSBPM in two ways: (i) by covering areas beyond the scope of GSBPM, and (ii) by providing a business capability view of statistical production itself.</p>
<p>GSIM complements both GSBPM and GAMSO by providing a catalogue of information objects to describe statistical data and metadata. It functions as a reference framework consisting of a set of standardised information objects to be used in statistical production.</p>
<p>CSDA provides a capability framework cataloguing the mayor abilities a statistical organization has to use, produce, share and manage data and metadata. CSDA integrates with the GSBPM and GAMSO by enabling processes and activities related to the lifecycle management of GSIM information objects. </p>
<p>As more and more statistical offices turn to semantic standards in order to formalize their data and metadata, it is time to build on our international core models to establish common foundations on which the different works can develop in a coherent way. This should be done using a formal framework that allows interoperability, activation and globally unique identification.</p>
<p>This paper proposes a first try at a base OWL vocabulary for Official Statistics. It suggests formal representations for the core concepts used in our domain. It builds on a set of well-known OWL vocabularies, namely SKOS, PROV, ORG, DCAT and the Dublin Core.</p>
<p>In the following sections, we stay at a very general model and deal with the following domains: statistical activities, products and actors. For more precise definitions like those that can be found in the GSIM or in CSPA, we refer to previous works: [[GSBPM-LM]] and [[GSIM-LM]] respectively.</p>
</section>
<section id="intro">
<h2>Introduction</h2>
<p>The ontology covers statististical activities, organizations and products.</p>
<section id="objectives">
<h3>Objectives and scope</h3>
<p>The ontology does not aim to cover the domains exhaustively. Inclusion criteria for a concept are:</p>
<ul>
<li>The concept helps clarifying and harmonizing the terminology</li>
<li>The concept helps brigding constructs from different models</li>
<li>The concept helps connecting statistical models to other well-known vocabularies</li>
</ul>
<p>The ontology also defines a large number of individuals (class instances) in order to standardize their naming and identification.</p>
</section>
</section>
<section id="overview">
<h2>Overview</h2>
<p>TBD.</p>
<div class="ednote" title="Governance">
<p>Include:</p>
<ul>
<li>Reference to governance policy of Unece MOS "stuff"</li>
<li>Specific policy elements that apply to COOS</li>
</ul>
</div>
</section>
<section id="nsvoc">
<h2>COOS Namespace and Vocabulary</h2>
<p>The COOS namespace URI is:</p>
<ul>
<li><strong>http://id.unece.org/def/coos#</strong></li>
</ul>
<p>The prefix <code>coos</code> will be associated to this namespace in all this document.</p>
<p>The COOS vocabulary is a set of URIs, given in the left-hand column in the table below. The right hand column indicates in which section below the corresponding term is explained in more detail.</p>
<table class="simple">
<caption>Table 1. COOS Vocabulary</caption>
<thead>
<tr>
<th>URI</th>
<th>Definition</th>
</tr>
</thead>
<tbody>
<tr>
<td>coos:Activity</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:ActivityArea</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalActivity</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalProductionActivity</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:OverarchingActivity</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:Phase</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:SubProcess</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalProgram</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalProgramCycle</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalSupportProgram</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:Task</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:Capability</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:CoreCapability</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:CrossCuttingCapability</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:uses</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:supports</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:enables</td>
<td><a href="#activities">Section 5. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalOrganization</td>
<td><a href="#activities">Section 6. Organizations</a></td>
</tr>
<tr>
<td>coos:NationalStatisticalInstitute</td>
<td><a href="#activities">Section 6. Organizations</a></td>
</tr>
<tr>
<td>coos:InternationalAgency</td>
<td><a href="#activities">Section 6. Organizations</a></td>
</tr>
<tr>
<td>coos:InformationOject</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:StatisticalInformationOject</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:StatisticalEntity</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:StatisticalProduct</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:StatisticalDataset</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:DimensionalDataset</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:GraphDataset</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:KeyValueDataset</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:RectangularDataset</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:TransposedDataset</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:ProductPresentation</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:ProductContent</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:metadataFor</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:content</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
<tr>
<td>coos:presentation</td>
<td><a href="#activities">Section 7. Products</a></td>
</tr>
</tbody>
</table>
<p>COOS also defines a number of individuals which are mostly instances of the classes above: those are described in more detail in the relevant sections.</p>
<p>Other vocabularies used in this document or in the RDF specification are listed in the table below, with their namespaces and associated prefixes.</p>
<table class="simple">
<caption>Table 2. Other vocabularies used in this document</caption>
<thead>
<tr>
<th>Prefix</th>
<th>URI</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>cc</td>
<td>https://creativecommons.org/ns</td>
<td>Describing Copyright in RDF: Creative Commons Rights Expression Language ([[!CC-ABOUT]])</td>
</tr>
<tr>
<td>dc</td>
<td>http://purl.org/dc/elements/1.1/</td>
<td>Dublin Core Metadata Element Set, Version 1.1 ([[!DC11]])</td>
</tr>
<tr>
<td>dcat</td>
<td>http://www.w3.org/ns/dcat#</td>
<td>Data Catalog Vocabulary ([[!vocab-dcat-2]])</td>
</tr>
<tr>
<td>dcterms</td>
<td>http://purl.org/dc/terms/</td>
<td>Dublin Core Metadata Initiative Metadata Terms ([[!DCTERMS]])</td>
</tr>
<tr>
<td>foaf</td>
<td>http://xmlns.com/foaf/0.1/ </td>
<td>FOAF Vocabulary Specification 0.99 ([[!FOAF]])</td>
</tr>
<tr>
<td>org</td>
<td>http://www.w3.org/ns/org#</td>
<td>The Organization Ontology ([[!vocab-org]])</td>
</tr>
<tr>
<td>pav</td>
<td>http://purl.org/pav/</td>
<td>PAV - Provenance, Authoring and Versioning ([[!PAV]])</</td>
</tr>
<tr>
<td>prov</td>
<td>http://www.w3.org/ns/prov#</td>
<td>PROV-O: The PROV Ontology ([[!prov-o]])</td>
</tr>
<tr>
<td>skos</td>
<td>http://www.w3.org/2004/02/skos/core#</td>
<td>SKOS Simple Knowledge Organization System Reference ([[!skos-reference]])</td>
</tr>
<tr>
<td>vann</td>
<td>http://purl.org/vocab/vann/</td>
<td>VANN: A vocabulary for annotating vocabulary descriptions ([[!vann]])</td>
</tr>
<tr>
<td>voaf</td>
<td>http://purl.org/vocommons/voaf#</td>
<td>Vocabulary of a Friend (VOAF) ([[!voaf]])</td>
</tr>
</tbody>
</table>
<p>RDF, RDFS, OWL and XSD vocabularies or namespaces are also used, with their usual URIs and prefixes.</p>
<p>The RDF examples are expressed with the Terse RDF Triple language (Turtle) [[turtle]]. Unless otherwise specified, these examples use the http://example.org/ns/, which will be represented by the <code>ex:</code> prefix. Note however that individual resource names used as examples are entirely fictitious.</p>
<p class="ednote" title="Naming policy">Include of reference the naming policy</p>
</section>
<section id="activities">
<h2>Activities</h2>
<section id="activities-gsbpm">
<h3>Base GSBPM model</h3>
<p>A first approach to modeling statistical activities, based on the GSBPM, is presented in http://ceur-ws.org/Vol-1551/article-06.pdf. Basically, three classes are defined in this GSBPM ontology, which are depicted in the following figure.</p>
<p><code>StatisticalProductionActivity</code> is a subclass of both <code>prov:Activity</code> and <code>skos:Concept</code>. Using SKOS allows us to organize the activities in schemes, like GSBPM or GAMSO. Using PROV allows us to specify who undertakes an activity and what entities it consumes or produces.</p>
<pre class="example" title="Example of statistical production activity">
ex:produce-study a coos:StatisticalProductionActivity ;
rdfs:label "Produce study"@en .
</pre>
<p><code>Phase</code> and <code>SubProcess</code> are both subclasses of <code>StatisticalProductionActivity</code> and represent respectively the concepts of a GSBPM phase and GSBPM sub-process.</p>
<p>The ontology also defines an individual for each GSBPM phase or subprocess, as well as two specific individuals: the <code>GSBPM</code> itself, which is viewed as as a <code>skos:ConceptScheme</code> (the scheme of all the GSBPM phases and subprocesses), and <code>statisticalProductionProcess</code>, which is an instance of <code>StatisticalProductionActivity</code> that represents statistical production process as a whole.</p>
<p>The links between the different individuals are represented by the usual SKOS properties: all the sub-processes of a phase have a <code>skos:broader</code> link to the phase, and each phase has a <code>skos:broader</code> link to the <code>statisticalProductionProcess</code> individual.</p>
<pre class="example nohighlight" title="Example of GSBPM phase and sub-process">
<http://id.unece.org/activities/phase/2> a coos:Phase ;
skos:notation "2" ;
skos:prefLabel "Design"@en ;
skos:narrower <http://id.unece.org/activities/subProcess/2.3> .
<http://id.unece.org/activities/subProcess/2.3> a coos:SubProcess ;
skos:notation "2.3" ;
skos:prefLabel "Design collection"@en ;
skos:broader <http://id.unece.org/activities/phase/2> .
</pre>
<p>The following figure summarizes the vocabulary terms defined until now.</p>
<figure id="image-gsbpm">
<img src="img/coos-gsbpm.png" alt="GSBPM vocabulary overview" title="GSBPM vocabulary overview"/>
<figcaption>GSBPM vocabulary overview</figcaption>
</figure>
</section>
<section id="activities-gamso">
<h3>Adding GAMSO</h3>
<p>Introducing GAMSO in this framework is not very difficult, but raises a few interesting questions. If we look at the terminology used in GAMSO, we find that the main concepts used are activity and activity area.</p>
<p>For the concept of activity, the simpler idea is to introduce a <code>StatisticalActivity</code> class that would be a daughter of <code>prov:Activity</code> and a mother of <code>StatisticalProductionActivity</code>. All second-level GAMSO activities would be instances of this <code>StatisticalActivity</code> class.</p>
<p>For the activity areas, the question is a bit more tricky: are these really activities, or just boxes where we classify the second-level activities? In the first case, activity areas will be themselves instances of <code>StatisticalActivity</code>, in the second case (represented below) they will just be instances of <code>skos:Concept</code> or of a specific <code>StatisticalActivityArea</code> sub-class. The answer can in fact be different for each GAMSO activity area: that does not change the global structure of the ontology.</p>
<figure id="image-gamso">
<img src="img/coos-gamso.png" alt="GAMSO vocabulary overview" title="GAMSO vocabulary overview"/>
<figcaption>GAMSO vocabulary overview</figcaption>
</figure>
<p>As previously for the GSBPM, individuals would be defined for each activity and activity area, and the GAMSO itself would be a <code>skos:ConceptScheme</code> with all four activity areas having <code>skos:broader</code> links to it.</p>
<p>In addition, we need two properties to describe how an <code>Activity</code> relates to a <code>StatisticalProductionActivity</code>:</p>
<ul>
<li><code>supports</code>: it means to enable to perform or function. We say that an <code>Activity</code>, e.g. GAMSO Manage Statistical Methods, <code>supports</code> a <code>StatisticalProductionActivity</code>, e.g. a GSBPM Phase.</li>
<li><code>uses</code>: it means to require to perform or function. We say that a <code>StatisticalProductionActivity</code>, e.g. GSBPM Overarching Processes, <code>uses</code> an <code>Activity</code>, e.g. Manage IT.</li>
</ul>
<p>Properties <code>uses</code> and <code>supports</code> are inverses of each other. They are weaker (looser) forms of dependencies than partitive relationships: <code>uses/supports</code> are about function whereas <code>part/whole</code> are about constitution.</p>
<p>A few points should be studied further:</p>
<ul>
<li>The notes of the second-level GAMSO activities are very well structured, so it would be possible to replace (or double) them with lists of individual concepts for third-level activities. For example, activity 2.1.1 ('Identify disruptions and capability improvements') would be added in the figure above with a <code>skso:broader</code> link to the activity 2.1.</li>
<li>We could refine the model by introducing more specific classes, for example <code>StatisticalSupportActivity</code>, <code>StatisticalStategicActivity</code>, etc. That should be justified by well-defined use cases.</li>
<li>The relation between the GSBPM as a concept scheme and as an activity area has to be precised. For now we can use a simple <code>rdf:seeAlso</code> property, but in a totally integrated view, all GSBPM and GAMSO artefacts would go in a single <code>skos:ConceptScheme</code> (the GAMSO) and the GSBPM would not be a concept scheme anymore: it would be an instance of both <code>StatisticalArea</code> and <code>StatisticalProductionActivity</code>.</li>
<li>There is a remaining question about the over-arching processes of the GSBPM. They probably should be viewed as components of the GSBPM concept scheme, and instances of class <code>StatisticalProductionActivity</code> which are not phases or sub-processes; an additional <code>OverArchingProcess</code> (or <code>TransverseActivity</code>, etc.) class could be created.</li>
<li>Some clarification should be made on the relations between similar GSBPM over-arching processes and GAMSO activities, notably 'Quality Management' (GSBPM) and 'Manage Quality' (GAMSO 3.10).</li>
</ul>
</section>
<section id="activities-other">
<h3>Adding GSIM and CSDA</h3>
<p>Adding the Generic Statistical Information Model (GSIM) in this framework allows us to refine <code>StatisticalActivity</code> even further. Parallel to <code>StatisticalProductionActivity</code> we can define three additional sub-classes of <code>StatisticalActivity</code>:</p>
<ul>
<li><code>StatisticalProgram:</code> it is essentially a set of activities carried out to produce statistics. These statistics are about the set of units in scope for the program, e.g. “All persons with a university degree”, within a given subject field, e.g. income statistics, tourism, etc.</li>
<li><code>StatisticalProgramCycle:</code> statistical program activities are often repeated over time in iterations called cycles. A StatisticalProgramCycle is one of those iterations for a specific time and geography.</li>
<li><code>StatisticalSupportProgram:</code> these are all centralized, corporate-level activities that allow statistical programs to exist and operate. These span from requirements elicitation, design and implementation (GSBPM Phases 1-3) to metadata management data, metadata and quality management (GSBPM overarching processes) and the statistical portion of capability development and corporate support (GAMSO).</li>
</ul>
<p>Another dimension to consider is that of information capabilities. A capability is an ability a statistical organization possesses to undertake a specific activity. It is only achieved through the integration of all relevant capability elements (e.g. methods, processes, standards and frameworks, IT systems and people skills). The notion of capability is often used in enterprise architecture approaches like TOGAF or, in the statistical domain, the Common Statistical Data Architecture [[CSDA]]. Capabilities can be mapped to strategic goals and objectives and provide a useful starting point to map lower level elements such as business process and functions, applications and technology assets.</p>
<ul>
<li><code>Core capabilities: </code> capabilities the organization needs to execute its core business, i.e. the production of statistics. They generally map to phases in GSBPM.</li>
<li><code>Cross-cutting capabilities: </code>capabilities used to formulate and implement the policies that the organization chooses for its internal operations. They generally map to corporate support in GAMSO and overarching processes in GSBPM.</li>
</ul>
<pre class="example" title="Example of capability">
ex:csda-data-integration a coos:Capability ;
rdfs:label "Data integration"@en ;
skos:definition "The ability to combine, link, relate and/or align different data sets in order to create an integrated information set."@en .
</pre>
</section>
<section id="activities-task">
<h3>More detailed activities</h3>
<p>The previous definitions stay in the framework of the GAMSO/GSBPM/GSIM/CSDA, but more precise notions will be needed by the NSIs for the more detailed modelization of their statistical activities. COOS users can adopt different names for these more specific activities that are scoped by a given GSBPM sub-process or overarching activity, but COOS defines the generic <code>Task</code> class as a common term for better interoperability. Tasks can be more or less fine-grained and form hierarchies.</p>
<pre class="example" title="Example of task">
ex:calculate-mean-wages a coos:Task ;
rdfs:label "Calculate mean wages"@en ;
skos:broader <http://id.unece.org/activities/subProcess/5.7> .
ex:produce-study prov:wasInformedBy ex:calculate-mean-wages .
</pre>
<p>We see in this example how we can use PROV to represent links between activities: here we suppose that the "Produce study" activity uses the results of the "Calculate mean wages" task.</p>
<figure id="image-ex1">
<img src="img/coos-ex1.png" alt="Using PROV with COOS constructs" title="Using PROV with COOS constructs"/>
<figcaption>Using PROV with COOS constructs</figcaption>
</figure>
<p>As mentioned previously, the Task class can be used by NSIs to create instances for their own needs. Two NSIs can create their own "Calculate mean wages" task, and only their labels would indicate that they do similar things. In certain cases however, it could be useful to define standard sub-classes of Task for some widely used and specific types of tasks, for example record linkage or hot-deck imputation. This would allow for example to refer to relevant methodology.</p>
</section>
</section>
<section id="organizations">
<h2>Organizations</h2>
<p>The following classes are defined:</p>
<figure id="image-org">
<img src="img/coos-org.png" alt="Organizations vocabulary overview" title="Organizations vocabulary overview"/>
<figcaption>Organizations vocabulary overview</figcaption>
</figure>
<ul>
<li><code>StatisticalOrganization</code>: Organization, or unit within an organization, whose primary role is the production of official statistics.</li>
<li><code>NationalStatisticalInstitute</code>: The main producer of official statistics in a country and/or the organization responsible for coordinating all activities related to the development, production, and dissemination of official statistics in the national statistical system. The actual name given to the national statistical office in a country may be National Statistical Institute (NSI), National Bureau of Statistics (NBS), Central Bureau of Statistics (CBS), National Statistical Agency (NSA), Central Statistical Agency (CSA), Central Statistics Agency (CSA), etc.</li>
<li><code>InternationalAgency</code>: A body with an international membership, scope, or presence whose primary role is the production of official statistics. There are many types of international organizations. One way of categorizing them is to distinguish between intergovernmental organizations, supranational organizations and international non-governmental organizations.</li>
</ul>
<p>The use of <code>org:Organization</code> as a mother class allows to benefit from all the constructs of the ORG ontology, for example the <code>org:hasUnit</code>/<code>org:unitOf</code> properties for the representation of the hierarchical links between organizations. The use of <code>prov:Organization</code> allows to capture the relations between organizations, activities and products, for example <code>prov:wasAssociatedWith</code> can link a statistical activity to the NSI that conducts it.</p>
<p>It would also be useful to include in the base ontology instances of <code>StatisticalOrganization</code> representing the existing national statistical institutes and international statistical organizations. This would provide in particular a shared global identifier for each of these organisms.</p>
<div class="issue">
<p>Is there an "official" list of international agencies and NSIs?</p>
</div>
</section>
<section id="products">
<h2>Products</h2>
<p class="ednote" title="Information objets">Introduce <code>InformationObject</code>, <code>StatisticalInformationObject</code> and <code>StatisticalEntity</code> here, even if they cover more than products.</p>
<p>The COOS ontology defines two base classes in the "Products" domain: <code>StatisticalProduct</code> and <code>StatisticalDataset</code>.</p>
<div class="issue">
<p>Provide textual definition of Statistical Product and Statistical Dataset.</p>
</div>
<p>Here again, the PROV vocabulary is useful, in particular to provide provenance information on products and to link them to activities and organizations (for example <code>prov:wasGeneratedBy</code> from a product to an activity).</p>
<p>The <code>StatisticalDataset</code> class is also declared as a daughter of <code>dcat:Dataset</code>, which allows to reuse the DCAT (or StatDCAT-AP) possibilities, in particular to document datasets and their different distributions, group them in catalogues, etc.</p>
<figure id="image-prod">
<img src="img/coos-prod.png" alt="Products vocabulary main classes" title="Products vocabulary main classes"/>
<figcaption>Products vocabulary main classes</figcaption>
</figure>
<div class="issue">
<ul>
<li>Review the following text (reproduced from Dan's comment in issue #15)</li>
<li>Add comment on renaming "long" to "transposed"</li>
<li>Add metadataFor property</li>
<li>Add constructs about product content and presentation</li>
<li>Rescale figure</li>
</ul>
</div>
<p>In DDI-CDI ([[DDI-CDI]]), four basic structural types of organizing data sets have been defined: rectangular, event history, key-value pair, and dimensional. Here we add the 'graph' type, which covers also tree-like data. Several of the types could be used to structure the same data. There is not a canonical structure in all cases, though some data is much more amenable to one structure over the others.</p>
<p>The types are defined roughly as follows:</p>
<ol>
<li>rectangular (or wide) - rows are units and columns are variables</li>
<li>event history (or tall or long) - rows are based on the value for each variable, one unit at a time - and this could be visualized as rows are variables and columns are units</li>
<li>dimensional - a pre-defined set of cells defined by the combination of categories, one from each of a set of dimensions (category sets), used to handle the value of some measure (variable) restricted to the cell</li>
<li>key-value - a set of values, each associated with some key</li>
<li>graph - datapoints are nodes and relationships between them are edges in a graph structure</li>
</ol>
<p>Dimensional data are usually associated with aggregates. Key-value data are often taken from scraping the web. Even-history is used to describe events over some time period.</p>
<p>The nominal, ordinal, interval, ratio are not used to differentiate datasets. Rather, they are families of datatypes used to describe variables. Nominal data are those conforming to a finite set of categories with no other conditions (sex categories). Ordinal data are those conforming to an ordered finite set of categories, but the difference between adjacent categories is not necessarily uniform (Likert scale measures of satisfaction). Interval data are numeric with no zero (absence of quantity) defined (Celsius temperature). Ratio data are numeric with a defined zero (Kelvin temperature). These apply to any kind of statistical data.</p>
<p>The distinction between aggregate and unit data is based on the definition of the variables in the dataset. A dataset can contain both unit and aggregate data.</p>
<p>Access restrictions on data (e.g., public, restricted, private) are assigned by the business and can change over the life-cycle of the dataset.</p>
<p>The domain for a dataset is defined by the subject field that data apply to. However, some datasets are merged from others, so a merged set can have the combination of its constituents. There seems to be no restriction on the number of subject fields.</p>
<p>Mode of transmission is not definitional for a dataset, as a single dataset can be obtained multiple ways. The phases of GSBPM may not be useful, as a single dataset can pass through a phase without change. Further, the phases impose a usage criterion (data for collection; data for editing; etc.) that seems arbitrary and would be useless in another domain (outside statistics).</p>
<p>Similarly, the explorative, temporary, and organizational categorization is based on intent, rather than the data per se. Plus, the categorization could change without any change to the data. If we change the organizational structure described above (rectangular, etc.), then we should call that a new dataset.</p>
<figure id="image-ds">
<img src="img/coos-ds.png" alt="Dataset types" title="Dataset types"/>
<figcaption>Dataset types</figcaption>
</figure>
</section>
<section id="conclusion">
<h2>Conclusion</h2>
<p>This paper proposes a collection of core concepts for official statistics, expressed as OWL constructs backed by elements of well-known standard vocabularies. Work still needs to be done on the precise definition of these concepts, on their properties and on their identification.</p>
<p>More effort is also needed to articulate this base ontology with more sectorial and in-depth works existing on GSIM, CSPA or the SIMS model for quality metadata.</p>
<p>The management and governance of these different initiatives need also to be established.</p>
</section>
<section class='appendix'>
<h2>Acknowledgements</h2>
<p>This is a placeholder for now.</p>
</section>
<section class="appendix">
<h2>Full copyright</h2>
<p>Copyright © 2021 Unece, <i>All Rights Reserved</i><br/><a href="http://www.unece.org/">http://www.unece.org/</a></p>
<p>Content of this document is licensed under a Creative Commons License:<br/>Attribution 4.0 International (CC BY 4.0)</p>
<p>This is a human-readable summary of the Legal Code (the full license).<br/><a href="http://creativecommons.org/licenses/by/4.0/">http://creativecommons.org/licenses/by/4.0/</a></p>
<p>You are free to:</p>
<ul>
<li>Share - copy and redistribute the material in any medium or format</li>
<li>Remix - remix, transform, and build upon the material</li>
</ul>
<p>for any purpose, even commercially.</p>
<p>The licensor cannot revoke these freedoms as long as you follow the license terms.</p>
<p>Under the following terms:</p>
<ul>
<li>Attribution. You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.</li>
<li>No additional restrictions. You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.</li>
</ul>
<div style="font-size:80%; margin-top:40px; margin-bottom:40px">
<p>Disclaimer</p>
<p>This deed highlights only some of the key features and terms of the actual license. It is not a license and has no legal value. You should carefully review all of the terms and conditions of the actual license before using the licensed material.</p>
<p>Creative Commons is not a law firm and does not provide legal services. Distributing, displaying, or linking to this deed or the license that it summarizes does not create a lawyer-client or any other relationship.</p>
</div>
<p>Legal Code:<br/><a href="http://creativecommons.org/licenses/by/4.0/legalcode">http://creativecommons.org/licenses/by/4.0/legalcode</a></p>
</section>
</body>
</html>