Skip to content
This repository was archived by the owner on Oct 16, 2025. It is now read-only.

Commit 53d8637

Browse files
committed
provenance diff tools
1 parent 3e4691d commit 53d8637

File tree

5 files changed

+158
-0
lines changed

5 files changed

+158
-0
lines changed

src-colladmin/actions/storage_action.rb

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,49 @@ def perform_action
228228

229229
end
230230

231+
if @path == 'storage-get-provenance-yaml'
232+
srvc = get_storage_service
233+
endpoint = "/content/#{nodenum}/#{CGI.escape(ark)}/0/#{CGI.escape('system/provenance_manifest.xml')}"
234+
return '<message>Storage service undefined</message>' if srvc.empty?
235+
return '<message>Empty Ark</message>' if ark.empty?
236+
237+
begin
238+
qxml = HttpGetXml.new(srvc, endpoint)
239+
return "<message>Status #{qxml.status} for #{endpoint}</message>" unless qxml.status == 200
240+
if qxml.body.length > 5_000_000
241+
return { error: "Provenance Manifest is too large to download: use curl: #{srvc}#{endpoint}" }.to_json
242+
end
243+
244+
return ManifestToYaml.new.load_xml(qxml.body)
245+
rescue StandardError => e
246+
log(e.message)
247+
log(e.backtrace)
248+
return { error: "#{e.message} for #{endpoint}" }.to_json
249+
end
250+
251+
end
252+
253+
if @path == 'storage-get-provenance-diff'
254+
srvc = get_storage_service
255+
currendpoint = "/manifest/#{nodenum}/#{CGI.escape(ark)}"
256+
oldendpoint = "/content/#{nodenum}/#{CGI.escape(ark)}/0/#{CGI.escape('system/provenance_manifest.xml')}"
257+
return '<message>Storage service undefined</message>' if srvc.empty?
258+
return '<message>Empty Ark</message>' if ark.empty?
259+
260+
begin
261+
curr = HttpGetXml.new(srvc, currendpoint)
262+
old = HttpGetXml.new(srvc, oldendpoint)
263+
264+
diff = ManifestToYaml.new.load_xml_diff(old.body, curr.body)
265+
return JSON.pretty_generate(diff)
266+
rescue StandardError => e
267+
log(e.message)
268+
log(e.backtrace)
269+
return { error: "#{e.message} for #{endpoint}" }.to_json
270+
end
271+
272+
end
273+
231274
if @path == 'storage-get-ingest-checkm'
232275
ver = @myparams.fetch('ver', '1').to_i
233276
srvc = get_storage_service

src-colladmin/config/actions.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,40 @@ storage-get-manifest-yaml:
433433
Convert the downloaded xml file to a user-friendly yaml format.
434434
documentation: |
435435
Storage: GET /manifest/{nodenum}/{ark}
436+
storage-get-provenance-yaml:
437+
link-title: Get Storage Provenance for an Object from Cloud Storage as a Yaml file
438+
class: StorageAction
439+
category: Storage Manifest
440+
sensitivity: readonly
441+
testing: automated
442+
format: xml
443+
test_params:
444+
ark: ark:/99999/fk4dv31c0b
445+
nodenum: 9502
446+
description: |
447+
This will fail if the provenance file does not exist.
448+
449+
Download the XML *storage manifest* representation of a object on a specific storage node. This request will be forwarded to the storage service.
450+
Convert the downloaded xml file to a user-friendly yaml format.
451+
documentation: |
452+
Storage: GET /ark/{nodenum}/{ark}/0/system/provenance-metadata.xml
453+
storage-get-provenance-diff:
454+
link-title: Get Storage Provenance for an Object from Cloud Storage as a Yaml file
455+
class: StorageAction
456+
category: Storage Manifest
457+
sensitivity: readonly
458+
testing: automated
459+
format: xml
460+
test_params:
461+
ark: ark:/99999/fk4dv31c0b
462+
nodenum: 9502
463+
description: |
464+
This will fail if the provenance file does not exist.
465+
466+
Download the XML *storage manifest* representation of a object on a specific storage node. This request will be forwarded to the storage service.
467+
Convert the downloaded xml file to a user-friendly yaml format.
468+
documentation: |
469+
Storage: GET /ark/{nodenum}/{ark}/0/system/provenance-metadata.xml
436470
storage-get-ingest-checkm:
437471
link-title: Generate Ingest Checkm Manifest Using Content from Cloud Storage
438472
class: StorageAction

src-colladmin/lib/manifest_to_yaml.rb

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,43 @@ def load_xml(xmlbody)
6666
manifest = JSON.parse(manifest.to_json)
6767
"# See https://github.com/CDLUC3/merritt-tinker/tree/main/yaml-manifest for more info\n#{YAML.dump(manifest)}"
6868
end
69+
70+
71+
def load_paths(xmlbody)
72+
ark = ''
73+
keys = {}
74+
xml = Nokogiri::XML(xmlbody).remove_namespaces!
75+
xml.xpath('/objectInfo/object').each do |doc|
76+
ark = doc['id']
77+
end
78+
xml.xpath('/objectInfo/versions/version').each do |doc|
79+
v = doc['id'].to_i
80+
doc.xpath('manifest').each do |m|
81+
m.xpath('file').each do |f|
82+
keys["#{ark}/#{v}/#{f['id']}"] = f.xpath('digest').text
83+
end
84+
end
85+
end
86+
keys
87+
end
88+
89+
def load_xml_diff(old, curr)
90+
pathsold = load_paths(old)
91+
pathscurr = load_paths(curr)
92+
93+
diff = {}
94+
pathsold.each do |k, v|
95+
next if pathsold[k] == pathscurr[k]
96+
97+
diff[v] = diff.fetch(v, {oldpath: [], newpath: []})
98+
diff[v].fetch(:oldpath, []).append(k)
99+
end
100+
pathscurr.each do |k, v|
101+
next if pathsold[k] == pathscurr[k]
102+
103+
diff[v] = diff.fetch(v, {oldpath: [], newpath: []})
104+
diff[v].fetch(:newpath, []).append(k)
105+
end
106+
diff
107+
end
69108
end

src-colladmin/template/storage.js

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,32 @@ function init() {
250250
invoke_text(params, fname);
251251
});
252252

253+
$("button.storage-get-provenance-yaml").on("click", function(){
254+
var ark = $(this).attr("data-ark");
255+
var nodenum = $(this).attr("data-node-num");
256+
params = {
257+
path: 'storage-get-provenance-yaml',
258+
ark: ark,
259+
nodenum: nodenum
260+
}
261+
const RE=/[\/:]+/g;
262+
fname = "provenance_manifest." + ark.replaceAll(RE, '_') + ".yml";
263+
invoke_text(params, fname);
264+
});
265+
266+
$("button.storage-get-provenance-diff").on("click", function(){
267+
var ark = $(this).attr("data-ark");
268+
var nodenum = $(this).attr("data-node-num");
269+
params = {
270+
path: 'storage-get-provenance-diff',
271+
ark: ark,
272+
nodenum: nodenum
273+
}
274+
const RE=/[\/:]+/g;
275+
fname = "provenance_manifest_diff." + ark.replaceAll(RE, '_') + ".txt";
276+
invoke_text(params, fname);
277+
});
278+
253279
$("button.storage-get-augmented-manifest").on("click", function(){
254280
var ark = $(this).attr("data-ark");
255281
var nodenum = $(this).attr("data-node-num");

src-colladmin/web/storeObjectNodes.html

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,22 @@ <h2>Object Node Actions</h2>
166166
class="colladmin storage-get-manifest-yaml"
167167
>Get Storage Manifest Yaml</button>
168168

169+
<button
170+
data-ark="{{ark}}"
171+
data-node-num="{{number}}"
172+
data-version="{{version}}"
173+
title="Get Manifest Yaml"
174+
class="colladmin storage-get-provenance-yaml"
175+
>Get Storage Provenance Yaml</button>
176+
177+
<button
178+
data-ark="{{ark}}"
179+
data-node-num="{{number}}"
180+
data-version="{{version}}"
181+
title="Get Manifest Yaml"
182+
class="colladmin storage-get-provenance-diff"
183+
>Get Storage Provenance Diff</button>
184+
169185
<button
170186
data-ark="{{ark}}"
171187
data-node-num="{{number}}"

0 commit comments

Comments
 (0)