From 91b77aabc8d95eb81ffe92c62b783be79a134ddd Mon Sep 17 00:00:00 2001 From: Gal Moyal Date: Mon, 25 Nov 2024 11:06:51 +0200 Subject: [PATCH 1/5] added ml bom Signed-off-by: Gal Moyal --- MLBOM/Dataset/bom.json | 388 ++++++++++++++++++++++++++ MLBOM/Model/FoundationModels/bom.json | 26 ++ MLBOM/Model/OpenSource/bom.json | 63 +++++ MLBOM/README.md | 28 ++ 4 files changed, 505 insertions(+) create mode 100644 MLBOM/Dataset/bom.json create mode 100644 MLBOM/Model/FoundationModels/bom.json create mode 100644 MLBOM/Model/OpenSource/bom.json create mode 100644 MLBOM/README.md diff --git a/MLBOM/Dataset/bom.json b/MLBOM/Dataset/bom.json new file mode 100644 index 0000000..4fb556a --- /dev/null +++ b/MLBOM/Dataset/bom.json @@ -0,0 +1,388 @@ +{ + "$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": "urn:uuid:75de3b9b-9e53-4421-a259-11f18afc22bf", + "version": 1, + "metadata": { + "timestamp": "2024-11-24T13:10:49Z", + }, + "components": [ + { + "type": "data", + "supplier": { + "name": "Wikimedia" + }, + "manufacturer": { + "name": "Wikimedia" + }, + "publisher": "Hugging Face Inc", + "name": "wikipedia", + "version": "b04c8d1ceb2f5cd4588862100d08de323dccfbaa", + "licenses": [ + { + "license": { + "id": "CC-BY-SA-3.0", + "name": "Creative Commons Attribution Share Alike 3.0", + "url": "https://spdx.org/licenses/CC-BY-SA-3.0.html" + } + }, + { + "license": { + "id": "GFDL-1.3", + "name": "GNU Free Documentation License family", + "url": "https://www.gnu.org/licenses/fdl-1.3.en.html" + } + } + ], + "externalReferences": [ + { + "type": "website", + "url": "https://huggingface.co/datasets/wikimedia/wikipedia" + } + ], + "hashes": [ + { + "alg": "SHA-1", + "content": "b04c8d1ceb2f5cd4588862100d08de323dccfbaa" + } + ], + "tags": [ + "task_categories:text-generation", + "task_categories:fill-mask", + "task_ids:language-modeling", + "task_ids:masked-language-modeling", + "language:ab", + "language:ace", + "language:ady", + "language:af", + "language:alt", + "language:am", + "language:ami", + "language:an", + "language:ang", + "language:anp", + "language:ar", + "language:arc", + "language:ary", + "language:arz", + "language:as", + "language:ast", + "language:atj", + "language:av", + "language:avk", + "language:awa", + "language:ay", + "language:az", + "language:azb", + "language:ba", + "language:ban", + "language:bar", + "language:bbc", + "language:bcl", + "language:be", + "language:bg", + "language:bh", + "language:bi", + "language:bjn", + "language:blk", + "language:bm", + "language:bn", + "language:bo", + "language:bpy", + "language:br", + "language:bs", + "language:bug", + "language:bxr", + "language:ca", + "language:cbk", + "language:cdo", + "language:ce", + "language:ceb", + "language:ch", + "language:chr", + "language:chy", + "language:ckb", + "language:co", + "language:cr", + "language:crh", + "language:cs", + "language:csb", + "language:cu", + "language:cv", + "language:cy", + "language:da", + "language:dag", + "language:de", + "language:dga", + "language:din", + "language:diq", + "language:dsb", + "language:dty", + "language:dv", + "language:dz", + "language:ee", + "language:el", + "language:eml", + "language:en", + "language:eo", + "language:es", + "language:et", + "language:eu", + "language:ext", + "language:fa", + "language:fat", + "language:ff", + "language:fi", + "language:fj", + "language:fo", + "language:fon", + "language:fr", + "language:frp", + "language:frr", + "language:fur", + "language:fy", + "language:ga", + "language:gag", + "language:gan", + "language:gcr", + "language:gd", + "language:gl", + "language:glk", + "language:gn", + "language:gom", + "language:gor", + "language:got", + "language:gpe", + "language:gsw", + "language:gu", + "language:guc", + "language:gur", + "language:guw", + "language:gv", + "language:ha", + "language:hak", + "language:haw", + "language:hbs", + "language:he", + "language:hi", + "language:hif", + "language:hr", + "language:hsb", + "language:ht", + "language:hu", + "language:hy", + "language:hyw", + "language:ia", + "language:id", + "language:ie", + "language:ig", + "language:ik", + "language:ilo", + "language:inh", + "language:io", + "language:is", + "language:it", + "language:iu", + "language:ja", + "language:jam", + "language:jbo", + "language:jv", + "language:ka", + "language:kaa", + "language:kab", + "language:kbd", + "language:kbp", + "language:kcg", + "language:kg", + "language:ki", + "language:kk", + "language:kl", + "language:km", + "language:kn", + "language:ko", + "language:koi", + "language:krc", + "language:ks", + "language:ksh", + "language:ku", + "language:kv", + "language:kw", + "language:ky", + "language:la", + "language:lad", + "language:lb", + "language:lbe", + "language:lez", + "language:lfn", + "language:lg", + "language:li", + "language:lij", + "language:lld", + "language:lmo", + "language:ln", + "language:lo", + "language:lt", + "language:ltg", + "language:lv", + "language:lzh", + "language:mad", + "language:mai", + "language:map", + "language:mdf", + "language:mg", + "language:mhr", + "language:mi", + "language:min", + "language:mk", + "language:ml", + "language:mn", + "language:mni", + "language:mnw", + "language:mr", + "language:mrj", + "language:ms", + "language:mt", + "language:mwl", + "language:my", + "language:myv", + "language:mzn", + "language:nah", + "language:nan", + "language:nap", + "language:nds", + "language:ne", + "language:new", + "language:nia", + "language:nl", + "language:nn", + "language:no", + "language:nov", + "language:nqo", + "language:nrf", + "language:nso", + "language:nv", + "language:ny", + "language:oc", + "language:olo", + "language:om", + "language:or", + "language:os", + "language:pa", + "language:pag", + "language:pam", + "language:pap", + "language:pcd", + "language:pcm", + "language:pdc", + "language:pfl", + "language:pi", + "language:pih", + "language:pl", + "language:pms", + "language:pnb", + "language:pnt", + "language:ps", + "language:pt", + "language:pwn", + "language:qu", + "language:rm", + "language:rmy", + "language:rn", + "language:ro", + "language:ru", + "language:rue", + "language:rup", + "language:rw", + "language:sa", + "language:sah", + "language:sat", + "language:sc", + "language:scn", + "language:sco", + "language:sd", + "language:se", + "language:sg", + "language:sgs", + "language:shi", + "language:shn", + "language:si", + "language:sk", + "language:skr", + "language:sl", + "language:sm", + "language:smn", + "language:sn", + "language:so", + "language:sq", + "language:sr", + "language:srn", + "language:ss", + "language:st", + "language:stq", + "language:su", + "language:sv", + "language:sw", + "language:szl", + "language:szy", + "language:ta", + "language:tay", + "language:tcy", + "language:te", + "language:tet", + "language:tg", + "language:th", + "language:ti", + "language:tk", + "language:tl", + "language:tly", + "language:tn", + "language:to", + "language:tpi", + "language:tr", + "language:trv", + "language:ts", + "language:tt", + "language:tum", + "language:tw", + "language:ty", + "language:tyv", + "language:udm", + "language:ug", + "language:uk", + "language:ur", + "language:uz", + "language:ve", + "language:vec", + "language:vep", + "language:vi", + "language:vls", + "language:vo", + "language:vro", + "language:wa", + "language:war", + "language:wo", + "language:wuu", + "language:xal", + "language:xh", + "language:xmf", + "language:yi", + "language:yo", + "language:yue", + "language:za", + "language:zea", + "language:zgh", + "language:zh", + "language:zu", + "size_categories:10M Date: Wed, 4 Dec 2024 10:32:53 +0200 Subject: [PATCH 2/5] Removed all languages to avoid clutter Signed-off-by: Gal Moyal --- MLBOM/Dataset/bom.json | 320 ----------------------------------------- 1 file changed, 320 deletions(-) diff --git a/MLBOM/Dataset/bom.json b/MLBOM/Dataset/bom.json index 4fb556a..6062a61 100644 --- a/MLBOM/Dataset/bom.json +++ b/MLBOM/Dataset/bom.json @@ -52,328 +52,8 @@ "task_categories:fill-mask", "task_ids:language-modeling", "task_ids:masked-language-modeling", - "language:ab", - "language:ace", - "language:ady", - "language:af", - "language:alt", - "language:am", - "language:ami", - "language:an", - "language:ang", - "language:anp", - "language:ar", - "language:arc", - "language:ary", - "language:arz", - "language:as", - "language:ast", - "language:atj", - "language:av", - "language:avk", - "language:awa", - "language:ay", - "language:az", - "language:azb", - "language:ba", - "language:ban", - "language:bar", - "language:bbc", - "language:bcl", - "language:be", - "language:bg", - "language:bh", - "language:bi", - "language:bjn", - "language:blk", - "language:bm", - "language:bn", - "language:bo", - "language:bpy", - "language:br", - "language:bs", - "language:bug", - "language:bxr", - "language:ca", - "language:cbk", - "language:cdo", - "language:ce", - "language:ceb", - "language:ch", - "language:chr", - "language:chy", - "language:ckb", - "language:co", - "language:cr", - "language:crh", - "language:cs", - "language:csb", - "language:cu", - "language:cv", - "language:cy", - "language:da", - "language:dag", - "language:de", - "language:dga", - "language:din", - "language:diq", - "language:dsb", - "language:dty", - "language:dv", - "language:dz", - "language:ee", - "language:el", - "language:eml", "language:en", - "language:eo", "language:es", - "language:et", - "language:eu", - "language:ext", - "language:fa", - "language:fat", - "language:ff", - "language:fi", - "language:fj", - "language:fo", - "language:fon", - "language:fr", - "language:frp", - "language:frr", - "language:fur", - "language:fy", - "language:ga", - "language:gag", - "language:gan", - "language:gcr", - "language:gd", - "language:gl", - "language:glk", - "language:gn", - "language:gom", - "language:gor", - "language:got", - "language:gpe", - "language:gsw", - "language:gu", - "language:guc", - "language:gur", - "language:guw", - "language:gv", - "language:ha", - "language:hak", - "language:haw", - "language:hbs", - "language:he", - "language:hi", - "language:hif", - "language:hr", - "language:hsb", - "language:ht", - "language:hu", - "language:hy", - "language:hyw", - "language:ia", - "language:id", - "language:ie", - "language:ig", - "language:ik", - "language:ilo", - "language:inh", - "language:io", - "language:is", - "language:it", - "language:iu", - "language:ja", - "language:jam", - "language:jbo", - "language:jv", - "language:ka", - "language:kaa", - "language:kab", - "language:kbd", - "language:kbp", - "language:kcg", - "language:kg", - "language:ki", - "language:kk", - "language:kl", - "language:km", - "language:kn", - "language:ko", - "language:koi", - "language:krc", - "language:ks", - "language:ksh", - "language:ku", - "language:kv", - "language:kw", - "language:ky", - "language:la", - "language:lad", - "language:lb", - "language:lbe", - "language:lez", - "language:lfn", - "language:lg", - "language:li", - "language:lij", - "language:lld", - "language:lmo", - "language:ln", - "language:lo", - "language:lt", - "language:ltg", - "language:lv", - "language:lzh", - "language:mad", - "language:mai", - "language:map", - "language:mdf", - "language:mg", - "language:mhr", - "language:mi", - "language:min", - "language:mk", - "language:ml", - "language:mn", - "language:mni", - "language:mnw", - "language:mr", - "language:mrj", - "language:ms", - "language:mt", - "language:mwl", - "language:my", - "language:myv", - "language:mzn", - "language:nah", - "language:nan", - "language:nap", - "language:nds", - "language:ne", - "language:new", - "language:nia", - "language:nl", - "language:nn", - "language:no", - "language:nov", - "language:nqo", - "language:nrf", - "language:nso", - "language:nv", - "language:ny", - "language:oc", - "language:olo", - "language:om", - "language:or", - "language:os", - "language:pa", - "language:pag", - "language:pam", - "language:pap", - "language:pcd", - "language:pcm", - "language:pdc", - "language:pfl", - "language:pi", - "language:pih", - "language:pl", - "language:pms", - "language:pnb", - "language:pnt", - "language:ps", - "language:pt", - "language:pwn", - "language:qu", - "language:rm", - "language:rmy", - "language:rn", - "language:ro", - "language:ru", - "language:rue", - "language:rup", - "language:rw", - "language:sa", - "language:sah", - "language:sat", - "language:sc", - "language:scn", - "language:sco", - "language:sd", - "language:se", - "language:sg", - "language:sgs", - "language:shi", - "language:shn", - "language:si", - "language:sk", - "language:skr", - "language:sl", - "language:sm", - "language:smn", - "language:sn", - "language:so", - "language:sq", - "language:sr", - "language:srn", - "language:ss", - "language:st", - "language:stq", - "language:su", - "language:sv", - "language:sw", - "language:szl", - "language:szy", - "language:ta", - "language:tay", - "language:tcy", - "language:te", - "language:tet", - "language:tg", - "language:th", - "language:ti", - "language:tk", - "language:tl", - "language:tly", - "language:tn", - "language:to", - "language:tpi", - "language:tr", - "language:trv", - "language:ts", - "language:tt", - "language:tum", - "language:tw", - "language:ty", - "language:tyv", - "language:udm", - "language:ug", - "language:uk", - "language:ur", - "language:uz", - "language:ve", - "language:vec", - "language:vep", - "language:vi", - "language:vls", - "language:vo", - "language:vro", - "language:wa", - "language:war", - "language:wo", - "language:wuu", - "language:xal", - "language:xh", - "language:xmf", - "language:yi", - "language:yo", - "language:yue", - "language:za", - "language:zea", - "language:zgh", - "language:zh", - "language:zu", "size_categories:10M Date: Wed, 4 Dec 2024 11:40:40 +0200 Subject: [PATCH 3/5] Added reference in the readme Signed-off-by: Gal Moyal --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 759c207..2f19262 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ are categorized by different BOM types including: | [SBOM](SBOM) | Software Bill of Materials | | [VDR](VDR) | Vulnerability Disclosure Report | | [VEX](VEX) | Vulnerability Exploitability eXchange | +| [MLBOM](MLBOM) | Machine Learning Bill of Materials | When possible, the BOMs conform to the latest version of the CycloneDX specification. From cb2791f116f3db259d57cdec4758c33fe96dda8c Mon Sep 17 00:00:00 2001 From: Gal Moyal Date: Wed, 4 Dec 2024 13:15:40 +0200 Subject: [PATCH 4/5] Changed the XBOM to be in alphabetical order Signed-off-by: Gal Moyal --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f19262..7389f87 100644 --- a/README.md +++ b/README.md @@ -15,12 +15,12 @@ are categorized by different BOM types including: |--------------------|-----------------------------------------| | [CBOM](CBOM) | Cryptography Bill of Materials | | [HBOM](HBOM) | Hardware Bill of Materials | +| [MLBOM](MLBOM) | Machine Learning Bill of Materials | | [OBOM](OBOM) | Operations Bill of Materials | | [SaaSBOM](SaaSBOM) | Software-as-a-Service Bill of Materials | | [SBOM](SBOM) | Software Bill of Materials | | [VDR](VDR) | Vulnerability Disclosure Report | | [VEX](VEX) | Vulnerability Exploitability eXchange | -| [MLBOM](MLBOM) | Machine Learning Bill of Materials | When possible, the BOMs conform to the latest version of the CycloneDX specification. From 64378bacab33b7dca43d1fc8205a30bbf146a44e Mon Sep 17 00:00:00 2001 From: Gal Moyal Date: Tue, 10 Dec 2024 18:43:44 +0200 Subject: [PATCH 5/5] Implemented CR Notes Signed-off-by: Gal Moyal --- MLBOM/Dataset/bom.json | 81 ++++++++++++++++++++++----- MLBOM/Model/FoundationModels/bom.json | 23 +++++++- MLBOM/Model/OpenSource/bom.json | 34 +++++++++-- 3 files changed, 117 insertions(+), 21 deletions(-) diff --git a/MLBOM/Dataset/bom.json b/MLBOM/Dataset/bom.json index 6062a61..904afb2 100644 --- a/MLBOM/Dataset/bom.json +++ b/MLBOM/Dataset/bom.json @@ -19,6 +19,15 @@ "publisher": "Hugging Face Inc", "name": "wikipedia", "version": "b04c8d1ceb2f5cd4588862100d08de323dccfbaa", + "data": [ + { + "type": "dataset", + "name": "wikipedia", + "contents": { + "url": "https://huggingface.co/datasets/wikimedia/wikipedia", + } + } + ], "licenses": [ { "license": { @@ -47,21 +56,63 @@ "content": "b04c8d1ceb2f5cd4588862100d08de323dccfbaa" } ], - "tags": [ - "task_categories:text-generation", - "task_categories:fill-mask", - "task_ids:language-modeling", - "task_ids:masked-language-modeling", - "language:en", - "language:es", - "size_categories:10M