Add script to regenerate catalog_upload.json files

Add helper script to regenerate the catalog_upload.json file(s) from the *.yaml files that are found inside the locally cloned katalog repo. Regenerate files: - bootstrapper/catalog_upload.json - quickstart/catalog_upload.json Current caveats: - no documentation yet - no command parameters, no command line help - the user must have cloned the katalog repo and the mlx repo in the same parent folder (in the future those paths should be configurable as parameters) - not all of the YAML files in the katalog repo may want to be included in the catalog upload, so after the files get regenerated, the user (MLX maintainer) needs to exercise some judgment - assets that live outside the katalog repo will not be found and have to be re-added (or not removed) using a Git enabled IDE or git diff tool - this initial commit reorders some of the assets (based on filename) Signed-off-by: Christian Kadner <[email protected]>
machine-learning-exchange · Sep 2, 2021 · 9201347 · 9201347
1 parent 7022f7d
commit 9201347
Show file tree

Hide file tree

Showing 3 changed files with 189 additions and 94 deletions.
diff --git a/bootstrapper/catalog_upload.json b/bootstrapper/catalog_upload.json
@@ -1,5 +1,9 @@
 {
   "components": [
+    {
+      "name": "Create Secret - Kubernetes Cluster",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/create-secret/component.yaml"
+    },
     {
       "name": "Generate Dataset Metadata",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/dax-to-dlf/component.yaml"
@@ -8,10 +12,6 @@
       "name": "Create Dataset Volume",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/dlf/component.yaml"
     },
-    {
-      "name": "Create Secret - Kubernetes Cluster",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/create-secret/component.yaml"
-    },
     {
       "name": "Echo Sample",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/echo/component.yaml"
@@ -23,20 +23,12 @@
     {
       "name": "Create Model Config",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/model-config/component.yaml"
-    },
-    {
-      "name": "Model Fairness Check",
-      "url": "https://raw.githubusercontent.com/Trusted-AI/AIF360/master/mlops/kubeflow/bias_detector_pytorch/component.yaml"
-    },
-    {
-      "name": "Adversarial Robustness Evaluation",
-      "url": "https://raw.githubusercontent.com/Trusted-AI/adversarial-robustness-toolbox/main/utils/mlops/kubeflow/robustness_evaluation_fgsm_pytorch/component.yaml"
     }
   ],
   "datasets": [
     {
-      "name": "Thematic Clustering",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/thematic_clustering/thematic_clustering.yaml"
+      "name": "Project CodeNet",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet/codenet.yaml"
     },
     {
       "name": "Finance Proposition Bank",
@@ -50,10 +42,6 @@
       "name": "NOAA Weather Data",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/jfk/jfk.yaml"
     },
-    {
-      "name": "Project CodeNet",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet/codenet.yaml"
-    },
     {
       "name": "PubLayNet",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/publaynet/publaynet.yaml"
@@ -62,6 +50,10 @@
       "name": "PubTabNet",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/pubtabnet/pubtabnet.yaml"
     },
+    {
+      "name": "Thematic Clustering",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/thematic_clustering/thematic_clustering.yaml"
+    },
     {
       "name": "TensorFlow Speech Commands",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/tsc/tsc.yaml"
@@ -73,20 +65,20 @@
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-human-pose-estimator.yaml"
     },
     {
-      "name": "MAX Image Resolution Enhancer",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-resolution-enhancer.yaml"
+      "name": "MAX Image Caption Generator",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-caption-generator.yaml"
     },
     {
-      "name": "MAX Optical Character Recognition",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-ocr.yaml"
+      "name": "MAX Image Resolution Enhancer",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-resolution-enhancer.yaml"
     },
     {
-      "name": "MAX Image Caption Generator",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-caption-generator.yaml"
+      "name": "MAX Object Detector",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-object-detector.yaml"
     },
     {
-      "name": "MAX Toxic Comment Classifier",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-toxic-comment-classifier.yaml"
+      "name": "MAX Optical Character Recognition",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-ocr.yaml"
     },
     {
       "name": "MAX Question Answering",
@@ -96,35 +88,35 @@
       "name": "MAX Recommender System",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-recommender.yaml"
     },
-    {
-      "name": "MAX Object Detector",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-object-detector.yaml"
-    },
     {
       "name": "MAX Text Sentiment Classifier",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-text-sentiment-classifier.yaml"
     },
+    {
+      "name": "MAX Toxic Comment Classifier",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-toxic-comment-classifier.yaml"
+    },
     {
       "name": "MAX Weather Forecaster",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-weather-forecaster.yaml"
     }
   ],
   "notebooks": [
     {
-      "name": "ART detector model",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-detector.yaml"
-    },
-    {
-      "name": "ART poisoning attack",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-poison.yaml"
+      "name": "JFK Airport Analysis",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/JFK-airport.yaml"
     },
     {
       "name": "AIF360 Bias detection example",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/aif-bias.yaml"
     },
     {
-      "name": "JFK Airport Analysis",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/JFK-airport.yaml"
+      "name": "ART detector model",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-detector.yaml"
+    },
+    {
+      "name": "ART poisoning attack",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-poison.yaml"
     },
     {
       "name": "Project CodeNet Language Classification",
@@ -148,14 +140,6 @@
       "name": "ResourceOp Basic",
       "url": "https://github.com/kubeflow/kfp-tekton/blob/master/sdk/python/tests/compiler/testdata/resourceop_basic.yaml"
     },
-    {
-      "name": "Trusted AI Pipeline",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/trusted-ai-pipeline.yaml"
-    },
-    {
-      "name": "Watson Machine Learning",
-      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/wml-pipeline.yaml"
-    },
     {
       "name": "Calculation Pipeline",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/calculation-pipeline.yaml"
@@ -167,6 +151,14 @@
     {
       "name": "Nested Pipeline",
       "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/nested-pipeline.yaml"
+    },
+    {
+      "name": "Trusted AI Pipeline",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/trusted-ai-pipeline.yaml"
+    },
+    {
+      "name": "Watson Machine Learning",
+      "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/wml-pipeline.yaml"
     }
   ]
 }
diff --git a/hack/regenerate_catalog_upload_json.py b/hack/regenerate_catalog_upload_json.py
@@ -0,0 +1,111 @@
+# Copyright 2021 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import json
+import yaml
+
+from glob import glob
+from os.path import abspath, dirname, relpath
+
+
+asset_types = [
+    "component",
+    "dataset",
+    "model",
+    "notebook",
+    "pipeline",
+]
+
+script_path = abspath(dirname(__file__))
+project_dir = dirname(script_path)
+
+katalog_dir = f"{project_dir}/../katalog"  # TODO: don't assume user cloned katalog and mlx repos into same parent folder
+katalog_url = "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/"
+
+catalog_upload_json_files = [
+    f"{project_dir}/bootstrapper/catalog_upload.json",
+    f"{project_dir}/quickstart/catalog_upload.json",
+]
+
+
+def get_list_of_yaml_files_in_katalog(asset_type: str):
+
+    yaml_files = glob(f"{katalog_dir}/{asset_type}-samples/**/*.yaml", recursive=True)
+
+    yaml_files = sorted(filter(lambda f: "template" not in f, yaml_files))
+
+    yaml_files = sorted(filter(lambda f: "test" not in f, yaml_files))
+
+    yaml_files = sorted(filter(lambda f: "src" not in f, yaml_files))
+
+    return yaml_files
+
+
+def generate_katalog_dict() -> dict:
+
+    katalog_dict = dict()
+
+    for asset_type in asset_types:
+
+        yaml_files = get_list_of_yaml_files_in_katalog(asset_type)
+        katalog_asset_list = []
+
+        for yaml_file in yaml_files:
+
+            with open(yaml_file) as f:
+                yaml_dict = yaml.load(f, Loader=yaml.FullLoader)
+                asset_name = yaml_dict.get("name") or \
+                             yaml_dict.get("metadata", {}).get("name", "").replace("-", " ").title() \
+                             or ""
+                asset_url = katalog_url + relpath(yaml_file, katalog_dir)
+
+            katalog_asset_item = {
+                "name": asset_name,
+                "url": asset_url
+            }
+
+            katalog_asset_list.append(katalog_asset_item)
+
+        katalog_dict[asset_type + "s"] = katalog_asset_list
+
+    return katalog_dict
+
+
+def rewrite_catalog_upload_json_files(katalog: dict):
+
+    for file_path in catalog_upload_json_files:
+
+        with open(file_path, "w") as output_file:
+            print(" - " + relpath(file_path, project_dir))
+            output_file.write(json.dumps(katalog, sort_keys=False, indent=2))
+            output_file.write("\n")
+
+
+def main():
+
+    print("Regenerating catalog_upload.json files:")
+
+    katalog_dict = generate_katalog_dict()
+
+    rewrite_catalog_upload_json_files(katalog_dict)
+
+    print("Done. Use git diff to evaluate if and which changes are desired!")
+
+
+if __name__ == '__main__':
+
+    main()
+