Skip to content

Commit

Permalink
Add script to regenerate catalog_upload.json files
Browse files Browse the repository at this point in the history
Add helper script to regenerate the catalog_upload.json
file(s) from the *.yaml files that are found inside the
locally cloned katalog repo.

Regenerate files:
- bootstrapper/catalog_upload.json
- quickstart/catalog_upload.json

Current caveats:
- no documentation yet
- no command parameters, no command line help
- the user must have cloned the katalog repo and the mlx
  repo in the same parent folder (in the future those paths
  should be configurable as parameters)
- not all of the YAML files in the katalog repo may want
  to be included in the catalog upload, so after the files
  get regenerated, the user (MLX maintainer) needs to
  exercise some judgment
- assets that live outside the katalog repo will not be
  found and have to be re-added (or not removed) using a
  Git enabled IDE or git diff tool
- this initial commit reorders some of the assets (based
  on filename)

Signed-off-by: Christian Kadner <[email protected]>
  • Loading branch information
ckadner committed Sep 2, 2021
1 parent 7022f7d commit 9201347
Show file tree
Hide file tree
Showing 3 changed files with 189 additions and 94 deletions.
84 changes: 38 additions & 46 deletions bootstrapper/catalog_upload.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"components": [
{
"name": "Create Secret - Kubernetes Cluster",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/create-secret/component.yaml"
},
{
"name": "Generate Dataset Metadata",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/dax-to-dlf/component.yaml"
Expand All @@ -8,10 +12,6 @@
"name": "Create Dataset Volume",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/dlf/component.yaml"
},
{
"name": "Create Secret - Kubernetes Cluster",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/create-secret/component.yaml"
},
{
"name": "Echo Sample",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/echo/component.yaml"
Expand All @@ -23,20 +23,12 @@
{
"name": "Create Model Config",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/model-config/component.yaml"
},
{
"name": "Model Fairness Check",
"url": "https://raw.githubusercontent.com/Trusted-AI/AIF360/master/mlops/kubeflow/bias_detector_pytorch/component.yaml"
},
{
"name": "Adversarial Robustness Evaluation",
"url": "https://raw.githubusercontent.com/Trusted-AI/adversarial-robustness-toolbox/main/utils/mlops/kubeflow/robustness_evaluation_fgsm_pytorch/component.yaml"
}
],
"datasets": [
{
"name": "Thematic Clustering",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/thematic_clustering/thematic_clustering.yaml"
"name": "Project CodeNet",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet/codenet.yaml"
},
{
"name": "Finance Proposition Bank",
Expand All @@ -50,10 +42,6 @@
"name": "NOAA Weather Data",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/jfk/jfk.yaml"
},
{
"name": "Project CodeNet",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet/codenet.yaml"
},
{
"name": "PubLayNet",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/publaynet/publaynet.yaml"
Expand All @@ -62,6 +50,10 @@
"name": "PubTabNet",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/pubtabnet/pubtabnet.yaml"
},
{
"name": "Thematic Clustering",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/thematic_clustering/thematic_clustering.yaml"
},
{
"name": "TensorFlow Speech Commands",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/tsc/tsc.yaml"
Expand All @@ -73,20 +65,20 @@
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-human-pose-estimator.yaml"
},
{
"name": "MAX Image Resolution Enhancer",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-resolution-enhancer.yaml"
"name": "MAX Image Caption Generator",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-caption-generator.yaml"
},
{
"name": "MAX Optical Character Recognition",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-ocr.yaml"
"name": "MAX Image Resolution Enhancer",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-resolution-enhancer.yaml"
},
{
"name": "MAX Image Caption Generator",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-caption-generator.yaml"
"name": "MAX Object Detector",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-object-detector.yaml"
},
{
"name": "MAX Toxic Comment Classifier",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-toxic-comment-classifier.yaml"
"name": "MAX Optical Character Recognition",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-ocr.yaml"
},
{
"name": "MAX Question Answering",
Expand All @@ -96,35 +88,35 @@
"name": "MAX Recommender System",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-recommender.yaml"
},
{
"name": "MAX Object Detector",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-object-detector.yaml"
},
{
"name": "MAX Text Sentiment Classifier",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-text-sentiment-classifier.yaml"
},
{
"name": "MAX Toxic Comment Classifier",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-toxic-comment-classifier.yaml"
},
{
"name": "MAX Weather Forecaster",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-weather-forecaster.yaml"
}
],
"notebooks": [
{
"name": "ART detector model",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-detector.yaml"
},
{
"name": "ART poisoning attack",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-poison.yaml"
"name": "JFK Airport Analysis",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/JFK-airport.yaml"
},
{
"name": "AIF360 Bias detection example",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/aif-bias.yaml"
},
{
"name": "JFK Airport Analysis",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/JFK-airport.yaml"
"name": "ART detector model",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-detector.yaml"
},
{
"name": "ART poisoning attack",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-poison.yaml"
},
{
"name": "Project CodeNet Language Classification",
Expand All @@ -148,14 +140,6 @@
"name": "ResourceOp Basic",
"url": "https://github.com/kubeflow/kfp-tekton/blob/master/sdk/python/tests/compiler/testdata/resourceop_basic.yaml"
},
{
"name": "Trusted AI Pipeline",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/trusted-ai-pipeline.yaml"
},
{
"name": "Watson Machine Learning",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/wml-pipeline.yaml"
},
{
"name": "Calculation Pipeline",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/calculation-pipeline.yaml"
Expand All @@ -167,6 +151,14 @@
{
"name": "Nested Pipeline",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/nested-pipeline.yaml"
},
{
"name": "Trusted AI Pipeline",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/trusted-ai-pipeline.yaml"
},
{
"name": "Watson Machine Learning",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/wml-pipeline.yaml"
}
]
}
111 changes: 111 additions & 0 deletions hack/regenerate_catalog_upload_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Copyright 2021 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import json
import yaml

from glob import glob
from os.path import abspath, dirname, relpath


asset_types = [
"component",
"dataset",
"model",
"notebook",
"pipeline",
]

script_path = abspath(dirname(__file__))
project_dir = dirname(script_path)

katalog_dir = f"{project_dir}/../katalog" # TODO: don't assume user cloned katalog and mlx repos into same parent folder
katalog_url = "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/"

catalog_upload_json_files = [
f"{project_dir}/bootstrapper/catalog_upload.json",
f"{project_dir}/quickstart/catalog_upload.json",
]


def get_list_of_yaml_files_in_katalog(asset_type: str):

yaml_files = glob(f"{katalog_dir}/{asset_type}-samples/**/*.yaml", recursive=True)

yaml_files = sorted(filter(lambda f: "template" not in f, yaml_files))

yaml_files = sorted(filter(lambda f: "test" not in f, yaml_files))

yaml_files = sorted(filter(lambda f: "src" not in f, yaml_files))

return yaml_files


def generate_katalog_dict() -> dict:

katalog_dict = dict()

for asset_type in asset_types:

yaml_files = get_list_of_yaml_files_in_katalog(asset_type)
katalog_asset_list = []

for yaml_file in yaml_files:

with open(yaml_file) as f:
yaml_dict = yaml.load(f, Loader=yaml.FullLoader)
asset_name = yaml_dict.get("name") or \
yaml_dict.get("metadata", {}).get("name", "").replace("-", " ").title() \
or ""
asset_url = katalog_url + relpath(yaml_file, katalog_dir)

katalog_asset_item = {
"name": asset_name,
"url": asset_url
}

katalog_asset_list.append(katalog_asset_item)

katalog_dict[asset_type + "s"] = katalog_asset_list

return katalog_dict


def rewrite_catalog_upload_json_files(katalog: dict):

for file_path in catalog_upload_json_files:

with open(file_path, "w") as output_file:
print(" - " + relpath(file_path, project_dir))
output_file.write(json.dumps(katalog, sort_keys=False, indent=2))
output_file.write("\n")


def main():

print("Regenerating catalog_upload.json files:")

katalog_dict = generate_katalog_dict()

rewrite_catalog_upload_json_files(katalog_dict)

print("Done. Use git diff to evaluate if and which changes are desired!")


if __name__ == '__main__':

main()

Loading

0 comments on commit 9201347

Please sign in to comment.