Skip to content

Commit

Permalink
Add script to regenerate catalog_upload.json files (#200)
Browse files Browse the repository at this point in the history
* Add script to regenerate catalog_upload.json files

Add helper script to regenerate the catalog_upload.json
file(s) from the *.yaml files that are found inside the
locally cloned katalog repo.

Regenerate files:
- bootstrapper/catalog_upload.json
- quickstart/catalog_upload.json

Current caveats:
- no documentation yet
- no command parameters, no command line help
- the user must have cloned the katalog repo and the mlx
  repo in the same parent folder (in the future those paths
  should be configurable as parameters)
- not all of the YAML files in the katalog repo may want
  to be included in the catalog upload, so after the files
  get regenerated, the user (MLX maintainer) needs to
  exercise some judgment
- assets that live outside the katalog repo will not be
  found and have to be re-added (or not removed) using a
  Git enabled IDE or git diff tool
- this initial commit reorders some of the assets (based
  on filename)

Signed-off-by: Christian Kadner <[email protected]>

* Filter out "template", "src", "test"

Signed-off-by: Christian Kadner <[email protected]>

* Update after Qiskit, CodeNet, MAX katalog changes

Signed-off-by: Christian Kadner <[email protected]>
  • Loading branch information
ckadner authored Sep 23, 2021
1 parent 025a979 commit 3129492
Show file tree
Hide file tree
Showing 3 changed files with 245 additions and 84 deletions.
106 changes: 65 additions & 41 deletions bootstrapper/catalog_upload.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"components": [
{
"name": "Create Secret - Kubernetes Cluster",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/create-secret/component.yaml"
},
{
"name": "Generate Dataset Metadata",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/dax-to-dlf/component.yaml"
Expand All @@ -8,10 +12,6 @@
"name": "Create Dataset Volume",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/dlf/component.yaml"
},
{
"name": "Create Secret - Kubernetes Cluster",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/create-secret/component.yaml"
},
{
"name": "Echo Sample",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/echo/component.yaml"
Expand All @@ -35,8 +35,16 @@
],
"datasets": [
{
"name": "Thematic Clustering",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/thematic_clustering/thematic_clustering.yaml"
"name": "Project CodeNet",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet/codenet.yaml"
},
{
"name": "Project CodeNet - Language Classifier",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet_langclass/codenet_langclass.yaml"
},
{
"name": "Project CodeNet - MLM",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet_mlm/codenet_mlm.yaml"
},
{
"name": "Finance Proposition Bank",
Expand All @@ -50,10 +58,6 @@
"name": "NOAA Weather Data",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/jfk/jfk.yaml"
},
{
"name": "Project CodeNet",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet/codenet.yaml"
},
{
"name": "PubLayNet",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/publaynet/publaynet.yaml"
Expand All @@ -62,54 +66,74 @@
"name": "PubTabNet",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/pubtabnet/pubtabnet.yaml"
},
{
"name": "Thematic Clustering",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/thematic_clustering/thematic_clustering.yaml"
},
{
"name": "TensorFlow Speech Commands",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/tsc/tsc.yaml"
}
],
"models": [
{
"name": "CodeNet Language Classification",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/codenet-language-classification/codenet-language-classification.yaml"
},
{
"name": "MAX Human Pose Estimator",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-human-pose-estimator.yaml"
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-human-pose-estimator/max-human-pose-estimator.yaml"
},
{
"name": "MAX Image Caption Generator",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-caption-generator/max-image-caption-generator.yaml"
},
{
"name": "MAX Image Resolution Enhancer",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-resolution-enhancer.yaml"
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-resolution-enhancer/max-image-resolution-enhancer.yaml"
},
{
"name": "MAX Optical Character Recognition",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-ocr.yaml"
"name": "MAX Named Entity Tagger",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-named-entity-tagger/max-named-entity-tagger.yaml"
},
{
"name": "MAX Image Caption Generator",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-caption-generator.yaml"
"name": "MAX Object Detector",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-object-detector/max-object-detector.yaml"
},
{
"name": "MAX Toxic Comment Classifier",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-toxic-comment-classifier.yaml"
"name": "MAX Optical Character Recognition",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-ocr/max-ocr.yaml"
},
{
"name": "MAX Question Answering",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-question-answering.yaml"
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-question-answering/max-question-answering.yaml"
},
{
"name": "MAX Recommender System",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-recommender.yaml"
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-recommender/max-recommender.yaml"
},
{
"name": "MAX Object Detector",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-object-detector.yaml"
"name": "MAX Text Sentiment Classifier",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-text-sentiment-classifier/max-text-sentiment-classifier.yaml"
},
{
"name": "MAX Text Sentiment Classifier",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-text-sentiment-classifier.yaml"
"name": "MAX Toxic Comment Classifier",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-toxic-comment-classifier/max-toxic-comment-classifier.yaml"
},
{
"name": "MAX Weather Forecaster",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-weather-forecaster.yaml"
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-weather-forecaster/max-weather-forecaster.yaml"
}
],
"notebooks": [
{
"name": "JFK Airport Analysis",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/JFK-airport.yaml"
},
{
"name": "AIF360 Bias detection example",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/aif-bias.yaml"
},
{
"name": "ART detector model",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-detector.yaml"
Expand All @@ -119,20 +143,20 @@
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-poison.yaml"
},
{
"name": "AIF360 Bias detection example",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/aif-bias.yaml"
"name": "Project CodeNet - Language Classification",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/codenet-lang.yaml"
},
{
"name": "JFK Airport Analysis",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/JFK-airport.yaml"
"name": "Project CodeNet - MLM",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/codenet-mlm.yaml"
},
{
"name": "Project CodeNet Language Classification",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/codenet-lang.yaml"
"name": "Qiskit Quantum Kernel Machine Learning",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/qiskit-ml.yaml"
},
{
"name": "Project CodeNet Masked Language Model",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/codenet-mlm.yaml"
"name": "Qiskit Neural Network Classifier and Regressor",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/qiskit-nncr.yaml"
}
],
"pipelines": [
Expand All @@ -148,14 +172,6 @@
"name": "ResourceOp Basic",
"url": "https://github.com/kubeflow/kfp-tekton/blob/master/sdk/python/tests/compiler/testdata/resourceop_basic.yaml"
},
{
"name": "Trusted AI Pipeline",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/trusted-ai-pipeline.yaml"
},
{
"name": "Watson Machine Learning",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/wml-pipeline.yaml"
},
{
"name": "Calculation Pipeline",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/calculation-pipeline.yaml"
Expand All @@ -167,6 +183,14 @@
{
"name": "Nested Pipeline",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/nested-pipeline.yaml"
},
{
"name": "Trusted AI Pipeline",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/trusted-ai-pipeline.yaml"
},
{
"name": "Watson Machine Learning",
"url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/wml-pipeline.yaml"
}
]
}
113 changes: 113 additions & 0 deletions hack/regenerate_catalog_upload_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/usr/bin/env python3

# Copyright 2021 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import json
import yaml

from glob import glob
from os.path import abspath, dirname, relpath


asset_types = [
"component",
"dataset",
"model",
"notebook",
"pipeline",
]

script_path = abspath(dirname(__file__))
project_dir = dirname(script_path)

katalog_dir = f"{project_dir}/../katalog" # TODO: don't assume user cloned katalog and mlx repos into same parent folder
katalog_url = "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/"

catalog_upload_json_files = [
f"{project_dir}/bootstrapper/catalog_upload.json",
f"{project_dir}/quickstart/catalog_upload.json",
]


def get_list_of_yaml_files_in_katalog(asset_type: str):

yaml_files = glob(f"{katalog_dir}/{asset_type}-samples/**/*.yaml", recursive=True)

yaml_files = [filepath for filepath in yaml_files
if not any(word in filepath for word in ["template", "test", "src"])]

return sorted(yaml_files)


def generate_katalog_dict() -> dict:

katalog_dict = dict()

for asset_type in asset_types:

yaml_files = get_list_of_yaml_files_in_katalog(asset_type)
katalog_asset_list = []

for yaml_file in yaml_files:

with open(yaml_file) as f:
yaml_dict = yaml.load(f, Loader=yaml.FullLoader)
asset_name = yaml_dict.get("name") or \
yaml_dict.get("metadata", {}).get("name", "").replace("-", " ").title() \
or ""
asset_url = katalog_url + relpath(yaml_file, katalog_dir)

katalog_asset_item = {
"name": asset_name,
"url": asset_url
}

katalog_asset_list.append(katalog_asset_item)

katalog_dict[asset_type + "s"] = katalog_asset_list

return katalog_dict


def rewrite_catalog_upload_json_files(katalog: dict):

for file_path in catalog_upload_json_files:

with open(file_path, "w") as output_file:

print(" - " + relpath(file_path, project_dir))

output_file.write(json.dumps(katalog, sort_keys=False, indent=2))
output_file.write("\n")


def main():

print("Regenerating catalog_upload.json files:")

# TODO: read current catalog_upload.json file(s) to capture non-katalog assets and restore later

katalog_dict = generate_katalog_dict()

rewrite_catalog_upload_json_files(katalog_dict)

print("Done. Use git diff to evaluate if and which changes are desired!")


if __name__ == '__main__':

main()
Loading

0 comments on commit 3129492

Please sign in to comment.