opendatahub-io · Nash-123 · Apr 2, 2024 · Apr 8, 2024 · May 23, 2024 · Sep 16, 2024
diff --git a/.github/workflows/odh-build-and-publish-operator-image.yaml b/.github/workflows/odh-build-and-publish-operator-image.yaml
@@ -0,0 +1,144 @@
+# This is a copy of the publish-core-images.yaml and has been customized to
+# use the quay login credentials.
+# The unused parts of the original have been commented out on purpose.
+name: ODH
+
+on:
+  push:
+    branches:
+      - dev
+    tags:
+      - '**'
+  pull_request:
+    branches:
+      - dev
+
+jobs:
+    build-and-publish-operator:
+      name: Build and (or) Publish Image
+      runs-on: ubuntu-latest
+      env:
+        GOPATH: ${{ github.workspace }}/go
+        REPO_NAME: ${{ vars.QUAY_REPO_NAME || 'opendatahub' }}
+      steps:
+        - name: Environment dump
+          shell: bash
+          run: |
+            echo "GOPATH = ${GOPATH}"
+            echo "REPO_NAME = ${REPO_NAME}"
+
+        - name: Checkout
+          uses: actions/checkout@v4
+
+        - name: Set up Go
+          uses: actions/setup-go@v5
+          with:
+            go-version-file: go.mod
+
+        - name: Run go mod
+          shell: bash
+          run: |
+            go mod download
+
+        # Build operators inside the gh runner vm directly and then copy the go binaries to docker images using the Dockerfile.buildx
+        - name: Build linux/amd64 operator binary
+          env:
+            CGO_ENABLED: 1
+            GOOS: linux
+            GOARCH: amd64
+          shell: bash
+          run: |
+            go build -tags strictfipsruntime -a -o manager-$GOARCH cmd/training-operator.v1/main.go
+
+        - name: Build linux/arm64 operator binary
+          env:
+            CC: aarch64-linux-gnu-gcc
+            CGO_ENABLED: 1
+            GOOS: linux
+            GOARCH: arm64
+          shell: bash
+          run: |
+            sudo apt-get update
+            sudo apt-get install -y gcc-aarch64-linux-gnu libc6-dev-arm64-cross
+            go build -tags strictfipsruntime -a -o manager-$GOARCH cmd/training-operator.v1/main.go
+
+        - name: Build linux/s390x operator binary
+          env:
+            CC: s390x-linux-gnu-gcc
+            CGO_ENABLED: 1
+            GOOS: linux
+            GOARCH: s390x
+          shell: bash
+          run: |
+            sudo apt-get update
+            sudo apt-get install -y gcc-s390x-linux-gnu libc6-dev-s390x-cross
+            go build -tags strictfipsruntime -a -o manager-$GOARCH cmd/training-operator.v1/main.go
+
+        - name: Add docker tags
+          id: meta
+          uses: docker/metadata-action@v5
+          with:
+            images: quay.io/${{ env.REPO_NAME }}/training-operator
+            tags: |
+              type=raw,latest
+              type=ref,event=pr
+              type=sha,prefix=v1-odh-
+              type=ref,enable=true,priority=600,prefix=,suffix=,event=tag
+
+        - name: Build image
+          id: build-image
+          uses: redhat-actions/buildah-build@v2
+          with:
+            image: quay.io/${{ env.REPO_NAME }}/training-operator
+            tags: ${{ steps.meta.outputs.tags }}
+            labels: ${{ steps.meta.outputs.labels }}
+            platforms: linux/amd64,linux/arm64,linux/s390x
+            containerfiles: |
+              build/images/training-operator/Dockerfile.multiarch
+            extra-args: |
+              --pull
+
+        # Check if image is build
+        - name: Check images created
+          shell: bash
+          run: buildah images | grep 'quay.io/${{ env.REPO_NAME }}/training-operator'
+
+        - name: Check image manifest
+          shell: bash
+          run: |
+            buildah manifest inspect ${{ steps.build-image.outputs.image }}:latest
+
+
+        - name: Check image metadata
+          shell: bash
+          run: |
+              buildah inspect ${{ steps.build-image.outputs.image-with-tag }} | jq '.OCIv1.config.Labels."org.opencontainers.image.title"'
+              buildah inspect ${{ steps.build-image.outputs.image-with-tag }} | jq '.OCIv1.config.Labels."org.opencontainers.image.description"'
+              buildah inspect ${{ steps.build-image.outputs.image-with-tag }} | jq '.Docker.config.Labels."org.opencontainers.image.title"'
+              buildah inspect ${{ steps.build-image.outputs.image-with-tag }} | jq '.Docker.config.Labels."org.opencontainers.image.description"'
+
+        - name: Login to Quay.io
+          id: podman-login-quay
+          # Trigger step only for specific branch (master, v.*-branch) or tag (v.*).
+          if:  (github.ref == 'refs/heads/dev' || (startsWith(github.ref, 'refs/heads/v') && endsWith(github.ref, '-branch')) || startsWith(github.ref, 'refs/tags/v'))
+          shell: bash
+          run: |
+              podman login --username ${{ secrets.QUAY_USERNAME }} --password ${{ secrets.QUAY_TOKEN }} quay.io
+
+        - name: Push to Quay.io
+          if:  always() && steps.podman-login-quay.outcome == 'success'
+          id: push-to-quay
+          uses: redhat-actions/push-to-registry@v2
+          with:
+            image: ${{ steps.build-image.outputs.image }}
+            tags: ${{ steps.build-image.outputs.tags }}
+
+        - name: Print image url
+          if: steps.push-to-quay.outcome == 'success'
+          shell: bash
+          run: echo "Image pushed to ${{ steps.push-to-quay.outputs.registry-paths }}"
+
+        - name: Logout from Quay.io
+          if: always() && steps.podman-login-quay.outcome == 'success'
+          run: |
+            podman logout quay.io
diff --git a/.github/workflows/odh-kfto-sdk-notebooks-sync.yaml b/.github/workflows/odh-kfto-sdk-notebooks-sync.yaml
@@ -0,0 +1,165 @@
+# The aim of this GitHub workflow is to update the pipfile to sync with Kubeflow Training release.
+name: Sync ODH-notebooks with Kubeflow-Training SDK release
+on:
+  workflow_dispatch:
+    inputs:
+      upstream-notebooks-repository-organization:
+        required: true
+        description: "Owner of target upstream notebooks repository used to open a PR against"
+        default: "opendatahub-io"
+      notebooks-target-branch:
+        required: true
+        description: "Target branch of upstream repository"
+        default: "main"
+      python-version:
+        required: true
+        description: "Provide the python version to be used for the notebooks"
+        default: "3.11"
+      notebooks-repository-organization:
+        required: true
+        description: "Owner of origin notebooks repository used to open a PR"
+        default: "opendatahub-io"
+      notebooks-repository-name:
+        required: true
+        description: "Name of origin notebooks repository used to open a PR"
+        default: "training-notebooks"
+      training-sdk-release-version:
+        required: true
+        description: "Provide version of the kubeflow-training-sdk release"
+
+env:
+  BRANCH_NAME: ${{ github.event.inputs.notebooks-target-branch }}
+  PYTHON_VERSION: ${{ github.event.inputs.python-version }}
+  TRAINING_SDK_RELEASE_VERSION: ${{ github.event.inputs.training-sdk-release-version }}
+  UPDATER_BRANCH: odh-sync-updater-${{ github.run_id }}
+  UPSTREAM_OWNER: ${{ github.event.inputs.upstream-notebooks-repository-organization }}
+  UPSTREAM_REPO_NAME: notebooks
+  REPO_OWNER: ${{ github.event.inputs.notebooks-repository-organization }}
+  REPO_OWNER_USER_EMAIL: [email protected]
+  REPO_OWNER_USER_NAME: kubeflow-training
+  REPO_NAME: ${{ github.event.inputs.notebooks-repository-name }}
+  GITHUB_TOKEN: ${{ secrets.KUBEFLOW_TRAINING_ACCOUNT_TOKEN }} # add KUBEFLOW_TRAINING_ACCOUNT_TOKEN named secret in your notebooks repo to be used here (Rights/Scopes required : repo & workflow)
+  MINIMUM_SUPPORTED_PYTHON_VERSION: 3.9
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone repository and Sync
+        run: |
+          git clone https://x-access-token:${GITHUB_TOKEN}@github.com/$REPO_OWNER/$REPO_NAME.git $REPO_NAME
+          cd $REPO_NAME
+          git remote add upstream https://github.com/$UPSTREAM_OWNER/$UPSTREAM_REPO_NAME.git
+          git config --global user.email $REPO_OWNER_USER_EMAIL
+          git config --global user.name $REPO_OWNER_USER_NAME
+          git remote -v
+          git checkout $BRANCH_NAME
+          git config pull.rebase true
+          git pull upstream $BRANCH_NAME && git push -f origin $BRANCH_NAME
+
+      - name: Setup Python environment
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+          cache: 'pipenv'
+
+      - name: Install pipenv and pip-versions
+        run: pip install pipenv==2024.4.0 pip-versions
+
+      - name: Update Pipfiles in accordance with Kubeflow Training latest release
+        run: |
+            package_name=kubeflow-training
+            available_python_versions=("$PYTHON_VERSION") # add space separated python versions according to 'python-versions' specified in 'Setup Python Environment' step
+            install_package_using_pipenv(){
+                # args allow custom names for Pipfile and Pipfile.lock
+                if [ $# -eq 2 ]; then
+                    mv "${1}" Pipfile
+                    mv "${2}" Pipfile.lock
+                fi
+                # replace existing version of cf-sdk with new version in Pipfile
+                sed -i "s/$package_name = \"==[^\"]*\"/$package_name = \"==${TRAINING_SDK_RELEASE_VERSION}\"/" Pipfile
+                # restore names as they were before
+                if [ $# -eq 2 ]; then
+                    mv Pipfile "${1}"
+                    mv Pipfile.lock "${2}"
+                fi
+            }
+            # Get the list of available versions for the package
+            if ! versions=$(pipenv run pip-versions list $package_name);then
+                echo "Failed to retrieve versions for $package_name"
+                exit 1
+            fi
+            # Check if the desired version exists in the list
+            if echo "$versions" | grep -q "${TRAINING_SDK_RELEASE_VERSION}"; then
+                echo "Version ${TRAINING_SDK_RELEASE_VERSION} is available for $package_name"
+                directories+=($(grep --exclude-dir=.git --exclude-dir=.github --exclude-dir=intel --exclude-dir=tensorflow --exclude-dir=rocm-tensorflow --include="Pipfile*" -rl "$package_name = \"==[0-9.]*\"" | xargs dirname | sort | uniq))
+                counter=0
+                total=${#directories[@]}
+                echo -----------
+                for dir in "${directories[@]}"; do
+                  counter=$((counter+1))
+                  echo "--Processing directory $counter '$dir' of total $total"
+                  cd "$dir"
+                  minimum_supported_python_version_major=$(echo "${MINIMUM_SUPPORTED_PYTHON_VERSION}" | awk -F '.' '{print $1}') #integer of MINIMUM_SUPPORTED_PYTHON_VERSION env variable
+                  minimum_supported_python_version_minor=$(echo "${MINIMUM_SUPPORTED_PYTHON_VERSION}" | awk -F '.' '{print $2}') #decimal of MINIMUM_SUPPORTED_PYTHON_VERSION env variable
+                  if ! [ -f "Pipfile" ]; then
+                      if [ -f "Pipfile.cpu" ]; then
+                          pipfile_python_version=$(grep -E '^python_version' ./Pipfile.cpu | cut -d '"' -f 2) # extracted from pipfile.cpu
+                      fi
+                  else
+                      pipfile_python_version=$(grep -E '^python_version' ./Pipfile | cut -d '"' -f 2) # extracted from pipfile
+                  fi
+                  pipfile_python_version_major=$(echo "$pipfile_python_version" | awk -F '.' '{print $1}')
+                  pipfile_python_version_minor=$(echo "$pipfile_python_version" | awk -F '.' '{print $2}')
+                  if [[ " ${available_python_versions[@]} " =~ " ${pipfile_python_version} " && "$pipfile_python_version_major" -ge "$minimum_supported_python_version_major" && "$pipfile_python_version_minor" -ge "$minimum_supported_python_version_minor" ]]; then
+                      if ! [ -f "Pipfile" ]; then
+                          if [ -f "Pipfile.cpu" ]; then
+                              install_package_using_pipenv Pipfile.cpu Pipfile.lock.cpu
+                          fi
+                          if [ -f "Pipfile.gpu" ]; then
+                              install_package_using_pipenv Pipfile.gpu Pipfile.lock.gpu
+                          fi
+                      else
+                          #install specified package
+                          install_package_using_pipenv
+                      fi
+                      else
+                      echo "Skipped installation of ${package_name} with version ${TRAINING_SDK_RELEASE_VERSION} in $dir"
+                  fi
+                  cd -
+                  echo "$((total-counter)) directories remaining.."
+                done
+                # Refresh Pipfile.Lock files
+                cd $REPO_NAME && make refresh-pipfilelock-files && cd -
+            else
+                versions_list=$(echo "$versions" | tr '\n' '   ' | sed 's/, $//')
+                versions="${versions_list%,}"
+                echo "Version '${TRAINING_SDK_RELEASE_VERSION}' is not available for $package_name"
+                echo "Available versions for $package_name: $versions"
+                exit 1
+            fi
+
+      - name: Push changes
+        run: |
+          cd $REPO_NAME
+          git add . && git status && git checkout -b ${{ env.UPDATER_BRANCH }} && \
+          git commit -am "Updated notebooks via ${{ env.UPDATER_BRANCH }} GitHub action" --signoff  &&
+          git remote set-url origin https://x-access-token:${GITHUB_TOKEN}@github.com/$REPO_OWNER/$REPO_NAME.git
+          git push origin ${{ env.UPDATER_BRANCH }}
+
+      - name: Wait for commit to propagate
+        run: sleep 15
+
+      - name: Create Pull Request
+        run: |
+          gh pr create --repo $UPSTREAM_OWNER/$UPSTREAM_REPO_NAME \
+            --title "$pr_title" \
+            --body "$pr_body" \
+            --head $REPO_OWNER:$UPDATER_BRANCH \
+            --base $BRANCH_NAME
+        env:
+          pr_title: "[Kubeflow-Training Action] Update notebook's pipfile to sync with Kubeflow-Training SDK release ${{ env.TRAINING_SDK_RELEASE_VERSION }}"
+          pr_body: |
+            :rocket: This is an automated Pull Request generated by [odh-kfto-sdk-notebooks-sync.yml](https://github.com/opendatahub-io/training-operator/tree/dev/.github/workflows/odh-kfto-sdk-notebooks-sync.yml) workflow.
+
+            This PR updates the `Pipfile` to sync with latest Kubeflow-Training SDK release.
diff --git a/.github/workflows/odh-release.yaml b/.github/workflows/odh-release.yaml
@@ -0,0 +1,46 @@
+# This workflow will handle the release process
+
+name: ODH Release
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Tag to be used for release, i.e.: v0.0.1-odh-1'
+        required: true
+  push:
+    tags:
+      - '*'
+jobs:
+  release-odh:
+    runs-on: ubuntu-latest
+
+    # Permission required to create a release
+    permissions:
+      contents: write
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set Go
+      uses: actions/setup-go@v5
+      with:
+        go-version-file: './go.mod'
+
+    - name: Verify that release doesn't exist yet
+      shell: bash {0}
+      run: |
+        gh release view ${{ github.event.inputs.version }}
+        status=$?
+        if [[ $status -eq 0 ]]; then
+          echo "Release ${{ github.event.inputs.version }} already exists."
+          exit 1
+        fi
+      env:
+        GITHUB_TOKEN: ${{ github.TOKEN }}
+
+    - name: Creates a release in GitHub
+      run: |
+        gh release create ${{ github.event.inputs.version }} --target ${{ github.ref }}
+      env:
+        GITHUB_TOKEN: ${{ secrets.CODEFLARE_MACHINE_ACCOUNT_TOKEN }}
+      shell: bash
diff --git a/.github/workflows/publish-conformance-images.yaml b/.github/workflows/publish-conformance-images.yaml
@@ -10,7 +10,7 @@ jobs:
     uses: ./.github/workflows/build-and-publish-images.yaml
     with:
       component-name: ${{ matrix.component-name }}
-      platforms: linux/amd64,linux/arm64,linux/ppc64le
+      platforms: linux/amd64,linux/arm64,linux/ppc64le,linux/s390x
       dockerfile: ${{ matrix.dockerfile }}
     secrets:
       DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}

diff --git a/.github/workflows/template-publish-image/action.yaml b/.github/workflows/template-publish-image/action.yaml
@@ -30,7 +30,7 @@ runs:
     - name: Setup QEMU
       uses: docker/setup-qemu-action@v3
       with:
-        platforms: amd64,ppc64le,arm64
+        platforms: amd64,ppc64le,arm64,s390x
 
     - name: Set Up Docker Buildx
       uses: docker/setup-buildx-action@v3

diff --git a/Makefile b/Makefile
@@ -117,7 +117,7 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified
 
 deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
 	cd manifests/overlays/standalone && $(KUSTOMIZE) edit set image kubeflow/training-operator=${IMG}
-	$(KUSTOMIZE) build manifests/overlays/standalone | kubectl apply -f -
+	$(KUSTOMIZE) build manifests/overlays/standalone | kubectl apply --server-side -f -
 
 undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config.
 	$(KUSTOMIZE) build manifests/overlays/standalone | kubectl delete -f -