Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2b6ca76
CARRY: Add RHOAI manifests (#3)
z103cb Apr 2, 2024
465d997
CARRY: ODH Image build actions (#4)
z103cb Apr 8, 2024
db5d1a3
CARRY: add separate file for RHOAI build and update multarch base image
KPostOffice May 23, 2024
799730c
CARRY: Update manifests to use ODH KFTO image
oksanabaza Sep 16, 2024
298135b
CARRY: training-operator manifests kustomize v5 upgrade
oksanabaza Sep 19, 2024
bd5556c
CARRY: Update OWNERS (#15)
Fiona-Waters Oct 8, 2024
f8683e2
Add ODH release workflow (#17)
oksanabaza Oct 25, 2024
88c5079
CARRY: Add upstream metadata to Training Operator manifests
oksanabaza Dec 6, 2024
0cc9462
CARRY: Add nslookup to the operator container image
astefanutti Feb 3, 2025
24321ac
CARRY: Set pytorch-init-container-image option
astefanutti Feb 3, 2025
8e0c7ff
CARRY: Enable validation webhooks
astefanutti Feb 4, 2025
49d8bcc
CARRY: Use Go v1.23.2 patch version for Cachito
astefanutti Feb 6, 2025
1f9a925
CARRY: set gomod toolset to patch version
ckhordiasma Feb 6, 2025
94c257e
CARRY: Update deployment image to v1.9.0-odh-1
astefanutti Feb 10, 2025
fbe9e65
CARRY: adjust go.mod formatting
sutaakar Feb 10, 2025
1cb3a54
PATCH: Added network policy in pytorch-job controller to make pytorch…
sutaakar Mar 24, 2025
7e4f9a2
CARRY: Added workflow to sync the Kubeflow-training SDK released vers…
abhijeet-dhumal Feb 20, 2025
19514b5
Update README.md
Wolfgang-Romanowski Mar 4, 2025
b6939ba
PATCH: Update name for repoUrl
ChughShilpa Mar 11, 2025
1535c1e
CARRY: Adding timeout to allow for commit to be ready before create P…
Fiona-Waters Apr 8, 2025
9ccff63
CARRY: Use official Red Hat go-toolset:1.23
sutaakar Apr 15, 2025
b27b899
CARRY: Update deployment image to v1.9.0-odh-3
sutaakar Apr 24, 2025
d07134e
Add s390x architecture support and configure test builds for Quay.io
Nash-123 Jun 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions .github/workflows/odh-build-and-publish-operator-image.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# This is a copy of the publish-core-images.yaml and has been customized to
# use the quay login credentials.
# The unused parts of the original have been commented out on purpose.
name: ODH

on:
push:
branches:
- dev
tags:
- '**'
pull_request:
branches:
- dev

jobs:
build-and-publish-operator:
name: Build and (or) Publish Image
runs-on: ubuntu-latest
env:
GOPATH: ${{ github.workspace }}/go
REPO_NAME: ${{ vars.QUAY_REPO_NAME || 'opendatahub' }}
steps:
- name: Environment dump
shell: bash
run: |
echo "GOPATH = ${GOPATH}"
echo "REPO_NAME = ${REPO_NAME}"

- name: Checkout
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Run go mod
shell: bash
run: |
go mod download

# Build operators inside the gh runner vm directly and then copy the go binaries to docker images using the Dockerfile.buildx
- name: Build linux/amd64 operator binary
env:
CGO_ENABLED: 1
GOOS: linux
GOARCH: amd64
shell: bash
run: |
go build -tags strictfipsruntime -a -o manager-$GOARCH cmd/training-operator.v1/main.go

- name: Build linux/arm64 operator binary
env:
CC: aarch64-linux-gnu-gcc
CGO_ENABLED: 1
GOOS: linux
GOARCH: arm64
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y gcc-aarch64-linux-gnu libc6-dev-arm64-cross
go build -tags strictfipsruntime -a -o manager-$GOARCH cmd/training-operator.v1/main.go

- name: Build linux/s390x operator binary
env:
CC: s390x-linux-gnu-gcc
CGO_ENABLED: 1
GOOS: linux
GOARCH: s390x
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y gcc-s390x-linux-gnu libc6-dev-s390x-cross
go build -tags strictfipsruntime -a -o manager-$GOARCH cmd/training-operator.v1/main.go

- name: Add docker tags
id: meta
uses: docker/metadata-action@v5
with:
images: quay.io/${{ env.REPO_NAME }}/training-operator
tags: |
type=raw,latest
type=ref,event=pr
type=sha,prefix=v1-odh-
type=ref,enable=true,priority=600,prefix=,suffix=,event=tag

- name: Build image
id: build-image
uses: redhat-actions/buildah-build@v2
with:
image: quay.io/${{ env.REPO_NAME }}/training-operator
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64,linux/s390x
containerfiles: |
build/images/training-operator/Dockerfile.multiarch
extra-args: |
--pull

# Check if image is build
- name: Check images created
shell: bash
run: buildah images | grep 'quay.io/${{ env.REPO_NAME }}/training-operator'

- name: Check image manifest
shell: bash
run: |
buildah manifest inspect ${{ steps.build-image.outputs.image }}:latest


- name: Check image metadata
shell: bash
run: |
buildah inspect ${{ steps.build-image.outputs.image-with-tag }} | jq '.OCIv1.config.Labels."org.opencontainers.image.title"'
buildah inspect ${{ steps.build-image.outputs.image-with-tag }} | jq '.OCIv1.config.Labels."org.opencontainers.image.description"'
buildah inspect ${{ steps.build-image.outputs.image-with-tag }} | jq '.Docker.config.Labels."org.opencontainers.image.title"'
buildah inspect ${{ steps.build-image.outputs.image-with-tag }} | jq '.Docker.config.Labels."org.opencontainers.image.description"'

- name: Login to Quay.io
id: podman-login-quay
# Trigger step only for specific branch (master, v.*-branch) or tag (v.*).
if: (github.ref == 'refs/heads/dev' || (startsWith(github.ref, 'refs/heads/v') && endsWith(github.ref, '-branch')) || startsWith(github.ref, 'refs/tags/v'))
shell: bash
run: |
podman login --username ${{ secrets.QUAY_USERNAME }} --password ${{ secrets.QUAY_TOKEN }} quay.io

- name: Push to Quay.io
if: always() && steps.podman-login-quay.outcome == 'success'
id: push-to-quay
uses: redhat-actions/push-to-registry@v2
with:
image: ${{ steps.build-image.outputs.image }}
tags: ${{ steps.build-image.outputs.tags }}

- name: Print image url
if: steps.push-to-quay.outcome == 'success'
shell: bash
run: echo "Image pushed to ${{ steps.push-to-quay.outputs.registry-paths }}"

- name: Logout from Quay.io
if: always() && steps.podman-login-quay.outcome == 'success'
run: |
podman logout quay.io
165 changes: 165 additions & 0 deletions .github/workflows/odh-kfto-sdk-notebooks-sync.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# The aim of this GitHub workflow is to update the pipfile to sync with Kubeflow Training release.
name: Sync ODH-notebooks with Kubeflow-Training SDK release
on:
workflow_dispatch:
inputs:
upstream-notebooks-repository-organization:
required: true
description: "Owner of target upstream notebooks repository used to open a PR against"
default: "opendatahub-io"
notebooks-target-branch:
required: true
description: "Target branch of upstream repository"
default: "main"
python-version:
required: true
description: "Provide the python version to be used for the notebooks"
default: "3.11"
notebooks-repository-organization:
required: true
description: "Owner of origin notebooks repository used to open a PR"
default: "opendatahub-io"
notebooks-repository-name:
required: true
description: "Name of origin notebooks repository used to open a PR"
default: "training-notebooks"
training-sdk-release-version:
required: true
description: "Provide version of the kubeflow-training-sdk release"

env:
BRANCH_NAME: ${{ github.event.inputs.notebooks-target-branch }}
PYTHON_VERSION: ${{ github.event.inputs.python-version }}
TRAINING_SDK_RELEASE_VERSION: ${{ github.event.inputs.training-sdk-release-version }}
UPDATER_BRANCH: odh-sync-updater-${{ github.run_id }}
UPSTREAM_OWNER: ${{ github.event.inputs.upstream-notebooks-repository-organization }}
UPSTREAM_REPO_NAME: notebooks
REPO_OWNER: ${{ github.event.inputs.notebooks-repository-organization }}
REPO_OWNER_USER_EMAIL: [email protected]
REPO_OWNER_USER_NAME: kubeflow-training
REPO_NAME: ${{ github.event.inputs.notebooks-repository-name }}
GITHUB_TOKEN: ${{ secrets.KUBEFLOW_TRAINING_ACCOUNT_TOKEN }} # add KUBEFLOW_TRAINING_ACCOUNT_TOKEN named secret in your notebooks repo to be used here (Rights/Scopes required : repo & workflow)
MINIMUM_SUPPORTED_PYTHON_VERSION: 3.9

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Clone repository and Sync
run: |
git clone https://x-access-token:${GITHUB_TOKEN}@github.com/$REPO_OWNER/$REPO_NAME.git $REPO_NAME
cd $REPO_NAME
git remote add upstream https://github.com/$UPSTREAM_OWNER/$UPSTREAM_REPO_NAME.git
git config --global user.email $REPO_OWNER_USER_EMAIL
git config --global user.name $REPO_OWNER_USER_NAME
git remote -v
git checkout $BRANCH_NAME
git config pull.rebase true
git pull upstream $BRANCH_NAME && git push -f origin $BRANCH_NAME

- name: Setup Python environment
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pipenv'

- name: Install pipenv and pip-versions
run: pip install pipenv==2024.4.0 pip-versions

- name: Update Pipfiles in accordance with Kubeflow Training latest release
run: |
package_name=kubeflow-training
available_python_versions=("$PYTHON_VERSION") # add space separated python versions according to 'python-versions' specified in 'Setup Python Environment' step
install_package_using_pipenv(){
# args allow custom names for Pipfile and Pipfile.lock
if [ $# -eq 2 ]; then
mv "${1}" Pipfile
mv "${2}" Pipfile.lock
fi
# replace existing version of cf-sdk with new version in Pipfile
sed -i "s/$package_name = \"==[^\"]*\"/$package_name = \"==${TRAINING_SDK_RELEASE_VERSION}\"/" Pipfile
# restore names as they were before
if [ $# -eq 2 ]; then
mv Pipfile "${1}"
mv Pipfile.lock "${2}"
fi
}
# Get the list of available versions for the package
if ! versions=$(pipenv run pip-versions list $package_name);then
echo "Failed to retrieve versions for $package_name"
exit 1
fi
# Check if the desired version exists in the list
if echo "$versions" | grep -q "${TRAINING_SDK_RELEASE_VERSION}"; then
echo "Version ${TRAINING_SDK_RELEASE_VERSION} is available for $package_name"
directories+=($(grep --exclude-dir=.git --exclude-dir=.github --exclude-dir=intel --exclude-dir=tensorflow --exclude-dir=rocm-tensorflow --include="Pipfile*" -rl "$package_name = \"==[0-9.]*\"" | xargs dirname | sort | uniq))
counter=0
total=${#directories[@]}
echo -----------
for dir in "${directories[@]}"; do
counter=$((counter+1))
echo "--Processing directory $counter '$dir' of total $total"
cd "$dir"
minimum_supported_python_version_major=$(echo "${MINIMUM_SUPPORTED_PYTHON_VERSION}" | awk -F '.' '{print $1}') #integer of MINIMUM_SUPPORTED_PYTHON_VERSION env variable
minimum_supported_python_version_minor=$(echo "${MINIMUM_SUPPORTED_PYTHON_VERSION}" | awk -F '.' '{print $2}') #decimal of MINIMUM_SUPPORTED_PYTHON_VERSION env variable
if ! [ -f "Pipfile" ]; then
if [ -f "Pipfile.cpu" ]; then
pipfile_python_version=$(grep -E '^python_version' ./Pipfile.cpu | cut -d '"' -f 2) # extracted from pipfile.cpu
fi
else
pipfile_python_version=$(grep -E '^python_version' ./Pipfile | cut -d '"' -f 2) # extracted from pipfile
fi
pipfile_python_version_major=$(echo "$pipfile_python_version" | awk -F '.' '{print $1}')
pipfile_python_version_minor=$(echo "$pipfile_python_version" | awk -F '.' '{print $2}')
if [[ " ${available_python_versions[@]} " =~ " ${pipfile_python_version} " && "$pipfile_python_version_major" -ge "$minimum_supported_python_version_major" && "$pipfile_python_version_minor" -ge "$minimum_supported_python_version_minor" ]]; then
if ! [ -f "Pipfile" ]; then
if [ -f "Pipfile.cpu" ]; then
install_package_using_pipenv Pipfile.cpu Pipfile.lock.cpu
fi
if [ -f "Pipfile.gpu" ]; then
install_package_using_pipenv Pipfile.gpu Pipfile.lock.gpu
fi
else
#install specified package
install_package_using_pipenv
fi
else
echo "Skipped installation of ${package_name} with version ${TRAINING_SDK_RELEASE_VERSION} in $dir"
fi
cd -
echo "$((total-counter)) directories remaining.."
done
# Refresh Pipfile.Lock files
cd $REPO_NAME && make refresh-pipfilelock-files && cd -
else
versions_list=$(echo "$versions" | tr '\n' ' ' | sed 's/, $//')
versions="${versions_list%,}"
echo "Version '${TRAINING_SDK_RELEASE_VERSION}' is not available for $package_name"
echo "Available versions for $package_name: $versions"
exit 1
fi

- name: Push changes
run: |
cd $REPO_NAME
git add . && git status && git checkout -b ${{ env.UPDATER_BRANCH }} && \
git commit -am "Updated notebooks via ${{ env.UPDATER_BRANCH }} GitHub action" --signoff &&
git remote set-url origin https://x-access-token:${GITHUB_TOKEN}@github.com/$REPO_OWNER/$REPO_NAME.git
git push origin ${{ env.UPDATER_BRANCH }}

- name: Wait for commit to propagate
run: sleep 15

- name: Create Pull Request
run: |
gh pr create --repo $UPSTREAM_OWNER/$UPSTREAM_REPO_NAME \
--title "$pr_title" \
--body "$pr_body" \
--head $REPO_OWNER:$UPDATER_BRANCH \
--base $BRANCH_NAME
env:
pr_title: "[Kubeflow-Training Action] Update notebook's pipfile to sync with Kubeflow-Training SDK release ${{ env.TRAINING_SDK_RELEASE_VERSION }}"
pr_body: |
:rocket: This is an automated Pull Request generated by [odh-kfto-sdk-notebooks-sync.yml](https://github.com/opendatahub-io/training-operator/tree/dev/.github/workflows/odh-kfto-sdk-notebooks-sync.yml) workflow.

This PR updates the `Pipfile` to sync with latest Kubeflow-Training SDK release.
46 changes: 46 additions & 0 deletions .github/workflows/odh-release.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# This workflow will handle the release process

name: ODH Release
on:
workflow_dispatch:
inputs:
version:
description: 'Tag to be used for release, i.e.: v0.0.1-odh-1'
required: true
push:
tags:
- '*'
jobs:
release-odh:
runs-on: ubuntu-latest

# Permission required to create a release
permissions:
contents: write

steps:
- uses: actions/checkout@v4

- name: Set Go
uses: actions/setup-go@v5
with:
go-version-file: './go.mod'

- name: Verify that release doesn't exist yet
shell: bash {0}
run: |
gh release view ${{ github.event.inputs.version }}
status=$?
if [[ $status -eq 0 ]]; then
echo "Release ${{ github.event.inputs.version }} already exists."
exit 1
fi
env:
GITHUB_TOKEN: ${{ github.TOKEN }}

- name: Creates a release in GitHub
run: |
gh release create ${{ github.event.inputs.version }} --target ${{ github.ref }}
env:
GITHUB_TOKEN: ${{ secrets.CODEFLARE_MACHINE_ACCOUNT_TOKEN }}
shell: bash
2 changes: 1 addition & 1 deletion .github/workflows/publish-conformance-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
uses: ./.github/workflows/build-and-publish-images.yaml
with:
component-name: ${{ matrix.component-name }}
platforms: linux/amd64,linux/arm64,linux/ppc64le
platforms: linux/amd64,linux/arm64,linux/ppc64le,linux/s390x
dockerfile: ${{ matrix.dockerfile }}
secrets:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/template-publish-image/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ runs:
- name: Setup QEMU
uses: docker/setup-qemu-action@v3
with:
platforms: amd64,ppc64le,arm64
platforms: amd64,ppc64le,arm64,s390x

- name: Set Up Docker Buildx
uses: docker/setup-buildx-action@v3
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified

deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
cd manifests/overlays/standalone && $(KUSTOMIZE) edit set image kubeflow/training-operator=${IMG}
$(KUSTOMIZE) build manifests/overlays/standalone | kubectl apply -f -
$(KUSTOMIZE) build manifests/overlays/standalone | kubectl apply --server-side -f -

undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config.
$(KUSTOMIZE) build manifests/overlays/standalone | kubectl delete -f -
Expand Down
Loading