Skip to content

Commit

Permalink
Merge pull request #876 from ArangoGutierrez/reg_test02
Browse files Browse the repository at this point in the history
Add remote-test option for E2E
  • Loading branch information
ArangoGutierrez authored Feb 4, 2025
2 parents df4c87b + 6164059 commit 78d6cdc
Show file tree
Hide file tree
Showing 111 changed files with 17,890 additions and 43 deletions.
21 changes: 20 additions & 1 deletion tests/e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,28 @@ include $(CURDIR)/versions.mk

E2E_RUNTIME ?= docker

E2E_INSTALL_CTK ?= false

ifeq ($($(DIST)),)
DIST ?= ubuntu20.04
endif
IMAGE_TAG ?= $(VERSION)-$(DIST)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)

E2E_SSH_KEY ?=
E2E_SSH_USER ?=
E2E_SSH_HOST ?=
E2E_SSH_PORT ?= 22

.PHONY: test
test:
cd $(CURDIR)/tests/e2e && $(GO_CMD) test -v . -args \
-ginkgo.focus="$(E2E_RUNTIME)" \
-test.timeout=1h \
-ginkgo.v
-ginkgo.v \
-install-ctk=$(E2E_INSTALL_CTK) \
-toolkit-image=$(IMAGE) \
-ssh-key=$(E2E_SSH_KEY) \
-ssh-user=$(E2E_SSH_USER) \
-remote-host=$(E2E_SSH_HOST) \
-remote-port=$(E2E_SSH_PORT)
44 changes: 19 additions & 25 deletions tests/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
package e2e

import (
"bytes"
"context"
"fmt"
"os/exec"
"flag"
"testing"

. "github.com/onsi/ginkgo/v2"
Expand All @@ -30,8 +28,26 @@ import (
// Test context
var (
ctx context.Context

installCTK bool

image string

sshKey string
sshUser string
host string
sshPort string
)

func init() {
flag.BoolVar(&installCTK, "install-ctk", false, "Install the NVIDIA Container Toolkit")
flag.StringVar(&image, "toolkit-image", "", "Repository of the image to test")
flag.StringVar(&sshKey, "ssh-key", "", "SSH key to use for remote login")
flag.StringVar(&sshUser, "ssh-user", "", "SSH user to use for remote login")
flag.StringVar(&host, "remote-host", "", "Hostname of the remote machine")
flag.StringVar(&sshPort, "ssh-port", "22", "SSH port to use for remote login")
}

func TestMain(t *testing.T) {
suiteName := "NVIDIA Container Toolkit E2E"

Expand All @@ -45,25 +61,3 @@ func TestMain(t *testing.T) {
var _ = BeforeSuite(func() {
ctx = context.Background()
})

func runScript(script string) (string, error) {
// Create a command to run the script using bash
cmd := exec.Command("bash", "-c", script)

// Buffer to capture standard output
var stdout bytes.Buffer
cmd.Stdout = &stdout

// Buffer to capture standard error
var stderr bytes.Buffer
cmd.Stderr = &stderr

// Run the command
err := cmd.Run()
if err != nil {
return "", fmt.Errorf("script execution failed: %v\nSTDOUT: %s\nSTDERR: %s", err, stdout.String(), stderr.String())
}

// Return the captured stdout and nil error
return stdout.String(), nil
}
118 changes: 118 additions & 0 deletions tests/e2e/installer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package e2e

import (
"bytes"
"fmt"
"text/template"
)

// dockerInstallTemplate is a template for installing the NVIDIA Container Toolkit
// on a host using Docker.
var dockerInstallTemplate = `
#! /usr/bin/env bash
set -xe
: ${IMAGE:={{.Image}}}
# Create a temporary directory
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
mkdir -p "$TEMP_DIR"
# Given that docker has an init function that checks for the existence of the
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
# in the /usr/bin directory.
# See https://github.com/moby/moby/blob/20a05dabf44934447d1a66cdd616cc803b81d4e2/daemon/nvidia_linux.go#L32-L46
sudo rm -f /usr/bin/nvidia-container-runtime-hook
sudo ln -s "$TEMP_DIR/toolkit/nvidia-container-runtime-hook" /usr/bin/nvidia-container-runtime-hook
docker run --pid=host --rm -i --privileged \
-v /:/host \
-v /var/run/docker.sock:/var/run/docker.sock \
-v "$TEMP_DIR:$TEMP_DIR" \
-v /etc/docker:/config-root \
${IMAGE} \
--root "$TEMP_DIR" \
--runtime=docker \
--config=/config-root/daemon.json \
--driver-root=/ \
--no-daemon \
--restart-mode=systemd
`

type ToolkitInstaller struct {
runner Runner
template string

Image string
}

type installerOption func(*ToolkitInstaller)

func WithRunner(r Runner) installerOption {
return func(i *ToolkitInstaller) {
i.runner = r
}
}

func WithImage(image string) installerOption {
return func(i *ToolkitInstaller) {
i.Image = image
}
}

func WithTemplate(template string) installerOption {
return func(i *ToolkitInstaller) {
i.template = template
}
}

func NewToolkitInstaller(opts ...installerOption) (*ToolkitInstaller, error) {
i := &ToolkitInstaller{
runner: localRunner{},
template: dockerInstallTemplate,
}

for _, opt := range opts {
opt(i)
}

if i.Image == "" {
return nil, fmt.Errorf("image is required")
}

return i, nil
}

func (i *ToolkitInstaller) Install() error {
// Parse the combined template
tmpl, err := template.New("installScript").Parse(i.template)
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
}

// Execute the template
var renderedScript bytes.Buffer
err = tmpl.Execute(&renderedScript, i)
if err != nil {
return fmt.Errorf("error executing template: %w", err)
}

_, _, err = i.runner.Run(renderedScript.String())
return err
}
56 changes: 39 additions & 17 deletions tests/e2e/nvidia-container-toolkit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,29 @@ import (
)

// Integration tests for Docker runtime
var _ = Describe("docker", func() {
var _ = Describe("docker", Ordered, func() {
var r Runner

// Install the NVIDIA Container Toolkit
BeforeAll(func(ctx context.Context) {
r = NewRunner(
WithHost(host),
WithPort(sshPort),
WithSshKey(sshKey),
WithSshUser(sshUser),
)
if installCTK {
installer, err := NewToolkitInstaller(
WithRunner(r),
WithImage(image),
WithTemplate(dockerInstallTemplate),
)
Expect(err).ToNot(HaveOccurred())
err = installer.Install()
Expect(err).ToNot(HaveOccurred())
}
})

// GPUs are accessible in a container: Running nvidia-smi -L inside the
// container shows the same output inside the container as outside the
// container. This means that the following commands must all produce
Expand All @@ -33,33 +55,33 @@ var _ = Describe("docker", func() {
var hostOutput string

BeforeAll(func(ctx context.Context) {
_, err := runScript("docker pull ubuntu")
_, _, err := r.Run("docker pull ubuntu")
Expect(err).ToNot(HaveOccurred())

hostOutput, err = runScript("nvidia-smi -L")
hostOutput, _, err = r.Run("nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
})

It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})

It("should support automatic CDI spec generation", func(ctx context.Context) {
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})

It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})

It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
containerOutput, err := runScript("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})
Expand All @@ -69,34 +91,34 @@ var _ = Describe("docker", func() {
// The following should all produce the same result.
When("Running the cuda-vectorAdd sample", Ordered, func() {
BeforeAll(func(ctx context.Context) {
_, err := runScript("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
})

var referenceOutput string

It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
var err error
referenceOutput, err = runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())

Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
})

It("should support automatic CDI spec generation", func(ctx context.Context) {
out2, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out2))
})

It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
out3, err := runScript("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out3))
})

It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
out4, err := runScript("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out4))
})
Expand All @@ -106,34 +128,34 @@ var _ = Describe("docker", func() {
// The following should all produce the same result.
When("Running the cuda-deviceQuery sample", Ordered, func() {
BeforeAll(func(ctx context.Context) {
_, err := runScript("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
})

var referenceOutput string

It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
var err error
referenceOutput, err = runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())

Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
})

It("should support automatic CDI spec generation", func(ctx context.Context) {
out2, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out2))
})

It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
out3, err := runScript("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out3))
})

It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
out4, err := runScript("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out4))
})
Expand Down
Loading

0 comments on commit 78d6cdc

Please sign in to comment.