diff --git a/Makefile b/Makefile index f3160a59..aaee8460 100644 --- a/Makefile +++ b/Makefile @@ -83,7 +83,7 @@ test: manifests generate fmt vet envtest ## Run tests. # Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. .PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. test-e2e: - go test ./test/e2e/ -v -ginkgo.v + ./test/run-e2e-tests.sh .PHONY: lint lint: golangci-lint ## Run golangci-lint linter & yamllint diff --git a/hack/kind_config.yaml b/hack/kind_config.yaml new file mode 100644 index 00000000..f4bf78cb --- /dev/null +++ b/hack/kind_config.yaml @@ -0,0 +1,7 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker + - role: worker diff --git a/test/README.md b/test/README.md new file mode 100644 index 00000000..0de87562 --- /dev/null +++ b/test/README.md @@ -0,0 +1,6 @@ +To run e2e test, below are the options + +- Use KIND_E2E=true if kind cluster setup is required. +- Use INSTAL_AIBRIX=true if installing aibrix components is required. + +KIND_E2E=true INSTALL_AIBRIX=true make test-e2e \ No newline at end of file diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go deleted file mode 100644 index 4d151845..00000000 --- a/test/e2e/e2e_suite_test.go +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright 2024 The Aibrix Team. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package e2e - -import ( - "fmt" - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// Run e2e tests using the Ginkgo runner. -func TestE2E(t *testing.T) { - RegisterFailHandler(Fail) - fmt.Fprintf(GinkgoWriter, "Starting aibrix suite\n") - RunSpecs(t, "e2e suite") -} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index a91f0573..aa772712 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -17,106 +17,141 @@ limitations under the License. package e2e import ( - "fmt" - "os/exec" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - - "github.com/aibrix/aibrix/test/utils" + "context" + "net/http" + "os" + "testing" + + v1alpha1 "github.com/aibrix/aibrix/pkg/client/clientset/versioned" + crdinformers "github.com/aibrix/aibrix/pkg/client/informers/externalversions" + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/klog/v2" ) -const namespace = "aibrix-system" - -var _ = Describe("controller", Ordered, func() { - BeforeAll(func() { - By("installing prometheus operator") - Expect(utils.InstallPrometheusOperator()).To(Succeed()) +const ( + baseURL = "http://localhost:8888" + apiKey = "test-key-1234567890" + modelName = "llama2-7b" + namespace = "aibrix-system" +) - By("installing the cert-manager") - Expect(utils.InstallCertManager()).To(Succeed()) +func TestBaseModelInference(t *testing.T) { + initializeClient(context.Background(), t) - By("creating manager namespace") - cmd := exec.Command("kubectl", "create", "ns", namespace) - _, _ = utils.Run(cmd) + client := createOpenAIClient(baseURL, apiKey) + chatCompletion, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: openai.F([]openai.ChatCompletionMessageParamUnion{ + openai.UserMessage("Say this is a test"), + }), + Model: openai.F(modelName), }) - - AfterAll(func() { - By("uninstalling the Prometheus manager bundle") - utils.UninstallPrometheusOperator() - - By("uninstalling the cert-manager bundle") - utils.UninstallCertManager() - - By("removing manager namespace") - cmd := exec.Command("kubectl", "delete", "ns", namespace) - _, _ = utils.Run(cmd) + if err != nil { + t.Error("chat completions failed", err) + } + assert.Equal(t, modelName, chatCompletion.Model) +} + +func TestBaseModelInferenceFailures(t *testing.T) { + // error on invalid api key + client := createOpenAIClient(baseURL, "fake-api-key") + _, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: openai.F([]openai.ChatCompletionMessageParamUnion{ + openai.UserMessage("Say this is a test"), + }), + Model: openai.F(modelName), }) - - Context("Operator", func() { - It("should run successfully", func() { - var controllerPodName string - var err error - - // projectimage stores the name of the image used in the example - var projectimage = "example.com/aibrix:v0.0.1" - - By("building the manager(Operator) image") - cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectimage)) - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - - By("loading the the manager(Operator) image on Kind") - err = utils.LoadImageToKindClusterWithName(projectimage) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - - By("installing CRDs") - cmd = exec.Command("make", "install") - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - - By("deploying the controller-manager") - cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectimage)) - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - - By("validating that the controller-manager pod is running as expected") - verifyControllerUp := func() error { - // Get pod name - - cmd = exec.Command("kubectl", "get", - "pods", "-l", "control-plane=controller-manager", - "-o", "go-template={{ range .items }}"+ - "{{ if not .metadata.deletionTimestamp }}"+ - "{{ .metadata.name }}"+ - "{{ \"\\n\" }}{{ end }}{{ end }}", - "-n", namespace, - ) - - podOutput, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - podNames := utils.GetNonEmptyLines(string(podOutput)) - if len(podNames) != 1 { - return fmt.Errorf("expect 1 controller pods running, but got %d", len(podNames)) - } - controllerPodName = podNames[0] - ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager")) - - // Validate pod status - cmd = exec.Command("kubectl", "get", - "pods", controllerPodName, "-o", "jsonpath={.status.phase}", - "-n", namespace, - ) - status, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - if string(status) != "Running" { - return fmt.Errorf("controller pod in %s status", status) - } - return nil - } - EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed()) - - }) + assert.Contains(t, err.Error(), "500 Internal Server Error") + if err == nil { + t.Error("500 Internal Server Error expected for invalid api-key") + } + + // error on invalid model name + client = createOpenAIClient(baseURL, apiKey) + _, err = client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: openai.F([]openai.ChatCompletionMessageParamUnion{ + openai.UserMessage("Say this is a test"), + }), + Model: openai.F("fake-model-name"), + }) + assert.Contains(t, err.Error(), "400 Bad Request") + if err == nil { + t.Error("400 Bad Request expected for invalid api-key") + } + + // invalid routing strategy + client = openai.NewClient( + option.WithBaseURL(baseURL), + option.WithAPIKey(apiKey), + option.WithHeader("routing-strategy", "invalid-routing-strategy"), + ) + client.Options = append(client.Options, option.WithHeader("routing-strategy", "invalid-routing-strategy")) + _, err = client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: openai.F([]openai.ChatCompletionMessageParamUnion{ + openai.UserMessage("Say this is a test"), + }), + Model: openai.F(modelName), }) -}) + if err == nil { + t.Error("400 Bad Request expected for invalid routing-strategy") + } + assert.Contains(t, err.Error(), "400 Bad Request") +} + +func initializeClient(ctx context.Context, t *testing.T) (*kubernetes.Clientset, *v1alpha1.Clientset) { + var err error + var config *rest.Config + + kubeConfig := os.Getenv("KUBECONFIG") + if kubeConfig == "" { + t.Error("kubeConfig not set") + } + klog.Infof("using configuration from '%s'", kubeConfig) + + config, err = clientcmd.BuildConfigFromFlags("", kubeConfig) + if err != nil { + t.Errorf("Error during client creation with %v", err) + } + k8sClientSet, err := kubernetes.NewForConfig(config) + if err != nil { + t.Errorf("Error during client creation with %v", err) + } + crdClientSet, err := v1alpha1.NewForConfig(config) + if err != nil { + t.Errorf("Error during client creation with %v", err) + } + + factory := informers.NewSharedInformerFactoryWithOptions(k8sClientSet, 0) + crdFactory := crdinformers.NewSharedInformerFactoryWithOptions(crdClientSet, 0) + + podInformer := factory.Core().V1().Pods().Informer() + modelInformer := crdFactory.Model().V1alpha1().ModelAdapters().Informer() + + defer runtime.HandleCrash() + factory.Start(ctx.Done()) + crdFactory.Start(ctx.Done()) + + if !cache.WaitForCacheSync(ctx.Done(), podInformer.HasSynced, modelInformer.HasSynced) { + t.Error("timed out waiting for caches to sync") + } + + return k8sClientSet, crdClientSet +} + +func createOpenAIClient(baseURL, apiKey string) *openai.Client { + return openai.NewClient( + option.WithBaseURL(baseURL), + option.WithAPIKey(apiKey), + option.WithMiddleware(func(r *http.Request, mn option.MiddlewareNext) (*http.Response, error) { + r.URL.Path = "/v1" + r.URL.Path + return mn(r) + }), + ) +} diff --git a/test/run-e2e-tests.sh b/test/run-e2e-tests.sh new file mode 100755 index 00000000..0b0ac0b3 --- /dev/null +++ b/test/run-e2e-tests.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash + +# Copyright 2024 The Aibrix Team. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +set -x +set -o errexit +set -o nounset + +# Set to empty if unbound/empty +KIND_E2E=${KIND_E2E:-} +SKIP_KUBECTL_INSTALL=${SKIP_KUBECTL_INSTALL:-true} +SKIP_KIND_INSTALL=${SKIP_KIND_INSTALL:-true} +SKIP_INSTALL=${SKIP_INSTALL:-} +SET_KUBECONFIG=${SET_KUBECONFIG:-true} +INSTALL_AIBRIX=${INSTALL_AIBRIX:-} +KIND_SUDO=${KIND_SUDO:-} + +# setup kind cluster +if [ -n "$KIND_E2E" ]; then + K8S_VERSION=${KUBERNETES_VERSION:-v1.32.0} + if [ -z "${SKIP_KUBECTL_INSTALL}" ]; then + curl -Lo kubectl https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/amd64/kubectl && chmod +x kubectl && mv kubectl /usr/local/bin/ + fi + if [ -z "${SKIP_KIND_INSTALL}" ]; then + wget https://github.com/kubernetes-sigs/kind/releases/download/v0.26.0/kind-linux-amd64 + chmod +x kind-linux-amd64 + mv kind-linux-amd64 kind + export PATH=$PATH:$PWD + fi + + # If we did not set SKIP_INSTALL + if [ -z "$SKIP_INSTALL" ]; then + ${KIND_SUDO} kind create cluster --image kindest/node:${K8S_VERSION} --config=./hack/kind_config.yaml + fi +fi + +if [ -n "$SET_KUBECONFIG" ]; then + kind get kubeconfig > /tmp/admin.conf + export KUBECONFIG=/tmp/admin.conf +fi + +# build images +if [ -n "$INSTALL_AIBRIX" ]; then + make docker-build-all + kind load docker-image aibrix/controller-manager:nightly + kind load docker-image aibrix/gateway-plugins:nightly + kind load docker-image aibrix/metadata-service:nightly + kind load docker-image aibrix/runtime:nightly + + # build and deploy mock-app + cd development/app + docker build -t aibrix/vllm-mock:nightly -f Dockerfile . + kind load docker-image aibrix/vllm-mock:nightly + kubectl create -k config/mock + cd ../.. + + # install crds and deploy aibrix components + kubectl create -k config/dependency + kubectl create -k config/default + + kubectl port-forward svc/llama2-7b 8000:8000 & + kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 & + + function cleanup { + echo "Cleaning up..." + # clean up env at end + kubectl delete --ignore-not-found=true -k config/default + kubectl delete --ignore-not-found=true -k config/dependency + cd development/app + kubectl delete -k config/mock + cd ../.. + } + + trap cleanup EXIT +fi + +collect_logs() { + echo "Collecting pods and logs" + kubectl get pods -n aibrix-system + + for pod in $(kubectl get pods -n aibrix-system -o name); do + echo "Logs for ${pod}" + kubectl logs -n aibrix-system ${pod} + done +} + +trap "collect_logs" ERR + +go test ./test/e2e/ -v -timeout 0