diff --git a/development/app/Makefile b/development/app/Makefile index e54a3d9e..533a1b02 100644 --- a/development/app/Makefile +++ b/development/app/Makefile @@ -94,5 +94,15 @@ test-gateway: "temperature": 0.7 \ }' +test-gateway2: + curl -v http://localhost:8888/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ \ + "model": "llama2-7b", \ + "prompt": "Say this is a test!", \ + "temperature": 0.0, \ + "max_tokens": 512 \ + }' + metrics: curl http://localhost:8000/metrics \ No newline at end of file diff --git a/development/app/config/heterogeneous/simulator_a40/patch_podautoscaler_a40.yaml b/development/app/config/heterogeneous/simulator_a40/patch_podautoscaler_a40.yaml index 339c87a2..a85c14ba 100644 --- a/development/app/config/heterogeneous/simulator_a40/patch_podautoscaler_a40.yaml +++ b/development/app/config/heterogeneous/simulator_a40/patch_podautoscaler_a40.yaml @@ -3,13 +3,17 @@ apiVersion: autoscaling.aibrix.ai/v1alpha1 kind: PodAutoscaler metadata: name: podautoscaler-simulator-llama2-7b-a40 + labels: + kpa.autoscaling.aibrix.ai/scale-down-delay: 0s spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: simulator-llama2-7b-a40 metricsSources: - - endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 + - metricSourceType: domain + protocolType: http + endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 path: /metrics/default/simulator-llama2-7b-a40 - metric: "vllm:deployment_replicas" - targetValue: "1" \ No newline at end of file + targetMetric: "vllm:deployment_replicas" + targetValue: "1" \ No newline at end of file diff --git a/development/app/config/simulator/patch_podautoscaler_a100.yaml b/development/app/config/simulator/patch_podautoscaler_a100.yaml index 09836449..b6a9f0dd 100644 --- a/development/app/config/simulator/patch_podautoscaler_a100.yaml +++ b/development/app/config/simulator/patch_podautoscaler_a100.yaml @@ -3,13 +3,17 @@ apiVersion: autoscaling.aibrix.ai/v1alpha1 kind: PodAutoscaler metadata: name: podautoscaler-simulator-llama2-7b-a100 + labels: + kpa.autoscaling.aibrix.ai/scale-down-delay: 30s spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: simulator-llama2-7b-a100 metricsSources: - - endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 + - metricSourceType: domain + protocolType: http + endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 path: /metrics/default/simulator-llama2-7b-a100 - metric: "vllm:deployment_replicas" - targetValue: "1" \ No newline at end of file + targetMetric: "vllm:deployment_replicas" + targetValue: "1" \ No newline at end of file diff --git a/development/app/config/templates/podautoscaler/kustomization.yaml b/development/app/config/templates/podautoscaler/kustomization.yaml index 77628acc..54f0b3d4 100644 --- a/development/app/config/templates/podautoscaler/kustomization.yaml +++ b/development/app/config/templates/podautoscaler/kustomization.yaml @@ -1,2 +1,2 @@ resources: - - podautoscaler.yaml + - podautoscaler_kpa.yaml diff --git a/go.mod b/go.mod index 664e54b7..bed459e1 100644 --- a/go.mod +++ b/go.mod @@ -67,6 +67,8 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect + github.com/nxadm/tail v1.4.8 // indirect + github.com/onsi/ginkgo v1.16.5 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_golang v1.19.1 // indirect @@ -94,6 +96,7 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.29.6 // indirect diff --git a/go.sum b/go.sum index 3c8da0f1..e28945bd 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,8 @@ github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lSh github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= @@ -49,16 +51,26 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.22.1 h1:40JcKH+bBNGFczGuoBYgX4I6m/i27HYW8P9FDk5PbgA= github.com/go-playground/validator/v10 v10.22.1/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= @@ -73,6 +85,7 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= @@ -101,8 +114,17 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g= github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/openai/openai-go v0.1.0-alpha.34 h1:mKz2UYTlGOQvsN3piK1wdYzpJP769aLyrWuEJ5Qi7xc= @@ -133,6 +155,7 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= @@ -164,22 +187,31 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= @@ -194,6 +226,7 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= @@ -207,15 +240,27 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/pkg/controller/podautoscaler/metrics/client.go b/pkg/controller/podautoscaler/metrics/client.go index 054ad1a1..ade70595 100644 --- a/pkg/controller/podautoscaler/metrics/client.go +++ b/pkg/controller/podautoscaler/metrics/client.go @@ -136,8 +136,7 @@ func (c *KPAMetricsClient) GetMetricsFromPods(ctx context.Context, pods []corev1 } func (c *KPAMetricsClient) GetMetricFromSource(ctx context.Context, source autoscalingv1alpha1.MetricSource) (float64, error) { - // Retrieve metrics from a list of pods - return c.fetcher.FetchMetric(ctx, "", source.Endpoint, source.Path, source.TargetMetric) + return GetMetricFromSource(ctx, c.fetcher, source) } type APAMetricsClient struct { @@ -222,5 +221,5 @@ func (c *APAMetricsClient) GetMetricsFromPods(ctx context.Context, pods []corev1 } func (c *APAMetricsClient) GetMetricFromSource(ctx context.Context, source autoscalingv1alpha1.MetricSource) (float64, error) { - return c.fetcher.FetchMetric(ctx, "", source.Endpoint, source.Path, source.TargetMetric) + return GetMetricFromSource(ctx, c.fetcher, source) } diff --git a/pkg/controller/podautoscaler/metrics/client_test.go b/pkg/controller/podautoscaler/metrics/client_test.go new file mode 100644 index 00000000..222d4226 --- /dev/null +++ b/pkg/controller/podautoscaler/metrics/client_test.go @@ -0,0 +1,108 @@ +/* +Copyright 2024 The Aibrix Team. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "context" + "testing" + + autoscalingv1alpha1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +func TestProxy(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Metrics Suite") +} + +// MetricFetcherRecorder records url used for fetching metrics. +type MetricFetcherRecorder struct { + RestMetricsFetcher + + url string +} + +func NewMetricFetcherRecorder() *MetricFetcherRecorder { + recorder := &MetricFetcherRecorder{} + recorder.RestMetricsFetcher.test_url_setter = func(url string) { + recorder.url = url + } + return recorder +} + +func GetDomainMetricSource0() autoscalingv1alpha1.MetricSource { + return autoscalingv1alpha1.MetricSource{ + MetricSourceType: "domain", + ProtocolType: "http", + Endpoint: "example.com", + Path: "/metrics", + } +} + +func GetDomainMetricSource1() autoscalingv1alpha1.MetricSource { + return autoscalingv1alpha1.MetricSource{ + MetricSourceType: "domain", + ProtocolType: "http", + Endpoint: "example.com:8080", + Path: "/metrics", + } +} + +func GetDomainMetricSource2() autoscalingv1alpha1.MetricSource { + return autoscalingv1alpha1.MetricSource{ + MetricSourceType: "domain", + ProtocolType: "https", + Endpoint: "example.com", + Port: "8000", + Path: "metrics", + } +} + +func GetDomainMetricSource3() autoscalingv1alpha1.MetricSource { + return autoscalingv1alpha1.MetricSource{ + MetricSourceType: "domain", + ProtocolType: "https", + Endpoint: "example.com:8080", + Port: "8000", + Path: "metrics", + } +} + +var _ = Describe("KPAMetricsClient", func() { + It("should client fetch metrics from correct url", func() { + recorder := NewMetricFetcherRecorder() + client := NewKPAMetricsClient(recorder, 0, 0) + + _, err := client.GetMetricFromSource(context.Background(), GetDomainMetricSource0()) + Expect(err).To(BeNil()) + Expect(recorder.url).To(Equal("http://example.com/metrics")) + + _, err = client.GetMetricFromSource(context.Background(), GetDomainMetricSource1()) + Expect(err).To(BeNil()) + Expect(recorder.url).To(Equal("http://example.com:8080/metrics")) + + _, err = client.GetMetricFromSource(context.Background(), GetDomainMetricSource2()) + Expect(err).To(BeNil()) + Expect(recorder.url).To(Equal("https://example.com:8000/metrics")) + + _, err = client.GetMetricFromSource(context.Background(), GetDomainMetricSource3()) + Expect(err).To(BeNil()) + Expect(recorder.url).To(Equal("https://example.com:8080/metrics")) + }) + +}) diff --git a/pkg/controller/podautoscaler/metrics/fetcher.go b/pkg/controller/podautoscaler/metrics/fetcher.go index 957699d1..58d8a82b 100644 --- a/pkg/controller/podautoscaler/metrics/fetcher.go +++ b/pkg/controller/podautoscaler/metrics/fetcher.go @@ -59,7 +59,10 @@ func (f *abstractMetricsFetcher) FetchMetric(ctx context.Context, pod v1.Pod, me } // RestMetricsFetcher implements MetricFetcher to fetch metrics from Pod's /metrics endpoint. -type RestMetricsFetcher struct{} +type RestMetricsFetcher struct { + // For unit test purpose only + test_url_setter func(string) +} var _ MetricFetcher = (*RestMetricsFetcher)(nil) @@ -71,6 +74,10 @@ func (f *RestMetricsFetcher) FetchPodMetrics(ctx context.Context, pod v1.Pod, so func (f *RestMetricsFetcher) FetchMetric(ctx context.Context, protocol autoscalingv1alpha1.ProtocolType, endpoint, path, metricName string) (float64, error) { // Use http to fetch endpoint url := fmt.Sprintf("%s://%s/%s", protocol, endpoint, strings.TrimLeft(path, "/")) + if f.test_url_setter != nil { + f.test_url_setter(url) + return 0.0, nil + } // Create request with context, so that the request will be canceled if the context is canceled req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) @@ -104,6 +111,10 @@ func (f *RestMetricsFetcher) FetchMetric(ctx context.Context, protocol autoscali return metricValue, nil } +func (f *RestMetricsFetcher) _get_url(protocol autoscalingv1alpha1.ProtocolType, endpoint, path string) string { + return fmt.Sprintf("%s://%s/%s", protocol, endpoint, strings.TrimLeft(path, "/")) +} + // ResourceMetricsFetcher fetches resource metrics from Kubernetes metrics API (metrics.k8s.io). type ResourceMetricsFetcher struct { abstractMetricsFetcher diff --git a/pkg/controller/podautoscaler/metrics/utils.go b/pkg/controller/podautoscaler/metrics/utils.go index 812f4810..5554ccd4 100644 --- a/pkg/controller/podautoscaler/metrics/utils.go +++ b/pkg/controller/podautoscaler/metrics/utils.go @@ -19,6 +19,7 @@ package metrics import ( "context" "fmt" + "net/url" "os" "strconv" "strings" @@ -121,6 +122,19 @@ func GetMetricsFromPods(ctx context.Context, fetcher MetricFetcher, pods []corev return metrics, nil } +func GetMetricFromSource(ctx context.Context, fetcher MetricFetcher, source autoscalingv1alpha1.MetricSource) (float64, error) { + endpoint := source.Endpoint + + // If port specified, try override. + if source.Port != "" { + u := url.URL{Host: source.Endpoint} + if u.Port() == "" { + endpoint = fmt.Sprintf("%s:%s", u.Hostname(), source.Port) + } + } + return fetcher.FetchMetric(ctx, source.ProtocolType, endpoint, source.Path, source.TargetMetric) +} + // getEnvKey retrieves the value of the environment variable named by the key. // If the variable is present, the function returns the value and a boolean true. // If the variable is not present, the function returns an empty string and a boolean false. diff --git a/python/aibrix/aibrix/gpu_optimizer/Makefile b/python/aibrix/aibrix/gpu_optimizer/Makefile index bec0f214..dbbc43fb 100644 --- a/python/aibrix/aibrix/gpu_optimizer/Makefile +++ b/python/aibrix/aibrix/gpu_optimizer/Makefile @@ -2,6 +2,7 @@ all: build DP ?= profiling DATASET ?= [set your DATASET path] +COST ?= 1.0 .PHONY: benchmark benchmark: @@ -9,10 +10,12 @@ benchmark: .PHONY: gen-profile gen-profile: - python optimizer/profiling/gen_profile.py $(DP) -o "redis://localhost:6379/?model=llama2-7b" + python optimizer/profiling/gen_profile.py $(DP) --cost $(COST) -o "redis://localhost:6379/?model=llama2-7b" .PHONY: debug-init debug-init: + ps aux | grep "8080:8080" | grep -v grep | awk '{print $$2}' | xargs kill -9 + ps aux | grep "6379:6379" | grep -v grep | awk '{print $$2}' | xargs kill -9 kubectl -n aibrix-system port-forward svc/aibrix-gpu-optimizer 8080:8080 1>/dev/null 2>&1 & kubectl -n aibrix-system port-forward svc/aibrix-redis-master 6379:6379 1>/dev/null 2>&1 & diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py index 5f3e4414..c2d09c63 100644 --- a/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py @@ -306,13 +306,19 @@ def _update_profile(self, profile: GPUProfile) -> bool: break # deployment existed if deployment_key is not None: - self._lock.acquire(blocking=True) - if profile.gpu in self.deployments: # double check - self._optimizer.set_profile(profile) - self._cost += cost_diff * self.deployments[key].replicas - else: - log_event = False - self._lock.release() + try: + self._lock.acquire(blocking=True) + if profile.gpu in self.deployments: # double check + self.deployments[key].profile = profile + self._optimizer.set_profile(profile) + self._cost += cost_diff * self.deployments[key].replicas + else: + log_event = False + except Exception as e: + raise e + finally: + self._lock.release() + if log_event: logger.info( f"Profile added to {profile.gpu}. Optimizer will consider corresponding GPU." @@ -446,11 +452,6 @@ def _match_profile(self, key, deployment_name) -> Optional[GPUProfile]: profile: GPUProfile = self._profiles[deployment_name] profile.gpu = key return profile - elif self.debug: - # Copy the debug profile and override the gpu name with given key - copy = GPUProfile(**debug_gpu_profile.__dict__) - copy.gpu = key - return copy return None diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py index 39b6f555..85ec4a29 100644 --- a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py +++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py @@ -86,7 +86,6 @@ async def send_request( ) -> None: headers = { "User-Agent": "Benchmark Client", - "user": "your-user-name", "model": model, } streaming = True @@ -101,7 +100,7 @@ async def send_request( # "top_p": 1.0, "max_tokens": output_len, # "ignore_eos": True, - # "stream": stream, + # "stream": streaming } if next_in > 0.0: pload["next_in"] = next_in