squash last few commits

vllm-project · Feb 20, 2025 · cb6c9ce · cb6c9ce
1 parent e863ac0
commit cb6c9ce
Show file tree

Hide file tree

Showing 153 changed files with 400 additions and 355 deletions.
diff --git a/.github/workflows/docker-push-images.yml b/.github/workflows/docker-push-images.yml
@@ -21,12 +21,12 @@ jobs:
         with:
           username: ${{ secrets.DOCKER_HUB_USERNAME }}
           password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
-      - name: Login to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
+#      - name: Login to the Container registry
+#        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+#        with:
+#          registry: ghcr.io
+#          username: ${{ github.actor }}
+#          password: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Build Container Images
         run: |
@@ -36,10 +36,11 @@ jobs:
         run: |
           make docker-push-all
 
-      - name: Build Container Images with Github Container Registry prefix
-        run: |
-          GIT_COMMIT_HASH=${{ github.sha }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-build-all
-
-      - name: Push Container Images to Github Container Registry
-        run: |
-          GIT_COMMIT_HASH=${{ github.sha }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-push-all
+# TODO: Disable GHCR at this moment after org transfer, let's enable it later.
+#      - name: Build Container Images with Github Container Registry prefix
+#        run: |
+#          GIT_COMMIT_HASH=${{ github.sha }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-build-all
+#
+#      - name: Push Container Images to Github Container Registry
+#        run: |
+#          GIT_COMMIT_HASH=${{ github.sha }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-push-all
diff --git a/.github/workflows/release-build.yaml b/.github/workflows/release-build.yaml
@@ -30,12 +30,12 @@ jobs:
           password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
 
       # Log in to Github Registry
-      - name: Login to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
+#      - name: Login to the Container registry
+#        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+#        with:
+#          registry: ghcr.io
+#          username: ${{ github.actor }}
+#          password: ${{ secrets.GITHUB_TOKEN }}
 
       # Build container images with docker registry namespace
       - name: Build Container Images
@@ -48,14 +48,14 @@ jobs:
           IS_MAIN_BRANCH=false GIT_COMMIT_HASH=${{ github.ref_name }} make docker-push-all
 
       # Build container images with Github registry namespace
-      - name: Build Container Images with Github Container Registry prefix
-        run: |
-          IS_MAIN_BRANCH=false GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-build-all
+#      - name: Build Container Images with Github Container Registry prefix
+#        run: |
+#          IS_MAIN_BRANCH=false GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-build-all
 
       # Push container image to Github container registry
-      - name: Push Container Images to Github Container Registry
-        run: |
-          IS_MAIN_BRANCH=false GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-push-all
+#      - name: Push Container Images to Github Container Registry
+#        run: |
+#          IS_MAIN_BRANCH=false GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-push-all
 
   python-wheel-release:
     runs-on: ubuntu-latest

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -12,25 +12,25 @@ please see the [Development Guide](development/README.md) in the `docs/developme
 
 ### 1. Understanding the Repository Structure
 
-You can refer to the [Design Doc](https://github.com/aibrix/aibrix/tree/main/docs/tutorial) to quickly understand the layout of the AIBrix directory structure.
+You can refer to the [Design Doc](https://github.com/vllm-project/aibrix/tree/main/docs/tutorial) to quickly understand the layout of the AIBrix directory structure.
 
 ### 2. Picking an Issue
 
-Start by picking an [issue](https://github.com/aibrix/aibrix/issues) tagged with "good first issue" to get familiar with the project. Claim the issue by commenting to avoid duplicate efforts.
+Start by picking an [issue](https://github.com/vllm-project/aibrix/issues) tagged with "good first issue" to get familiar with the project. Claim the issue by commenting to avoid duplicate efforts.
 
 ### 3. Submitting Codes
 
 - **Creating Branches**: Use the following naming convention: `/<YOUR_NAME>/[feat|patch|bug_fix] description`
 - **Making Commits**: Write detailed commit messages that explain your changes.
 - **Pull Requests**:
-  - Create a pull request on [PR Page](https://github.com/aibrix/aibrix/pulls) against the master branch.
+  - Create a pull request on [PR Page](https://github.com/vllm-project/aibrix/pulls) against the master branch.
   - Use the provided PR template to describe your changes.
 - **Code Review Process**: Your PR will be reviewed by related contributors. Address feedback to improve and finalize your contribution.
 
 ## Advanced Contributions
 
 - **Feature Proposals and Bug Reporting**: Report bugs or propose new features by creating an issue on our GitHub page. Provide as much detail as possible to facilitate discussions.
-- **Contributing to Documentation**: Help improve our documentation by proposing changes to issue list. Our documentation is at [AIBrix Doc](https://github.com/aibrix/aibrix/tree/main/docs/tutorial)
+- **Contributing to Documentation**: Help improve our documentation by proposing changes to issue list. Our documentation is at [AIBrix Doc](https://github.com/vllm-project/aibrix/tree/main/docs/tutorial)
 - **Organizing Community Events**: If you are interested in organizing meetups or webinars, contact our community manager (TODO: contact information).
 
 ## Community and Communication

diff --git a/Makefile b/Makefile
@@ -59,6 +59,7 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
 .PHONY: generate
 generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
 	$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
+	./hack/update-codegen.sh go $(PROJECT_DIR)/bin
 
 .PHONY: update-codegen
 update-codegen:

diff --git a/PROJECT b/PROJECT
@@ -7,7 +7,7 @@ layout:
 - go.kubebuilder.io/v4
 multigroup: true
 projectName: aibrix
-repo: github.com/aibrix/aibrix
+repo: github.com/vllm-project/aibrix
 resources:
 - api:
     crdVersion: v1
@@ -16,7 +16,7 @@ resources:
   domain: aibrix.ai
   group: autoscaling
   kind: PodAutoscaler
-  path: github.com/aibrix/aibrix/api/autoscaling/v1alpha1
+  path: github.com/vllm-project/aibrix/api/autoscaling/v1alpha1
   version: v1alpha1
 - api:
     crdVersion: v1
@@ -25,7 +25,7 @@ resources:
   domain: aibrix.ai
   group: model
   kind: ModelAdapter
-  path: github.com/aibrix/aibrix/api/model/v1alpha1
+  path: github.com/vllm-project/aibrix/api/model/v1alpha1
   version: v1alpha1
 - api:
     crdVersion: v1
@@ -34,7 +34,7 @@ resources:
   domain: aibrix.ai
   group: orchestration
   kind: RayClusterReplicaSet
-  path: github.com/aibrix/aibrix/api/orchestration/v1alpha1
+  path: github.com/vllm-project/aibrix/api/orchestration/v1alpha1
   version: v1alpha1
 - api:
     crdVersion: v1
@@ -43,7 +43,7 @@ resources:
   domain: aibrix.ai
   group: orchestration
   kind: RayClusterFleet
-  path: github.com/aibrix/aibrix/api/orchestration/v1alpha1
+  path: github.com/vllm-project/aibrix/api/orchestration/v1alpha1
   version: v1alpha1
 - api:
     crdVersion: v1
@@ -52,6 +52,6 @@ resources:
   domain: aibrix.ai
   group: orchestration
   kind: KVCache
-  path: github.com/aibrix/aibrix/api/orchestration/v1alpha1
+  path: github.com/vllm-project/aibrix/api/orchestration/v1alpha1
   version: v1alpha1
 version: "3"
diff --git a/README.md b/README.md
@@ -11,8 +11,13 @@ The initial release includes the following key features:
 - **Distributed Inference**: Scalable architecture to handle large workloads across multiple nodes.
 - **LLM App-Tailored Autoscaler**: Dynamically scale inference resources based on real-time demand.
 - **Unified AI Runtime**: A versatile sidecar enabling metric standardization, model downloading, and management.
+- **Distributed KV Cache**: Enables high-capacity, cross-engine KV reuse.
+- **Cost-efficient Heterogeneous Serving**: Enables mixed GPU inference to reduce costs with SLO guarantees.
 - **GPU Hardware Failure Detection (TBD)**: Proactive detection of GPU hardware issues.
-- **Benchmark Tool (TBD)**: A tool for measuring inference performance and resource efficiency.
+
+## Architecture
+
+![aibrix-architecture-v1](docs/source/assets/images/aibrix-architecture-v1.jpeg)
 
 
 ## Quick Start
@@ -21,7 +26,7 @@ To get started with AIBrix, clone this repository and follow the setup instructi
 
 ```shell
 # Local Testing
-git clone https://github.com/aibrix/aibrix.git
+git clone https://github.com/vllm-project/aibrix.git
 cd aibrix
 
 # Install nightly aibrix dependencies
@@ -34,26 +39,28 @@ kubectl create -k config/default
 Install stable distribution
 ```shell
 # Install component dependencies
-kubectl create -k "github.com/aibrix/aibrix/config/dependency?ref=v0.2.0"
+kubectl create -k "github.com/vllm-project/aibrix/config/dependency?ref=v0.2.0"
 
 # Install aibrix components
-kubectl create -k "github.com/aibrix/aibrix/config/overlays/release?ref=v0.2.0"
+kubectl create -k "github.com/vllm-project/aibrix/config/overlays/release?ref=v0.2.0"
 ```
 
 ## Documentation
 
-For detailed documentation on installation, configuration, and usage, please visit our [documentation page](https://github.com/aibrix/aibrix).
+For detailed documentation on installation, configuration, and usage, please visit our [documentation page](https://aibrix.readthedocs.io/latest/).
 
 ## Contributing
 
-We welcome contributions from the community! Check out our [contributing guidelines](https://github.com/aibrix/aibrix/CONTRIBUTING.md) to see how you can make a difference.
+We welcome contributions from the community! Check out our [contributing guidelines](https://github.com/vllm-project/aibrix/CONTRIBUTING.md) to see how you can make a difference.
+
+Slack Channel: https://vllm-dev.slack.com/archives/C07QP347J4D
 
 ## License
 
-AIBrix is licensed under the [APACHE License](https://github.com/aibrix/aibrix/LICENSE.md).
+AIBrix is licensed under the [APACHE License](https://github.com/vllm-project/aibrix/LICENSE.md).
 
 ## Support
 
-If you have any questions or encounter any issues, please submit an issue on our [GitHub issues page](https://github.com/aibrix/aibrix/issues).
+If you have any questions or encounter any issues, please submit an issue on our [GitHub issues page](https://github.com/vllm-project/aibrix/issues).
 
 Thank you for choosing AIBrix for your GenAI infrastructure needs!
diff --git a/api/model/v1alpha1/zz_generated.deepcopy.go b/api/model/v1alpha1/zz_generated.deepcopy.go
diff --git a/benchmarks/generator/README.md b/benchmarks/generator/README.md
@@ -19,7 +19,7 @@ python workload_generator.py --prompt-file $SHAREGPT_FILE_PATH --interval-ms 100
 
 ### Generate a workload file based on workload patterns (synthetic patterns)
 
-The can generate workload file based on synthetic traffic (qps), input lengths (prompt lengths) and output lengths (completion lengths) patterns. Currently we support 4 patterns (`'quick_rising`, `'slow_rising'`, `'slight_fluctuation'`, `'severe_fluctuation'`), described [here](https://github.com/aibrix/aibrix/blob/main/benchmarks/autoscaling/bench_workload_generator.py).:
+The can generate workload file based on synthetic traffic (qps), input lengths (prompt lengths) and output lengths (completion lengths) patterns. Currently we support 4 patterns (`'quick_rising`, `'slow_rising'`, `'slight_fluctuation'`, `'severe_fluctuation'`), described [here](https://github.com/vllm-project/aibrix/blob/main/benchmarks/autoscaling/bench_workload_generator.py).:
 ```shell
 python workload_generator.py --prompt-file $SHAREGPT_FILE_PATH --interval-ms 1000 --duration-ms 300000 --trace-type synthetic --traffic-pattern "slight_fluctuation" --prompt-len-pattern "slight_fluctuation" --completion-len-pattern "slight_fluctuation" --model "Qwen/Qwen2.5-Coder-7B-Instruct" --output-dir "./output" --output-format jsonl 
 ```

diff --git a/cmd/controllers/main.go b/cmd/controllers/main.go
@@ -25,11 +25,11 @@ import (
 
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 
-	autoscalingv1alpha1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1"
-	modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
-	orchestrationv1alpha1 "github.com/aibrix/aibrix/api/orchestration/v1alpha1"
-	"github.com/aibrix/aibrix/pkg/features"
 	rayclusterv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
+	autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1"
+	modelv1alpha1 "github.com/vllm-project/aibrix/api/model/v1alpha1"
+	orchestrationv1alpha1 "github.com/vllm-project/aibrix/api/orchestration/v1alpha1"
+	"github.com/vllm-project/aibrix/pkg/features"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	"k8s.io/apimachinery/pkg/runtime"
@@ -45,9 +45,9 @@ import (
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
 	"sigs.k8s.io/controller-runtime/pkg/webhook"
 
-	"github.com/aibrix/aibrix/pkg/cache"
-	"github.com/aibrix/aibrix/pkg/config"
-	"github.com/aibrix/aibrix/pkg/controller"
+	"github.com/vllm-project/aibrix/pkg/cache"
+	"github.com/vllm-project/aibrix/pkg/config"
+	"github.com/vllm-project/aibrix/pkg/controller"
 	//+kubebuilder:scaffold:imports
 )
 

diff --git a/cmd/metadata/main.go b/cmd/metadata/main.go
@@ -17,8 +17,8 @@ limitations under the License.
 package main
 
 import (
-	"github.com/aibrix/aibrix/pkg/metadata"
-	"github.com/aibrix/aibrix/pkg/utils"
+	"github.com/vllm-project/aibrix/pkg/metadata"
+	"github.com/vllm-project/aibrix/pkg/utils"
 	"k8s.io/klog/v2"
 )
 

diff --git a/cmd/plugins/main.go b/cmd/plugins/main.go
@@ -31,10 +31,10 @@ import (
 	"k8s.io/client-go/tools/clientcmd"
 	"k8s.io/klog/v2"
 
-	"github.com/aibrix/aibrix/pkg/cache"
-	"github.com/aibrix/aibrix/pkg/plugins/gateway"
-	"github.com/aibrix/aibrix/pkg/utils"
 	extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	"github.com/vllm-project/aibrix/pkg/cache"
+	"github.com/vllm-project/aibrix/pkg/plugins/gateway"
+	"github.com/vllm-project/aibrix/pkg/utils"
 	healthPb "google.golang.org/grpc/health/grpc_health_v1"
 )
 

diff --git a/config/gateway/gateway-plugin/gateway-plugin.yaml b/config/gateway/gateway-plugin/gateway-plugin.yaml
@@ -36,6 +36,13 @@ spec:
           imagePullPolicy: IfNotPresent
           ports:
             - containerPort: 50052
+          resources:
+            limits:
+              cpu: 1
+              memory: 1Gi
+            requests:
+              cpu: 1
+              memory: 1Gi
           env:
             - name: REDIS_HOST
               value: aibrix-redis-master

diff --git a/config/gpu-optimizer/deployment.yaml b/config/gpu-optimizer/deployment.yaml
@@ -21,6 +21,13 @@ spec:
         command: ["python", "-m", "aibrix.gpu_optimizer.app"]
         ports:
         - containerPort: 8080
+        resources:
+          limits:
+            cpu: 500m
+            memory: 256Mi
+          requests:
+            cpu: 10m
+            memory: 64Mi
         env:
           - name: REDIS_HOST
             value: aibrix-redis-master.aibrix-system.svc.cluster.local
diff --git a/config/metadata/metadata.yaml b/config/metadata/metadata.yaml
@@ -36,6 +36,13 @@ spec:
           imagePullPolicy: IfNotPresent
           ports:
             - containerPort: 8090
+          resources:
+            limits:
+              cpu: 500m
+              memory: 256Mi
+            requests:
+              cpu: 10m
+              memory: 64Mi
           env:
             - name: REDIS_HOST
               value: aibrix-redis-master

diff --git a/development/README.md b/development/README.md
@@ -4,8 +4,8 @@
 
 Here are some essential resources for anyone interested in AIBrix:
 
-- **Documentation and Tutorials**: [View Tutorials](https://github.com/aibrix/aibrix/tree/main/development/tutorial)
-- **Issue Tracker**: [View Issues](https://github.com/aibrix/aibrix/issues)
+- **Documentation and Tutorials**: [View Tutorials](https://github.com/vllm-project/aibrix/tree/main/development/tutorial)
+- **Issue Tracker**: [View Issues](https://github.com/vllm-project/aibrix/issues)
 - **Project Roadmap**: TODO
 
 Additional resources for contributors:
@@ -35,7 +35,7 @@ Alternatively, you can use [Kind](https://kind.sigs.k8s.io/) or [Minikube](https
 - Clone your fork locally:
 
   ```sh
-  git clone https://github.com/aibrix/aibrix.git
+  git clone https://github.com/vllm-project/aibrix.git
   cd aibrix
   ```
 

diff --git a/development/tutorials/distributed/README.md b/development/tutorials/distributed/README.md
@@ -12,7 +12,7 @@ COPY utils.py /usr/local/lib/python3.12/dist-packages/vllm/executor/ray_utils.py
 ENTRYPOINT [""]
 ```
 
-> Note: copy uitls.py from upstream version and remove the placement group validation logic. See [#228](https://github.com/aibrix/aibrix/issues/228) for more details.
+> Note: copy uitls.py from upstream version and remove the placement group validation logic. See [#228](https://github.com/vllm-project/aibrix/issues/228) for more details.
 > Note: No need to downgrade ray to v2.10.0. Seem only ray-project/ray image has issues.
 
 Container Image Combination which supports the distributed multi-host inference.

diff --git a/docs/README.md b/docs/README.md
@@ -13,4 +13,4 @@ pip install -r requirements-docs.txt
 make html
 ```
 
-Now the html paged should be generated at "website/docs/build/html/index.html". You can open this html page with your web browser as our project front page. 
+Now the html paged should be generated at "docs/build/html/index.html". You can open this html page with your web browser as our project front page.
diff --git a/docs/source/assets/images/aibrix-architecture-v1.jpeg b/docs/source/assets/images/aibrix-architecture-v1.jpeg
diff --git a/docs/source/assets/images/aibrix-architecture-v1.png b/docs/source/assets/images/aibrix-architecture-v1.png