Skip to content

Commit

Permalink
Merge pull request #375 from aibrix/main
Browse files Browse the repository at this point in the history
Sync main changes to release-0.1 branch
  • Loading branch information
Jeffwan authored Nov 12, 2024
2 parents 5165d11 + 2e0179c commit c08ef11
Show file tree
Hide file tree
Showing 111 changed files with 8,046 additions and 1,350 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/docker-build-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:

jobs:
build:
# This prevents the job from running as other steps cover its functionality.
# We use 'if: false' to keep the file for future reference without deleting it.
if: false
runs-on: ubuntu-latest
steps:
- name: Check out code
Expand Down
22 changes: 20 additions & 2 deletions .github/workflows/release-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,34 @@ jobs:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

# Build container images
# Log in to Github Registry
- name: Login to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# Build container images with docker registry namespace
- name: Build Container Images
run: |
GIT_COMMIT_HASH=${{ github.ref_name }} make docker-build-all
# Push container image to container registry
# Push container image to DockerHub
- name: Push container image to container registry
run: |
GIT_COMMIT_HASH=${{ github.ref_name }} make docker-push-all
# Build container images with Github registry namespace
- name: Build Container Images with Github Container Registry prefix
run: |
GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-build-all
# Push container image to Github container registry
- name: Push Container Images to Github Container Registry
run: |
GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-push-all
python-wheel-release:
runs-on: ubuntu-latest
strategy:
Expand Down
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,8 @@ __pycache__
docs/build/
!**/*.template.rst


# benchmark logs, result and figs
benchmarks/autoscaling/logs
benchmarks/autoscaling/output_stats
benchmarks/autoscaling/workload_plot
13 changes: 13 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@ build:
os: ubuntu-22.04
tools:
python: "3.10"
jobs:
post_checkout:
# Cancel building pull requests when there aren't changed in the docs directory or YAML file.
# You can add any other files or directories that you'd like here as well,
# like your docs requirements file, or other files that will change your docs build.
#
# If there are no changes (git diff exits with 0) we force the command to return with 183.
# This is a special exit code on Read the Docs that will cancel the build immediately.
- |
if [ "$READTHEDOCS_VERSION_TYPE" = "external" ] && git diff --quiet origin/main -- docs/ .readthedocs.yaml;
then
exit 183;
fi
# Build documentation in the "docs/" directory with Sphinx
sphinx:
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ kubectl create -k config/default
Install stable distribution
```shell
# Install component dependencies
kubectl create -k "github.com/aibrix/aibrix/config/dependency?ref=v0.1.0-rc.1"
kubectl create -k "github.com/aibrix/aibrix/config/dependency?ref=v0.1.0-rc.4"

# Install aibrix components
kubectl create -k "github.com/aibrix/aibrix/config/default?ref=v0.1.0-rc.1"
kubectl create -k "github.com/aibrix/aibrix/config/default?ref=v0.1.0-rc.4"
```

## Documentation
Expand Down
174 changes: 174 additions & 0 deletions benchmarks/autoscaling/7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
model.aibrix.ai/name: deepseek-coder-7b-instruct
model.aibrix.ai/port: "8000"
name: aibrix-model-deepseek-coder-7b-instruct
namespace: default
spec:
replicas: 1
selector:
matchLabels:
model.aibrix.ai/name: deepseek-coder-7b-instruct
strategy:
type: Recreate
template:
metadata:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
labels:
model.aibrix.ai/name: deepseek-coder-7b-instruct
spec:
containers:
- command:
- python3
- -m
- vllm.entrypoints.openai.api_server
- --host
- "0.0.0.0"
- --port
- "8000"
- --model
- /models/deepseek-coder-6.7b-instruct
- --served-model-name
- deepseek-coder-7b-instruct
- --trust-remote-code
- --max-model-len
- "10240"
- --api-key
- sk-kFJ12nKsFVfVmGpj3QzX65s4RbN2xJqWzPYCjYu7wT3BlbLi
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.6.2-distributed
imagePullPolicy: Always
livenessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 90
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
name: vllm-openai
ports:
- containerPort: 8000
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 90
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
nvidia.com/gpu: "1"
requests:
nvidia.com/gpu: "1"
# We need to use dataset cache
volumeMounts:
- mountPath: /models
name: model-hostpath
- name: dshm
mountPath: /dev/shm
- name: aibrix-runtime
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime:v0.1.0-rc.4
command:
- gunicorn
- -b
- :8080
- app:app
- -k
- uvicorn.workers.UvicornWorker
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
initContainers:
- name: init-model
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime:v0.1.0-rc.4
command:
- python
- -m
- aibrix.downloader
- --model-uri
- tos://aibrix-artifact-testing/models/deepseek-ai/deepseek-coder-6.7b-instruct/
- --local-dir
- /models/
env:
- name: DOWNLOADER_MODEL_NAME
value: deepseek-coder-6.7b-instruct
- name: DOWNLOADER_NUM_THREADS
value: "16"
- name: DOWNLOADER_ALLOW_FILE_SUFFIX
value: json, safetensors
- name: TOS_ACCESS_KEY
valueFrom:
secretKeyRef:
name: tos-credential
key: TOS_ACCESS_KEY
- name: TOS_SECRET_KEY
valueFrom:
secretKeyRef:
name: tos-credential
key: TOS_SECRET_KEY
- name: TOS_ENDPOINT
value: tos-cn-beijing.ivolces.com
- name: TOS_REGION
value: cn-beijing
volumeMounts:
- mountPath: /models
name: model-hostpath
volumes:
- name: model-hostpath
hostPath:
path: /root/models
type: DirectoryOrCreate
- name: dshm
emptyDir:
medium: Memory
sizeLimit: "4Gi"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: machine.cluster.vke.volcengine.com/gpu-name
operator: In
values:
- NVIDIA-A10

---

apiVersion: v1
kind: Service
metadata:
labels:
model.aibrix.ai/name: deepseek-coder-7b-instruct
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
name: deepseek-coder-7b-instruct
namespace: default
spec:
ports:
- name: serve
port: 8000
protocol: TCP
targetPort: 8000
- name: http
port: 8080
protocol: TCP
targetPort: 8080
selector:
model.aibrix.ai/name: deepseek-coder-7b-instruct
type: LoadBalancer
18 changes: 18 additions & 0 deletions benchmarks/autoscaling/apa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: autoscaling.aibrix.ai/v1alpha1
kind: PodAutoscaler
metadata:
name: deepseek-coder-7b-instruct-apa
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
namespace: default
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: aibrix-model-deepseek-coder-7b-instruct
minReplicas: 1
maxReplicas: 10
targetMetric: "vllm:gpu_cache_usage_perc"
targetValue: "50"
scalingStrategy: "APA"
Loading

0 comments on commit c08ef11

Please sign in to comment.