huggingface
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 13 additions & 0 deletions b/‎README.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎extra/k8s/benchmark.yaml‎
Lines changed: 0 additions & 195 deletions b/‎extra/k8s/benchmark.yaml‎
Lines changed: 0 additions & 195 deletions
diff --git a/‎extra/k8s/text-generation-inference-benchmark/.helmignore‎
Lines changed: 23 additions & 0 deletions b/‎extra/k8s/text-generation-inference-benchmark/.helmignore‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎extra/k8s/text-generation-inference-benchmark/Chart.yaml‎
Lines changed: 6 additions & 0 deletions b/‎extra/k8s/text-generation-inference-benchmark/Chart.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎extra/k8s/text-generation-inference-benchmark/templates/_helpers.tpl‎
Lines changed: 62 additions & 0 deletions b/‎extra/k8s/text-generation-inference-benchmark/templates/_helpers.tpl‎
Lines changed: 62 additions & 0 deletions
@@ -170,4 +170,5 @@ Cargo.lock
 *.json
 *.txt
 results
-*.parquet
+*.parquet
+*.dev.*
@@ -193,6 +193,19 @@ values, sampling token counts from a normal distribution with the specified vari
 --decode-options "num_tokens=50,max_tokens=60,min_tokens=40,variance=10"
 ```
 
+## Deploy on Kubernetes
+
+You can deploy the benchmarking tool on Kubernetes using the provided Helm chart.
+
+Review the values (especially model, HF token and resources), and install the chart:
+```shell
+$ helm install tgi-benchmark ./extra/k8s/text-generation-inference-benchmark
+```
+
+## Deploy on Slurm
+
+Slurm example is provided in `extra/slurm`.
+
 ## Development
 
 You need [Rust](https://rustup.rs/) installed to build the benchmarking tool.
 
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
@@ -0,0 +1,6 @@
+apiVersion: v2
+name: text-generation-inference-benchmark
+description: A Helm chart to run a text generation inference benchmark
+type: application
+version: 0.1.0
+appVersion: "2.3.0"
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "text-generation-inference-benchmark.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "text-generation-inference-benchmark.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "text-generation-inference-benchmark.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "text-generation-inference-benchmark.labels" -}}
+helm.sh/chart: {{ include "text-generation-inference-benchmark.chart" . }}
+{{ include "text-generation-inference-benchmark.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "text-generation-inference-benchmark.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "text-generation-inference-benchmark.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "text-generation-inference-benchmark.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "text-generation-inference-benchmark.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}