Skip to content

Commit

Permalink
add benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
timodonnell committed Sep 6, 2016
1 parent ad880cf commit b64a3f0
Show file tree
Hide file tree
Showing 7 changed files with 334 additions and 12 deletions.
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ EXPOSE 9786
# dask distributed scheduler worker interface
EXPOSE 8786
USER root
RUN apt-get install --yes libxml2-dev libxslt1-dev
USER user
COPY . ./mhcflurry-cloud
RUN venv-py3/bin/pip install ./mhcflurry-cloud
Expand Down
30 changes: 26 additions & 4 deletions kubernetes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
I called mine "tim-ml1":

```
$ gcloud container clusters create tim-ml1 \
gcloud container clusters create tim-ml1 \
--zone us-east1-b --num-nodes=1 \
--enable-autoscaling --min-nodes=1 --max-nodes=5000 \
--machine-type=n1-standard-1
--enable-autoscaling --min-nodes=1 --max-nodes=100 \
--machine-type=n1-standard-32
```

It should show up here:
Expand All @@ -22,6 +22,14 @@ gcloud container clusters get-credentials tim-ml1

## Deploy dask distributed

If you want to use a development checkout of MHCflurry, first build a new MHCflurry docker image. From the MHCflurry checkout, run:

```
docker build .
```

When that completes, tag it, push it to docker hub, and edit `spec.yaml` to point to your image.

This will launch dask scheduler and one worker:

```
Expand All @@ -34,17 +42,31 @@ Can check it like this:
kubectl get pods
```

Get the IP of the scheduler (you want the external ip of daskd-scheduler):

```
$ kubectl get service
NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE
daskd-scheduler 10.3.249.60 104.196.185.187 8786/TCP 4m
kubernetes 10.3.240.1 <none> 443/TCP 17h
```

Then scale it up:

```
kubectl scale deployment daskd-worker --replicas=100
kubectl scale deployment daskd-worker --replicas=400
```

## Run analysis

Run mhcflurry-class1-allele-specific-cv-and-train, passing in the host and IP of the scheduler above, e.g. `--dask-scheduler 104.196.185.187:8787`.

## When finished (important)

Run:
```
kubectl delete -f spec.yaml
gcloud container clusters delete tim-ml1
```


Expand Down
192 changes: 192 additions & 0 deletions kubernetes/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
#!/usr/bin/env python
"""
Dask distributed joblib backend on kubernetes benchmark script
"""

import argparse
import sys
import logging
import time
import subprocess
import socket

import joblib
import numpy

import distributed.joblib # for side effects

parser = argparse.ArgumentParser(description=__doc__)

parser.add_argument(
"--tasks",
type=int,
default=1,
help="")

parser.add_argument(
"--task-time-sec",
type=float,
default=1.0,
help="")

parser.add_argument(
"--task-allocate-mb",
type=float,
default=0.0,
help="")

parser.add_argument(
"--task-input-mb",
type=float,
default=0.0,
help="")

parser.add_argument(
"--task-output-mb",
type=float,
default=0.0,
help="")

parser.add_argument(
"--dask-scheduler",
metavar="HOST:PORT",
help="Host and port of dask distributed scheduler")

parser.add_argument(
"--jobs-range",
type=int,
nargs=3,
default=None,
help="")

parser.add_argument(
"--replicas",
type=int,
default=1,
help="")

parser.add_argument(
"--scale-command",
default="kubectl scale deployment daskd-worker --replicas=%d",
help="")

parser.add_argument(
"--joblib-num-jobs",
type=int,
default=1,
help="Set to -1 to use as many jobs as cores")

parser.add_argument(
"--joblib-pre-dispatch",
default='2*n_jobs',
help="")

parser.add_argument(
"--quiet",
action="store_true",
default=False,
help="")

parser.add_argument(
"--verbose",
action="store_true",
default=False,
help="")

parser.add_argument(
"--out",
help="")



def make_data(size_mb):
if not size_mb:
return None
return numpy.random.rand(int(size_mb * 2**20 / 8))


def task(task_data, task_time, task_allocate_mb, task_output_mb):
allocated = make_data(task_allocate_mb)
time.sleep(task_time)
return (socket.gethostname(), make_data(task_output_mb))


def go(args, cores, out_fds):
for replica in range(args.replicas):
tasks = [
joblib.delayed(task)(
make_data(args.task_input_mb),
args.task_time_sec,
args.task_allocate_mb,
args.task_output_mb)
for _ in range(args.tasks)
]
start = time.time()
results = joblib.Parallel(
n_jobs=args.joblib_num_jobs,
verbose=1 if not args.quiet else 0,
pre_dispatch=args.joblib_pre_dispatch)(tasks)
length = time.time() - start

assert len(results) == args.tasks

#logging.info("Hosts: %s" % " ".join(set([x[0] for x in results])))
logging.info("Hosts: %s" % len(set([x[0] for x in results])))

for fd in out_fds:
fd.write(", ".join([str(x) for x in [
"RESULT_ROW",
cores,
replica,
args.tasks,
args.task_input_mb,
args.task_time_sec,
args.task_allocate_mb,
args.task_output_mb,
length
]]))
fd.write("\n")
fd.flush()

if __name__ == "__main__":
args = parser.parse_args(sys.argv[1:])
if not args.quiet:
logging.basicConfig(level="INFO")
if args.verbose:
logging.basicConfig(level="DEBUG")
out_fds = [sys.stdout]
if args.out:
out_fds.append(open(args.out, 'w'))
if args.dask_scheduler:
backend = joblib.parallel_backend(
'distributed',
scheduler_host=args.dask_scheduler)
with backend:
active_backend = joblib.parallel.get_active_backend()[0]
logging.info(
"Running with dask scheduler: %s [%d cores]" % (
args.dask_scheduler,
active_backend.effective_n_jobs()))

if args.jobs_range is not None:
for i in range(*args.jobs_range):
command = args.scale_command % i
logging.info("Running: %s" % command)
subprocess.check_call(command, shell=True)
while True:
cores = active_backend.effective_n_jobs(n_jobs=args.joblib_num_jobs)
logging.info("Cores: %d. Waiting for %d cores." % (cores, i))
if cores == i:
break
time.sleep(1)
go(args, cores, out_fds)
else:
cores = active_backend.effective_n_jobs(n_jobs=args.joblib_num_jobs)
go(args, cores, out_fds)

else:
active_backend = joblib.parallel.get_active_backend()[0]
cores = active_backend.effective_n_jobs(n_jobs=args.joblib_num_jobs)
logging.info(
"Running with joblib scheduler [%d cores]" % cores)
go(args, cores, out_fds)
46 changes: 46 additions & 0 deletions kubernetes/results_benchmark.raw
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
RESULT_ROW, 1, 0, 1000, 0.0, 0.01, 0.0, 0.0, 42.810691118240356
RESULT_ROW, 1, 1, 1000, 0.0, 0.01, 0.0, 0.0, 39.79971694946289
RESULT_ROW, 1, 2, 1000, 0.0, 0.01, 0.0, 0.0, 39.859691858291626
RESULT_ROW, 11, 0, 1000, 0.0, 0.01, 0.0, 0.0, 37.82715106010437
RESULT_ROW, 11, 1, 1000, 0.0, 0.01, 0.0, 0.0, 37.97161889076233
RESULT_ROW, 11, 2, 1000, 0.0, 0.01, 0.0, 0.0, 40.698598861694336
RESULT_ROW, 21, 0, 1000, 0.0, 0.01, 0.0, 0.0, 38.651761054992676
RESULT_ROW, 21, 1, 1000, 0.0, 0.01, 0.0, 0.0, 38.31314492225647
RESULT_ROW, 21, 2, 1000, 0.0, 0.01, 0.0, 0.0, 38.718400955200195
RESULT_ROW, 31, 0, 1000, 0.0, 0.01, 0.0, 0.0, 38.127875089645386
RESULT_ROW, 31, 1, 1000, 0.0, 0.01, 0.0, 0.0, 40.412209033966064
RESULT_ROW, 31, 2, 1000, 0.0, 0.01, 0.0, 0.0, 39.897480964660645
RESULT_ROW, 41, 0, 1000, 0.0, 0.01, 0.0, 0.0, 41.633188009262085
RESULT_ROW, 41, 1, 1000, 0.0, 0.01, 0.0, 0.0, 38.20383810997009
RESULT_ROW, 41, 2, 1000, 0.0, 0.01, 0.0, 0.0, 38.48811316490173
RESULT_ROW, 51, 0, 1000, 0.0, 0.01, 0.0, 0.0, 39.60266900062561
RESULT_ROW, 51, 1, 1000, 0.0, 0.01, 0.0, 0.0, 39.59385704994202
RESULT_ROW, 51, 2, 1000, 0.0, 0.01, 0.0, 0.0, 40.012531995773315
RESULT_ROW, 61, 0, 1000, 0.0, 0.01, 0.0, 0.0, 40.476792097091675
RESULT_ROW, 61, 1, 1000, 0.0, 0.01, 0.0, 0.0, 40.196911096572876
RESULT_ROW, 61, 2, 1000, 0.0, 0.01, 0.0, 0.0, 40.05432915687561
RESULT_ROW, 71, 0, 1000, 0.0, 0.01, 0.0, 0.0, 39.73402285575867
RESULT_ROW, 71, 1, 1000, 0.0, 0.01, 0.0, 0.0, 39.60586404800415
RESULT_ROW, 71, 2, 1000, 0.0, 0.01, 0.0, 0.0, 41.099231004714966
RESULT_ROW, 81, 0, 1000, 0.0, 0.01, 0.0, 0.0, 39.502846002578735
RESULT_ROW, 81, 1, 1000, 0.0, 0.01, 0.0, 0.0, 38.96229314804077
RESULT_ROW, 81, 2, 1000, 0.0, 0.01, 0.0, 0.0, 39.69266104698181
RESULT_ROW, 91, 0, 1000, 0.0, 0.01, 0.0, 0.0, 39.57509088516235
RESULT_ROW, 91, 1, 1000, 0.0, 0.01, 0.0, 0.0, 38.91041588783264
RESULT_ROW, 91, 2, 1000, 0.0, 0.01, 0.0, 0.0, 40.886382818222046
RESULT_ROW, 101, 0, 1000, 0.0, 0.01, 0.0, 0.0, 38.735661029815674
RESULT_ROW, 101, 1, 1000, 0.0, 0.01, 0.0, 0.0, 40.10374188423157
RESULT_ROW, 101, 2, 1000, 0.0, 0.01, 0.0, 0.0, 38.86319088935852
RESULT_ROW, 111, 0, 1000, 0.0, 0.01, 0.0, 0.0, 40.18087887763977
RESULT_ROW, 111, 1, 1000, 0.0, 0.01, 0.0, 0.0, 42.48532295227051
RESULT_ROW, 111, 2, 1000, 0.0, 0.01, 0.0, 0.0, 40.80208992958069
RESULT_ROW, 121, 0, 1000, 0.0, 0.01, 0.0, 0.0, 40.6304349899292
RESULT_ROW, 121, 1, 1000, 0.0, 0.01, 0.0, 0.0, 41.413609981536865
RESULT_ROW, 121, 2, 1000, 0.0, 0.01, 0.0, 0.0, 41.193679094314575
RESULT_ROW, 131, 0, 1000, 0.0, 0.01, 0.0, 0.0, 40.80850100517273
RESULT_ROW, 131, 1, 1000, 0.0, 0.01, 0.0, 0.0, 42.69233298301697
RESULT_ROW, 131, 2, 1000, 0.0, 0.01, 0.0, 0.0, 41.4269540309906
RESULT_ROW, 141, 0, 1000, 0.0, 0.01, 0.0, 0.0, 41.53449892997742
RESULT_ROW, 141, 1, 1000, 0.0, 0.01, 0.0, 0.0, 41.34440898895264
RESULT_ROW, 141, 2, 1000, 0.0, 0.01, 0.0, 0.0, 40.884902000427246
RESULT_ROW, 151, 0, 1000, 0.0, 0.01, 0.0, 0.0, 42.00139498710632
27 changes: 27 additions & 0 deletions kubernetes/results_benchmark2.raw
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
RESULT_ROW, 50, 0, 1000, 0.0, 1.0, 0.0, 0.0, 39.22998094558716
RESULT_ROW, 50, 1, 1000, 0.0, 1.0, 0.0, 0.0, 39.52116799354553
RESULT_ROW, 50, 2, 1000, 0.0, 1.0, 0.0, 0.0, 38.92290997505188
RESULT_ROW, 100, 0, 1000, 0.0, 1.0, 0.0, 0.0, 52.22019600868225
RESULT_ROW, 100, 1, 1000, 0.0, 1.0, 0.0, 0.0, 38.640321016311646
RESULT_ROW, 100, 2, 1000, 0.0, 1.0, 0.0, 0.0, 38.31685495376587
RESULT_ROW, 150, 0, 1000, 0.0, 1.0, 0.0, 0.0, 42.019338846206665
RESULT_ROW, 150, 1, 1000, 0.0, 1.0, 0.0, 0.0, 39.23623609542847
RESULT_ROW, 150, 2, 1000, 0.0, 1.0, 0.0, 0.0, 39.346214056015015
RESULT_ROW, 200, 0, 1000, 0.0, 1.0, 0.0, 0.0, 42.92032504081726
RESULT_ROW, 200, 1, 1000, 0.0, 1.0, 0.0, 0.0, 40.08589220046997
RESULT_ROW, 200, 2, 1000, 0.0, 1.0, 0.0, 0.0, 40.70915699005127
RESULT_ROW, 250, 0, 1000, 0.0, 1.0, 0.0, 0.0, 42.21598196029663
RESULT_ROW, 250, 1, 1000, 0.0, 1.0, 0.0, 0.0, 43.63485503196716
RESULT_ROW, 250, 2, 1000, 0.0, 1.0, 0.0, 0.0, 41.62060904502869
RESULT_ROW, 300, 0, 1000, 0.0, 1.0, 0.0, 0.0, 43.74205994606018
RESULT_ROW, 300, 1, 1000, 0.0, 1.0, 0.0, 0.0, 42.37982702255249
RESULT_ROW, 300, 2, 1000, 0.0, 1.0, 0.0, 0.0, 44.53074288368225
RESULT_ROW, 350, 0, 1000, 0.0, 1.0, 0.0, 0.0, 44.342978954315186
RESULT_ROW, 350, 1, 1000, 0.0, 1.0, 0.0, 0.0, 46.62700414657593
RESULT_ROW, 350, 2, 1000, 0.0, 1.0, 0.0, 0.0, 44.34846496582031
RESULT_ROW, 400, 0, 1000, 0.0, 1.0, 0.0, 0.0, 46.634907960891724
RESULT_ROW, 400, 1, 1000, 0.0, 1.0, 0.0, 0.0, 46.63501811027527
RESULT_ROW, 400, 2, 1000, 0.0, 1.0, 0.0, 0.0, 47.11781406402588
RESULT_ROW, 450, 0, 1000, 0.0, 1.0, 0.0, 0.0, 81.69227719306946
RESULT_ROW, 450, 1, 1000, 0.0, 1.0, 0.0, 0.0, 46.34372115135193
RESULT_ROW, 450, 2, 1000, 0.0, 1.0, 0.0, 0.0, 48.869673013687134
21 changes: 21 additions & 0 deletions kubernetes/results_benchmark3.raw
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
RESULT_ROW, 100, 0, 1000, 0.0, 10.0, 0.0, 0.0, 112.01388001441956
RESULT_ROW, 100, 1, 1000, 0.0, 10.0, 0.0, 0.0, 104.10000491142273
RESULT_ROW, 100, 2, 1000, 0.0, 10.0, 0.0, 0.0, 103.82982802391052
RESULT_ROW, 200, 0, 1000, 0.0, 10.0, 0.0, 0.0, 60.276768922805786
RESULT_ROW, 200, 1, 1000, 0.0, 10.0, 0.0, 0.0, 57.974695920944214
RESULT_ROW, 200, 2, 1000, 0.0, 10.0, 0.0, 0.0, 60.19661211967468
RESULT_ROW, 300, 0, 1000, 0.0, 10.0, 0.0, 0.0, 52.92485213279724
RESULT_ROW, 300, 1, 1000, 0.0, 10.0, 0.0, 0.0, 52.32376194000244
RESULT_ROW, 300, 2, 1000, 0.0, 10.0, 0.0, 0.0, 55.85372304916382
RESULT_ROW, 400, 0, 1000, 0.0, 10.0, 0.0, 0.0, 55.10609292984009
RESULT_ROW, 400, 1, 1000, 0.0, 10.0, 0.0, 0.0, 53.92383694648743
RESULT_ROW, 400, 2, 1000, 0.0, 10.0, 0.0, 0.0, 52.83234214782715
RESULT_ROW, 500, 0, 1000, 0.0, 10.0, 0.0, 0.0, 98.3832459449768
RESULT_ROW, 500, 1, 1000, 0.0, 10.0, 0.0, 0.0, 71.08167910575867
RESULT_ROW, 500, 2, 1000, 0.0, 10.0, 0.0, 0.0, 59.25785684585571
RESULT_ROW, 600, 0, 1000, 0.0, 10.0, 0.0, 0.0, 110.31718397140503
RESULT_ROW, 600, 1, 1000, 0.0, 10.0, 0.0, 0.0, 89.4331259727478
RESULT_ROW, 600, 2, 1000, 0.0, 10.0, 0.0, 0.0, 108.7091691493988
RESULT_ROW, 700, 0, 1000, 0.0, 10.0, 0.0, 0.0, 163.17476797103882
RESULT_ROW, 700, 1, 1000, 0.0, 10.0, 0.0, 0.0, 164.9972541332245
RESULT_ROW, 700, 2, 1000, 0.0, 10.0, 0.0, 0.0, 130.63042402267456
Loading

0 comments on commit b64a3f0

Please sign in to comment.