diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
new file mode 100644
index 0000000..7492737
--- /dev/null
+++ b/.github/workflows/docker-image.yml
@@ -0,0 +1,52 @@
+name: Docker Image CI
+
+on:
+ push:
+ branches: [ master ]
+ pull_request:
+ branches: [ master ]
+
+jobs:
+
+ build:
+
+ runs-on: ubuntu-latest
+
+ steps:
+
+ - name: Check Out Repo
+ uses: actions/checkout@v2
+
+ - name: Login to Docker Hub
+ uses: docker/login-action@v1
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
+
+ - name: Set up Docker Buildx
+ id: buildx
+ uses: docker/setup-buildx-action@v1
+
+
+
+ - name: Build and push cpu
+ id: docker_build_cpu
+ uses: docker/build-push-action@v2
+ with:
+ context: ./
+ file: ./NLP/Dockerfile.cpu
+ push: true
+ tags: torlof/nlp-cpu-docker-keras:latest
+ log-level: error
+
+ - name: Build and push gpu
+ id: docker_build_gpu
+ uses: docker/build-push-action@v2
+ with:
+ context: ./
+ file: ./NLP/Dockerfile.gpu
+ push: true
+ tags: torlof/nlp-nvidia-docker-keras:latest
+
+ - name: Image digest
+ run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/Dockerfile.cpu b/Image/Dockerfile.cpu
similarity index 100%
rename from Dockerfile.cpu
rename to Image/Dockerfile.cpu
diff --git a/Dockerfile.gpu b/Image/Dockerfile.gpu
similarity index 100%
rename from Dockerfile.gpu
rename to Image/Dockerfile.gpu
diff --git a/src/requirements.txt b/Image/src/requirements.txt
similarity index 100%
rename from src/requirements.txt
rename to Image/src/requirements.txt
diff --git a/src/train.py b/Image/src/train.py
similarity index 100%
rename from src/train.py
rename to Image/src/train.py
diff --git a/NLP/Dockerfile.cpu b/NLP/Dockerfile.cpu
new file mode 100644
index 0000000..4a00ccf
--- /dev/null
+++ b/NLP/Dockerfile.cpu
@@ -0,0 +1,11 @@
+FROM tensorflow/tensorflow:2.3.1
+
+COPY ./NLP/src /src
+
+WORKDIR /src
+
+RUN pip install -r requirements.txt
+
+ENV PYTHONPATH='/src/:$PYTHONPATH'
+
+CMD ["python", "train.py"]
diff --git a/NLP/Dockerfile.gpu b/NLP/Dockerfile.gpu
new file mode 100644
index 0000000..b363080
--- /dev/null
+++ b/NLP/Dockerfile.gpu
@@ -0,0 +1,11 @@
+FROM tensorflow/tensorflow:2.3.1-gpu
+
+COPY ./NLP/src /src
+
+WORKDIR /src
+
+RUN pip install -r requirements.txt
+
+ENV PYTHONPATH='/src/:$PYTHONPATH'
+
+CMD ["python", "train.py"]
diff --git a/NLP/Dockerfile.tensorflow b/NLP/Dockerfile.tensorflow
new file mode 100644
index 0000000..50a76f5
--- /dev/null
+++ b/NLP/Dockerfile.tensorflow
@@ -0,0 +1,62 @@
+FROM ubuntu:20.04
+
+RUN apt update
+RUN apt install -y python3-dev python3-pip python3-venv wget curl gnupg git sudo
+
+RUN ln -s /usr/bin/python3 /usr/bin/python && \
+ ln -s /usr/bin/pip3 /usr/bin/pip
+
+ENV PYTHONPATH=/tensorflow/lib \
+ PYTHON_ARG=/tensorflow/lib \
+ USE_BAZEL_VERSION=3.3.0 \
+ TF_NEED_CUDA=0 \
+ TF_NEED_GCP=0 \
+ TF_CUDA_COMPUTE_CAPABILITIES=5.2,3.5 \
+ TF_NEED_HDFS=0 \
+ TF_NEED_OPENCL=0 \
+ TF_NEED_JEMALLOC=0 \
+ TF_ENABLE_XLA=0 \
+ TF_NEED_VERBS=0 \
+ TF_CUDA_CLANG=0 \
+ TF_DOWNLOAD_CLANG=0 \
+ TF_NEED_MKL=0 \
+ TF_DOWNLOAD_MKL=0 \
+ TF_NEED_MPI=0 \
+ TF_NEED_S3=1 \
+ TF_NEED_KAFKA=0 \
+ TF_NEED_GDR=0 \
+ TF_NEED_OPENCL_SYCL=0 \
+ TF_SET_ANDROID_WORKSPACE=0 \
+ TF_NEED_AWS=0 \
+ TF_NEED_IGNITE=0 \
+ TF_NEED_ROCM=0 \
+ GCC_HOST_COMPILER_PATH="/usr/bin/gcc" \
+ CC_OPT_FLAGS="-march=native"
+
+RUN pip3 install pip six 'numpy<1.19.0' wheel setuptools mock 'future>=0.17.1' && \
+ pip3 install keras_applications --no-deps && \
+ pip3 install keras_preprocessing --no-deps
+
+RUN git clone https://github.com/tensorflow/tensorflow.git
+WORKDIR /tensorflow
+RUN git checkout r2.3
+
+RUN curl -fsSL https://bazel.build/bazel-release.pub.gpg | gpg --dearmor > bazel.gpg
+RUN mv bazel.gpg /etc/apt/trusted.gpg.d/
+RUN echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list
+RUN apt update && apt install -y bazel-3.3.0
+RUN ln -s /usr/bin/bazel-3.3.0 /usr/bin/bazel
+
+RUN ./configure
+
+RUN bazel build -c opt \
+ --copt=-mavx --copt=-mavx2 --copt=-mfma --copt=-mfpmath=both \
+ --copt=-w \
+ --jobs=26 \
+ //tensorflow/tools/pip_package:build_pip_package
+
+
+RUN ./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
+
+RUN pip install /tmp/tensorflow_pkg/tensorflow-2.3.2-cp38-cp38-linux_x86_64.whl
+
diff --git a/NLP/src/requirements.txt b/NLP/src/requirements.txt
new file mode 100644
index 0000000..e69dc5c
--- /dev/null
+++ b/NLP/src/requirements.txt
@@ -0,0 +1,4 @@
+numpy
+tensorflow-datasets
+tensorflow-text==2.3.0
+tf-models-official==2.3.0
\ No newline at end of file
diff --git a/NLP/src/train.py b/NLP/src/train.py
new file mode 100644
index 0000000..c930ee7
--- /dev/null
+++ b/NLP/src/train.py
@@ -0,0 +1,288 @@
+import time
+import numpy as np
+import string
+import re
+import os
+import sys
+import shutil
+from typing import Dict, Tuple
+import tensorflow as tf
+from official.nlp import optimization
+import tensorflow_hub as hub
+import tensorflow_text as text
+
+
+def print_devices() -> None:
+ """
+ Print number of gpu devices to be used
+ """
+ print("\n------------------------")
+ print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
+ print("------------------------\n")
+
+
+def mlp(vocab_size : int, embedding_dim : int, max_length : int, no_classes : int) -> tf.keras.Model:
+ """
+ Build multi-layer perceptron model
+
+ :param int vocab_size: vocabulary size
+ :param int embedding_dim: embedding size
+ :param int max_length: maximal length of the padded sequence
+ :param int no_classes: number of classes / output layer size
+ :return: model object
+ :rtype: tf.keras.Model
+ """
+ sequence_input = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name="input0")
+ embedding_layer = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=True)
+ x = embedding_layer(sequence_input)
+ x = tf.keras.layers.GlobalAveragePooling1D()(x)
+ x = tf.keras.layers.Dense(512, activation='relu')(x)
+ x = tf.keras.layers.Dense(512, activation='relu')(x)
+ output = tf.keras.layers.Dense(no_classes, activation='sigmoid', name="output0")(x)
+ model = tf.keras.Model(sequence_input, output)
+ model.compile(optimizer='adam',
+ loss=tf.keras.losses.BinaryCrossentropy(),
+ metrics=['accuracy'])
+ model.summary()
+ return model
+
+
+def bert(train_ds : tf.data.Dataset, epochs : int, no_classes) -> tf.keras.Model:
+ """
+ Build bert model
+
+ :param tf.data.Dataset train_ds: training dataset
+ :param int epochs: no epochs
+ :param int no_classes: number of classes / output layer size
+ :return: model object
+ :rtype: tf.keras.Model
+ """
+ tfhub_handle_encoder = "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1"
+ tfhub_handle_preprocess = "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"
+
+ text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
+ preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
+ encoder_inputs = preprocessing_layer(text_input)
+ encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
+ outputs = encoder(encoder_inputs)
+ net = outputs['pooled_output']
+ x = tf.keras.layers.Dense(512, activation='relu')(net)
+ x = tf.keras.layers.Dense(512, activation='relu')(x)
+ output = tf.keras.layers.Dense(no_classes, activation='sigmoid', name="output0")(x)
+ model = tf.keras.Model(text_input, output)
+ loss = tf.keras.losses.BinaryCrossentropy()
+ metrics = tf.metrics.BinaryAccuracy()
+ model.compile()
+ steps_per_epoch = tf.data.experimental.cardinality(train_ds).numpy()
+ num_train_steps = steps_per_epoch * epochs
+ num_warmup_steps = int(0.1 * num_train_steps)
+
+ init_lr = 3e-5
+ optimizer = optimization.create_optimizer(init_lr=init_lr,
+ num_train_steps=num_train_steps,
+ num_warmup_steps=num_warmup_steps,
+ optimizer_type='adamw')
+ model.compile(optimizer=optimizer,
+ loss=loss,
+ metrics=metrics)
+ model.summary()
+ return model
+
+
+def custom_standardization(input_data):
+ """
+ Function of standardizing text
+
+ """
+ lowercase = tf.strings.lower(input_data)
+ stripped_html = tf.strings.regex_replace(lowercase, '
', ' ')
+ return tf.strings.regex_replace(stripped_html,
+ '[%s]' % re.escape(string.punctuation), '')
+
+
+def get_data_from_aclImdb() -> tf.data.Dataset:
+ """
+ Load aclImdb_v1 dataset from internet
+
+ :return: dataset object
+ :rtype: tf.data.Dataset
+ """
+ url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
+
+ dataset = tf.keras.utils.get_file("aclImdb_v1.tar.gz", url,
+ untar=True, cache_dir='.',
+ cache_subdir='')
+
+ dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')
+ train_dir = os.path.join(dataset_dir, 'train')
+ remove_dir = os.path.join(train_dir, 'unsup')
+ shutil.rmtree(remove_dir)
+ seed = 123
+ raw_ds = tf.keras.preprocessing.text_dataset_from_directory('aclImdb/train', batch_size=50, seed=seed)
+ return raw_ds
+
+
+def preprocess_mlp_text(dataset : tf.data.Dataset, parameter: Dict) -> np.ndarray:
+ """
+ Perform tokenization for MLP model
+
+ :param tf.data.Dataset dataset: dataset containing text and label data
+ :param Dict parameter: parameter object containing vocab_size and sequence_lenght parameter
+ :return: tokenized and padded text
+ :rtype: np.ndarray
+ """
+ vectorize_layer = tf.keras.layers.experimental.preprocessing.TextVectorization(
+ standardize=custom_standardization,
+ max_tokens=parameter["vocab_size"],
+ output_mode='int',
+ output_sequence_length=parameter["sequence_length"])
+
+ def vectorize_text_func(text, label):
+ text = tf.expand_dims(text, -1)
+ return vectorize_layer(text), label
+
+ text_ds = dataset.map(lambda x, y: x)
+ vectorize_layer.adapt(text_ds)
+ text_ds = dataset.map(vectorize_text_func)
+
+ tokenized_texts = []
+ for text, _ in text_ds:
+ tokenized_texts.append(text.numpy())
+
+ return np.vstack(tokenized_texts)
+
+
+def preprocess_bert_text(dataset : tf.data.Dataset) -> np.ndarray:
+ """
+ Perform tokenization for BERT model
+
+ :param tf.data.Dataset dataset: dataset containing text and label data
+ :return: tokenized and padded text
+ :rtype: np.ndarray
+ """
+
+ text_ds = dataset.map(lambda x, y: x)
+ tokenized_texts = []
+ for text in text_ds:
+ tokenized_texts += text.numpy().tolist()
+
+ return np.vstack(tokenized_texts)
+
+
+def prepare_dataset(text_data: np.ndarray, parameter: Dict, no_samples : int) -> tf.data.Dataset:
+ """
+ To simulate multi-class problem, we randomly generate labels here
+
+ :param np.ndarray text_data: text data
+ :param Dict parameter: parameter object
+ :param int no_samples: dataset size
+ :return: dataset object
+ :rtype: tf.data.Dataset
+ """
+ print(f"\nDataset contains {no_samples} samples\n")
+ labels = np.random.randint(low=0, high=parameter["no_classes"], size=(no_samples))
+ category_labels_mat = tf.keras.utils.to_categorical(labels, num_classes=parameter["no_classes"])
+
+ train_ds = tf.data.Dataset.from_tensor_slices(
+ (tf.convert_to_tensor(text_data),
+ tf.convert_to_tensor(category_labels_mat)))
+ train_ds = train_ds.shuffle(buffer_size=10000)
+ train_ds = train_ds.batch(parameter["batch_size"]).prefetch(tf.data.experimental.AUTOTUNE)
+ return train_ds
+
+
+def run_mlp_test_track(train_ds: tf.data.Dataset, parameter: Dict) -> Tuple[float, float]:
+ """
+ Perform training time and inference time test for multi-layer-perceptron model
+
+ :param tf.data.Dataset train_ds:
+ :param Dict parameter:
+ :return: training and inference time
+ :rtype: Tuple of float
+ """
+ # build model
+ print("create mlp model")
+ model = mlp(vocab_size=parameter["vocab_size"], embedding_dim=parameter["embedding_dim"],
+ max_length=parameter["sequence_length"], no_classes=parameter["no_classes"])
+ print("complete")
+ # start training
+ print("start training")
+ start_time = time.time()
+ model.fit(train_ds, epochs=parameter["epochs"], verbose=2)
+ train_time = time.time() - start_time
+ print(f"complete in {train_time} [sec]")
+ # start batch interference
+ print("start inference test")
+ start_time = time.time()
+ model.predict(train_ds)
+ inference_time = (time.time() - start_time) / len(train_ds)
+ print(f"complete in {inference_time} [sec]")
+ return train_time, inference_time
+
+
+def run_bert_test_track(train_ds: tf.data.Dataset, parameter: Dict) -> Tuple[float, float]:
+ """
+ Perform training time and inference time test for bert model
+
+ :param tf.data.Dataset train_ds:
+ :param Dict parameter:
+ :return: training and inference time
+ :rtype: Tuple of float
+ """
+ # build model
+ print("create bert model")
+ model = bert(train_ds=train_ds, epochs=parameter["epochs"], no_classes=parameter["no_classes"])
+ print("complete")
+ # start training
+ print("train model")
+ start_time = time.time()
+ model.fit(train_ds, epochs=parameter["epochs"], verbose=2)
+ train_time = time.time() - start_time
+ print(f"complete in {train_time} [sec]")
+ # start batch interference
+ print("start inference test")
+ start_time = time.time()
+ _= model.predict(train_ds)
+ inference_time = (time.time() - start_time) / len(train_ds)
+ print(f"complete in {inference_time} [sec]")
+ return train_time, inference_time
+
+
+def main():
+ parameter = {"vocab_size": 80000,
+ "sequence_length": 150,
+ "embedding_dim": 100,
+ "batch_size": 128,
+ "epochs" : 2}
+
+ no_classes = 10000
+ print_devices()
+ train_dataset = get_data_from_aclImdb()
+ mlp_text_data = preprocess_mlp_text(dataset=train_dataset, parameter=parameter)
+ bert_text_data = preprocess_bert_text(dataset=train_dataset)
+ print("Load and prepare dataset")
+
+ results ={"mlp" : {"no classes": [], "training time": [], "inference time": []},
+ "bert" : {"no classes": [], "training time": [], "inference time": []}}
+
+ for batch_size in [128, 500, 1000, 5000, 10000]:
+ parameter["no_classes"] = no_classes
+ parameter["batch_size"] = batch_size
+ train_ds = prepare_dataset(text_data=mlp_text_data, parameter=parameter, no_samples=mlp_text_data.shape[0])
+ runtimes = run_mlp_test_track(train_ds=train_ds, parameter=parameter)
+ results["mlp"]["batch_size"].append(batch_size)
+ results["mlp"]["training time"].append(runtimes[0])
+ results["mlp"]["inference time"].append(runtimes[1])
+ print(results)
+
+ #train_ds = prepare_dataset(text_data=bert_text_data, parameter=parameter, no_samples=bert_text_data.shape[0])
+ #runtimes = run_bert_test_track(train_ds=train_ds, parameter=parameter)
+ #results["bert"]["batch_size"].append(batch_size)
+ #results["bert"]["training time"].append(runtimes[0])
+ #results["bert"]["inference time"].append(runtimes[1])
+
+
+
+
+if __name__ == '__main__':
+ main()
diff --git a/README.md b/README.md
index 3ea70bc..67bf631 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,15 @@
# Train Neural Networks on Amazon EC2 with GPU support
-Workflow that shows how to train neural networks on EC2 instances with GPU support. The goal is to present a simple and stable setup to train on GPU instances by using **Docker** and the NVIDIA Container Runtime **nvidia-docker**. A minimal example is given to train a small CNN built in Keras on MNIST. We achieve a 30-fold speedup in training time when training on GPU versus CPU.
+Workflow that shows how to train neural networks on EC2 instances with GPU support.
+The goal is to present a simple and stable setup to train on GPU instances by using **Docker** and the NVIDIA Container
+Runtime **nvidia-docker**.
+
+Two minimal examples are given to train:
+ - a small built in Keras on MNIST image dataset
+ - a multi-layer perceptron (MLP) and BERT model on [aclImdb](https://ai.stanford.edu/~amaas/data/sentiment) sentiment dataset for NLP
+
+It is shown that a 30-fold speedup in training time when training on GPU versus CPU can be achieved, **but** only in respect
+to the applied model. E.g. for the MLP using CPU instances can be preferable.
## Getting started
@@ -14,14 +23,16 @@ Workflow that shows how to train neural networks on EC2 instances with GPU suppo
## Train locally on CPU
-1. Build Docker image for CPU
+1. Build Docker images for CPU
```
-docker build -t docker-keras . -f Dockerfile.cpu
+docker build -t docker-keras-image . -f Image/Dockerfile.cpu
+docker build -t docker-keras-nlp . -f NLP/Dockerfile.cpu
```
2. Run training container (**NB:** you might have to increase the container resources [[link](https://docs.docker.com/config/containers/resource_constraints/)])
```
-docker run docker-keras
+docker run docker-keras-image
+docker run docker-keras-nlp
```
@@ -39,46 +50,47 @@ For example, to launch a **p2.xlarge** EC2 instance named **ec2-p2** with a Tesl
```
docker-machine create --driver amazonec2 \
- --amazonec2-region eu-west-1 \
- --amazonec2-ami ami-58d7e821 \
+ --amazonec2-region eu-central-1 \
+ --amazonec2-ami ami-0ae9bf04fb7c502ea \
--amazonec2-instance-type p2.xlarge \
- --amazonec2-vpc-id vpc-abc \
- ec2-p2
+ --amazonec2-vpc-id vpc-b0ec4fda \
+ --amazonec2-root-size 100 \
+ ec2-gpu
+```
+```
+docker-machine create --driver amazonec2 \
+ --amazonec2-region eu-central-1 \
+ --amazonec2-ami ami-0ae9bf04fb7c502ea \
+ --amazonec2-instance-type c5n.xlarge \
+ --amazonec2-root-size 100 \
+ --amazonec2-vpc-id vpc-b0ec4fda ec2-cpu
```
-
3. ssh into instance
```
-docker-machine ssh ec2-p2
+docker-machine ssh ec2-mp5
```
-4. Update NVIDIA drivers and install **nvidia-docker** (see this [blog post](https://towardsdatascience.com/using-docker-to-set-up-a-deep-learning-environment-on-aws-6af37a78c551) for more details)
+4. Run training container on CPU instance
```
-# update NVIDIA drivers
-sudo add-apt-repository ppa:graphics-drivers/ppa -y
-sudo apt-get update
-sudo apt-get install -y nvidia-375 nvidia-settings nvidia-modprobe
-
-# install nvidia-docker
-wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.1/nvidia-docker_1.0.1-1_amd64.deb
-sudo dpkg -i /tmp/nvidia-docker_1.0.1-1_amd64.deb && rm /tmp/nvidia-docker_1.0.1-1_amd64.deb
+sudo nvidia-docker run torlof/nlp-cpu-docker-keras
```
-
5. Run training container on GPU instance
```
-sudo nvidia-docker run idealo/nvidia-docker-keras
+sudo nvidia-docker run torlof/nlp-nvidia-docker-keras
```
-This will pull the Docker image `idealo/nvidia-docker-keras` from [DockerHub](https://hub.docker.com/r/idealo/nvidia-docker-keras) and start the training.
-The corresponding Dockerfile can be found under `Dockerfile.gpu` for reference.
-
+This will pull the Docker image from [Dockerhub](https://hub.docker.com/r/torlof/nlp-nvidia-docker-keras).
+The corresponding Dockerfile for image example is `idealo/nvidia-docker-keras` and can be found
+at [Idealo DockerHub](https://hub.docker.com/r/idealo/nvidia-docker-keras).
-## Training time comparison
+## Image classification example
+### Training time comparison
We trained MNIST for 3 epochs (~98% accuracy on validation set):
@@ -88,6 +100,44 @@ We trained MNIST for 3 epochs (~98% accuracy on validation set):
• p3.2xlarge (Tesla V100): **20 seconds**
+## Text classification example
+For NLP use-case we trained on aclImdb dataset.
+
+We want to test two text classification architectures:
+ - large BERT model
+ - lightweight MLP with a rather simple embedding table
+
+Since BERT is known for good performances on various tasks, lightweight MLP has its justification as it combines
+computational efficient wiht convincing performances.
+
+In addition we want to investigate the runtime performance for the low dimensional output (2 to 10 classes) and
+large-scale (up to 20000) cases.
+
+Since aclImdb is a binary classification problem we extend the dataset for multi-label classification by randomly
+relabel the existing sample. (Note this makes the accuracy metrics useless.) Our focus in on runtime comparison.
+
+We have captured the training and inference runtime. Finally we are interested what instance provides use the
+best cost efficiency therefore we caputred the cost for training and inference.
+
+### Training Runtime and Pricing
+
+
+
+
+
+
+You can find these plots as interactive html plot at the **./plots** directory.
+
+### Inference Runtime and Pricing
+
+
+
+
+
+
+
+
+You can find these plots as interactive html plot at the **./plots** directory.
## Copyright
diff --git a/plots/Inference_BERT_Model__Price_.html b/plots/Inference_BERT_Model__Price_.html
new file mode 100644
index 0000000..49c3047
--- /dev/null
+++ b/plots/Inference_BERT_Model__Price_.html
@@ -0,0 +1,85 @@
+
+