diff --git a/README.md b/README.md index cc8b819911..b23af5dbf9 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,7 @@ More examples for models such as BERT and YOLOv5 can be found in [distributed_tr - [Train GPT-2 with Sharded Data Parallel](https://github.com/aws/amazon-sagemaker-examples/tree/main/training/distributed_training/pytorch/model_parallel/gpt2/smp-train-gpt-simple-sharded-data-parallel.ipynb) shows how to train GPT-2 with near-linear scaling using Sharded Data Parallelism technique in SageMaker Model Parallelism Library. - [Train EleutherAI GPT-J with Model Parallel](https://github.com/aws/amazon-sagemaker-examples/blob/main/training/distributed_training/pytorch/model_parallel/gpt-j/11_train_gptj_smp_tensor_parallel_notebook.ipynb) shows how to train EleutherAI GPT-J with PyTorch and Tensor Parallelism technique in the SageMaker Model Parallelism Library. - [Train MaskRCNN with Data Parallel](https://github.com/aws/amazon-sagemaker-examples/blob/main/training/distributed_training/pytorch/data_parallel/maskrcnn/pytorch_smdataparallel_maskrcnn_demo.ipynb) shows how to train MaskRCNN with PyTorch and SageMaker Data Parallelism Library. +- [Distributed training with TensorFlow Multi-Worker Mirrored Strategy API on Amazon SageMaker](https://github.com/aws/amazon-sagemaker-examples/blob/main/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/tensorflow_multi_worker_mirrored_strategy.ipynb) shows how to train an MNIST Classifier with TensorFlow using TensorFlow's Multi-Worker Mirrored Strategy for Distributed Training. ### Amazon SageMaker Smart Sifting diff --git a/training/distributed_training/index.rst b/training/distributed_training/index.rst index 17c42631ad..ee22ce8707 100644 --- a/training/distributed_training/index.rst +++ b/training/distributed_training/index.rst @@ -159,6 +159,15 @@ Horovod /sagemaker-python-sdk/keras_script_mode_pipe_mode_horovod/tensorflow_keras_CIFAR10 +TensorFlow Multi-Worker Mirrored Strategy (MWMS) +----------------------------------------- + +.. toctree:: + :maxdepth: 1 + + tensorflow/multi_worker_mirrored_strategy/tensorflow_multi_worker_mirrored_strategy + + .. _mxnet-distributed: Apache MXNet diff --git a/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/mnist-distributed.py b/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/mnist-distributed.py new file mode 100644 index 0000000000..bc75fd749a --- /dev/null +++ b/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/mnist-distributed.py @@ -0,0 +1,104 @@ +# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License.import tensorflow as tf + +import argparse +import json +import os + +import numpy as np +import tensorflow as tf + + +def model(x_train, y_train, x_test, y_test, strategy): + """Generate a simple model""" + with strategy.scope(): + + model = tf.keras.models.Sequential( + [ + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(1024, activation=tf.nn.relu), + tf.keras.layers.Dropout(0.4), + tf.keras.layers.Dense(10, activation=tf.nn.softmax), + ] + ) + + model.compile( + optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"] + ) + + model.fit(x_train, y_train) + model.evaluate(x_test, y_test) + + return model + + +def _load_training_data(base_dir): + """Load MNIST training data""" + x_train = np.load(os.path.join(base_dir, "input_train.npy")) + y_train = np.load(os.path.join(base_dir, "input_train_labels.npy")) + return x_train, y_train + + +def _load_testing_data(base_dir): + """Load MNIST testing data""" + x_test = np.load(os.path.join(base_dir, "input_test.npy")) + y_test = np.load(os.path.join(base_dir, "input_test_labels.npy")) + return x_test, y_test + + +def _parse_args(): + parser = argparse.ArgumentParser() + + # Data, model, and output directories + # model_dir is always passed in from SageMaker. By default this is a S3 path under the default bucket. + parser.add_argument("--model_dir", type=str) + parser.add_argument("--sm-model-dir", type=str, default=os.environ.get("SM_MODEL_DIR")) + parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAINING")) + parser.add_argument("--hosts", type=list, default=json.loads(os.environ.get("SM_HOSTS"))) + parser.add_argument("--current-host", type=str, default=os.environ.get("SM_CURRENT_HOST")) + + return parser.parse_known_args() + + +if __name__ == "__main__": + args, unknown = _parse_args() + + train_data, train_labels = _load_training_data(args.train) + eval_data, eval_labels = _load_testing_data(args.train) + + print("Tensorflow version: ", tf.__version__) + print("TF_CONFIG", os.environ.get("TF_CONFIG")) + + communication_options = tf.distribute.experimental.CommunicationOptions( + implementation=tf.distribute.experimental.CommunicationImplementation.NCCL + ) + strategy = tf.distribute.MultiWorkerMirroredStrategy( + communication_options=communication_options + ) + + print("Number of devices: {}".format(strategy.num_replicas_in_sync)) + + mnist_classifier = model(train_data, train_labels, eval_data, eval_labels, strategy) + + task_type, task_id = (strategy.cluster_resolver.task_type, strategy.cluster_resolver.task_id) + + print("Task type: ", task_type) + print("Task id: ", task_id) + + # Save the model on chief worker + if strategy.cluster_resolver.task_id == 0: + print("Saving model on chief") + mnist_classifier.save(os.path.join(args.sm_model_dir, "000000001")) + else: + print("Saving model in /tmp on worker") + mnist_classifier.save(f"/tmp/{strategy.cluster_resolver.task_id}") diff --git a/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/mnist.py b/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/mnist.py new file mode 100644 index 0000000000..5a96972c82 --- /dev/null +++ b/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/mnist.py @@ -0,0 +1,79 @@ +# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License.import tensorflow as tf + +import argparse +import json +import os + +import numpy as np +import tensorflow as tf + + +def model(x_train, y_train, x_test, y_test): + """Generate a simple model""" + model = tf.keras.models.Sequential( + [ + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(1024, activation=tf.nn.relu), + tf.keras.layers.Dropout(0.4), + tf.keras.layers.Dense(10, activation=tf.nn.softmax), + ] + ) + + model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) + model.fit(x_train, y_train) + model.evaluate(x_test, y_test) + + return model + + +def _load_training_data(base_dir): + """Load MNIST training data""" + x_train = np.load(os.path.join(base_dir, "input_train.npy")) + y_train = np.load(os.path.join(base_dir, "input_train_labels.npy")) + return x_train, y_train + + +def _load_testing_data(base_dir): + """Load MNIST testing data""" + x_test = np.load(os.path.join(base_dir, "input_test.npy")) + y_test = np.load(os.path.join(base_dir, "input_test_labels.npy")) + return x_test, y_test + + +def _parse_args(): + parser = argparse.ArgumentParser() + + # Data, model, and output directories + # model_dir is always passed in from SageMaker. By default this is a S3 path under the default bucket. + parser.add_argument("--model_dir", type=str) + parser.add_argument("--sm-model-dir", type=str, default=os.environ.get("SM_MODEL_DIR")) + parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAINING")) + parser.add_argument("--hosts", type=list, default=json.loads(os.environ.get("SM_HOSTS"))) + parser.add_argument("--current-host", type=str, default=os.environ.get("SM_CURRENT_HOST")) + + return parser.parse_known_args() + + +if __name__ == "__main__": + args, unknown = _parse_args() + + train_data, train_labels = _load_training_data(args.train) + eval_data, eval_labels = _load_testing_data(args.train) + + mnist_classifier = model(train_data, train_labels, eval_data, eval_labels) + + if args.current_host == args.hosts[0]: + # save model to an S3 directory with version number '00000001' in Tensorflow SavedModel Format + # To export the model as h5 format use model.save('my_model.h5') + mnist_classifier.save(os.path.join(args.sm_model_dir, "000000001")) diff --git a/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/tensorflow_multi_worker_mirrored_strategy.ipynb b/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/tensorflow_multi_worker_mirrored_strategy.ipynb new file mode 100644 index 0000000000..c181e3b0c1 --- /dev/null +++ b/training/distributed_training/tensorflow/multi_worker_mirrored_strategy/tensorflow_multi_worker_mirrored_strategy.ipynb @@ -0,0 +1,406 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributed training with TensorFlow Distribute Strategy API on Amazon SageMaker" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "This notebook's CI test result for us-west-2 is as follows. CI test results in other regions can be found at the end of the notebook.\n", + "\n", + "![This us-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-2/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Tensorflow's Distributed Training API](https://www.tensorflow.org/guide/distributed_training) enables multiple strategies for distributed training natively in Tensorflow. In this example, we will use the [SageMaker Python SDK](https://github.com/aws/sagemaker-python-sdk) to run a distributed training job on the training instance using a Tensorflow training script and SageMaker Deep Learning Container (DLC) for TensorFlow training. We will use the popular MNIST dataset to train a classifier based on a Simple Neural Network architecture.\n", + "\n", + "We will start with a non-distributed Neuron Network MNIST training script and then adapt it to use distributed training." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up the environment\n", + "\n", + "Let's start by setting up the environment:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "! pip install -U sagemaker" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "sagemaker_session = sagemaker.Session()\n", + "\n", + "role = get_execution_role()\n", + "region = sagemaker_session.boto_session.region_name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training Data\n", + "\n", + "We will use the MNIST dataset has been already loaded to the public S3 buckets ``sagemaker-example-files-prod-`` under the prefix ``datasets/image/MNIST``. There are four ``.npy`` file under this prefix:\n", + "* ``input_train.npy``\n", + "* ``input_test.npy``\n", + "* ``input_train_labels.npy``\n", + "* ``input_test_labels.npy``" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "training_data_uri = \"s3://sagemaker-example-files-prod-{}/datasets/image/MNIST/numpy\".format(region)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## Construct the training script\n", + "\n", + "This tutorial's training script is based on a [SageMaker MNIST example](https://github.com/aws/amazon-sagemaker-examples/blob/main/sagemaker-python-sdk/tensorflow_script_mode_training_and_serving/mnist-2.py). Here is the entire script:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# TensorFlow script\n", + "!pygmentize 'mnist.py'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a training job using the `TensorFlow` estimator\n", + "\n", + "The `sagemaker.tensorflow.TensorFlow` estimator handles locating the training container based on the framework version and the job type (Inference or Training), uploading your script to a S3 location and creating a SageMaker training job. Let's call out a couple important parameters here:\n", + "\n", + "* `framework_version` is set to `'2.13.0'` to indicate the TensorFlow version we want to use for executing your model training code. This will indicate to SageMaker which DLC should be used. Here's the list of the [available Deep Learning Container Images](https://github.com/aws/deep-learning-containers/blob/master/available_images.md).\n", + "\n", + "* `entry_point` is the absolute or relative path to the local Python source file that should be executed as the entry point to training. \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.tensorflow import TensorFlow\n", + "\n", + "local_mode = True\n", + "\n", + "if local_mode:\n", + " instance_type = \"local_gpu\"\n", + " instance_count = 1\n", + "else:\n", + " instance_type = \"ml.g5.xlarge\"\n", + " instance_count = 1\n", + "\n", + "mnist_estimator = TensorFlow(\n", + " entry_point=\"mnist.py\",\n", + " role=role,\n", + " instance_count=instance_count,\n", + " instance_type=instance_type,\n", + " framework_version=\"2.13.0\",\n", + " py_version=\"py310\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calling ``fit``\n", + "\n", + "To start a training job, we call `estimator.fit(training_data_uri)`.\n", + "\n", + "An S3 location is used here as the input. `fit` creates a default channel named `'training'`, which points to this S3 location. In the training script we can then access the training data from the location stored in `SM_CHANNEL_TRAINING`. `fit` accepts a couple other types of input as well. See the API doc [here](https://sagemaker.readthedocs.io/en/stable/estimators.html#sagemaker.estimator.EstimatorBase.fit) for details.\n", + "\n", + "When training starts, the TensorFlow container executes mnist.py, passing `hyperparameters` and `model_dir` from the estimator as script arguments. Because we didn't define either in this example, no hyperparameters are passed, and `model_dir` defaults to `s3:///`, so the script execution is as follows:\n", + "```bash\n", + "python mnist.py --model_dir s3:///\n", + "```\n", + "When training is complete, the training job will upload the saved model to Amazon S3." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calling fit to train a model with TensorFlow script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "mnist_estimator.fit(training_data_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adapt the training job and training script to use Distribtued training\n", + "\n", + "In this section, we use an adapter training script that leverages Tensorflow distributed training. We will use the [`MultiWorkerMirroredStrategy`](https://www.tensorflow.org/guide/distributed_training#multiworkermirroredstrategy) which performs Distributed Data Parallelism\n", + "\n", + "MultiWorkerMirroredStrategy has two implementations for cross-device communications:\n", + "\n", + "1. RING is RPC-based and supports both CPUs and GPUs.\n", + "\n", + "2. NCCL uses [NVIDIA Collective Communications Library (NCCL)](https://developer.nvidia.com/nccl) which provides state-of-art performance on GPUs but it doesn't support CPUs.\n", + "\n", + "In this implementation we will defers the choice to Tensorflow, which will use NCCL in case GPU devices are used." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here are the changes we implement in the script:\n", + "1. Instantiate the Multi-Worker Mirrored Strategy and the Communication Option\n", + "\n", + "```python\n", + "communication_options = tf.distribute.experimental.CommunicationOptions(\n", + " implementation=tf.distribute.experimental.CommunicationImplementation.NCCL)\n", + "strategy = tf.distribute.MultiWorkerMirroredStrategy(\n", + " communication_options=communication_options)\n", + "```\n", + "\n", + "2. Prints the number of devices (replicas) involved in the distributed strategy\n", + "\n", + "```python\n", + "print('Number of devices: {}'.format(strategy.num_replicas_in_sync))\n", + "```\n", + "\n", + "3. In the `main` method, move the model definition and compilation inside the strategy scope context to ensure they are distributed across the defined devices\n", + "\n", + "```python\n", + "with strategy.scope():\n", + " model = tf.keras.models.Sequential(\n", + " [\n", + " tf.keras.layers.Flatten(),\n", + " tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n", + " tf.keras.layers.Dropout(0.4),\n", + " tf.keras.layers.Dense(10, activation=tf.nn.softmax),\n", + " ]\n", + " )\n", + "\n", + " model.compile(optimizer=\"adam\", loss=\"sparse_categorical_crossentropy\", metrics=[\"accuracy\"])\n", + "```\n", + "\n", + "3. Sove the model only the chief worker\n", + "```python\n", + "if strategy.cluster_resolver.task_id == 0:\n", + " print(\"Saving model on chief\")\n", + " mnist_classifier.save(os.path.join(args.sm_model_dir, \"000000001\"))\n", + "else:\n", + " print(\"Saving model in /tmp on worker\")\n", + " mnist_classifier.save(f\"/tmp/{strategy.cluster_resolver.task_id}\")\n", + "\n", + "```\n", + "\n", + "---\n", + "\n", + "Here is the entire script:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# TensorFlow script\n", + "!pygmentize 'mnist-distributed.py'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we modify the `sagemaker.tensorflow.TensorFlow` estimator by changing the `entry_point` to the new script and and adding a distribution strategy.\n", + "\n", + "To enable [`MultiWorkerMirroredStrategy`](https://www.tensorflow.org/guide/distributed_training#multiworkermirroredstrategy) we use the following configuration:\n", + "\n", + "```python\n", + "{\n", + " \"multi_worker_mirrored_strategy\": {\n", + " \"enabled\": True\n", + " }\n", + "}\n", + "```\n", + "\n", + "This distribution strategy option is available for TensorFlow 2.9 and later in the SageMaker Python SDK v2.xx.yy and later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "local_mode = False\n", + "\n", + "if local_mode:\n", + " instance_type = \"local_gpu\"\n", + " instance_count = 1\n", + "else:\n", + " instance_type = \"ml.g5.24xlarge\"\n", + " instance_count = 2\n", + "\n", + "mnist_estimator_distibuted = TensorFlow(\n", + " entry_point=\"mnist-distributed.py\",\n", + " role=role,\n", + " instance_count=instance_count,\n", + " instance_type=instance_type,\n", + " framework_version=\"2.13.0\",\n", + " py_version=\"py310\",\n", + " distribution={\"multi_worker_mirrored_strategy\": {\"enabled\": True}},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calling fit to train a model with TensorFlow script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "mnist_estimator_distibuted.fit(training_data_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## Notebook CI Test Results\n", + "\n", + "This notebook was tested in multiple regions. The test results are as follows, except for us-west-2 which is shown at the top of the notebook.\n", + "\n", + "\n", + "![This us-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This us-east-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-2/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This us-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This ca-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ca-central-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This sa-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/sa-east-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This eu-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This eu-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-2/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This eu-west-3 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-3/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This eu-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-central-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This eu-north-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-north-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This ap-southeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This ap-southeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-2/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This ap-northeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This ap-northeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-2/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n", + "\n", + "![This ap-south-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-south-1/training|distributed_training|tensorflow|multi_worker_mirrored_strategy|tensorflow_multi_worker_mirrored_strategy.ipynb)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_tensorflow2_p310", + "language": "python", + "name": "conda_tensorflow2_p310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 4 +}