From 8849285f06673bb31d1f7470e481c3f5be029176 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 27 Mar 2020 14:31:29 -0700 Subject: [PATCH] Use mixed precision for gelu intermediate activation in BERT SQuAD model PiperOrigin-RevId: 303407939 --- official/nlp/modeling/layers/transformer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/official/nlp/modeling/layers/transformer.py b/official/nlp/modeling/layers/transformer.py index 54d0f9cbfdd..0aa456597e5 100644 --- a/official/nlp/modeling/layers/transformer.py +++ b/official/nlp/modeling/layers/transformer.py @@ -142,10 +142,8 @@ def build(self, input_shape): kernel_constraint=self._kernel_constraint, bias_constraint=self._bias_constraint, name="intermediate") - # Use float32 in intermediate gelu activation for numeric stability. - # TODO(b/149117297): investigate gelu numeric stability. self._intermediate_activation_layer = tf.keras.layers.Activation( - self._intermediate_activation, dtype=tf.float32) + self._intermediate_activation) self._output_dense = dense_einsum.DenseEinsum( output_shape=hidden_size, kernel_initializer=self._kernel_initializer,