Skip to content

Commit

Permalink
Create Softmax layer in activation decomposition instead of Activatio…
Browse files Browse the repository at this point in the history
…n layer (#894)

In MP we need to know the axis used in Softmax for KL-Divergence computation.
In activation decomposition, we created a simple 'Activation' layer; thus, we can not
use the axis info when computing MP distance. This commit changes the decomposition
to create a Softmax layer, thus enabling the KL divergence in MP distance computation.

---------

Co-authored-by: reuvenp <[email protected]>
  • Loading branch information
reuvenperetz and reuvenp authored Dec 26, 2023
1 parent 9f87f4c commit 4d4d3ac
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 19 deletions.
5 changes: 4 additions & 1 deletion model_compression_toolkit/core/keras/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,7 @@
PAD = 'pad'

# Special/Custom layers strings
COMBINED_NMS = 'combined_non_max_suppression'
COMBINED_NMS = 'combined_non_max_suppression'

# Keras activation layers defaults:
SOFTMAX_AXIS_DEFAULT = -1
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================


import keras.layers
from tensorflow.keras.layers import Dense, DepthwiseConv2D, Conv2D, Conv2DTranspose, Activation, SeparableConv2D

from model_compression_toolkit.logger import Logger
Expand All @@ -23,7 +22,8 @@
from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher, \
NodeFrameworkAttrMatcher
from model_compression_toolkit.core.common.graph.base_node import BaseNode
from model_compression_toolkit.core.keras.constants import LINEAR, ACTIVATION, TRAINABLE, LAYER_NAME
from model_compression_toolkit.core.keras.constants import LINEAR, ACTIVATION, TRAINABLE, LAYER_NAME, SOFTMAX, AXIS, \
SOFTMAX_AXIS_DEFAULT


class ActivationDecomposition(common.BaseSubstitution):
Expand Down Expand Up @@ -70,18 +70,29 @@ def substitute(self,

activation_node_name = op2d_node.name + '_post_activation'

activation_fw_attr = {
LAYER_NAME: activation_node_name,
TRAINABLE: False,
DATA_TYPE: FLOAT_32,
ACTIVATION: op2d_node.framework_attr.get(ACTIVATION)}

activation_node = common.graph.BaseNode(activation_node_name,
activation_fw_attr,
op2d_node.output_shape,
op2d_node.output_shape,
{},
Activation)
# Softmax is a special case where we need to know the default axis parameter used
# and for this reason we create a Softmax layer and not Activation layer.
if op2d_node.framework_attr.get(ACTIVATION) == SOFTMAX:
activation_fw_attr = {AXIS: SOFTMAX_AXIS_DEFAULT}
activation_node = common.graph.BaseNode(activation_node_name,
activation_fw_attr,
op2d_node.output_shape,
op2d_node.output_shape,
{},
keras.layers.Softmax)
else:
activation_fw_attr = {
LAYER_NAME: activation_node_name,
TRAINABLE: False,
DATA_TYPE: FLOAT_32,
ACTIVATION: op2d_node.framework_attr.get(ACTIVATION)}

activation_node = common.graph.BaseNode(activation_node_name,
activation_fw_attr,
op2d_node.output_shape,
op2d_node.output_shape,
{},
Activation)

graph.add_node(activation_node)
graph.reconnect_out_edges(current_node=op2d_node,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import tensorflow as tf

from model_compression_toolkit.core.keras.constants import ACTIVATION, LINEAR
from model_compression_toolkit.core.keras.constants import ACTIVATION, LINEAR, AXIS, SOFTMAX, SOFTMAX_AXIS_DEFAULT
from tests.keras_tests.tpc_keras import get_quantization_disabled_keras_tpc
from tests.keras_tests.feature_networks_tests.base_keras_feature_test import BaseKerasFeatureNetworkTest
from tests.keras_tests.utils import get_layers_from_model_by_type
Expand All @@ -40,7 +40,14 @@ def create_networks(self):

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
conv_layer = get_layers_from_model_by_type(quantized_model, layers.Conv2D)[0]
activation_layer = get_layers_from_model_by_type(quantized_model, layers.Activation)[0]
self.unit_test.assertTrue(conv_layer.get_config().get(ACTIVATION) == LINEAR)
self.unit_test.assertTrue(activation_layer.get_config().get(ACTIVATION) == self.activation_function)

if self.activation_function==SOFTMAX:
activation_layer = get_layers_from_model_by_type(quantized_model, keras.layers.Softmax)[0]
self.unit_test.assertTrue(activation_layer.get_config().get(AXIS) == SOFTMAX_AXIS_DEFAULT)

else:
activation_layer = get_layers_from_model_by_type(quantized_model, layers.Activation)[0]
self.unit_test.assertTrue(activation_layer.get_config().get(ACTIVATION) == self.activation_function)


0 comments on commit 4d4d3ac

Please sign in to comment.