sony · elad-c · Jun 23, 2024 · Jun 19, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/model_compression_toolkit/core/common/graph/functional_node.py b/model_compression_toolkit/core/common/graph/functional_node.py
@@ -1,4 +1,4 @@
-from typing import Dict, Any, Tuple, Type
+from typing import Dict, Any, Tuple, Type, List, Union
 
 from model_compression_toolkit.constants import FOUND_TF
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
@@ -25,7 +25,7 @@ def __init__(self,
                  functional_op: Any = None,
                  inputs_as_list: bool = False,
                  has_activation: bool = True,
-                 tensor_input_allocs = None):
+                 tensor_input_allocs: List[Union[int, str]] = None):
         """
         Init a FunctionalNode object.
 
@@ -44,8 +44,7 @@ def __init__(self,
             functional_op: The op the node implements.
             inputs_as_list: Whether to pass the node its input tensors as a list or not when calling the layer.
             has_activation: Whether the node has activations that we might want to quantize.
-            tensor_input_allocs: A list of indices for activation tensors in the node's input tensor list
-
+            tensor_input_allocs: A list of indices and strings for allocations input tensors in the node's args and kwargs.
         """
 
         super().__init__(name,

diff --git a/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py b/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py
@@ -106,7 +106,7 @@ def _run_operation(n: BaseNode,
                    input_tensors: List,
                    op_func: Any,
                    quantize_node_activation_fn,
-                   use_activation_quantization: bool) -> Tuple[Union[List, torch.Tensor], Union[List, torch.Tensor]]:
+                   use_activation_quantization: bool) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
     """
     Applying the layer (op_func) to the input tensors (input_tensors).
     If quantized is set to True, and the layer's corresponding node (n) has quantization
@@ -126,17 +126,17 @@ def _run_operation(n: BaseNode,
     op_call_args = n.op_call_args if isinstance(n, FunctionalNode) else []
     functional_kwargs = n.op_call_kwargs if isinstance(n, FunctionalNode) else {}
 
-    if not (isinstance(n, FunctionalNode) and isinstance(op_func, PytorchQuantizationWrapper)):
-        # Insert positional weights only when not a quantized functional node, because quantized functional nodes
-        # insert the quantized weights in the wrapper.
+    # Insert positional weights only when not a quantized functional node, because quantized functional nodes
+    # insert the quantized weights in the wrapper.
+    if isinstance(n, FunctionalNode) and isinstance(op_func, PytorchQuantizationWrapper):
+        _tensor_input_allocs = [i for i in n.tensor_input_allocs if i not in n.weights]
+    else:
         input_tensors = n.insert_positional_weights_to_input_list(input_tensors)
         # convert inputs from positional weights (numpy arrays) to tensors. Must handle each element in the
         # list separately, because in FX the tensors are FX objects and fail to_torch_tensor
         input_tensors = [to_torch_tensor(t, numpy_type=t.dtype) if isinstance(t, np.ndarray) else t
                          for t in input_tensors]
         _tensor_input_allocs = None
-    else:
-        _tensor_input_allocs = [i for i in n.tensor_input_allocs if i not in n.weights]
 
     if isinstance(n, FunctionalNode) and n.inputs_as_list:
         out_tensors_of_n_float = op_func(input_tensors, *op_call_args, **functional_kwargs)
@@ -152,6 +152,8 @@ def _run_operation(n: BaseNode,
             out_tensors_of_n_float = torch.cat(out_tensors_of_n_float, dim=0)
         out_tensors_of_n = quantize_node_activation_fn(out_tensors_of_n_float)
 
+    if not isinstance(out_tensors_of_n, list):
+        out_tensors_of_n, out_tensors_of_n_float = [out_tensors_of_n], [out_tensors_of_n_float]
     return out_tensors_of_n, out_tensors_of_n_float
 
 
@@ -318,12 +320,8 @@ def forward(self,
                                                                       quantize_node_activation_fn=activation_quantization_fn,
                                                                       use_activation_quantization=use_activation_quantization)
 
-            if isinstance(out_tensors_of_n, list):
-                node_to_output_tensors_dict.update({node: out_tensors_of_n})
-                node_to_output_tensors_dict_float.update({node: out_tensors_of_n_float})
-            else:
-                node_to_output_tensors_dict.update({node: [out_tensors_of_n]})
-                node_to_output_tensors_dict_float.update({node: [out_tensors_of_n_float]})
+            node_to_output_tensors_dict.update({node: out_tensors_of_n})
+            node_to_output_tensors_dict_float.update({node: out_tensors_of_n_float})
 
         if self.append2output:
             outputs = _generate_outputs(self.append2output,

diff --git a/...l_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py b/...l_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py
@@ -19,6 +19,7 @@
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
+from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
 from model_compression_toolkit.core.pytorch.constants import IN_CHANNELS, OUT_CHANNELS, KERNEL_SIZE, KERNEL, BIAS
 from model_compression_toolkit.core.common import FrameworkInfo
 
@@ -37,7 +38,7 @@ def __init__(self, fw_info: FrameworkInfo):
 
     def substitute(self,
                    graph: Graph,
-                   func_node: BaseNode) -> Graph:
+                   func_node: FunctionalNode) -> Graph:
         """
         Substitute functional and conv/linear layer with torch layer
         Args:
@@ -60,9 +61,15 @@ def substitute(self,
         # Create new node of layer convolution
         if 1 not in func_node.weights:
             Logger.critical(f'Weight input missing for node {func_node.name}.')  # pragma: no cover
-        weight = func_node.weights[1]
-        bias = func_node.weights.get(2)
-        framework_attr = func_node.framework_attr
+        # Extract index of kernel and bias according to tensor_input_allocs if they were input as kwargs. If
+        # they were input as args, use their fixed positions.
+        weight_index = func_node.tensor_input_allocs.index(KERNEL) if KERNEL in func_node.tensor_input_allocs else 1
+        bias_index = func_node.tensor_input_allocs.index(BIAS) if BIAS in func_node.tensor_input_allocs else 2
+        if weight_index not in func_node.weights:
+            Logger.critical(f'Mismatch between tensor_input_allocs and weight index in node {func_node.name}.')  # pragma: no cover
+        weight = func_node.weights[weight_index]
+        bias = func_node.weights.get(bias_index)
+        framework_attr = func_node.op_call_kwargs
         framework_attr.update({OUT_CHANNELS: weight.shape[out_channel_index]})
         framework_attr.update({IN_CHANNELS: weight.shape[in_channel_index]})
         framework_attr.update({KERNEL_SIZE: weight.shape[2:]})

diff --git a/...mpression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_batch_norm.py b/...mpression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_batch_norm.py
@@ -20,6 +20,7 @@
 from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import BaseNode, Graph
+from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
 from model_compression_toolkit.core.pytorch.constants import *
 from model_compression_toolkit.logger import Logger
 
@@ -37,9 +38,12 @@ def __init__(self):
         super().__init__(matcher_instance=bn_node)
 
     @staticmethod
-    def get_attributes_from_weights(node: BaseNode) -> Dict:
+    def get_attributes_from_weights(node: FunctionalNode) -> Dict:
         """
-        convert functional batch_norm positional weights to BatchNorm2d weights
+        Convert functional batch_norm positional weights to BatchNorm2d weights. Extract indices of gamma
+        and beta according to tensor_input_allocs if they were input as kwargs. If they were input as args,
+        use their fixed positions.
+
         Args:
             node: functional batch_norm node.
 
@@ -53,23 +57,22 @@ def get_attributes_from_weights(node: BaseNode) -> Dict:
                         GAMMA: np.ones(node.weights[1].shape),
                         BETA: np.zeros(node.weights[1].shape)}
 
-        has_weight = WEIGHT not in node.framework_attr
-        has_bias = BIAS not in node.framework_attr
+        # Check if weight and/or bias were not given.
+        if KERNEL in node.tensor_input_allocs:
+            weights_dict[GAMMA] = node.weights[node.tensor_input_allocs.index(KERNEL)]
+        elif KERNEL not in node.op_call_kwargs:
+            weights_dict[GAMMA] = node.weights[3]
 
-        if 3 in node.weights:
-            if has_weight:
-                weights_dict[GAMMA] = node.weights[3]
-            else:
-                weights_dict[BETA] = node.weights[3]
-        if 4 in node.weights:
-            assert has_bias
+        if BIAS in node.tensor_input_allocs:
+            weights_dict[BETA] = node.weights[node.tensor_input_allocs.index(BIAS)]
+        elif BIAS not in node.op_call_kwargs:
             weights_dict[BETA] = node.weights[4]
 
         return weights_dict
 
     def substitute(self,
                    graph: Graph,
-                   node: BaseNode) -> Graph:
+                   node: FunctionalNode) -> Graph:
         """
         Substitute functional.batch_norm and its inputs with BatchNorm2d.
         Args:
@@ -87,10 +90,13 @@ def substitute(self,
         bn_node_weights = self.get_attributes_from_weights(node)
         if not bn_node_weights:
             return graph
+        framework_attr = {NUM_FEATURES: out_channels}
+        if EPSILON in node.op_call_kwargs:
+            framework_attr.update({EPSILON: node.op_call_kwargs[EPSILON]})
+        if MOMENTUM in node.op_call_kwargs:
+            framework_attr.update({MOMENTUM: node.op_call_kwargs[MOMENTUM]})
         new_batchnorm2d = BaseNode(name=node.name + '_into_BatchNorm2d',
-                                   framework_attr={NUM_FEATURES: out_channels,
-                                                   EPSILON: EPSILON_VAL,
-                                                   MOMENTUM: MOMENTUM_VAL},
+                                   framework_attr=framework_attr,
                                    input_shape=node.output_shape,
                                    output_shape=node.output_shape,
                                    weights=bn_node_weights,

diff --git a/...mpression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_layer_norm.py b/...mpression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_layer_norm.py
@@ -21,6 +21,7 @@
 from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import BaseNode, Graph
+from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
 from model_compression_toolkit.core.pytorch.constants import *
 from model_compression_toolkit.logger import Logger
 
@@ -38,9 +39,11 @@ def __init__(self):
         super().__init__(matcher_instance=ln_node)
 
     @staticmethod
-    def get_attributes_from_weights(node: BaseNode, normalized_shape: [Tuple, List, int]) -> Dict:
+    def get_attributes_from_weights(node: FunctionalNode, normalized_shape: [Tuple, List, int]) -> Dict:
         """
-        Parse layer_norm(input, normalized_shape, weight=None, bias=None)
+        Convert functional layer_norm positional weights to LayerNorm weights. Extract indices of gamma
+        and beta according to tensor_input_allocs if they were input as kwargs. If they were input as args,
+        use their fixed positions.
         Args:
             node: Node that match the pattern in the substitution init.
             normalized_shape: nn.LayerNorm "normalized_shape" argument
@@ -50,28 +53,26 @@ def get_attributes_from_weights(node: BaseNode, normalized_shape: [Tuple, List,
         """
 
         # Define default weight and bias
-        weights_dict = {GAMMA: np.ones(normalized_shape), # Default value in case weight is not given
-                        BETA: np.zeros(normalized_shape) # Default value in case bias is not given
+        weights_dict = {GAMMA: np.ones(normalized_shape),  # Default value in case weight is not given
+                        BETA: np.zeros(normalized_shape)  # Default value in case bias is not given
                         }
 
         # Check if weight and/or bias were not given.
-        has_weight = WEIGHT not in node.framework_attr
-        has_bias = BIAS not in node.framework_attr
+        if KERNEL in node.tensor_input_allocs:
+            weights_dict[GAMMA] = node.weights[node.tensor_input_allocs.index(KERNEL)]
+        elif KERNEL not in node.op_call_kwargs:
+            weights_dict[GAMMA] = node.weights[1]
 
-        if 1 in node.weights:
-            if has_weight:
-                weights_dict[GAMMA] = node.weights[1]
-            else:
-                weights_dict[BETA] = node.weights[1]
-        if 2 in node.weights:
-            assert has_bias
+        if BIAS in node.tensor_input_allocs:
+            weights_dict[BETA] = node.weights[node.tensor_input_allocs.index(BIAS)]
+        elif BIAS not in node.op_call_kwargs:
             weights_dict[BETA] = node.weights[2]
 
         return weights_dict
 
     def substitute(self,
                    graph: Graph,
-                   node: BaseNode) -> Graph:
+                   node: FunctionalNode) -> Graph:
         """
         Substitute functional.layer_norm and its inputs with LayerNorm.
         Args:
@@ -85,10 +86,11 @@ def substitute(self,
 
         ln_node_weights = self.get_attributes_from_weights(node, normalized_shape)
 
+        framework_attr = {NORMALIZED_SHAPE: normalized_shape}
+        if EPSILON in node.op_call_kwargs:
+            framework_attr.update({EPSILON: node.op_call_kwargs[EPSILON]})
         new_layernorm = BaseNode(name=node.name + '_into_LayerNorm',
-                                 framework_attr={NORMALIZED_SHAPE: normalized_shape,
-                                                 EPSILON: node.framework_attr.get('eps'),
-                                                 },
+                                 framework_attr=framework_attr,
                                  input_shape=node.output_shape,
                                  output_shape=node.output_shape,
                                  weights=ln_node_weights,