Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add resource utilization calculator
Browse files Browse the repository at this point in the history
irenaby committed Jan 6, 2025
1 parent ce318c0 commit c2dcbe1
Showing 22 changed files with 1,030 additions and 1,058 deletions.
6 changes: 2 additions & 4 deletions model_compression_toolkit/core/common/graph/base_graph.py
Original file line number Diff line number Diff line change
@@ -545,9 +545,7 @@ def get_weights_configurable_nodes(self,

def is_configurable(n):
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
return (n.is_weights_quantization_enabled(kernel_attr) and
not n.is_all_weights_candidates_equal(kernel_attr) and
(not n.reuse or include_reused_nodes))
return n.is_configurable_weight(kernel_attr) and (not n.reuse or include_reused_nodes)

return [n for n in potential_conf_nodes if is_configurable(n)]

@@ -576,7 +574,7 @@ def get_activation_configurable_nodes(self) -> List[BaseNode]:
Returns:
A list of nodes that their activation can be configured (namely, has one or more activation qc candidate).
"""
return [n for n in list(self) if n.is_activation_quantization_enabled() and not n.is_all_activation_candidates_equal()]
return [n for n in list(self) if n.has_configurable_activation()]

def get_sorted_activation_configurable_nodes(self) -> List[BaseNode]:
"""
16 changes: 14 additions & 2 deletions model_compression_toolkit/core/common/graph/base_node.py
Original file line number Diff line number Diff line change
@@ -150,6 +150,14 @@ def is_weights_quantization_enabled(self, attr_name: str) -> bool:

return False

def is_configurable_weight(self, attr_name: str) -> bool:
""" Checks whether the specific weight has a configurable quantization. """
return self.is_weights_quantization_enabled(attr_name) and not self.is_all_weights_candidates_equal(attr_name)

def has_configurable_activation(self):
""" Checks whether the activation has a configurable quantization. """
return self.is_activation_quantization_enabled() and not self.is_all_activation_candidates_equal()

def __repr__(self):
"""
@@ -420,11 +428,15 @@ def get_total_output_params(self) -> float:
Returns: Output size.
"""
output_shapes = self.output_shape if isinstance(self.output_shape, List) else [self.output_shape]
# multiple output shapes are not necessarily lists, e.g. tf nms uses custom named tuple.
if self.output_shape and isinstance(self.output_shape[0], (tuple, list)):
output_shapes = list(self.output_shape)
else:
output_shapes = self.output_shape if isinstance(self.output_shape, list) else [self.output_shape]

# remove batch size (first element) from output shape
output_shapes = [s[1:] for s in output_shapes]

# for scalar shape (None,) prod returns 1
return sum([np.prod([x for x in output_shape if x is not None]) for output_shape in output_shapes])

def find_min_candidates_indices(self) -> List[int]:
Original file line number Diff line number Diff line change
@@ -22,7 +22,6 @@
from model_compression_toolkit.core.common import Graph
from model_compression_toolkit.core.common.hessian import HessianInfoService
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import ru_functions_mapping
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
@@ -105,16 +104,11 @@ def search_bit_width(graph_to_search_cfg: Graph,
disable_activation_for_metric=disable_activation_for_metric,
hessian_info_service=hessian_info_service)

# Each pair of (resource utilization method, resource utilization aggregation) should match to a specific
# provided target resource utilization
ru_functions = ru_functions_mapping

# Instantiate a manager object
search_manager = MixedPrecisionSearchManager(graph,
fw_info,
fw_impl,
se,
ru_functions,
target_resource_utilization,
original_graph=graph_to_search_cfg)

Original file line number Diff line number Diff line change
@@ -13,23 +13,24 @@
# limitations under the License.
# ==============================================================================

from typing import Callable, Tuple
from typing import Dict, List
from typing import Callable, Dict, List

import numpy as np

from model_compression_toolkit.core.common import BaseNode
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
from model_compression_toolkit.core.common.graph.base_graph import Graph
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
VirtualSplitWeightsNode, VirtualSplitActivationNode
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import RuFunctions
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric, calc_graph_cuts
from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import Cut
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
RUTarget, ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import \
MixPrecisionRUHelper
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
from model_compression_toolkit.logger import Logger


class MixedPrecisionSearchManager:
@@ -42,7 +43,6 @@ def __init__(self,
fw_info: FrameworkInfo,
fw_impl: FrameworkImplementation,
sensitivity_evaluator: SensitivityEvaluation,
ru_functions: Dict[RUTarget, RuFunctions],
target_resource_utilization: ResourceUtilization,
original_graph: Graph = None):
"""
@@ -53,8 +53,6 @@ def __init__(self,
fw_impl: FrameworkImplementation object with specific framework methods implementation.
sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
a bit-width configuration for the MP model.
ru_functions: A dictionary with pairs of (MpRuMethod, MpRuAggregationMethod) mapping a RUTarget to
a couple of resource utilization metric function and resource utilization aggregation function.
target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument
will contain the original graph (for config reconstruction purposes).
@@ -69,29 +67,17 @@ def __init__(self,
self.compute_metric_fn = self.get_sensitivity_metric()
self._cuts = None

ru_types = [ru_target for ru_target, ru_value in
target_resource_utilization.get_resource_utilization_dict().items() if ru_value < np.inf]
self.compute_ru_functions = {ru_target: ru_fn for ru_target, ru_fn in ru_functions.items() if ru_target in ru_types}
self.ru_metrics = target_resource_utilization.get_restricted_metrics()
self.ru_helper = MixPrecisionRUHelper(graph, fw_info, fw_impl)
self.target_resource_utilization = target_resource_utilization
self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
self.min_ru = self.compute_min_ru()
self.min_ru = self.ru_helper.compute_utilization(self.ru_metrics, self.min_ru_config)
self.non_conf_ru_dict = self._non_configurable_nodes_ru()

self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
original_graph=self.original_graph)

@property
def cuts(self) -> List[Cut]:
"""
Calculates graph cuts. Written as property, so it will only be calculated once and
only if cuts are needed.
"""
if self._cuts is None:
self._cuts = calc_graph_cuts(self.original_graph)
return self._cuts

def get_search_space(self) -> Dict[int, List[int]]:
"""
The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
@@ -122,40 +108,6 @@ def get_sensitivity_metric(self) -> Callable:

return self.sensitivity_evaluator.compute_metric

def _calc_ru_fn(self, ru_target, ru_fn, mp_cfg) -> np.ndarray:
"""
Computes a resource utilization for a certain mixed precision configuration.
The method computes a resource utilization vector for specific target resource utilization.
Returns: resource utilization value.
"""
# ru_fn is a pair of resource utilization computation method and
# resource utilization aggregation method (in this method we only need the first one)
if ru_target is RUTarget.ACTIVATION:
return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl, self.cuts)
else:
return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl)

def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]:
"""
Computes a resource utilization vector with the values matching to the minimal mp configuration
(i.e., each node is configured with the quantization candidate that would give the minimal size of the
node's resource utilization).
The method computes the minimal resource utilization vector for each target resource utilization.
Returns: A dictionary mapping each target resource utilization to its respective minimal
resource utilization values.
"""
min_ru = {}
for ru_target, ru_fn in self.compute_ru_functions.items():
# ru_fns is a pair of resource utilization computation method and
# resource utilization aggregation method (in this method we only need the first one)
min_ru[ru_target] = self._calc_ru_fn(ru_target, ru_fn, self.min_ru_config)

return min_ru

def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
"""
Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
@@ -184,7 +136,8 @@ def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
# always be 0 for all entries in the results vector.
candidate_rus = np.zeros(shape=self.min_ru[target].shape)
else:
candidate_rus = self.compute_candidate_relative_ru(c, candidate_idx, target)
candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target) - self.min_ru[target]

ru_matrix.append(np.asarray(candidate_rus))

# We need to transpose the calculated ru matrix to allow later multiplication with
@@ -195,40 +148,6 @@ def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
np_ru_matrix = np.array(ru_matrix)
return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)

def compute_candidate_relative_ru(self,
conf_node_idx: int,
candidate_idx: int,
target: RUTarget) -> np.ndarray:
"""
Computes a resource utilization vector for a given candidates of a given configurable node,
i.e., the matching resource utilization vector which is obtained by computing the given target's
resource utilization function on a minimal configuration in which the given
layer's candidates is changed to the new given one.
The result is normalized by subtracting the target's minimal resource utilization vector.
Args:
conf_node_idx: The index of a node in a sorted configurable nodes list.
candidate_idx: The index of a node's quantization configuration candidate.
target: The target for which the resource utilization is calculated (a RUTarget value).
Returns: Normalized node's resource utilization vector
"""
return self.compute_node_ru_for_candidate(conf_node_idx, candidate_idx, target) - \
self.get_min_target_resource_utilization(target)

def get_min_target_resource_utilization(self, target: RUTarget) -> np.ndarray:
"""
Returns the minimal resource utilization vector (pre-calculated on initialization) of a specific target.
Args:
target: The target for which the resource utilization is calculated (a RUTarget value).
Returns: Minimal resource utilization vector.
"""
return self.min_ru[target]

def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
"""
Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal
@@ -243,7 +162,8 @@ def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int,
"""
cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
return self._calc_ru_fn(target, self.compute_ru_functions[target], cfg)
# TODO compute for all targets at once. Currently the way up to add_set_of_ru_constraints is per target.
return self.ru_helper.compute_utilization({target}, cfg)[target]

@staticmethod
def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
@@ -270,21 +190,10 @@ def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
"""

non_conf_ru_dict = {}
for target, ru_fns in self.compute_ru_functions.items():
# Call for the ru method of the given target - empty quantization configuration list is passed since we
# compute for non-configurable nodes
if target == RUTarget.BOPS:
ru_vector = None
elif target == RUTarget.ACTIVATION:
ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl, self.cuts)
else:
ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl)

non_conf_ru_dict[target] = ru_vector

return non_conf_ru_dict
ru_metrics = self.ru_metrics - {RUTarget.BOPS}
ru = self.ru_helper.compute_utilization(ru_targets=ru_metrics, mp_cfg=None)
ru[RUTarget.BOPS] = None
return ru

def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
"""
@@ -297,29 +206,14 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource
with the given config.
"""

ru_dict = {}
for ru_target, ru_fns in self.compute_ru_functions.items():
# Passing False to ru methods and aggregations to indicates that the computations
# are not for constraints setting
if ru_target == RUTarget.BOPS:
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl, False)
elif ru_target == RUTarget.ACTIVATION:
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.graph, self.fw_info, self.fw_impl, self.cuts)
else:
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl)
non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(configurable_nodes_ru_vector, False)
else:
ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(
np.concatenate([configurable_nodes_ru_vector, non_configurable_nodes_ru_vector]), False)

ru_dict[ru_target] = ru_ru[0]

config_ru = ResourceUtilization()
config_ru.set_resource_utilization_by_target(ru_dict)
return config_ru
act_qcs, w_qcs = self.ru_helper.get_configurable_qcs(config)
# TODO on graph or on orig graph???
ru_calc = ResourceUtilizationCalculator(self.graph, self.fw_impl, self.fw_info)
ru = ru_calc.compute_resource_utilization(target_criterion=TargetInclusionCriterion.AnyQuantized,
bitwidth_mode=BitwidthMode.MpCustom,
act_qcs=act_qcs,
w_qcs=w_qcs)
return ru

def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
"""
Original file line number Diff line number Diff line change
@@ -12,29 +12,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from dataclasses import dataclass
from enum import Enum
from typing import Dict, Any
from typing import Dict, Any, Set

import numpy as np


class RUTarget(Enum):
"""
Targets for which we define Resource Utilization metrics for mixed-precision search.
For each target that we care to consider in a mixed-precision search, there should be defined a set of
resource utilization computation function, resource utilization aggregation function,
and resource utilization target (within a ResourceUtilization object).
Whenever adding a resource utilization metric to ResourceUtilization class we should add a matching target to this enum.
WEIGHTS - Weights memory ResourceUtilization metric.
ACTIVATION - Activation memory ResourceUtilization metric.
TOTAL - Total memory ResourceUtilization metric.
BOPS - Total Bit-Operations ResourceUtilization Metric.
Resource Utilization targets for mixed-precision search.
WEIGHTS - Weights memory.
ACTIVATION - Activation memory.
TOTAL - Total memory.
BOPS - Total Bit-Operations.
"""

WEIGHTS = 'weights'
@@ -43,34 +35,21 @@ class RUTarget(Enum):
BOPS = 'bops'


@dataclass
class ResourceUtilization:
"""
Class to represent measurements of performance.
"""

def __init__(self,
weights_memory: float = np.inf,
activation_memory: float = np.inf,
total_memory: float = np.inf,
bops: float = np.inf):
"""
Args:
weights_memory: Memory of a model's weights in bytes. Note that this includes only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not).
activation_memory: Memory of a model's activation in bytes, according to the given activation resource utilization metric.
total_memory: The sum of model's activation and weights memory in bytes, according to the given total resource utilization metric.
bops: The total bit-operations in the model.
"""
self.weights_memory = weights_memory
self.activation_memory = activation_memory
self.total_memory = total_memory
self.bops = bops
def __repr__(self):
return f"Weights_memory: {self.weights_memory}, " \
f"Activation_memory: {self.activation_memory}, " \
f"Total_memory: {self.total_memory}, " \
f"BOPS: {self.bops}"
weights_memory: Memory of a model's weights in bytes.
activation_memory: Memory of a model's activation in bytes.
total_memory: The sum of model's activation and weights memory in bytes.
bops: The total bit-operations in the model.
"""
# TODO the user facade actually computes size, not memory. Do we want to change fields names?
weights_memory: float = np.inf
activation_memory: float = np.inf
total_memory: float = np.inf
bops: float = np.inf

def weight_restricted(self):
return self.weights_memory < np.inf
@@ -93,34 +72,24 @@ def get_resource_utilization_dict(self) -> Dict[RUTarget, float]:
RUTarget.TOTAL: self.total_memory,
RUTarget.BOPS: self.bops}

def set_resource_utilization_by_target(self, ru_mapping: Dict[RUTarget, float]):
def is_satisfied_by(self, ru: 'ResourceUtilization') -> bool:
"""
Setting a ResourceUtilization object values for each ResourceUtilization target in the given dictionary.
Checks whether another ResourceUtilization object satisfies the constraints defined by the current object.
Args:
ru_mapping: A mapping from a RUTarget to a matching resource utilization value.
"""
self.weights_memory = ru_mapping.get(RUTarget.WEIGHTS, np.inf)
self.activation_memory = ru_mapping.get(RUTarget.ACTIVATION, np.inf)
self.total_memory = ru_mapping.get(RUTarget.TOTAL, np.inf)
self.bops = ru_mapping.get(RUTarget.BOPS, np.inf)
ru: A ResourceUtilization object to check against the current object.
def holds_constraints(self, ru: Any) -> bool:
Returns:
Whether all constraints are satisfied.
"""
Checks whether the given ResourceUtilization object holds a set of ResourceUtilization constraints defined by
the current ResourceUtilization object.
return bool(ru.weights_memory <= self.weights_memory and \
ru.activation_memory <= self.activation_memory and \
ru.total_memory <= self.total_memory and \
ru.bops <= self.bops)

Args:
ru: A ResourceUtilization object to check if it holds the constraints.
Returns: True if all the given resource utilization values are not greater than the referenced resource utilization values.
"""
if not isinstance(ru, ResourceUtilization):
return False
def get_restricted_metrics(self) -> Set[RUTarget]:
d = self.get_resource_utilization_dict()
return {k for k, v in d.items() if v < np.inf}

return ru.weights_memory <= self.weights_memory and \
ru.activation_memory <= self.activation_memory and \
ru.total_memory <= self.total_memory and \
ru.bops <= self.bops
def is_any_restricted(self) -> bool:
return bool(self.get_restricted_metrics())

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -13,21 +13,17 @@
# limitations under the License.
# ==============================================================================
import copy
from collections import defaultdict
from typing import Callable, Any

import numpy as np
from typing import Callable, Any, Dict, Tuple

from model_compression_toolkit.logger import Logger
from model_compression_toolkit.constants import FLOAT_BITWIDTH, BITS_TO_BYTES
from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig, QuantizationErrorMethod
from model_compression_toolkit.core.common import Graph
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
RUTarget
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
ResourceUtilizationCalculator, BitwidthMode, TargetInclusionCriterion
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import calc_graph_cuts


def compute_resource_utilization_data(in_model: Any,
@@ -37,7 +33,7 @@ def compute_resource_utilization_data(in_model: Any,
fw_info: FrameworkInfo,
fw_impl: FrameworkImplementation,
transformed_graph: Graph = None,
mixed_precision_enable: bool = True) -> ResourceUtilization:
mixed_precision_enabled: bool = True) -> ResourceUtilization:
"""
Compute Resource Utilization information that can be relevant for defining target ResourceUtilization for mixed precision search.
Calculates maximal activation tensor size, the sum of the model's weight parameters and the total memory combining both weights
@@ -53,7 +49,7 @@ def compute_resource_utilization_data(in_model: Any,
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
transformed_graph: An internal graph representation of the input model. Defaults to None.
If no graph is provided, a graph will be constructed using the specified model.
mixed_precision_enable: Indicates if mixed precision is enabled, defaults to True.
mixed_precision_enabled: Indicates if mixed precision is enabled, defaults to True.
If disabled, computes resource utilization using base quantization
configurations across all layers.
@@ -72,174 +68,15 @@ def compute_resource_utilization_data(in_model: Any,
fw_impl,
tpc,
bit_width_config=core_config.bit_width_config,
mixed_precision_enable=mixed_precision_enable)

# Compute parameters sum
weights_memory_bytes, weights_params = compute_nodes_weights_params(graph=transformed_graph, fw_info=fw_info)
total_weights_params = 0 if len(weights_params) == 0 else sum(weights_params)

# Compute max activation tensor
activation_output_sizes_bytes, activation_output_sizes = compute_activation_output_maxcut_sizes(graph=transformed_graph)
max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes)

# Compute total memory utilization - parameters sum + max activation tensor
total_size = total_weights_params + max_activation_tensor_size

# Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)

return ResourceUtilization(weights_memory=total_weights_params,
activation_memory=max_activation_tensor_size,
total_memory=total_size,
bops=bops_count)


def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> Tuple[np.ndarray, np.ndarray]:
"""
Calculates the memory usage in bytes and the number of weight parameters for each node within a graph.
Memory calculations are based on the maximum bit-width used for quantization per node.
Args:
graph: A finalized Graph object, representing the model structure.
fw_info: FrameworkInfo object containing details about the specific framework's
quantization attributes for different layers' weights.
Returns:
A tuple containing two arrays:
- The first array represents the memory in bytes for each node's weights when quantized at the maximal bit-width.
- The second array represents the total number of weight parameters for each node.
"""
weights_params = []
weights_memory_bytes = []
for n in graph.nodes:
# TODO: when enabling multiple attribute quantization by default (currently,
# only kernel quantization is enabled) we should include other attributes memory in the sum of all
# weights memory.
# When implementing this, we should just go over all attributes in the node instead of counting only kernels.
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
if kernel_attr is not None and not n.reuse:
kernel_candidates = n.get_all_weights_attr_candidates(kernel_attr)

if len(kernel_candidates) > 0 and any([c.enable_weights_quantization for c in kernel_candidates]):
max_weight_bits = max([kc.weights_n_bits for kc in kernel_candidates])
node_num_weights_params = 0
for attr in fw_info.get_kernel_op_attributes(n.type):
if attr is not None:
node_num_weights_params += n.get_weights_by_keys(attr).flatten().shape[0]

weights_params.append(node_num_weights_params)

# multiply num params by num bits and divide by BITS_TO_BYTES to convert from bits to bytes
weights_memory_bytes.append(node_num_weights_params * max_weight_bits / BITS_TO_BYTES)

return np.array(weights_memory_bytes), np.array(weights_params)


def compute_activation_output_maxcut_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:
"""
Computes an array of the respective output tensor maxcut size and an array of the output tensor
cut size in bytes for each cut.
Args:
graph: A finalized Graph object, representing the model structure.
Returns:
A tuple containing two arrays:
- The first is an array of the size of each activation max-cut size in bytes, calculated
using the maximal bit-width for quantization.
- The second array an array of the size of each activation max-cut activation size in number of parameters.
"""
cuts = calc_graph_cuts(graph)

# map nodes to cuts.
node_to_cat_mapping = defaultdict(list)
for i, cut in enumerate(cuts):
mem_element_names = [m.node_name for m in cut.mem_elements.elements]
for m_name in mem_element_names:
if len(graph.find_node_by_name(m_name)) > 0:
node_to_cat_mapping[m_name].append(i)
else:
Logger.critical(f"Missing node: {m_name}") # pragma: no cover
mixed_precision_enable=mixed_precision_enabled,
running_gptq=False)

activation_outputs = np.zeros(len(cuts))
activation_outputs_bytes = np.zeros(len(cuts))
for n in graph.nodes:
# Go over all nodes that have activation quantization enabled.
if n.has_activation_quantization_enabled_candidate():
# Fetch maximum bits required for activations quantization.
max_activation_bits = max([qc.activation_quantization_cfg.activation_n_bits for qc in n.candidates_quantization_cfg])
node_output_size = n.get_total_output_params()
for cut_index in node_to_cat_mapping[n.name]:
activation_outputs[cut_index] += node_output_size
# Calculate activation size in bytes and append to list
activation_outputs_bytes[cut_index] += node_output_size * max_activation_bits / BITS_TO_BYTES

return activation_outputs_bytes, activation_outputs


# TODO maxcut: add test for this function and remove no cover
def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]: # pragma: no cover
"""
Computes an array of the respective output tensor size and an array of the output tensor size in bytes for
each node.
Args:
graph: A finalized Graph object, representing the model structure.
Returns:
A tuple containing two arrays:
- The first array represents the size of each node's activation output tensor size in bytes,
calculated using the maximal bit-width for quantization.
- The second array represents the size of each node's activation output tensor size.
"""
activation_outputs = []
activation_outputs_bytes = []
for n in graph.nodes:
# Go over all nodes that have configurable activation.
if n.has_activation_quantization_enabled_candidate():
# Fetch maximum bits required for quantizing activations
max_activation_bits = max([qc.activation_quantization_cfg.activation_n_bits for qc in n.candidates_quantization_cfg])
node_output_size = n.get_total_output_params()
activation_outputs.append(node_output_size)
# Calculate activation size in bytes and append to list
activation_outputs_bytes.append(node_output_size * max_activation_bits / BITS_TO_BYTES)

return np.array(activation_outputs_bytes), np.array(activation_outputs)


def compute_total_bops(graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation) -> np.ndarray:
"""
Computes a vector with the respective Bit-operations count for each configurable node that includes MAC operations.
The computation assumes that the graph is a representation of a float model, thus, BOPs computation uses 32-bit.
Args:
graph: Finalized Graph object.
fw_info: FrameworkInfo object about the specific framework
(e.g., attributes of different layers' weights to quantize).
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
Returns: A vector of nodes' Bit-operations count.
"""

bops = []

# Go over all configurable nodes that have kernels.
for n in graph.get_topo_sorted_nodes():
if n.has_kernel_weight_to_quantize(fw_info):
# If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
assert len(incoming_edges) == 1, f"Can't compute BOPS metric for node {n.name} with multiple inputs."

node_mac = fw_impl.get_node_mac_operations(n, fw_info)

node_bops = (FLOAT_BITWIDTH ** 2) * node_mac
bops.append(node_bops)

return np.array(bops)
ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl, fw_info)
ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized,
BitwidthMode.Size,
metrics=set(RUTarget) - {RUTarget.BOPS})
ru.bops, _ = ru_calculator.compute_bops(TargetInclusionCriterion.AnyQuantized, BitwidthMode.Float)
return ru


def requires_mixed_precision(in_model: Any,
@@ -268,7 +105,6 @@ def requires_mixed_precision(in_model: Any,
Returns: A boolean indicating if mixed precision is needed.
"""
is_mixed_precision = False
core_config = _create_core_config_for_ru(core_config)

transformed_graph = graph_preparation_runner(in_model,
@@ -278,25 +114,14 @@ def requires_mixed_precision(in_model: Any,
fw_impl,
tpc,
bit_width_config=core_config.bit_width_config,
mixed_precision_enable=False)
# Compute max weights memory in bytes
weights_memory_by_layer_bytes, _ = compute_nodes_weights_params(transformed_graph, fw_info)
total_weights_memory_bytes = 0 if len(weights_memory_by_layer_bytes) == 0 else sum(weights_memory_by_layer_bytes)

# Compute max activation tensor in bytes
activation_memory_estimation_bytes, _ = compute_activation_output_maxcut_sizes(transformed_graph)
max_activation_memory_estimation_bytes = 0 if len(activation_memory_estimation_bytes) == 0 \
else max(activation_memory_estimation_bytes)

# Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)

is_mixed_precision |= target_resource_utilization.weights_memory < total_weights_memory_bytes
is_mixed_precision |= target_resource_utilization.activation_memory < max_activation_memory_estimation_bytes
is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes + max_activation_memory_estimation_bytes
is_mixed_precision |= target_resource_utilization.bops < bops_count
return is_mixed_precision
mixed_precision_enable=False,
running_gptq=False)

ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl, fw_info)
max_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized,
BitwidthMode.MpMax,
metrics=target_resource_utilization.get_restricted_metrics())
return not target_resource_utilization.is_satisfied_by(max_ru)


def _create_core_config_for_ru(core_config: CoreConfig) -> CoreConfig:

This file was deleted.

This file was deleted.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -14,10 +14,13 @@
# ==============================================================================

import numpy as np
import pulp
from pulp import *
from tqdm import tqdm
from typing import Dict, List, Tuple, Callable

from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
ru_target_aggregation_fn, AggregationMethod
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
@@ -218,13 +221,11 @@ def _add_set_of_ru_constraints(search_manager: MixedPrecisionSearchManager,
np.sum(indicated_ru_matrix[i], axis=0) + # sum of metric values over all configurations in a row
search_manager.min_ru[target][i] for i in range(indicated_ru_matrix.shape[0])])

# search_manager.compute_ru_functions contains a pair of ru_metric and ru_aggregation for each ru target
# get aggregated ru, considering both configurable and non-configurable nodes
if non_conf_ru_vector is None or len(non_conf_ru_vector) == 0:
aggr_ru = search_manager.compute_ru_functions[target].aggregate_fn(ru_sum_vector)
else:
aggr_ru = search_manager.compute_ru_functions[target].aggregate_fn(np.concatenate([ru_sum_vector, non_conf_ru_vector]))
ru_vec = ru_sum_vector
if non_conf_ru_vector is not None and non_conf_ru_vector.size:
ru_vec = np.concatenate([ru_vec, non_conf_ru_vector])

aggr_ru = _aggregate_for_lp(ru_vec, target)
for v in aggr_ru:
if isinstance(v, float):
if v > target_resource_utilization_value:
@@ -235,6 +236,21 @@ def _add_set_of_ru_constraints(search_manager: MixedPrecisionSearchManager,
lp_problem += v <= target_resource_utilization_value


def _aggregate_for_lp(ru_vec, target) -> list:
if target == RUTarget.TOTAL:
w = pulp.lpSum(v[0] for v in ru_vec)
return [w + v[1] for v in ru_vec]

if ru_target_aggregation_fn[target] == AggregationMethod.SUM:
return [pulp.lpSum(ru_vec)]

if ru_target_aggregation_fn[target] == AggregationMethod.MAX:
return list(ru_vec)

raise NotImplementedError(f'Cannot define lp constraints with unsupported aggregation function '
f'{ru_target_aggregation_fn[target]}') # pragma: no cover


def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
target_resource_utilization: ResourceUtilization,
eps: float = EPS) -> Dict[int, Dict[int, float]]:
Original file line number Diff line number Diff line change
@@ -113,11 +113,9 @@ def __init__(self,
# in the new built MP model.
self.baseline_model, self.model_mp, self.conf_node2layers = self._build_models()

# Build images batches for inference comparison
self.images_batches = self._get_images_batches(quant_config.num_of_images)

# Casting images tensors to the framework tensor type.
self.images_batches = [self.fw_impl.to_tensor(img) for img in self.images_batches]
# Build images batches for inference comparison and cat to framework type
images_batches = self._get_images_batches(quant_config.num_of_images)
self.images_batches = [self.fw_impl.to_tensor(img) for img in images_batches]

# Initiating baseline_tensors_list since it is not initiated in SensitivityEvaluationManager init.
self.baseline_tensors_list = self._init_baseline_tensors_list()
Original file line number Diff line number Diff line change
@@ -80,8 +80,8 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
updated_ru.append(node_updated_ru)

# filter out new configs that don't hold the resource utilization restrictions
node_filtered_ru = [(node_idx, ru) for node_idx, ru in zip(valid_candidates, updated_ru) if
target_resource_utilization.holds_constraints(ru)]
node_filtered_ru = [(node_idx, ru) for node_idx, ru in zip(valid_candidates, updated_ru)
if target_resource_utilization.is_satisfied_by(ru)]

if len(node_filtered_ru) > 0:
sorted_by_ru = sorted(node_filtered_ru, key=lambda node_ru: (node_ru[1].total_memory,
Original file line number Diff line number Diff line change
@@ -12,13 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from dataclasses import dataclass, field
from typing import List, Union, Dict

from model_compression_toolkit.core.common import Graph
from model_compression_toolkit.core.common.matchers.node_matcher import BaseNodeMatcher
from model_compression_toolkit.logger import Logger


@dataclass
class ManualBitWidthSelection:
"""
Class to encapsulate the manual bit width selection configuration for a specific filter.
@@ -27,27 +29,19 @@ class ManualBitWidthSelection:
filter (BaseNodeMatcher): The filter used to select nodes for bit width manipulation.
bit_width (int): The bit width to be applied to the selected nodes.
"""
def __init__(self,
filter: BaseNodeMatcher,
bit_width: int):
self.filter = filter
self.bit_width = bit_width
filter: BaseNodeMatcher
bit_width: int


@dataclass
class BitWidthConfig:
"""
Class to manage manual bit-width configurations.
Attributes:
manual_activation_bit_width_selection_list (List[ManualBitWidthSelection]): A list of ManualBitWidthSelection objects defining manual bit-width configurations.
"""
def __init__(self,
manual_activation_bit_width_selection_list: List[ManualBitWidthSelection] = None):
self.manual_activation_bit_width_selection_list = [] if manual_activation_bit_width_selection_list is None else manual_activation_bit_width_selection_list

def __repr__(self):
# Used for debugging, thus no cover.
return str(self.__dict__) # pragma: no cover
manual_activation_bit_width_selection_list: List[ManualBitWidthSelection] = field(default_factory=list)

def set_manual_activation_bit_width(self,
filters: Union[List[BaseNodeMatcher], BaseNodeMatcher],
74 changes: 21 additions & 53 deletions model_compression_toolkit/core/runner.py
Original file line number Diff line number Diff line change
@@ -12,44 +12,38 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from collections import namedtuple

import copy

from typing import Callable, Tuple, Any, List, Dict

import numpy as np
from typing import Callable, Any, List

from model_compression_toolkit.core.common import FrameworkInfo
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.fusion.graph_fuser import GraphFuser

from model_compression_toolkit.core.common.graph.base_graph import Graph
from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut, \
SchedulerInfo
from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
from model_compression_toolkit.core.common.hessian.hessian_info_service import HessianInfoService
from model_compression_toolkit.core.common.mixed_precision.bit_width_setter import set_bit_widths
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_candidates_filter import \
filter_candidates_for_mixed_precision
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import \
requires_mixed_precision
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.graph.base_graph import Graph
from model_compression_toolkit.core.common.mixed_precision.bit_width_setter import set_bit_widths
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import ru_functions_mapping
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width
from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
from model_compression_toolkit.core.common.visualization.final_config_visualizer import \
WeightsFinalBitwidthConfigVisualizer, \
ActivationFinalBitwidthConfigVisualizer
from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter, \
finalize_bitwidth_in_tb
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import \
TargetPlatformCapabilities


def core_runner(in_model: Any,
@@ -88,15 +82,15 @@ def core_runner(in_model: Any,
"""

# Warn is representative dataset has batch-size == 1
batch_data = iter(representative_data_gen()).__next__()
batch_data = next(iter(representative_data_gen()))
if isinstance(batch_data, list):
batch_data = batch_data[0]
if batch_data.shape[0] == 1:
Logger.warning('representative_data_gen generates a batch size of 1 which can be slow for optimization:'
' consider increasing the batch size')

# Checking whether to run mixed precision quantization
if target_resource_utilization is not None:
if target_resource_utilization is not None and target_resource_utilization.is_any_restricted():
if core_config.mixed_precision_config is None:
Logger.critical("Provided an initialized target_resource_utilization, that means that mixed precision quantization is "
"enabled, but the provided MixedPrecisionQuantizationConfig is None.")
@@ -177,7 +171,6 @@ def core_runner(in_model: Any,

_set_final_resource_utilization(graph=tg,
final_bit_widths_config=bit_widths_config,
ru_functions_dict=ru_functions_mapping,
fw_info=fw_info,
fw_impl=fw_impl)

@@ -215,7 +208,6 @@ def core_runner(in_model: Any,

def _set_final_resource_utilization(graph: Graph,
final_bit_widths_config: List[int],
ru_functions_dict: Dict[RUTarget, Tuple[MpRuMetric, MpRuAggregation]],
fw_info: FrameworkInfo,
fw_impl: FrameworkImplementation):
"""
@@ -225,39 +217,15 @@ def _set_final_resource_utilization(graph: Graph,
Args:
graph: Graph to compute the resource utilization for.
final_bit_widths_config: The final bit-width configuration to quantize the model accordingly.
ru_functions_dict: A mapping between a RUTarget and a pair of resource utilization method and resource utilization aggregation functions.
fw_info: A FrameworkInfo object.
fw_impl: FrameworkImplementation object with specific framework methods implementation.
"""

final_ru_dict = {}
for ru_target, ru_funcs in ru_functions_dict.items():
ru_method, ru_aggr = ru_funcs
if ru_target == RUTarget.BOPS:
final_ru_dict[ru_target] = \
ru_aggr(ru_method(final_bit_widths_config, graph, fw_info, fw_impl, False), False)[0]
else:
non_conf_ru = ru_method([], graph, fw_info, fw_impl)
conf_ru = ru_method(final_bit_widths_config, graph, fw_info, fw_impl)
if len(final_bit_widths_config) > 0 and len(non_conf_ru) > 0:
final_ru_dict[ru_target] = ru_aggr(np.concatenate([conf_ru, non_conf_ru]), False)[0]
elif len(final_bit_widths_config) > 0 and len(non_conf_ru) == 0:
final_ru_dict[ru_target] = ru_aggr(conf_ru, False)[0]
elif len(final_bit_widths_config) == 0 and len(non_conf_ru) > 0:
# final_bit_widths_config == 0 ==> no configurable nodes,
# thus, ru can be computed from non_conf_ru alone
final_ru_dict[ru_target] = ru_aggr(non_conf_ru, False)[0]
else:
# No relevant nodes have been quantized with affect on the given target - since we only consider
# in the model's final size the quantized layers size, this means that the final size for this target
# is zero.
Logger.warning(f"No relevant quantized layers for the ru target {ru_target} were found, the recorded "
f"final ru for this target would be 0.")
final_ru_dict[ru_target] = 0

final_ru = ResourceUtilization()
final_ru.set_resource_utilization_by_target(final_ru_dict)
w_qcs = {n: n.final_weights_quantization_cfg for n in graph.nodes}
a_qcs = {n: n.final_activation_quantization_cfg for n in graph.nodes}
ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
final_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, BitwidthMode.MpCustom,
act_qcs=a_qcs, w_qcs=w_qcs)
print(final_ru)
graph.user_info.final_resource_utilization = final_ru
graph.user_info.mixed_precision_cfg = final_bit_widths_config
Original file line number Diff line number Diff line change
@@ -78,7 +78,7 @@ def get_max_resources_for_model(self, model):
fw_info=DEFAULT_KERAS_INFO,
fw_impl=KerasImplementation(),
transformed_graph=None,
mixed_precision_enable=False)
mixed_precision_enabled=False)

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
Original file line number Diff line number Diff line change
@@ -24,8 +24,6 @@
MixedPrecisionQuantizationConfig
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width, \
BitWidthSearchMethod
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import \
RuFunctions
from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
mp_integer_programming_search
from model_compression_toolkit.core.common.model_collector import ModelCollector
@@ -68,10 +66,6 @@ def __init__(self, layer_to_ru_mapping):
RUTarget.TOTAL: [[2], [2], [2]],
RUTarget.BOPS: [[1], [1], [1]]} # minimal resource utilization in the tests layer_to_ru_mapping

self.compute_ru_functions = {RUTarget.WEIGHTS: RuFunctions(None, lambda v: [lpSum(v)]),
RUTarget.ACTIVATION: RuFunctions(None, lambda v: [i for i in v]),
RUTarget.TOTAL: RuFunctions(None, lambda v: [lpSum(v[0]) + i for i in v[1]]),
RUTarget.BOPS: RuFunctions(None, lambda v: [lpSum(v)])}
self.max_ru_config = [0]
self.config_reconstruction_helper = MockReconstructionHelper()
self.non_conf_ru_dict = None
14 changes: 14 additions & 0 deletions tests_pytest/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
14 changes: 14 additions & 0 deletions tests_pytest/core/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
14 changes: 14 additions & 0 deletions tests_pytest/core/common/mixed_precision/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np

from model_compression_toolkit.core.common import Graph, BaseNode


class TestUtilization:
pass

def generate_node():
rng = np.random.default_rng(seed=42)
BaseNode(name='node',
framework_attr={},
input_shape=(2, 3),
output_shape=(4, 5, 6),
weights={'weight1': rng.random((2, 5, 7)),
'weight2': rng.random((11,)),
'weight3': rng.random((10, 3))},
layer_class: Mock(),
reuse=False,
reuse_group=None,
inputs_as_list=False,
quantization_attr: Dict[str, Any] = None,
has_activation: bool = True,
is_custom: bool = False
):
)
def generate_graph():
g = Graph('g',
nodes: List[BaseNode],
input_nodes: List[BaseNode],
output_nodes: List[OutTensor],
edge_list: List[Edge],
fw_info: FrameworkInfo = None,
**attr)

class TestRUCalculator:
def test_compute_node_weights(self):

0 comments on commit c2dcbe1

Please sign in to comment.