diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py index 670dc11cc..c1bad8313 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py @@ -69,7 +69,7 @@ def __init__(self, # To define RU Total constraints we need to compute weights and activations even if they have no constraints # TODO currently this logic is duplicated in linear_programming.py - targets = target_resource_utilization.get_restricted_metrics() + targets = target_resource_utilization.get_restricted_targets() if RUTarget.TOTAL in targets: targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL} self.ru_targets_to_compute = targets diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py index 3da53184a..d2746da1b 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py @@ -86,15 +86,31 @@ def is_satisfied_by(self, ru: 'ResourceUtilization') -> bool: ru.total_memory <= self.total_memory and \ ru.bops <= self.bops) - def get_restricted_metrics(self) -> Set[RUTarget]: + def get_restricted_targets(self) -> Set[RUTarget]: d = self.get_resource_utilization_dict() return {k for k, v in d.items() if v < np.inf} def is_any_restricted(self) -> bool: - return bool(self.get_restricted_metrics()) + return bool(self.get_restricted_targets()) - def __repr__(self): - return f"Weights_memory: {self.weights_memory}, " \ - f"Activation_memory: {self.activation_memory}, " \ - f"Total_memory: {self.total_memory}, " \ - f"BOPS: {self.bops}" + def get_summary_str(self, restricted: bool): + """ + Generate summary string. + + Args: + restricted: whether to include non-restricted targets. + + Returns: + Summary string. + """ + targets = self.get_restricted_targets() if restricted else list(RUTarget) + summary = [] + if RUTarget.WEIGHTS in targets: + summary.append(f"Weights memory: {self.weights_memory}") + if RUTarget.ACTIVATION in targets: + summary.append(f"Activation memory: {self.activation_memory}") + if RUTarget.TOTAL in targets: + summary.append(f"Total memory: {self.total_memory}") + if RUTarget.BOPS in targets: + summary.append(f"BOPS: {self.bops}") + return ', '.join(summary) diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py index aff19117f..066209e64 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py @@ -15,7 +15,6 @@ from collections import defaultdict from copy import deepcopy from enum import Enum, auto -from functools import lru_cache from typing import Dict, NamedTuple, Optional, Tuple, List, Iterable, Union, Literal, Sequence from model_compression_toolkit.constants import FLOAT_BITWIDTH @@ -163,12 +162,12 @@ def compute_resource_utilization(self, w_total, a_total = None, None if {RUTarget.WEIGHTS, RUTarget.TOTAL}.intersection(ru_targets): w_total, *_ = self.compute_weights_utilization(target_criterion, bitwidth_mode, w_qcs) - elif w_qcs is not None: # pragma: no cover + elif w_qcs is not None and RUTarget.BOPS not in ru_targets: # pragma: no cover raise ValueError('Weight configuration passed but no relevant metric requested.') if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets): a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs) - elif act_qcs is not None: # pragma: no cover + elif act_qcs is not None and RUTarget.BOPS not in ru_targets: # pragma: no cover raise ValueError('Activation configuration passed but no relevant metric requested.') ru = ResourceUtilization() @@ -182,7 +181,7 @@ def compute_resource_utilization(self, ru.bops, _ = self.compute_bops(target_criterion=target_criterion, bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs) - assert ru.get_restricted_metrics() == set(ru_targets), 'Mismatch between the number of requested and computed metrics' + assert ru.get_restricted_targets() == set(ru_targets), 'Mismatch between the number of requested and computed metrics' return ru def compute_weights_utilization(self, diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py index 0564b5ddf..c61dbf6a1 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py @@ -118,7 +118,7 @@ def requires_mixed_precision(in_model: Any, ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl, fw_info) max_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, BitwidthMode.QMaxBit, - ru_targets=target_resource_utilization.get_restricted_metrics()) + ru_targets=target_resource_utilization.get_restricted_targets()) return not target_resource_utilization.is_satisfied_by(max_ru) diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py index bf89f1ff8..34e6fcbaa 100644 --- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py @@ -196,7 +196,7 @@ def _add_ru_constraints(search_manager: MixedPrecisionSearchManager, """ ru_indicated_vectors = {} # targets to add constraints for - constraints_targets = target_resource_utilization.get_restricted_metrics() + constraints_targets = target_resource_utilization.get_restricted_targets() # to add constraints for Total target we need to compute weight and activation targets_to_compute = constraints_targets if RUTarget.TOTAL in constraints_targets: diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index 0e678070c..c31d256f0 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -14,7 +14,7 @@ # ============================================================================== import copy -from typing import Callable, Any, List +from typing import Callable, Any, List, Optional from model_compression_toolkit.core.common import FrameworkInfo from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation @@ -170,6 +170,7 @@ def core_runner(in_model: Any, _set_final_resource_utilization(graph=tg, final_bit_widths_config=bit_widths_config, + target_resource_utilization=target_resource_utilization, fw_info=fw_info, fw_impl=fw_impl) @@ -207,6 +208,7 @@ def core_runner(in_model: Any, def _set_final_resource_utilization(graph: Graph, final_bit_widths_config: List[int], + target_resource_utilization: Optional[ResourceUtilization], fw_info: FrameworkInfo, fw_impl: FrameworkImplementation): """ @@ -216,21 +218,20 @@ def _set_final_resource_utilization(graph: Graph, Args: graph: Graph to compute the resource utilization for. final_bit_widths_config: The final bit-width configuration to quantize the model accordingly. + target_resource_utilization: Requested target resource utilization if relevant. fw_info: A FrameworkInfo object. fw_impl: FrameworkImplementation object with specific framework methods implementation. """ - w_qcs = {n: n.final_weights_quantization_cfg for n in graph.nodes} - a_qcs = {n: n.final_activation_quantization_cfg for n in graph.nodes} - ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info) - final_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, BitwidthMode.QCustom, - act_qcs=a_qcs, w_qcs=w_qcs) - - for ru_target, ru in final_ru.get_resource_utilization_dict().items(): - if ru == 0: - Logger.warning(f"No relevant quantized layers for the resource utilization target {ru_target} were found, " - f"the recorded final ru for this target would be 0.") - - Logger.info(f'Resource utilization (of quantized targets):\n {str(final_ru)}.') + ru_targets = target_resource_utilization.get_restricted_targets() + final_ru = None + if ru_targets: + w_qcs = {n: n.final_weights_quantization_cfg for n in graph.nodes} + a_qcs = {n: n.final_activation_quantization_cfg for n in graph.nodes} + ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info) + final_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, BitwidthMode.QCustom, + act_qcs=a_qcs, w_qcs=w_qcs, ru_targets=ru_targets) + summary = final_ru.get_summary_str(restricted=True) + Logger.info(f'Resource utilization for quantized mixed-precision targets:\n {summary}.') graph.user_info.final_resource_utilization = final_ru graph.user_info.mixed_precision_cfg = final_bit_widths_config diff --git a/tests/common_tests/function_tests/test_resource_utilization_object.py b/tests/common_tests/function_tests/test_resource_utilization_object.py index f7e3f9374..d9b783240 100644 --- a/tests/common_tests/function_tests/test_resource_utilization_object.py +++ b/tests/common_tests/function_tests/test_resource_utilization_object.py @@ -22,6 +22,7 @@ default_ru = ResourceUtilization() custom_ru = ResourceUtilization(1, 2, 3, 4) +mixed_ru = ResourceUtilization(activation_memory=5, bops=10) class TestResourceUtilizationObject(unittest.TestCase): @@ -38,15 +39,17 @@ def test_default(self): self.assertTrue(custom_ru.bops, 4) def test_representation(self): - self.assertEqual(repr(default_ru), f"Weights_memory: {np.inf}, " - f"Activation_memory: {np.inf}, " - f"Total_memory: {np.inf}, " - f"BOPS: {np.inf}") - - self.assertEqual(repr(custom_ru), f"Weights_memory: {1}, " - f"Activation_memory: {2}, " - f"Total_memory: {3}, " - f"BOPS: {4}") + self.assertEqual(default_ru.get_summary_str(restricted=False), f"Weights memory: {np.inf}, " + f"Activation memory: {np.inf}, " + f"Total memory: {np.inf}, " + f"BOPS: {np.inf}") + self.assertEqual(default_ru.get_summary_str(restricted=True), "") + + self.assertEqual(mixed_ru.get_summary_str(restricted=False), f"Weights memory: {np.inf}, " + "Activation memory: 5, " + f"Total memory: {np.inf}, " + "BOPS: 10") + self.assertEqual(mixed_ru.get_summary_str(restricted=True), "Activation memory: 5, BOPS: 10") def test_ru_hold_constraints(self): self.assertTrue(default_ru.is_satisfied_by(custom_ru))