From 0fd48cece3cd1d2bcc3c7964e65eda4524ddb819 Mon Sep 17 00:00:00 2001 From: elad-c Date: Wed, 15 Jan 2025 16:02:09 +0200 Subject: [PATCH] 1. Maxcut runtime optimization. 2. Add "warning experimental" to maxcut feature. 3. add timeout to maxcut solver after 1st iteration. --- .../memory_graph/compute_graph_max_cut.py | 21 +++++++++++++++-- .../core/common/graph/memory_graph/cut.py | 4 ++-- .../graph/memory_graph/max_cut_astar.py | 23 +++++++++---------- .../common/graph/memory_graph/memory_graph.py | 6 ++++- .../resource_utilization_calculator.py | 2 ++ .../search_methods/linear_programming.py | 2 +- requirements.txt | 3 ++- 7 files changed, 42 insertions(+), 19 deletions(-) diff --git a/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py b/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py index 0729699ab..a51af50f4 100644 --- a/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py +++ b/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py @@ -13,9 +13,10 @@ # limitations under the License. # ============================================================================== from collections import namedtuple - from typing import Tuple, List +import timeout_decorator +from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import OPERATORS_SCHEDULING, MAX_CUT, CUTS, FUSED_NODES_MAPPING from model_compression_toolkit.core.common import BaseNode from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut @@ -47,9 +48,25 @@ def compute_graph_max_cut(memory_graph: MemoryGraph, l_bound = memory_graph.memory_lbound_single_op u_bound = 2 * sum([t.total_size for t in memory_graph.b_nodes]) - l_bound it = 0 + + @timeout_decorator.timeout(300) + def solver_wrapper(_estimate, _iter_limit): + return max_cut_astar.solve(estimate=_estimate, iter_limit=_iter_limit) + while it < n_iter: estimate = (u_bound + l_bound) / 2 - schedule, max_cut_size, cuts = max_cut_astar.solve(estimate=estimate, iter_limit=astar_n_iter) + if it == 0: + schedule, max_cut_size, cuts = max_cut_astar.solve(estimate=estimate, iter_limit=astar_n_iter) + else: + try: + schedule, max_cut_size, cuts = solver_wrapper(estimate=estimate, iter_limit=astar_n_iter) + except timeout_decorator.TimeoutError: + if last_result[0] is None: + Logger.critical(f"Max-cut solver stopped on timeout in iteration {it} before finding a solution.") # pragma: no cover + else: + Logger.warning(f"Max-cut solver stopped on timeout in iteration {it}.") + return last_result + if schedule is None: l_bound = estimate else: diff --git a/model_compression_toolkit/core/common/graph/memory_graph/cut.py b/model_compression_toolkit/core/common/graph/memory_graph/cut.py index 184f5e7c5..bd21f502b 100644 --- a/model_compression_toolkit/core/common/graph/memory_graph/cut.py +++ b/model_compression_toolkit/core/common/graph/memory_graph/cut.py @@ -67,7 +67,7 @@ def __eq__(self, other) -> bool: return False # pragma: no cover def __hash__(self): - return hash((frozenset(self.op_order), frozenset(self.op_record), self.mem_elements)) + return id(self) def __repr__(self): - return f"" # pragma: no cover \ No newline at end of file + return f"" # pragma: no cover diff --git a/model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py b/model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py index b7005fd4e..6e651d4db 100644 --- a/model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py +++ b/model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== import copy -from typing import List, Tuple, Dict +from typing import List, Tuple, Dict, Set from model_compression_toolkit.core.common import BaseNode from model_compression_toolkit.constants import DUMMY_TENSOR, DUMMY_NODE @@ -139,8 +139,8 @@ def solve(self, estimate: float, iter_limit: int = 500) -> Tuple[List[BaseNode], """ - open_list = [self.src_cut] - closed_list = [] + open_list = {self.src_cut} + closed_list = set() costs = {self.src_cut: self.src_cut.memory_size()} routes = {self.src_cut: [self.src_cut]} @@ -159,22 +159,21 @@ def solve(self, estimate: float, iter_limit: int = 500) -> Tuple[List[BaseNode], if self.is_pivot(next_cut): # Can clear all search history - open_list = [] - closed_list = [] + open_list.clear() + closed_list.clear() routes = {} else: # Can remove only next_cut and put it in closed_list open_list.remove(next_cut) del routes[next_cut] - closed_list.append(next_cut) + closed_list.add(next_cut) # Expand the chosen cut expanded_cuts = self.expand(next_cut) expansion_count += 1 # Only consider nodes that where not already visited - expanded_cuts = [_c for _c in expanded_cuts if _c not in closed_list] - for c in expanded_cuts: + for c in filter(lambda _c: _c not in closed_list, expanded_cuts): cost = self.accumulate(cut_cost, c.memory_size()) if c not in open_list: self._update_expanded_node(c, cost, cut_route, open_list, costs, routes) @@ -192,7 +191,7 @@ def solve(self, estimate: float, iter_limit: int = 500) -> Tuple[List[BaseNode], return None, 0, None # pragma: no cover @staticmethod - def _update_expanded_node(cut: Cut, cost: float, route: List[Cut], open_list: List[Cut], + def _update_expanded_node(cut: Cut, cost: float, route: List[Cut], open_list: Set[Cut], costs: Dict[Cut, float], routes: Dict[Cut, List[Cut]]): """ An auxiliary method for updating search data structures according to an expanded node. @@ -201,16 +200,16 @@ def _update_expanded_node(cut: Cut, cost: float, route: List[Cut], open_list: Li cut: A cut to expand the search to. cost: The cost of the cut. route: The rout to the cut. - open_list: The search open list. + open_list: The search open set. costs: The search utility mapping between cuts and their cost. routes: The search utility mapping between cuts and their routes. """ - open_list.append(cut) + open_list.add(cut) costs.update({cut: cost}) routes.update({cut: [cut] + route}) - def _get_cut_to_expand(self, open_list: List[Cut], costs: Dict[Cut, float], routes: Dict[Cut, List[Cut]], + def _get_cut_to_expand(self, open_list: Set[Cut], costs: Dict[Cut, float], routes: Dict[Cut, List[Cut]], estimate: float) -> Cut: """ An auxiliary method for finding a cut for expanding the search out of a set of potential cuts for expansion. diff --git a/model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py b/model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py index fe131214a..e4a3063d6 100644 --- a/model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py +++ b/model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py @@ -14,6 +14,7 @@ # ============================================================================== from typing import List from operator import getitem +from functools import cache from model_compression_toolkit.core.common import Graph, BaseNode from model_compression_toolkit.core.common.graph.edge import EDGE_SOURCE_INDEX @@ -82,7 +83,6 @@ def __init__(self, model_graph: Graph): inputs_tensors_memory = [sum([t.total_size for t in self.operation_node_children(n)]) for n in nodes if n in model_graph.get_inputs()] - # TODO maxcut: why both inputs and outputs of each nodes, while the A* solves for node outputs only??? nodes_total_memory = [sum([t.total_size for t in self.operation_node_children(n)] + [t.total_size for t in self.operation_node_parents(n)]) for n in nodes if n not in model_graph.get_inputs()] @@ -117,6 +117,7 @@ def update_sinks_b(self): """ self.sinks_b = [n for n in self.b_nodes if len(list(self.successors(n))) == 0] + @cache def activation_tensor_children(self, activation_tensor: ActivationMemoryTensor) -> List[BaseNode]: """ Returns the children nodes of a side B node (activation tensor) in the bipartite graph. @@ -129,6 +130,7 @@ def activation_tensor_children(self, activation_tensor: ActivationMemoryTensor) """ return [oe[1] for oe in self.out_edges(activation_tensor)] + @cache def activation_tensor_parents(self, activation_tensor: ActivationMemoryTensor) -> List[BaseNode]: """ Returns the parents nodes of a side B node (activation tensor) in the bipartite graph. @@ -141,6 +143,7 @@ def activation_tensor_parents(self, activation_tensor: ActivationMemoryTensor) - """ return [ie[0] for ie in self.in_edges(activation_tensor)] + @cache def operation_node_children(self, op_node: BaseNode) -> List[ActivationMemoryTensor]: """ Returns the children nodes of a side A node (operation) in the bipartite graph. @@ -153,6 +156,7 @@ def operation_node_children(self, op_node: BaseNode) -> List[ActivationMemoryTen """ return [oe[1] for oe in self.out_edges(op_node)] + @cache def operation_node_parents(self, op_node: BaseNode) -> List[ActivationMemoryTensor]: """ Returns the parents nodes of a side A node (operation) in the bipartite graph. diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py index 2b118ee1b..429b49740 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py @@ -17,6 +17,7 @@ from enum import Enum, auto from typing import Dict, NamedTuple, Optional, Tuple, List, Iterable, Union, Literal, Sequence, Set +from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.core import FrameworkInfo from model_compression_toolkit.core.common import Graph, BaseNode @@ -169,6 +170,7 @@ def compute_resource_utilization(self, w_total, *_ = self.compute_weights_utilization(target_criterion, bitwidth_mode, w_qcs) if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets): + Logger.warning("Using an experimental feature max-cut for activation memory utilization estimation.") a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs) ru = ResourceUtilization() diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py index 34e6fcbaa..a4c5945e8 100644 --- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py @@ -182,7 +182,7 @@ def _add_ru_constraints(search_manager: MixedPrecisionSearchManager, target_resource_utilization: ResourceUtilization, indicators_matrix: np.ndarray, lp_problem: LpProblem, - non_conf_ru_dict: Optional[Dict[RUTarget, np.ndarray]]): + non_conf_ru_dict: Dict[RUTarget, np.ndarray]): """ Adding targets constraints for the Lp problem for the given target resource utilization. The update to the Lp problem object is done inplace. diff --git a/requirements.txt b/requirements.txt index 4c68dd252..fabd795df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,5 @@ matplotlib<3.10.0 scipy protobuf mct-quantizers==1.5.2 -pydantic<2.0 \ No newline at end of file +pydantic<2.0 +timeout-decorator \ No newline at end of file