[WIP] Statement scanning performance improvements (#156)

verkaufer · web-flow · commit 7d9a15770782 · 2021-02-24T10:54:00.000-05:00
* PolicyDocument.service_wildcard now uses set() internally. Remove duplicate calls to `get_all_service_prefixes()`

* Add cached-property package &amp; cache `Statement.expanded_actions` calls.

The `expanded_actions` is an expensive call to policy_sentry. Nearly every other method on the StatementDetail class accesses the expanded_actions attribute. Caching this property results in a dramatic improvement when processing wildcard statements.

* Simplify loops and conditions in StatementDetail to remove redundancies.
diff --git a/cloudsplaining/scan/policy_document.py b/cloudsplaining/scan/policy_document.py
@@ -183,37 +183,32 @@ def credentials_exposure(self):
     @property
     def service_wildcard(self):
         """Determine if the policy gives access to all actions within a service - simple grepping"""
-        services = []
+        services = set()
+        all_service_prefixes = get_all_service_prefixes()
+        
         for statement in self.statements:
             logger.debug("Evaluating statement: %s", statement.json)
-            if statement.effect == "Allow":
+            if statement.effect_allow:
                 if isinstance(statement.actions, list):
                     for action in statement.actions:
                         # If the action is a straight up *
                         if action == "*":
                             logger.debug("All actions are allowed by this policy")
-                            services.extend(get_all_service_prefixes())
+                            services.update(all_service_prefixes)
                         # Otherwise, it will take the format of service:*
                         else:
                             service, this_action = action.split(":")
                             # service:*
                             if this_action == "*":
-                                services.append(service)
+                                services.add(service)
                 elif isinstance(statement.actions, str):
                     # If the action is a straight up *
                     if statement.actions == "*":
                         logger.debug("All actions are allowed by this policy")
-                        services.append(get_all_service_prefixes())
+                        services.update(all_service_prefixes)
                     else:
                         service, this_action = statement.actions.split(":")
                         # service:*
                         if this_action == "*":
-                            services.append(service)
-        if services:
-            # Remove duplicates and sort
-            services = list(dict.fromkeys(services))
-            these_services = services.copy()
-            these_services.sort()
-            return these_services
-        else:
-            return []
+                            services.add(service)
+        return sorted(services)
diff --git a/cloudsplaining/scan/statement_detail.py b/cloudsplaining/scan/statement_detail.py
@@ -1,5 +1,8 @@
 """Abstracts evaluation of IAM Policy statements."""
 import logging
+
+from cached_property import cached_property
+
 from policy_sentry.analysis.analyze import determine_actions_to_expand
 from policy_sentry.querying.actions import (
     remove_actions_not_matching_access_level,
@@ -20,7 +23,7 @@
 logger = logging.getLogger(__name__)
 logging.getLogger("policy_sentry").setLevel(logging.WARNING)
 
-all_actions = get_all_actions()
+ALL_ACTIONS = get_all_actions()
 
 
 # pylint: disable=too-many-instance-attributes
@@ -36,6 +39,9 @@ def __init__(self, statement):
         self.resources = self._resources()
         self.actions = self._actions()
         self.not_action = self._not_action()
+
+        self.has_resource_constraints = _has_resource_constraints(self.resources)
+
         self.not_action_effective_actions = self._not_action_effective_actions()
         self.not_resource = self._not_resource()
 
@@ -84,65 +90,66 @@ def _not_action_effective_actions(self):
         effective_actions = []
         if not self.not_action:
             return None
-        not_actions_expanded = determine_actions_to_expand(self.not_action)
-        not_actions_expanded_lowercase = [x.lower() for x in not_actions_expanded]
+
+        not_actions_expanded_lowercase = [
+            a.lower() 
+            for a in determine_actions_to_expand(self.not_action)
+        ]
 
         # Effect: Allow && Resource != "*"
         if self.has_resource_constraints and self.effect_allow:
             opposite_actions = []
             for arn in self.resources:
                 actions_specific_to_arn = get_actions_matching_arn(arn)
                 if actions_specific_to_arn:
-                    opposite_actions.extend(get_actions_matching_arn(arn))
+                    opposite_actions.extend(actions_specific_to_arn)
 
             for opposite_action in opposite_actions:
                 # If it's in NotActions, then it is not an action we want
-                if opposite_action.lower() in not_actions_expanded_lowercase:
-                    pass
-                # Otherwise add it
-                else:
+                if opposite_action.lower() not in not_actions_expanded_lowercase:
                     effective_actions.append(opposite_action)
             effective_actions.sort()
             return effective_actions
+
         # Effect: Allow, Resource != "*", and Action == prefix:*
-        elif not self.has_resource_constraints and self.effect_allow:
+        if not self.has_resource_constraints and self.effect_allow:
             # Then we calculate the reverse using all_actions
-            for action in all_actions:
-                # If it's in NotActions, then it is not an action we want
-                if action.lower() in not_actions_expanded_lowercase:
-                    pass
-                    # Otherwise add it
-                else:
-                    effective_actions.append(action)
+
+            # If it's in NotActions, then it is not an action we want
+            effective_actions = [
+                action for action in ALL_ACTIONS 
+                if action.lower() not in not_actions_expanded_lowercase
+            ]
+            
             effective_actions.sort()
             return effective_actions
-        elif self.has_resource_constraints and self.effect_deny:
+
+        if self.has_resource_constraints and self.effect_deny:
             logger.debug("NOTE: Haven't decided if we support Effect Deny here?")
             return None
-        elif not self.has_resource_constraints and self.effect_deny:
+        
+        if not self.has_resource_constraints and self.effect_deny:
             logger.debug("NOTE: Haven't decided if we support Effect Deny here?")
             return None
         # only including this so Pylint doesn't yell at us
-        else:
-            return None  # pragma: no cover
+        return None  # pragma: no cover
 
     @property
     def has_not_resource_with_allow(self):
         """Per the AWS documentation, the NotResource should NEVER be used with the Allow Effect.
         See documentation here. https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_notresource.html#notresource-element-combinations"""
-        result = False
-        if self.not_resource:
-            if self.effect_allow:
-                result = True
-                logger.warning(
-                    "Per the AWS documentation, the NotResource should never be used with the "
-                    "Allow Effect. We suggest changing this ASAP"
-                )
-        return result
+        if self.not_resource and self.effect_allow:
+            logger.warning(
+                "Per the AWS documentation, the NotResource should never be used with the "
+                "Allow Effect. We suggest changing this ASAP"
+            )            
+            return True
+        return False
 
-    @property
+    @cached_property
     def expanded_actions(self):
         """Expands the full list of allowed actions from the Policy/"""
+        
         if self.actions:
             expanded = determine_actions_to_expand(self.actions)
             expanded.sort()
@@ -167,30 +174,11 @@ def effect_allow(self):
     @property
     def services_in_use(self):
         """Get a list of the services in use by the statement."""
-        service_prefixes = []
+        service_prefixes = set()
         for action in self.expanded_actions:
             service, action_name = action.split(":")  # pylint: disable=unused-variable
-            if service not in service_prefixes:
-                service_prefixes.append(service)
-        service_prefixes.sort()
-        return service_prefixes
-
-    @property
-    def has_resource_constraints(self):
-        """Determine whether or not the statement allows resource constraints."""
-        answer = True
-        if len(self.resources) == 0:
-            # This is probably a NotResources situation which we do not support.
-            pass
-        if len(self.resources) == 1:
-            if self.resources[0] == "*":
-                answer = False
-        elif len(self.resources) > 1:  # pragma: no cover
-            # It's possible that someone writes a bad policy that includes both a resource ARN as well as a wildcard.
-            for resource in self.resources:
-                if resource == "*":
-                    answer = False
-        return answer
+            service_prefixes.add(service)
+        return sorted(service_prefixes)
 
     @property
     def permissions_management_actions_without_constraints(self):
@@ -235,13 +223,11 @@ def missing_resource_constraints(self, exclusions=DEFAULT_EXCLUSIONS):
                 "Please use the Exclusions object."
             )
         actions_missing_resource_constraints = []
-        if len(self.resources) == 1:
-            if self.resources[0] == "*":
-                actions_missing_resource_constraints = remove_wildcard_only_actions(
-                    self.expanded_actions
-                )
-        results = exclusions.get_allowed_actions(actions_missing_resource_constraints)
-        return results
+        if len(self.resources) == 1 and self.resources[0] == "*":
+            actions_missing_resource_constraints = remove_wildcard_only_actions(
+                self.expanded_actions
+            )
+        return exclusions.get_allowed_actions(actions_missing_resource_constraints)
 
     def missing_resource_constraints_for_modify_actions(
         self, exclusions=DEFAULT_EXCLUSIONS
@@ -258,18 +244,20 @@ def missing_resource_constraints_for_modify_actions(
                 "Please use the Exclusions object."
             )
         # This initially includes read-only and modify level actions
-        if exclusions.include_actions is None:
-            always_look_for_actions = []  # pragma: no cover
+        if exclusions.include_actions:
+            always_look_for_actions = [x.lower() for x in exclusions.include_actions]
         else:
-            always_look_for_actions = exclusions.include_actions
+            always_look_for_actions = []
+
         actions_missing_resource_constraints = self.missing_resource_constraints(
             exclusions
         )
 
         always_actions_found = []
         for action in actions_missing_resource_constraints:
-            if action.lower() in [x.lower() for x in always_look_for_actions]:
+            if action.lower() in always_look_for_actions:
                 always_actions_found.append(action)
+
         modify_actions_missing_constraints = remove_read_level_actions(
             actions_missing_resource_constraints
         )
@@ -282,3 +270,16 @@ def missing_resource_constraints_for_modify_actions(
         )
         modify_actions_missing_constraints.sort()
         return modify_actions_missing_constraints
+
+
+def _has_resource_constraints(resources):
+    """Determine whether or not the statement allows resource constraints."""
+    if len(resources) == 0:
+        # This is probably a NotResources situation which we do not support.
+        pass
+    if len(resources) == 1 and resources[0] == "*":
+            return False
+    elif len(resources) > 1:  # pragma: no cover
+        # It's possible that someone writes a bad policy that includes both a resource ARN as well as a wildcard.
+        return not any(resource == "*" for resource in resources)
+    return True
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,7 @@
 beautifulsoup4==4.9.3
 boto3==1.16.43
 botocore==1.19.43
+cached-property==1.5.2
 certifi==2020.12.5
 chardet==4.0.0
 click==7.1.2