diff --git a/docs/content/en/integrations/notification_webhooks/_index.md b/docs/content/en/integrations/notification_webhooks/_index.md index 795c37d354f..ac66bc6564c 100644 --- a/docs/content/en/integrations/notification_webhooks/_index.md +++ b/docs/content/en/integrations/notification_webhooks/_index.md @@ -5,29 +5,41 @@ weight: 7 chapter: true --- - ## Transition graph: ```mermaid flowchart TD - START{START} + START{{Endpoint created}} + ALL{All states} STATUS_ACTIVE([STATUS_ACTIVE]) STATUS_INACTIVE_TMP - STATUS_INACTIVE_400 - STATUS_ACTIVE_500([STATUS_ACTIVE_500]) - STATUS_INACTIVE_500 - STATUS_INACTIVE_OTHERS - STATUS_INACTIVE_MANUAL + STATUS_INACTIVE_PERMANENT + STATUS_ACTIVE_TMP([STATUS_ACTIVE_TMP]) + END{{Endpoint removed}} - START --> STATUS_ACTIVE + START ==> STATUS_ACTIVE STATUS_ACTIVE --HTTP 200 or 201 --> STATUS_ACTIVE - STATUS_ACTIVE --HTTP 5xx, 429, timeout or other non-HTTP error
within one day from the first error--> STATUS_INACTIVE_TMP - STATUS_INACTIVE_TMP --After 60s--> STATUS_ACTIVE_500 - STATUS_ACTIVE_500 --HTTP 5xx, 429, timeout or other non-HTTP error
within one day from the first error-->STATUS_INACTIVE_TMP - STATUS_ACTIVE_500 --HTTP 5xx, 429, timeout or other non-HTTP error
within one day from the first error-->STATUS_INACTIVE_500 - STATUS_ACTIVE_500 --After 24h--> STATUS_ACTIVE - STATUS_ACTIVE --Any HTTP 4xx response--> STATUS_INACTIVE_400 - STATUS_ACTIVE --Any other HTTP response--> STATUS_INACTIVE_OTHERS - STATUS_ACTIVE --Manual deactivation by user--> STATUS_INACTIVE_MANUAL -``` \ No newline at end of file + STATUS_ACTIVE --HTTP 5xx
or HTTP 429
or Timeout--> STATUS_INACTIVE_TMP + STATUS_ACTIVE --Any HTTP 4xx response
or any other HTTP responsee
or non-HTTP error--> STATUS_INACTIVE_PERMANENT + STATUS_INACTIVE_TMP -.After 60s.-> STATUS_ACTIVE_TMP + STATUS_ACTIVE_TMP --HTTP 5xx
or HTTP 429
or Timeout
within 24h
from the first error-->STATUS_INACTIVE_TMP + STATUS_ACTIVE_TMP -.After 24h.-> STATUS_ACTIVE + STATUS_ACTIVE_TMP --HTTP 200 or 201 --> STATUS_ACTIVE_TMP + STATUS_ACTIVE_TMP --HTTP 5xx
or HTTP 429
or Timeout
within 24h from the first error
or any other HTTP respons or error--> STATUS_INACTIVE_PERMANENT + ALL ==Activation by user==> STATUS_ACTIVE + ALL ==Deactivation by user==> STATUS_INACTIVE_PERMANENT + ALL ==Removal of endpoint by user==> END +``` + +Notes: + +1. Transitions: + - bold: manual changes by user + - dotted: automated by celery + - others: based on responses on webhooks +1. Nodes: + - Stadium-shaped: Active - following webhook can be send + - Rectangles: Inactive - performing of webhook will fail (and not retried) + - Hexagonal: Initial and final states + - Rhombus: All states (meta node to make graph more readable) diff --git a/dojo/db_migrations/0213_system_settings_enable_webhooks_notifications_and_more.py b/dojo/db_migrations/0213_system_settings_enable_webhooks_notifications_and_more.py index 395211339d7..8a610859982 100644 --- a/dojo/db_migrations/0213_system_settings_enable_webhooks_notifications_and_more.py +++ b/dojo/db_migrations/0213_system_settings_enable_webhooks_notifications_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 4.1.13 on 2024-05-27 16:23 +# Generated by Django 4.2.13 on 2024-07-05 18:54 from django.db import migrations, models import django.db.models.deletion @@ -17,6 +17,11 @@ class Migration(migrations.Migration): name='enable_webhooks_notifications', field=models.BooleanField(default=False, verbose_name='Enable Webhook notifications'), ), + migrations.AddField( + model_name='system_settings', + name='webhooks_notifications_timeout', + field=models.IntegerField(default=60, help_text='How many seconds will DefectDojo waits for response from webhook endpoint'), + ), migrations.AlterField( model_name='notifications', name='auto_close_engagement', @@ -115,9 +120,10 @@ class Migration(migrations.Migration): ('url', models.URLField(default='', help_text='The full URL of the incoming webhook')), ('header_name', models.CharField(blank=True, default='', help_text='Name of the header required for interacting with Webhook endpoint', max_length=100, null=True)), ('header_value', models.CharField(blank=True, default='', help_text='Content of the header required for interacting with Webhook endpoint', max_length=100, null=True)), - ('status', models.CharField(choices=[('active', 'Active'), ('inactive_400', 'Inactive because of 4xx error'), ('active_500', 'Active but 5xx error detected'), ('inactive_500', 'Inactive because of 5xx error'), ('inactive_others', 'Inactive because of status code unsupported'), ('inactive_manual', 'Inactive because of manual deactivation')], default='active', help_text='Status of the incoming webhook', max_length=20)), + ('status', models.CharField(choices=[('active', 'Active'), ('active_tmp', 'Active but 5xx (or similar) error detected'), ('inactive_tmp', 'Temporary inactive because of 5xx (or similar) error'), ('inactive_permanent', 'Permanently inactive')], default='active', help_text='Status of the incoming webhook', max_length=20)), ('first_error', models.DateTimeField(blank=True, help_text='If endpoint is active, when error happened first time', null=True)), ('last_error', models.DateTimeField(blank=True, help_text='If endpoint is active, when error happened last time', null=True)), + ('note', models.CharField(blank=True, default='', help_text='Description of the latest error', null=True)), ('owner', models.ForeignKey(blank=True, help_text='Owner/receiver of notification, if empty processed as system notification', null=True, on_delete=django.db.models.deletion.CASCADE, to='dojo.dojo_user')), ], ), diff --git a/dojo/fixtures/dojo_testdata.json b/dojo/fixtures/dojo_testdata.json index 71fe0a9c73e..ae550f8bf81 100644 --- a/dojo/fixtures/dojo_testdata.json +++ b/dojo/fixtures/dojo_testdata.json @@ -3074,6 +3074,7 @@ "status": "active", "first_error": null, "last_error": null, + "note": null, "owner": null } }, @@ -3088,6 +3089,7 @@ "status": "active", "first_error": null, "last_error": null, + "note": null, "owner": 2 } } diff --git a/dojo/models.py b/dojo/models.py index 4e1f4c856b5..1f65ec9b731 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -357,6 +357,8 @@ class System_Settings(models.Model): models.BooleanField(default=False, verbose_name=_('Enable Webhook notifications'), blank=False) + webhooks_notifications_timeout = models.IntegerField(default=60, + help_text=_("How many seconds will DefectDojo waits for response from webhook endpoint")) false_positive_history = models.BooleanField( default=False, help_text=_( @@ -4105,18 +4107,14 @@ class Notification_Webhooks(models.Model): _STATUS_ACTIVE = "active" _STATUS_INACTIVE = "inactive" STATUS_ACTIVE = f"{_STATUS_ACTIVE}" - STATUS_INACTIVE_400 = f"{_STATUS_INACTIVE}_400" - STATUS_ACTIVE_500 = f"{_STATUS_ACTIVE}_500" - STATUS_INACTIVE_500 = f"{_STATUS_INACTIVE}_500" - STATUS_INACTIVE_OTHERS = f"{_STATUS_INACTIVE}_others" - STATUS_INACTIVE_MANUAL = f"{_STATUS_INACTIVE}_manual" + STATUS_ACTIVE_TMP = f"{_STATUS_ACTIVE}_tmp" + STATUS_INACTIVE_TMP = f"{_STATUS_INACTIVE}_tmp" + STATUS_INACTIVE_PERMANENT = f"{_STATUS_INACTIVE}_permanent" STATUS_CHOICES = ( (STATUS_ACTIVE, "Active"), - (STATUS_INACTIVE_400, "Inactive because of 4xx error"), - (STATUS_ACTIVE_500, "Active but 5xx error detected"), - (STATUS_INACTIVE_500, "Inactive because of 5xx error"), - (STATUS_INACTIVE_OTHERS, "Inactive because of status code unsupported"), - (STATUS_INACTIVE_MANUAL, "Inactive because of manual deactivation"), + (STATUS_ACTIVE_TMP, "Active but 5xx (or similar) error detected"), + (STATUS_INACTIVE_TMP, "Temporary inactive because of 5xx (or similar) error"), + (STATUS_INACTIVE_PERMANENT, "Permanently inactive"), ) name = models.CharField(max_length=100, default='', blank=False, unique=True, help_text=_('Name of the incoming webhook')) @@ -4130,6 +4128,7 @@ class Notification_Webhooks(models.Model): help_text=_('Status of the incoming webhook')) first_error = models.DateTimeField(help_text=_('If endpoint is active, when error happened first time'), blank=True, null=True) last_error = models.DateTimeField(help_text=_('If endpoint is active, when error happened last time'), blank=True, null=True) + note = models.CharField(default='', blank=True, null=True, help_text=_('Description of the latest error')) owner = models.ForeignKey(Dojo_User, editable=True, null=True, blank=True, on_delete=models.CASCADE, help_text=_('Owner/receiver of notification, if empty processed as system notification')) @@ -4618,6 +4617,7 @@ def __str__(self): auditlog.register(Risk_Acceptance) auditlog.register(Finding_Template) auditlog.register(Cred_User, exclude_fields=['password']) + auditlog.register(Notification_Webhooks, exclude_fields=['header_name', 'header_value']) from dojo.utils import calculate_grade, to_str_typed # noqa: E402 # there is issue due to a circular import diff --git a/dojo/notifications/helper.py b/dojo/notifications/helper.py index c6dd7522eb5..1c9a7467682 100644 --- a/dojo/notifications/helper.py +++ b/dojo/notifications/helper.py @@ -326,6 +326,8 @@ def send_mail_notification(event, user=None, *args, **kwargs): def webhooks_notification_request(endpoint, event, *args, **kwargs): + from dojo.utils import get_system_setting + headers = { "User-Agent": f"DefectDojo-{dd_version}", "X-DefectDojo-Event": event, @@ -336,12 +338,15 @@ def webhooks_notification_request(endpoint, event, *args, **kwargs): headers[endpoint.header_name] = endpoint.header_value yaml_data = create_notification_message(event, endpoint.owner, 'webhooks', *args, **kwargs) data = yaml.safe_load(yaml_data) + + timeout = get_system_setting('webhooks_notifications_timeout') + res = requests.request( method='POST', url=endpoint.url, headers=headers, json=data, - timeout=60, + timeout=timeout, ) return res @@ -353,9 +358,39 @@ def test_webhooks_notification(endpoint): # for now, "raise_for_status" should be enough +@app.task(ignore_result=True) +def webhook_reactivation(*args, **kwargs): + endpoint = Notification_Webhooks.objects.get(pk=kwargs.get('endpoint_id')) + + # User already changed status of endpoint + if endpoint.status != Notification_Webhooks.STATUS_INACTIVE_TMP: + return + + endpoint.status = Notification_Webhooks.STATUS_ACTIVE_TMP + endpoint.save() + + +@app.task(ignore_result=True) +def webhook_status_cleanup(*args, **kwargs): + + Notification_Webhooks.objects.filter( + status=Notification_Webhooks.STATUS_ACTIVE_TMP, + last_error__le=get_current_datetime() - timedelta(hours=24), + ).update( + status=Notification_Webhooks.STATUS_ACTIVE, + first_error=None, + last_error=None, + note=f'Reactivation from {Notification_Webhooks.STATUS_ACTIVE_TMP}', + ) + + @dojo_async_task @app.task def send_webhooks_notification(event, user=None, *args, **kwargs): + + ERROR_PERMANENT = 'permanent' + ERROR_TEMPORARY = 'temporary' + endpoints = Notification_Webhooks.objects.filter(owner=user) if not endpoints: @@ -366,7 +401,9 @@ def send_webhooks_notification(event, user=None, *args, **kwargs): return for endpoint in endpoints: - if not endpoint.status.startswith(Notification_Webhooks._STATUS_ACTIVE): + + error = None + if endpoint.status not in [Notification_Webhooks.STATUS_ACTIVE, Notification_Webhooks.STATUS_ACTIVE_TMP]: logger.info(f"URL for Webhook '{endpoint.name}' is not active: {endpoint.get_status_display()} ({endpoint.status})") continue @@ -378,48 +415,51 @@ def send_webhooks_notification(event, user=None, *args, **kwargs): logger.debug(f"Message sent to endpoint '{endpoint.name}' sucessfully.") continue - now = get_current_datetime() + # HTTP request passed successfully but we still need to check status code + if 500 <= res.status_code < 600 or res.status_code == 429: + error = ERROR_TEMPORARY + else: + error = ERROR_PERMANENT - # There is no reason to keep endpoint active if it is returning 4xx errors - if 400 <= res.status_code < 500: - endpoint.status = Notification_Webhooks.STATUS_INACTIVE_400 - endpoint.first_error = now + endpoint.note = f"Response status code: {res.status_code}" + logger.error(f"Error when sending message to Webhooks '{endpoint.name}' (status: {res.status_code}): {res.text}") - # 5xx is also not OK - elif 500 <= res.status_code < 600: - # If there is only temporary outage (we detected 5xx first time or it was before more then one hour), we can keep endpoint active (but marked) + except requests.exceptions.Timeout as e: + error = ERROR_TEMPORARY + endpoint.note = f"Requests exception: {e}" + logger.error(f"Timeout when sending message to Webhook '{endpoint.name}'") - # First detected - # or first detected more then one hour ago - if endpoint.last_error is None or (now - endpoint.last_error).seconds > 60 * 60: - endpoint.status = Notification_Webhooks.STATUS_ACTIVE_500 - endpoint.first_error = now # Yes, if last fail happen before more then hour, we are considering it as a new error + except Exception as e: + error = ERROR_PERMANENT + endpoint.note = f"Exception: {e}" + logger.exception(e) + log_alert(e, "Webhooks Notification") - # Repeated detection - else: - # Error is repeating over more then hour - if (now - endpoint.first_error).seconds > 60 * 60: - endpoint.status = Notification_Webhooks.STATUS_INACTIVE_500 + now = get_current_datetime() - # This situation shouldn't happen - only if somebody was cleaning status and didn't clean first/last_error - # But we should handle it - else: - endpoint.status = Notification_Webhooks.STATUS_ACTIVE_500 - endpoint.first_error = now + if error == ERROR_TEMPORARY: + + # If endpoint is unstable for more then one day, it needs to be deactivated + if endpoint.first_error is not None and (now - endpoint.first_error).total_seconds() > 60 * 60 * 24: + endpoint.status = Notification_Webhooks.STATUS_INACTIVE_PERMANENT - # Well, we really accepts only 200 and 201 else: - endpoint.status = Notification_Webhooks.STATUS_INACTIVE_OTHERS - endpoint.first_error = now + # We need to monitor when outage started + if endpoint.status == Notification_Webhooks.STATUS_ACTIVE: + endpoint.first_error = now - endpoint.last_error = now - endpoint.save() + endpoint.status = Notification_Webhooks.STATUS_INACTIVE_TMP - logger.error(f"Error when sending message to Webhooks (status: {res.status_code}): {res.text}") + # In case of failure within one day, endpoint can be deactivated temporally only for one minute + # webhook_reactivation.apply_async(kwargs={'endpoint_id': endpoint.pk}, countdown=60) - except Exception as e: - logger.exception(e) - log_alert(e, "Webhooks Notification") + # There is no reason to keep endpoint active if it is returning 4xx errors + else: + endpoint.status = Notification_Webhooks.STATUS_INACTIVE_PERMANENT + endpoint.first_error = now + + endpoint.last_error = now + endpoint.save() def send_alert_notification(event, user=None, *args, **kwargs): diff --git a/dojo/settings/.settings.dist.py.sha256sum b/dojo/settings/.settings.dist.py.sha256sum index 890d05663e9..33fbf65328c 100644 --- a/dojo/settings/.settings.dist.py.sha256sum +++ b/dojo/settings/.settings.dist.py.sha256sum @@ -1 +1 @@ -cce215fa477d611d45cae69a29185e943eb209526fec2b38659666e5e9513fe3 +1d797b12b15e9130fbe6de3555b9de59ea3937080908059dd77c1116a775b886 diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 0c62f004bc6..02b9c669139 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -1140,6 +1140,10 @@ def saml2_attrib_map_format(dict): 'task': 'dojo.risk_acceptance.helper.expiration_handler', 'schedule': crontab(minute=0, hour='*/3'), # every 3 hours }, + 'notification_webhook_status_cleanup': { + 'task': 'dojo.notifications.helper.webhook_status_cleanup', + 'schedule': timedelta(minutes=1), + }, # 'jira_status_reconciliation': { # 'task': 'dojo.tasks.jira_status_reconciliation_task', # 'schedule': timedelta(hours=12), @@ -1150,7 +1154,6 @@ def saml2_attrib_map_format(dict): # 'schedule': timedelta(hours=12) # }, - } # ------------------------------------ diff --git a/unittests/test_notifications.py b/unittests/test_notifications.py index 7e85dc1d1c6..18f826a2dea 100644 --- a/unittests/test_notifications.py +++ b/unittests/test_notifications.py @@ -24,6 +24,7 @@ Notifications, Product, Product_Type, + System_Settings, Test, Test_Type, User, @@ -438,11 +439,11 @@ def test_missing_personal_webhook(self): self.assertIn("URLs for Webhooks not configured for user '(admin)': skipping user notification", cm.output[0]) def test_system_webhook_inactive(self): - self.sys_wh.status = Notification_Webhooks.STATUS_INACTIVE_400 + self.sys_wh.status = Notification_Webhooks.STATUS_INACTIVE_PERMANENT self.sys_wh.save() with self.assertLogs('dojo.notifications.helper', level='INFO') as cm: send_webhooks_notification(event='dummy') - self.assertIn("URL for Webhook 'My webhook endpoint' is not active: Inactive because of 4xx error (inactive_400)", cm.output[0]) + self.assertIn("URL for Webhook 'My webhook endpoint' is not active: Permanently inactive (inactive_permanent)", cm.output[0]) def test_system_webhook_sucessful(self): with self.assertLogs('dojo.notifications.helper', level='DEBUG') as cm: @@ -460,10 +461,10 @@ def test_system_webhook_4xx(self): with self.assertLogs('dojo.notifications.helper', level='ERROR') as cm: send_webhooks_notification(event='dummy', title='Dummy event') - self.assertIn("Error when sending message to Webhooks (status: 400)", cm.output[-1]) + self.assertIn("Error when sending message to Webhooks 'My webhook endpoint' (status: 400)", cm.output[-1]) updated_wh = Notification_Webhooks.objects.all().filter(owner=None).first() - self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_INACTIVE_400) + self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_INACTIVE_PERMANENT) self.assertIsNotNone(updated_wh.first_error) self.assertEqual(updated_wh.first_error, updated_wh.last_error) @@ -474,14 +475,20 @@ def test_system_webhook_first_5xx(self): send_webhooks_notification(event='dummy', title='Dummy event') updated_wh = Notification_Webhooks.objects.filter(owner=None).first() - self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_ACTIVE_500) + self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_INACTIVE_TMP) self.assertIsNotNone(updated_wh.first_error) self.assertEqual(updated_wh.first_error, updated_wh.last_error) - def test_system_webhook_second_5xx_within_hour(self): + def test_system_webhook_second_5xx_within_one_day(self): + + # For proper testing, block_execution needs to be disabled for this task + testuser = User.objects.get(username='admin') + testuser.usercontactinfo.block_execution = False + testuser.save() + ten_mins_ago = get_current_datetime() - datetime.timedelta(minutes=10) self.sys_wh.url = f"{self.url_base}/status/500" - self.sys_wh.status = Notification_Webhooks.STATUS_ACTIVE_500 + self.sys_wh.status = Notification_Webhooks.STATUS_ACTIVE_TMP self.sys_wh.first_error = ten_mins_ago self.sys_wh.last_error = ten_mins_ago self.sys_wh.save() @@ -489,11 +496,62 @@ def test_system_webhook_second_5xx_within_hour(self): send_webhooks_notification(event='dummy', title='Dummy event') updated_wh = Notification_Webhooks.objects.filter(owner=None).first() - self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_ACTIVE_500) + self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_INACTIVE_TMP) self.assertEqual(updated_wh.first_error, ten_mins_ago) self.assertGreater(updated_wh.last_error, ten_mins_ago) - @patch('requests.request') + def test_system_webhook_third_5xx_after_more_then_day(self): + + # For proper testing, block_execution needs to be disabled for this task + testuser = User.objects.get(username='admin') + testuser.usercontactinfo.block_execution = False + testuser.save() + + now = get_current_datetime() + day_ago = now - datetime.timedelta(hours=24, minutes=10) + ten_minutes_ago = now - datetime.timedelta(minutes=10) + self.sys_wh.url = f"{self.url_base}/status/500" + self.sys_wh.status = Notification_Webhooks.STATUS_ACTIVE_TMP + self.sys_wh.first_error = day_ago + self.sys_wh.last_error = ten_minutes_ago + self.sys_wh.save() + + send_webhooks_notification(event='dummy', title='Dummy event') + + updated_wh = Notification_Webhooks.objects.filter(owner=None).first() + self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_INACTIVE_PERMANENT) + self.assertEqual(updated_wh.first_error, day_ago) + self.assertGreater(updated_wh.last_error, ten_minutes_ago) + + def test_system_webhook_timeout(self): + + self.sys_wh.url = f"{self.url_base}/delay/10" + self.sys_wh.save() + + system_settings = System_Settings.objects.get() + system_settings.webhooks_notifications_timeout = 3 + system_settings.save() + + send_webhooks_notification(event='dummy', title='Dummy event') + + updated_wh = Notification_Webhooks.objects.filter(owner=None).first() + self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_INACTIVE_TMP) + self.assertIsNotNone(updated_wh.first_error) + self.assertEqual(updated_wh.first_error, updated_wh.last_error) + + def test_system_webhook_wrong_fqdn(self): + + self.sys_wh.url = "http://non.existing.place" + self.sys_wh.save() + + send_webhooks_notification(event='dummy', title='Dummy event') + + updated_wh = Notification_Webhooks.objects.filter(owner=None).first() + self.assertEqual(updated_wh.status, Notification_Webhooks.STATUS_INACTIVE_PERMANENT) + self.assertIsNotNone(updated_wh.first_error) + self.assertEqual(updated_wh.first_error, updated_wh.last_error) + + @patch('requests.request', **{'return_value.status_code': 200}) def test_headers(self, mock): Product_Type.objects.create(name='notif prod type') self.assertEqual(mock.call_args.kwargs['headers'], { @@ -504,7 +562,7 @@ def test_headers(self, mock): 'Auth': 'Token xxx', }) - @patch('requests.request') + @patch('requests.request', **{'return_value.status_code': 200}) def test_events_messages(self, mock): with self.subTest('product_type_added'): prod_type = Product_Type.objects.create(name='notif prod type')