4.2b2 - more anonmyization, less chatty telemetry

explorerhq · Apr 25, 2024 · 55d4436 · 55d4436
1 parent 1e2a8b0
commit 55d4436
Show file tree

Hide file tree

Showing 4 changed files with 98 additions and 45 deletions.
diff --git a/explorer/__init__.py b/explorer/__init__.py
@@ -1,9 +1,9 @@
 __version_info__ = {
     "major": 4,
-    "minor": 1,
-    "patch": 1,
-    "releaselevel": "final",
-    "serial": 0
+    "minor": 2,
+    "patch": 0,
+    "releaselevel": "beta",
+    "serial": 1
 }
 
 

diff --git a/explorer/models.py b/explorer/models.py
@@ -9,7 +9,7 @@
 from django.utils.translation import gettext_lazy as _
 
 from explorer import app_settings
-from explorer.telemetry import Stat, StatNames, send_summary_stats_if_necessary
+from explorer.telemetry import Stat, StatNames
 from explorer.utils import (
     extract_params, get_params_for_url, get_s3_bucket, get_valid_connection, passes_blacklist, s3_url,
     shared_dict_update, swap_params,
@@ -112,7 +112,6 @@ def execute_with_logging(self, executing_user):
         ql.save()
         Stat(StatNames.QUERY_RUN,
              {"sql_len": len(ql.sql), "duration": ql.duration}).track()
-        send_summary_stats_if_necessary()
         return ret, ql
 
     def execute(self):

diff --git a/explorer/telemetry.py b/explorer/telemetry.py
@@ -44,23 +44,52 @@ class StatNames(SelfNamedEnum):
 class Stat:
 
     STAT_COLLECTION_INTERVAL = 60 * 10  # Ten minutes
+    STARTUP_STAT_COLLECTION_INTERVAL = 60 * 60 * 24 * 7  # A week
 
     def __init__(self, name: StatNames, value):
         self.instanceId = instance_identifier()
         self.time = time.time()
         self.value = value
         self.name = name.value
 
+    @property
+    def is_summary(self):
+        return self.name == StatNames.STARTUP_STATS.value
+
+    def should_send_summary_stats(self):
+        from explorer.models import ExplorerValue
+        last_send = ExplorerValue.objects.get_startup_last_send()
+        if not last_send:
+            return True
+        else:
+            return self.time - last_send >= self.STARTUP_STAT_COLLECTION_INTERVAL
+
+    def send_summary_stats(self):
+        from explorer.models import ExplorerValue
+        payload = _gather_summary_stats()
+        Stat(StatNames.STARTUP_STATS, payload).track()
+        ExplorerValue.objects.set_startup_last_send(self.time)
+
     def track(self):
         from explorer import app_settings
-        if app_settings.EXPLORER_ENABLE_ANONYMOUS_STATS:
-            cache_key = "last_stat_sent_time"
-            last_sent_time = cache.get(cache_key, 0)
-            if self.time - last_sent_time >= self.STAT_COLLECTION_INTERVAL:
-                data = json.dumps(self.__dict__)
-                thread = threading.Thread(target=_send, args=(data,))
-                thread.start()
-                cache.set(cache_key, self.time)
+        if not app_settings.EXPLORER_ENABLE_ANONYMOUS_STATS:
+            return
+
+        cache_key = "last_stat_sent_time"
+        last_sent_time = cache.get(cache_key, 0)
+        # Summary stats are tracked with a different time interval
+        if self.is_summary or self.time - last_sent_time >= self.STAT_COLLECTION_INTERVAL:
+            data = json.dumps(self.__dict__)
+            thread = threading.Thread(target=_send, args=(data,))
+            thread.start()
+            cache.set(cache_key, self.time)
+
+        # Every time we send any tracking, see if we have recently sent overall summary stats
+        # Of course, sending the summary stats calls .track(), so we need to NOT call track()
+        # again if we are in fact already in the process of sending summary stats. Otherwise,
+        # we will end up in infinite recursion of track() calls.
+        if not self.is_summary and self.should_send_summary_stats():
+            self.send_summary_stats()
 
 
 def _send(data):
@@ -73,17 +102,17 @@ def _send(data):
         logger.warning(f"Failed to send stats: {e}")
 
 
-STARTUP_STAT_COLLECTION_INTERVAL = 60 * 60 * 24 * 7  # A week
-
+def _get_install_quarter():
+    first_migration = MigrationRecorder.Migration.objects. \
+        filter(app="explorer").order_by("applied").first()
 
-def send_summary_stats_if_necessary():
-    from explorer.models import ExplorerValue
-
-    last_send = ExplorerValue.objects.get_startup_last_send()
-    should_send = not last_send or time.time() - last_send >= STARTUP_STAT_COLLECTION_INTERVAL
-    if should_send:
-        payload = _gather_summary_stats()
-        Stat(StatNames.STARTUP_STATS, payload).track()
+    if first_migration is not None:
+        quarter = (first_migration.applied.month - 1) // 3 + 1  # Calculate the quarter
+        year = first_migration.applied.year
+        quarter_str = f"Q{quarter}-{year}"
+    else:
+        quarter_str = None
+    return quarter_str
 
 
 def _gather_summary_stats():
@@ -103,23 +132,14 @@ def _gather_summary_stats():
             unique_connection_count=Count("connection", distinct=True)
         )
 
-        install_date = MigrationRecorder.Migration.objects. \
-            filter(app="explorer").order_by("applied").first().applied
-
-        if install_date is not None:
-            quarter = (install_date.month - 1) // 3 + 1  # Calculate the quarter
-            year = install_date.year
-            quarter_str = f"Q{quarter}-{year}"
-        else:
-            quarter_str = None
-
+        # Round the counts to provide additional anonymity
         return {
-            "total_log_count": ql_stats["total_count"],
-            "unique_run_by_user_count": ql_stats["unique_run_by_user_count"],
-            "total_query_count": q_stats["total_count"],
-            "unique_connection_count": q_stats["unique_connection_count"],
+            "total_log_count": round(ql_stats["total_count"] * 0.1) * 10,
+            "unique_run_by_user_count": round(ql_stats["unique_run_by_user_count"] * 0.2) * 5,
+            "total_query_count": round(q_stats["total_count"] * 0.1) * 10,
+            "unique_connection_count": round(q_stats["unique_connection_count"] * 0.2) * 5,
             "default_database": connection.vendor,
-            "explorer_install_quarter": quarter_str,
+            "explorer_install_quarter": _get_install_quarter(),
             "debug": settings.DEBUG,
             "tasks_enabled": app_settings.ENABLE_TASKS,
             "unsafe_rendering": app_settings.UNSAFE_RENDERING,

diff --git a/explorer/tests/test_telemetry.py b/explorer/tests/test_telemetry.py
@@ -1,7 +1,8 @@
 from django.test import TestCase
-from explorer.telemetry import instance_identifier, _gather_summary_stats, Stat, StatNames
-from unittest.mock import patch
-from explorer import app_settings
+from explorer.telemetry import instance_identifier, _gather_summary_stats, Stat, StatNames, _get_install_quarter
+from unittest.mock import patch, MagicMock
+from django.core.cache import cache
+from datetime import datetime
 
 
 class TestTelemetry(TestCase):
@@ -22,11 +23,44 @@ def test_gather_summary_stats(self):
     @patch("explorer.telemetry.threading.Thread")
     @patch("explorer.app_settings")
     def test_stats_not_sent_too_frequently(self, mocked_app_settings, mocked_thread):
-        mocked_app_settings = app_settings
         mocked_app_settings.EXPLORER_ENABLE_ANONYMOUS_STATS = True
+        mocked_app_settings.UNSAFE_RENDERING = True
+        mocked_app_settings.EXPLORER_CHARTS_ENABLED = True
+        mocked_app_settings.ENABLE_TASKS = True
         s1 = Stat(StatNames.QUERY_RUN, {"foo": "bar"})
         s2 = Stat(StatNames.QUERY_RUN, {"mux": "qux"})
+        s3 = Stat(StatNames.QUERY_RUN, {"bar": "baz"})
+
+        # once for s1 and once for summary stats
         s1.track()
-        self.assertEqual(mocked_thread.call_count, 1)
+        self.assertEqual(mocked_thread.call_count, 2)
+
+        # both the s2 track call is suppressed, and the summary stat call
         s2.track()
-        self.assertEqual(mocked_thread.call_count, 1)
+        self.assertEqual(mocked_thread.call_count, 2)
+
+        # clear the cache, which should cause track() for the stat to work, but not send summary stats
+        cache.clear()
+        s3.track()
+        self.assertEqual(mocked_thread.call_count, 3)
+
+    @patch("explorer.telemetry.MigrationRecorder.Migration.objects.filter")
+    def test_get_install_quarter_with_no_migrations(self, mock_filter):
+        mock_filter.return_value.order_by.return_value.first.return_value = None
+        result = _get_install_quarter()
+        self.assertIsNone(result)
+
+    @patch("explorer.telemetry.MigrationRecorder.Migration.objects.filter")
+    def test_get_install_quarter_edge_cases(self, mock_filter):
+        # Test edge cases like end of year and start of year
+        dates = [datetime(2022, 12, 31), datetime(2023, 1, 1), datetime(2023, 3, 31), datetime(2023, 4, 1)]
+        results = ["Q4-2022", "Q1-2023", "Q1-2023", "Q2-2023"]
+
+        for date, expected in zip(dates, results):
+            with self.subTest(date=date):
+                mock_migration = MagicMock()
+                mock_migration.applied = date
+                mock_filter.return_value.order_by.return_value.first.return_value = mock_migration
+
+                result = _get_install_quarter()
+                self.assertEqual(result, expected)