Skip to content

Commit

Permalink
4.2b2 - more anonmyization, less chatty telemetry
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisclark committed Apr 25, 2024
1 parent 1e2a8b0 commit 55d4436
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 45 deletions.
8 changes: 4 additions & 4 deletions explorer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
__version_info__ = {
"major": 4,
"minor": 1,
"patch": 1,
"releaselevel": "final",
"serial": 0
"minor": 2,
"patch": 0,
"releaselevel": "beta",
"serial": 1
}


Expand Down
3 changes: 1 addition & 2 deletions explorer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from django.utils.translation import gettext_lazy as _

from explorer import app_settings
from explorer.telemetry import Stat, StatNames, send_summary_stats_if_necessary
from explorer.telemetry import Stat, StatNames
from explorer.utils import (
extract_params, get_params_for_url, get_s3_bucket, get_valid_connection, passes_blacklist, s3_url,
shared_dict_update, swap_params,
Expand Down Expand Up @@ -112,7 +112,6 @@ def execute_with_logging(self, executing_user):
ql.save()
Stat(StatNames.QUERY_RUN,
{"sql_len": len(ql.sql), "duration": ql.duration}).track()
send_summary_stats_if_necessary()
return ret, ql

def execute(self):
Expand Down
86 changes: 53 additions & 33 deletions explorer/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,52 @@ class StatNames(SelfNamedEnum):
class Stat:

STAT_COLLECTION_INTERVAL = 60 * 10 # Ten minutes
STARTUP_STAT_COLLECTION_INTERVAL = 60 * 60 * 24 * 7 # A week

def __init__(self, name: StatNames, value):
self.instanceId = instance_identifier()
self.time = time.time()
self.value = value
self.name = name.value

@property
def is_summary(self):
return self.name == StatNames.STARTUP_STATS.value

def should_send_summary_stats(self):
from explorer.models import ExplorerValue
last_send = ExplorerValue.objects.get_startup_last_send()
if not last_send:
return True
else:
return self.time - last_send >= self.STARTUP_STAT_COLLECTION_INTERVAL

def send_summary_stats(self):
from explorer.models import ExplorerValue
payload = _gather_summary_stats()
Stat(StatNames.STARTUP_STATS, payload).track()
ExplorerValue.objects.set_startup_last_send(self.time)

def track(self):
from explorer import app_settings
if app_settings.EXPLORER_ENABLE_ANONYMOUS_STATS:
cache_key = "last_stat_sent_time"
last_sent_time = cache.get(cache_key, 0)
if self.time - last_sent_time >= self.STAT_COLLECTION_INTERVAL:
data = json.dumps(self.__dict__)
thread = threading.Thread(target=_send, args=(data,))
thread.start()
cache.set(cache_key, self.time)
if not app_settings.EXPLORER_ENABLE_ANONYMOUS_STATS:
return

cache_key = "last_stat_sent_time"
last_sent_time = cache.get(cache_key, 0)
# Summary stats are tracked with a different time interval
if self.is_summary or self.time - last_sent_time >= self.STAT_COLLECTION_INTERVAL:
data = json.dumps(self.__dict__)
thread = threading.Thread(target=_send, args=(data,))
thread.start()
cache.set(cache_key, self.time)

# Every time we send any tracking, see if we have recently sent overall summary stats
# Of course, sending the summary stats calls .track(), so we need to NOT call track()
# again if we are in fact already in the process of sending summary stats. Otherwise,
# we will end up in infinite recursion of track() calls.
if not self.is_summary and self.should_send_summary_stats():
self.send_summary_stats()


def _send(data):
Expand All @@ -73,17 +102,17 @@ def _send(data):
logger.warning(f"Failed to send stats: {e}")


STARTUP_STAT_COLLECTION_INTERVAL = 60 * 60 * 24 * 7 # A week

def _get_install_quarter():
first_migration = MigrationRecorder.Migration.objects. \
filter(app="explorer").order_by("applied").first()

def send_summary_stats_if_necessary():
from explorer.models import ExplorerValue

last_send = ExplorerValue.objects.get_startup_last_send()
should_send = not last_send or time.time() - last_send >= STARTUP_STAT_COLLECTION_INTERVAL
if should_send:
payload = _gather_summary_stats()
Stat(StatNames.STARTUP_STATS, payload).track()
if first_migration is not None:
quarter = (first_migration.applied.month - 1) // 3 + 1 # Calculate the quarter
year = first_migration.applied.year
quarter_str = f"Q{quarter}-{year}"
else:
quarter_str = None
return quarter_str


def _gather_summary_stats():
Expand All @@ -103,23 +132,14 @@ def _gather_summary_stats():
unique_connection_count=Count("connection", distinct=True)
)

install_date = MigrationRecorder.Migration.objects. \
filter(app="explorer").order_by("applied").first().applied

if install_date is not None:
quarter = (install_date.month - 1) // 3 + 1 # Calculate the quarter
year = install_date.year
quarter_str = f"Q{quarter}-{year}"
else:
quarter_str = None

# Round the counts to provide additional anonymity
return {
"total_log_count": ql_stats["total_count"],
"unique_run_by_user_count": ql_stats["unique_run_by_user_count"],
"total_query_count": q_stats["total_count"],
"unique_connection_count": q_stats["unique_connection_count"],
"total_log_count": round(ql_stats["total_count"] * 0.1) * 10,
"unique_run_by_user_count": round(ql_stats["unique_run_by_user_count"] * 0.2) * 5,
"total_query_count": round(q_stats["total_count"] * 0.1) * 10,
"unique_connection_count": round(q_stats["unique_connection_count"] * 0.2) * 5,
"default_database": connection.vendor,
"explorer_install_quarter": quarter_str,
"explorer_install_quarter": _get_install_quarter(),
"debug": settings.DEBUG,
"tasks_enabled": app_settings.ENABLE_TASKS,
"unsafe_rendering": app_settings.UNSAFE_RENDERING,
Expand Down
46 changes: 40 additions & 6 deletions explorer/tests/test_telemetry.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from django.test import TestCase
from explorer.telemetry import instance_identifier, _gather_summary_stats, Stat, StatNames
from unittest.mock import patch
from explorer import app_settings
from explorer.telemetry import instance_identifier, _gather_summary_stats, Stat, StatNames, _get_install_quarter
from unittest.mock import patch, MagicMock
from django.core.cache import cache
from datetime import datetime


class TestTelemetry(TestCase):
Expand All @@ -22,11 +23,44 @@ def test_gather_summary_stats(self):
@patch("explorer.telemetry.threading.Thread")
@patch("explorer.app_settings")
def test_stats_not_sent_too_frequently(self, mocked_app_settings, mocked_thread):
mocked_app_settings = app_settings
mocked_app_settings.EXPLORER_ENABLE_ANONYMOUS_STATS = True
mocked_app_settings.UNSAFE_RENDERING = True
mocked_app_settings.EXPLORER_CHARTS_ENABLED = True
mocked_app_settings.ENABLE_TASKS = True
s1 = Stat(StatNames.QUERY_RUN, {"foo": "bar"})
s2 = Stat(StatNames.QUERY_RUN, {"mux": "qux"})
s3 = Stat(StatNames.QUERY_RUN, {"bar": "baz"})

# once for s1 and once for summary stats
s1.track()
self.assertEqual(mocked_thread.call_count, 1)
self.assertEqual(mocked_thread.call_count, 2)

# both the s2 track call is suppressed, and the summary stat call
s2.track()
self.assertEqual(mocked_thread.call_count, 1)
self.assertEqual(mocked_thread.call_count, 2)

# clear the cache, which should cause track() for the stat to work, but not send summary stats
cache.clear()
s3.track()
self.assertEqual(mocked_thread.call_count, 3)

@patch("explorer.telemetry.MigrationRecorder.Migration.objects.filter")
def test_get_install_quarter_with_no_migrations(self, mock_filter):
mock_filter.return_value.order_by.return_value.first.return_value = None
result = _get_install_quarter()
self.assertIsNone(result)

@patch("explorer.telemetry.MigrationRecorder.Migration.objects.filter")
def test_get_install_quarter_edge_cases(self, mock_filter):
# Test edge cases like end of year and start of year
dates = [datetime(2022, 12, 31), datetime(2023, 1, 1), datetime(2023, 3, 31), datetime(2023, 4, 1)]
results = ["Q4-2022", "Q1-2023", "Q1-2023", "Q2-2023"]

for date, expected in zip(dates, results):
with self.subTest(date=date):
mock_migration = MagicMock()
mock_migration.applied = date
mock_filter.return_value.order_by.return_value.first.return_value = mock_migration

result = _get_install_quarter()
self.assertEqual(result, expected)

0 comments on commit 55d4436

Please sign in to comment.