diff --git a/.gitignore b/.gitignore index b1d8cacc..53598004 100644 --- a/.gitignore +++ b/.gitignore @@ -5,9 +5,10 @@ /dist *.egg-info .DS_Store +tmp /build *# *~ .coverage /htmlcov/ -*.orig \ No newline at end of file +*.orig diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..89d8ff78 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +# Use an official Python runtime as a parent image +FROM python:3.5-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# Set the working directory in the container +WORKDIR /app + +# Copy the requirements file into the container at /app +COPY requirements.txt /app/ +COPY optional-requirements.txt /app/ + +# Install any needed packages specified in requirements.txt +RUN pip install Django==1.11.17 +RUN pip install -r requirements.txt +RUN pip install -r optional-requirements.txt + +# Copy the entire Django project directory into the container at /app +COPY . /app/ diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..53f98a9c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +version: '3' +services: + db: + image: postgres:13 + environment: + POSTGRES_DB: mydb + POSTGRES_USER: myuser + POSTGRES_PASSWORD: mypassword + + web: + build: . + command: python manage.py runserver 0.0.0.0:8000 + volumes: + - .:/app + ports: + - "8000:8000" + depends_on: + - db + environment: + DEBUG: "True" # Set to "False" in production + DJANGO_DB_HOST: db + DJANGO_DB_PORT: 5432 + DJANGO_DB_NAME: mydb + DJANGO_DB_USER: myuser + DJANGO_DB_PASSWORD: mypassword + links: + - db diff --git a/explorer/__init__.py b/explorer/__init__.py index fac665c0..ce120d90 100644 --- a/explorer/__init__.py +++ b/explorer/__init__.py @@ -1,7 +1,7 @@ __version_info__ = { 'major': 0, 'minor': 9, - 'micro': 2, + 'micro': 23, 'releaselevel': 'final', 'serial': 0 } diff --git a/explorer/admin.py b/explorer/admin.py index 3f4f66f5..bc661001 100644 --- a/explorer/admin.py +++ b/explorer/admin.py @@ -1,3 +1,4 @@ + from django.contrib import admin from explorer.models import Query from explorer.actions import generate_report_action @@ -7,7 +8,8 @@ class QueryAdmin(admin.ModelAdmin): list_display = ('title', 'description', 'created_by_user',) list_filter = ('title',) raw_id_fields = ('created_by_user',) - + actions = [generate_report_action()] + admin.site.register(Query, QueryAdmin) diff --git a/explorer/app_settings.py b/explorer/app_settings.py index 004bb058..ef64be67 100644 --- a/explorer/app_settings.py +++ b/explorer/app_settings.py @@ -2,28 +2,49 @@ # Required EXPLORER_CONNECTION_NAME = getattr(settings, 'EXPLORER_CONNECTION_NAME', None) +EXPLORER_CONNECTION_PII_NAME = getattr( + settings, 'EXPLORER_CONNECTION_PII_NAME', None) +EXPLORER_CONNECTION_ASYNC_API_DB_NAME = getattr( + settings, 'EXPLORER_CONNECTION_ASYNC_API_DB_NAME', None) +EXPLORER_MASTER_DB_CONNECTION_NAME = getattr(settings, 'EXPLORER_MASTER_DB_CONNECTION', None) # Change the behavior of explorer -EXPLORER_SQL_BLACKLIST = getattr(settings, 'EXPLORER_SQL_BLACKLIST', ('ALTER', 'RENAME ', 'DROP', 'TRUNCATE', 'INSERT INTO', 'UPDATE', 'REPLACE', 'DELETE', 'CREATE TABLE', 'SCHEMA', 'GRANT', 'OWNER TO')) -EXPLORER_SQL_WHITELIST = getattr(settings, 'EXPLORER_SQL_WHITELIST', ('CREATED', 'DELETED', 'REGEXP_REPLACE')) +EXPLORER_SQL_BLACKLIST = getattr(settings, 'EXPLORER_SQL_BLACKLIST', ('ALTER', 'RENAME ', 'DROP', 'TRUNCATE', + 'INSERT INTO', 'UPDATE', 'REPLACE', 'DELETE', 'CREATE TABLE', 'SCHEMA', 'GRANT', 'OWNER TO')) +EXPLORER_SQL_WHITELIST = getattr( + settings, 'EXPLORER_SQL_WHITELIST', ('CREATED', 'DELETED', 'REGEXP_REPLACE')) +TABLE_NAMES_FOR_PII_MASKING = getattr( + settings, 'TABLE_NAMES_FOR_PII_MASKING', None) EXPLORER_DEFAULT_ROWS = getattr(settings, 'EXPLORER_DEFAULT_ROWS', 1000) -EXPLORER_SCHEMA_EXCLUDE_APPS = getattr(settings, 'EXPLORER_SCHEMA_EXCLUDE_APPS', ('django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.admin')) +EXPLORER_SCHEMA_EXCLUDE_APPS = getattr(settings, 'EXPLORER_SCHEMA_EXCLUDE_APPS', ( + 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.admin')) EXPLORER_TRANSFORMS = getattr(settings, 'EXPLORER_TRANSFORMS', []) -EXPLORER_PERMISSION_VIEW = getattr(settings, 'EXPLORER_PERMISSION_VIEW', lambda u: u.is_staff) -EXPLORER_PERMISSION_CHANGE = getattr(settings, 'EXPLORER_PERMISSION_CHANGE', lambda u: u.is_staff) -EXPLORER_RECENT_QUERY_COUNT = getattr(settings, 'EXPLORER_RECENT_QUERY_COUNT', 10) +EXPLORER_PERMISSION_VIEW = getattr( + settings, 'EXPLORER_PERMISSION_VIEW', lambda u: u.is_staff) +EXPLORER_PERMISSION_CHANGE = getattr( + settings, 'EXPLORER_PERMISSION_CHANGE', lambda u: u.is_staff) +EXPLORER_RECENT_QUERY_COUNT = getattr( + settings, 'EXPLORER_RECENT_QUERY_COUNT', 10) CSV_DELIMETER = getattr(settings, "EXPLORER_CSV_DELIMETER", ",") # API access EXPLORER_TOKEN = getattr(settings, 'EXPLORER_TOKEN', 'CHANGEME') # These are callable to aid testability by dodging the settings cache. # There is surely a better pattern for this, but this'll hold for now. -EXPLORER_GET_USER_QUERY_VIEWS = lambda: getattr(settings, 'EXPLORER_USER_QUERY_VIEWS', {}) -EXPLORER_TOKEN_AUTH_ENABLED = lambda: getattr(settings, 'EXPLORER_TOKEN_AUTH_ENABLED', False) + + +def EXPLORER_GET_USER_QUERY_VIEWS(): return getattr( + settings, 'EXPLORER_USER_QUERY_VIEWS', {}) + + +def EXPLORER_TOKEN_AUTH_ENABLED(): return getattr( + settings, 'EXPLORER_TOKEN_AUTH_ENABLED', False) + # Async task related. Note that the EMAIL_HOST settings must be set up for email to work. ENABLE_TASKS = getattr(settings, "EXPLORER_TASKS_ENABLED", False) S3_ACCESS_KEY = getattr(settings, "EXPLORER_S3_ACCESS_KEY", None) S3_SECRET_KEY = getattr(settings, "EXPLORER_S3_SECRET_KEY", None) S3_BUCKET = getattr(settings, "EXPLORER_S3_BUCKET", None) -FROM_EMAIL = getattr(settings, 'EXPLORER_FROM_EMAIL', 'django-sql-explorer@example.com') \ No newline at end of file +FROM_EMAIL = getattr(settings, 'EXPLORER_FROM_EMAIL', + 'django-sql-explorer@example.com') diff --git a/explorer/constants.py b/explorer/constants.py new file mode 100644 index 00000000..0f246950 --- /dev/null +++ b/explorer/constants.py @@ -0,0 +1,15 @@ +PII_MASKING_PATTERN_REPLACEMENT_DICT = { + r"(?:\+?\d{1,3}|0)?([6-9]\d{9})\b": "XXXXXXXXXXX", # For phone number + r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b": "XXX@XXX.com", # For email +} + +TYPE_CODE_FOR_JSON = 3802 +TYPE_CODE_FOR_TEXT = 25 +TYPE_CODE_FOR_CHAR = 1043 + +PLAYER_PHONE_NUMBER_MASKING_TYPE_CODES = [TYPE_CODE_FOR_CHAR] + +ALLOW_PHONE_NUMBER_MASKING_GROUP_ID = 10439 + +PATTERN_FOR_FINDING_PHONE_NUMBER = r"\+?\d{0,3}?([6-9]\d{9})(?:_\w+)?\b" +PATTERN_FOR_FINDING_EMAIL = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b" diff --git a/explorer/forms.py b/explorer/forms.py index 302fd9d5..5a044a33 100644 --- a/explorer/forms.py +++ b/explorer/forms.py @@ -2,8 +2,12 @@ from django.forms.widgets import CheckboxInput from explorer.models import Query, MSG_FAILED_BLACKLIST from django.db import DatabaseError +import logging +import re +def _(x): return x -_ = lambda x: x + +logger = logging.getLogger(__name__) class SqlField(Field): @@ -14,20 +18,27 @@ def validate(self, value): :param value: The SQL for this Query model. """ - + super().validate(value) query = Query(sql=value) passes_blacklist, failing_words = query.passes_blacklist() - error = MSG_FAILED_BLACKLIST % ', '.join(failing_words) if not passes_blacklist else None + error = MSG_FAILED_BLACKLIST % ', '.join( + failing_words) if not passes_blacklist else None if not error and not query.available_params(): try: query.execute_query_only() except DatabaseError as e: - error = str(e) + + logger.info("error executing query: %s", e) + if (re.search("permission denied for table", str(e))): + error = None + else: + error = e if error: + raise ValidationError( _(error), code="InvalidSql" @@ -54,4 +65,4 @@ def created_by_user_id(self): class Meta: model = Query - fields = ['title', 'sql', 'description', 'created_by_user', 'snapshot'] \ No newline at end of file + fields = ['title', 'sql', 'description', 'created_by_user', 'snapshot'] diff --git a/explorer/migrations/0001_initial.py b/explorer/migrations/0001_initial.py index 38e78089..428a96f1 100644 --- a/explorer/migrations/0001_initial.py +++ b/explorer/migrations/0001_initial.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals from django.db import models, migrations import django.db.models.deletion @@ -14,34 +13,74 @@ class Migration(migrations.Migration): operations = [ migrations.CreateModel( - name='Query', + name="Query", fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('title', models.CharField(max_length=255)), - ('sql', models.TextField()), - ('description', models.TextField(null=True, blank=True)), - ('created_at', models.DateTimeField(auto_now_add=True)), - ('last_run_date', models.DateTimeField(auto_now=True)), - ('created_by_user', models.ForeignKey(blank=True, to=settings.AUTH_USER_MODEL, null=True)), + ( + "id", + models.AutoField( + verbose_name="ID", + serialize=False, + auto_created=True, + primary_key=True, + ), + ), + ("title", models.CharField(max_length=255)), + ("sql", models.TextField()), + ("description", models.TextField(null=True, blank=True)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("last_run_date", models.DateTimeField(auto_now=True)), + ( + "created_by_user", + models.ForeignKey( + blank=True, + to=settings.AUTH_USER_MODEL, + null=True, + on_delete=django.db.models.deletion.CASCADE, + ), + ), ], options={ - 'ordering': ['title'], - 'verbose_name_plural': 'Queries', + "ordering": ["title"], + "verbose_name_plural": "Queries", }, bases=(models.Model,), ), migrations.CreateModel( - name='QueryLog', + name="QueryLog", fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('sql', models.TextField()), - ('is_playground', models.BooleanField(default=False)), - ('run_at', models.DateTimeField(auto_now_add=True)), - ('query', models.ForeignKey(on_delete=django.db.models.deletion.SET_NULL, blank=True, to='explorer.Query', null=True)), - ('run_by_user', models.ForeignKey(blank=True, to=settings.AUTH_USER_MODEL, null=True)), + ( + "id", + models.AutoField( + verbose_name="ID", + serialize=False, + auto_created=True, + primary_key=True, + ), + ), + ("sql", models.TextField()), + ("is_playground", models.BooleanField(default=False)), + ("run_at", models.DateTimeField(auto_now_add=True)), + ( + "query", + models.ForeignKey( + on_delete=django.db.models.deletion.SET_NULL, + blank=True, + to="explorer.Query", + null=True, + ), + ), + ( + "run_by_user", + models.ForeignKey( + blank=True, + to=settings.AUTH_USER_MODEL, + null=True, + on_delete=django.db.models.deletion.CASCADE, + ), + ), ], options={ - 'ordering': ['-run_at'], + "ordering": ["-run_at"], }, bases=(models.Model,), ), diff --git a/explorer/migrations/0002_auto_20150501_1515.py b/explorer/migrations/0002_auto_20150501_1515.py index 1bd48cb9..68eb9b43 100644 --- a/explorer/migrations/0002_auto_20150501_1515.py +++ b/explorer/migrations/0002_auto_20150501_1515.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals from django.db import models, migrations diff --git a/explorer/migrations/0003_query_snapshot.py b/explorer/migrations/0003_query_snapshot.py index 02afa491..9daa0723 100644 --- a/explorer/migrations/0003_query_snapshot.py +++ b/explorer/migrations/0003_query_snapshot.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals from django.db import models, migrations diff --git a/explorer/migrations/0004_querylog_duration.py b/explorer/migrations/0004_querylog_duration.py index 10daa311..214482d3 100644 --- a/explorer/migrations/0004_querylog_duration.py +++ b/explorer/migrations/0004_querylog_duration.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals from django.db import migrations, models diff --git a/explorer/migrations/0005_querychangelog.py b/explorer/migrations/0005_querychangelog.py new file mode 100644 index 00000000..e93fa323 --- /dev/null +++ b/explorer/migrations/0005_querychangelog.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.9 on 2022-03-25 05:46 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('explorer', '0004_querylog_duration'), + ] + + operations = [ + migrations.CreateModel( + name='QueryChangeLog', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('old_sql', models.TextField(blank=True, null=True)), + ('new_sql', models.TextField(blank=True, null=True)), + ('run_at', models.DateTimeField(auto_now_add=True)), + ('query', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='explorer.Query')), + ('run_by_user', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['-run_at'], + }, + ), + ] diff --git a/explorer/migrations/0006_auto_20230207_0103.py b/explorer/migrations/0006_auto_20230207_0103.py new file mode 100644 index 00000000..42149f42 --- /dev/null +++ b/explorer/migrations/0006_auto_20230207_0103.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('explorer', '0005_querychangelog'), + ] + + operations = [ + migrations.AlterField( + model_name='query', + name='snapshot', + field=models.BooleanField(default=False, help_text='Include in snapshot task (if enabled)'), + ), + ] diff --git a/explorer/models.py b/explorer/models.py index 93f38330..7362e595 100644 --- a/explorer/models.py +++ b/explorer/models.py @@ -1,12 +1,37 @@ -from explorer.utils import passes_blacklist, swap_params, extract_params, shared_dict_update, get_connection, get_s3_connection +from explorer.utils import ( + passes_blacklist, + swap_params, + extract_params, + shared_dict_update, + get_connection, + get_s3_connection, + get_connection_pii, + get_explorer_master_db_connection, + get_connection_asyncapi_db, + should_route_to_asyncapi_db, + mask_string, + is_pii_masked_for_user, + mask_player_pii, +) from django.db import models, DatabaseError from time import time -from django.core.urlresolvers import reverse +from django.urls import reverse from django.conf import settings +from django.contrib import messages +from django.contrib.messages import constants as messages_constants from . import app_settings import logging +import re +import json import six +from explorer.constants import ( + TYPE_CODE_FOR_JSON, + TYPE_CODE_FOR_TEXT, + PLAYER_PHONE_NUMBER_MASKING_TYPE_CODES, + TYPE_CODE_FOR_CHAR, +) + MSG_FAILED_BLACKLIST = "Query failed the SQL blacklist: %s" @@ -17,28 +42,32 @@ class Query(models.Model): title = models.CharField(max_length=255) sql = models.TextField() description = models.TextField(null=True, blank=True) - created_by_user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, blank=True) + created_by_user = models.ForeignKey( + settings.AUTH_USER_MODEL, null=True, blank=True, on_delete=models.CASCADE + ) created_at = models.DateTimeField(auto_now_add=True) last_run_date = models.DateTimeField(auto_now=True) - snapshot = models.BooleanField(default=False, help_text="Include in snapshot task (if enabled)") + snapshot = models.BooleanField( + default=False, help_text="Include in snapshot task (if enabled)" + ) def __init__(self, *args, **kwargs): - self.params = kwargs.get('params') - kwargs.pop('params', None) + self.params = kwargs.get("params") + kwargs.pop("params", None) super(Query, self).__init__(*args, **kwargs) class Meta: - ordering = ['title'] - verbose_name_plural = 'Queries' + ordering = ["title"] + verbose_name_plural = "Queries" - def __unicode__(self): - return six.text_type(self.title) + def __str__(self): + return self.title def get_run_count(self): return self.querylog_set.count() def avg_duration(self): - return self.querylog_set.aggregate(models.Avg('duration'))['duration__avg'] + return self.querylog_set.aggregate(models.Avg("duration"))["duration__avg"] def passes_blacklist(self): return passes_blacklist(self.final_sql()) @@ -46,18 +75,41 @@ def passes_blacklist(self): def final_sql(self): return swap_params(self.sql, self.available_params()) - def execute_query_only(self): - return QueryResult(self.final_sql()) + def execute_query_only( + self, + is_connection_type_pii=None, + executing_user=None, + is_connection_for_explorer_master_db=False, + ): + return QueryResult( + self.final_sql(), + self.title, + is_connection_type_pii, + executing_user if executing_user else self.created_by_user, + is_connection_for_explorer_master_db, + ) def execute_with_logging(self, executing_user): ql = self.log(executing_user) - ret = self.execute() + ret = self.execute(executing_user) ql.duration = ret.duration ql.save() return ret, ql - def execute(self): - ret = self.execute_query_only() + def execute(self, executing_user=None): + ret = self.execute_query_only(False, executing_user) + ret.process() + return ret + + def execute_pii(self, executing_user=None): + ret = self.execute_query_only(True, executing_user) + ret.process() + return ret + + def execute_on_explorer_with_master_db(self, executing_user=None): + ret = self.execute_query_only( + False, executing_user, is_connection_for_explorer_master_db=True + ) ret.process() return ret @@ -75,32 +127,44 @@ def available_params(self): return p def get_absolute_url(self): - return reverse("query_detail", kwargs={'query_id': self.id}) + return reverse("query_detail", kwargs={"query_id": self.id}) def log(self, user=None): - if user and user.is_anonymous(): - user = None - ql = QueryLog(sql=self.final_sql(), query_id=self.id, run_by_user=user) + + if user: + # In Django<1.10, is_anonymous was a method. + if user.is_anonymous: + user = None + ql = QueryLog( + sql=self.final_sql(), + query_id=self.id, + run_by_user=user, + ) ql.save() return ql + @property def shared(self): - return self.id in set(sum(app_settings.EXPLORER_GET_USER_QUERY_VIEWS().values(), [])) + return self.id in set( + sum(app_settings.EXPLORER_GET_USER_QUERY_VIEWS().values(), []) + ) @property def snapshots(self): if app_settings.ENABLE_TASKS: conn = get_s3_connection() - res = conn.list('query-%s.snap-' % self.id) - return sorted(res, key=lambda s: s['last_modified']) + res = conn.list("query-%s.snap-" % self.id) + return sorted(res, key=lambda s: s["last_modified"]) class QueryLog(models.Model): sql = models.TextField(null=True, blank=True) query = models.ForeignKey(Query, null=True, blank=True, on_delete=models.SET_NULL) - run_by_user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, blank=True) + run_by_user = models.ForeignKey( + settings.AUTH_USER_MODEL, null=True, blank=True, on_delete=models.CASCADE + ) run_at = models.DateTimeField(auto_now_add=True) duration = models.FloatField(blank=True, null=True) # milliseconds @@ -109,21 +173,157 @@ def is_playground(self): return self.query_id is None class Meta: - ordering = ['-run_at'] + ordering = ["-run_at"] -class QueryResult(object): +class QueryChangeLog(models.Model): + + old_sql = models.TextField(null=True, blank=True) + new_sql = models.TextField(null=True, blank=True) + query = models.ForeignKey(Query, null=True, blank=True, on_delete=models.SET_NULL) + run_by_user = models.ForeignKey( + settings.AUTH_USER_MODEL, null=True, blank=True, on_delete=models.CASCADE + ) + run_at = models.DateTimeField(auto_now_add=True) - def __init__(self, sql): + @property + def is_playground(self): + return self.query_id is None - self.sql = sql + class Meta: + ordering = ["-run_at"] + + +class QueryResult(object): + def get_type_code_and_column_indices_to_be_masked_dict(self): + """ + Returns a dictionary of type code and column indices to be masked + Return type: + { + type_code: [column indices that match the type code] + } + Eg. + Say a table has three fields id, data, random_text. id is of type INT, data is of type JSON, and random_text is of type TEXT. + Then the return value will be: + { + TYPE_CODE_FOR_JSON: [1], + TYPE_CODE_FOR_TEXT: [2] + } + as 1 is the column index for JSON and 2 is the column index for TEXT + """ + type_code_and_column_indices_to_be_masked_dict = { + TYPE_CODE_FOR_JSON: [], + TYPE_CODE_FOR_TEXT: [], + } + phone_number_masking_indexes = [] + + # Collect the indices for JSON and text columns + for index, column in enumerate(self._description): + if ( + hasattr(column, "type_code") + and column.type_code in type_code_and_column_indices_to_be_masked_dict + ): + type_code_and_column_indices_to_be_masked_dict[column.type_code].append( + index + ) + + # Masking for player phone numbers + if ( + self.used_by_user + and is_pii_masked_for_user(self.used_by_user) + and hasattr(column, "type_code") + and column.type_code in PLAYER_PHONE_NUMBER_MASKING_TYPE_CODES + ): + phone_number_masking_indexes.append(index) + + # Masking for PII data in char fields if specific tables are used in SQL + if app_settings.TABLE_NAMES_FOR_PII_MASKING and phone_number_masking_indexes: + for table_name in app_settings.TABLE_NAMES_FOR_PII_MASKING: + if table_name in self.sql: + type_code_and_column_indices_to_be_masked_dict[ + TYPE_CODE_FOR_CHAR + ] = phone_number_masking_indexes + break + + return type_code_and_column_indices_to_be_masked_dict + + def get_masked_data(self, data, type_code): + """ + Mask the data based on the type code. + """ + if not data: + return data + if type_code == TYPE_CODE_FOR_JSON: + return json.dumps(mask_string(str(data))) + elif type_code == TYPE_CODE_FOR_TEXT: + return mask_string(data) + elif type_code in PLAYER_PHONE_NUMBER_MASKING_TYPE_CODES: + return mask_player_pii(data) + return data + + def mask_pii_data(self, row, type_code_and_column_indices_to_be_masked_dict): + """ + Mask the JSON and TEXT data types in the row. + """ + modified_row = list(row) + for ( + type_code, + indices, + ) in type_code_and_column_indices_to_be_masked_dict.items(): + for index in indices: + modified_row[index] = self.get_masked_data( + modified_row[index], type_code + ) + + return modified_row + + def get_data_to_be_displayed(self, cursor): + """ + If the connection type allows PII, then return the data as is. + If connection type does not allow PII, then mask JSON and TEXT data types and then return the data. + JSON and TEXT data types can be identified by the type_code attribute of the column. + """ + if self.is_connection_type_pii: + return [list(r) for r in cursor.fetchall()] + + type_code_and_column_indices_to_be_masked_dict = ( + self.get_type_code_and_column_indices_to_be_masked_dict() + ) + data_to_be_displayed = [] + + for row in cursor.fetchall(): + modified_row = self.mask_pii_data( + row, type_code_and_column_indices_to_be_masked_dict + ) + data_to_be_displayed.append(modified_row) + + return data_to_be_displayed + + def __init__( + self, + sql, + title=None, + is_connection_type_pii=None, + used_by_user=None, + is_connection_for_explorer_master_db=False, + ): + self.sql = sql + self.title = title + self.is_connection_for_explorer_master_db = is_connection_for_explorer_master_db + if is_connection_type_pii: + self.is_connection_type_pii = is_connection_type_pii + else: + self.is_connection_type_pii = False + + self.used_by_user = used_by_user cursor, duration = self.execute_query() self._description = cursor.description or [] - self._data = [list(r) for r in cursor.fetchall()] - self.duration = duration + self._data = self.get_data_to_be_displayed(cursor) + + self.duration = duration cursor.close() self._headers = self._get_headers() @@ -138,20 +338,38 @@ def headers(self): return self._headers or [] def _get_headers(self): - return [ColumnHeader(d[0]) for d in self._description] if self._description else [ColumnHeader('--')] + return ( + [ColumnHeader(d[0]) for d in self._description] + if self._description + else [ColumnHeader("--")] + ) def _get_numerics(self): conn = get_connection() if hasattr(conn.Database, "NUMBER"): - return [ix for ix, c in enumerate(self._description) if hasattr(c, 'type_code') and c.type_code in conn.Database.NUMBER.values] + return [ + ix + for ix, c in enumerate(self._description) + if hasattr(c, "type_code") + and c.type_code in conn.Database.NUMBER.values + ] elif self.data: d = self.data[0] - return [ix for ix, _ in enumerate(self._description) if not isinstance(d[ix], six.string_types) and six.text_type(d[ix]).isnumeric()] + return [ + ix + for ix, _ in enumerate(self._description) + if not isinstance(d[ix], six.string_types) + and six.text_type(d[ix]).isnumeric() + ] return [] def _get_transforms(self): transforms = dict(app_settings.EXPLORER_TRANSFORMS) - return [(ix, transforms[str(h)]) for ix, h in enumerate(self.headers) if str(h) in transforms.keys()] + return [ + (ix, transforms[str(h)]) + for ix, h in enumerate(self.headers) + if str(h) in transforms.keys() + ] def column(self, ix): return [r[ix] for r in self.data] @@ -162,7 +380,9 @@ def process(self): self.process_columns() self.process_rows() - logger.info("Explorer Query Processing took %sms." % ((time() - start_time) * 1000)) + logger.info( + "Explorer test Query Processing took %sms." % ((time() - start_time) * 1000) + ) def process_columns(self): for ix in self._get_numerics(): @@ -176,7 +396,19 @@ def process_rows(self): r[ix] = t.format(str(r[ix])) def execute_query(self): - conn = get_connection() + # can change connectiion type here to use different role --> get_connection_pii() + if self.is_connection_type_pii: + logger.info("pii-connection") + conn = get_connection_pii() + elif should_route_to_asyncapi_db(self.sql): + logger.info("Route to Async API DB") + conn = get_connection_asyncapi_db() + elif self.is_connection_for_explorer_master_db: + conn = get_explorer_master_db_connection() + else: + logger.info("non-pii-connection") + conn = get_connection() + cursor = conn.cursor() start_time = time() @@ -184,7 +416,16 @@ def execute_query(self): cursor.execute(self.sql) except DatabaseError as e: cursor.close() - raise e + if ( + re.search("permission denied for table", str(e)) + and self.title != "Playground" + ): + + raise DatabaseError( + "Query saved but unable to execute it because " + str(e) + ) + else: + raise e return cursor, ((time() - start_time) * 1000) @@ -214,7 +455,9 @@ def __init__(self, label, statfn, precision=2, handles_null=False): self.handles_null = handles_null def __call__(self, coldata): - self.value = round(float(self.statfn(coldata)), self.precision) if coldata else 0 + self.value = ( + round(float(self.statfn(coldata)), self.precision) if coldata else 0 + ) def __unicode__(self): return self.label @@ -232,7 +475,12 @@ def __init__(self, header, col): ColumnStat("Avg", lambda x: float(sum(x)) / float(len(x))), ColumnStat("Min", min), ColumnStat("Max", max), - ColumnStat("NUL", lambda x: int(sum(map(lambda y: 1 if y is None else 0, x))), 0, True) + ColumnStat( + "NUL", + lambda x: int(sum(map(lambda y: 1 if y is None else 0, x))), + 0, + True, + ), ] without_nulls = list(map(lambda x: 0 if x is None else x, col)) diff --git a/explorer/static/explorer/explorer.js b/explorer/static/explorer/explorer.js index 2bb654e1..cf347440 100644 --- a/explorer/static/explorer/explorer.js +++ b/explorer/static/explorer/explorer.js @@ -1,253 +1,315 @@ -var csrf_token = $.cookie('csrftoken'); +var csrf_token = $.cookie("csrftoken"); $.ajaxSetup({ - beforeSend: function(xhr) { - xhr.setRequestHeader("X-CSRFToken", csrf_token); - } + beforeSend: function (xhr) { + xhr.setRequestHeader("X-CSRFToken", csrf_token); + }, }); function ExplorerEditor(queryId, dataUrl) { - this.queryId = queryId; - this.dataUrl = dataUrl; - this.$table = $('#preview'); - this.$rows = $('#rows'); - this.$form = $("form"); - this.$snapshotField = $("#id_snapshot"); - this.$paramFields = this.$form.find(".param"); - - this.$submit = $("#refresh_play_button, #save_button"); - if (!this.$submit.length) { this.$submit = $("#refresh_button"); } - - this.editor = CodeMirror.fromTextArea(document.getElementById('id_sql'), { - mode: "text/x-sql", - lineNumbers: 't', - autofocus: true, - height: 500, - extraKeys: { - "Ctrl-Enter": function() { this.doCodeMirrorSubmit(); }.bind(this), - "Cmd-Enter": function() { this.doCodeMirrorSubmit(); }.bind(this), - "Cmd-/": function() { this.editor.toggleComment(); }.bind(this) - } - }); - this.editor.on("change", function(cm, change) { - document.getElementById('id_sql').classList.add('changed-input'); - }); - this.bind(); + this.queryId = queryId; + this.dataUrl = dataUrl; + this.$table = $("#preview"); + this.$rows = $("#rows"); + this.$form = $("form"); + this.$snapshotField = $("#id_snapshot"); + this.$paramFields = this.$form.find(".param"); + + this.$submit = $("#refresh_play_button, #save_button"); + + if (!this.$submit.length) { + this.$submit = $("#refresh_button"); + } + + this.editor = CodeMirror.fromTextArea(document.getElementById("id_sql"), { + mode: "text/x-sql", + lineNumbers: "t", + autofocus: true, + height: 500, + extraKeys: { + "Ctrl-Enter": function () { + this.doCodeMirrorSubmit(); + }.bind(this), + "Cmd-Enter": function () { + this.doCodeMirrorSubmit(); + }.bind(this), + "Cmd-/": function () { + this.editor.toggleComment(); + }.bind(this), + }, + }); + this.editor.on("change", function (cm, change) { + document.getElementById("id_sql").classList.add("changed-input"); + }); + this.bind(); } -ExplorerEditor.prototype.getParams = function() { - var o = false; - if(this.$paramFields.length) { - o = {}; - this.$paramFields.each(function() { - o[$(this).data('param')] = $(this).val(); - }); - } - return o; +ExplorerEditor.prototype.getParams = function () { + var o = false; + if (this.$paramFields.length) { + o = {}; + this.$paramFields.each(function () { + o[$(this).data("param")] = $(this).val(); + }); + } + return o; }; -ExplorerEditor.prototype.serializeParams = function(params) { - var args = []; - for(var key in params) { - args.push(key + '%3A' + params[key]); - } - return args.join('%7C'); +ExplorerEditor.prototype.serializeParams = function (params) { + var args = []; + for (var key in params) { + args.push(key + "%3A" + params[key]); + } + return args.join("%7C"); }; -ExplorerEditor.prototype.doCodeMirrorSubmit = function() { - // Captures the cmd+enter keystroke and figures out which button to trigger. - this.$submit.click(); +ExplorerEditor.prototype.doCodeMirrorSubmit = function () { + // Captures the cmd+enter keystroke and figures out which button to trigger. + this.$submit.click(); }; -ExplorerEditor.prototype.savePivotState = function(state) { - bmark = btoa(JSON.stringify(_(state).pick('aggregatorName', 'rows', 'cols', 'rendererName', 'vals'))); - $el = $('#pivot-bookmark') - $el.attr('href', $el.data('baseurl') + '#' + bmark) +ExplorerEditor.prototype.savePivotState = function (state) { + bmark = btoa( + JSON.stringify( + _(state).pick("aggregatorName", "rows", "cols", "rendererName", "vals") + ) + ); + $el = $("#pivot-bookmark"); + $el.attr("href", $el.data("baseurl") + "#" + bmark); }; -ExplorerEditor.prototype.updateQueryString = function(key, value, url) { - // http://stackoverflow.com/a/11654596/221390 - if (!url) url = window.location.href; - var re = new RegExp("([?&])" + key + "=.*?(&|#|$)(.*)", "gi"); - - if (re.test(url)) { - if (typeof value !== 'undefined' && value !== null) - return url.replace(re, '$1' + key + "=" + value + '$2$3'); - else { - var hash = url.split('#'); - url = hash[0].replace(re, '$1$3').replace(/(&|\?)$/, ''); - if (typeof hash[1] !== 'undefined' && hash[1] !== null) - url += '#' + hash[1]; - return url; - } - } - else { - if (typeof value !== 'undefined' && value !== null) { - var separator = url.indexOf('?') !== -1 ? '&' : '?', - hash = url.split('#'); - url = hash[0] + separator + key + '=' + value; - if (typeof hash[1] !== 'undefined' && hash[1] !== null) - url += '#' + hash[1]; - return url; - } - else - return url; - } +ExplorerEditor.prototype.updateQueryString = function (key, value, url) { + // http://stackoverflow.com/a/11654596/221390 + if (!url) url = window.location.href; + var re = new RegExp("([?&])" + key + "=.*?(&|#|$)(.*)", "gi"); + + if (re.test(url)) { + if (typeof value !== "undefined" && value !== null) + return url.replace(re, "$1" + key + "=" + value + "$2$3"); + else { + var hash = url.split("#"); + url = hash[0].replace(re, "$1$3").replace(/(&|\?)$/, ""); + if (typeof hash[1] !== "undefined" && hash[1] !== null) + url += "#" + hash[1]; + return url; + } + } else { + if (typeof value !== "undefined" && value !== null) { + var separator = url.indexOf("?") !== -1 ? "&" : "?", + hash = url.split("#"); + url = hash[0] + separator + key + "=" + value; + if (typeof hash[1] !== "undefined" && hash[1] !== null) + url += "#" + hash[1]; + return url; + } else return url; + } }; -ExplorerEditor.prototype.formatSql = function() { - $.post('../format/', {sql: this.editor.getValue() }, function(data) { - this.editor.setValue(data.formatted); - }.bind(this)); +ExplorerEditor.prototype.formatSql = function () { + $.post( + "../format/", + { sql: this.editor.getValue() }, + function (data) { + this.editor.setValue(data.formatted); + }.bind(this) + ); }; -ExplorerEditor.prototype.showRows = function() { - var rows = this.$rows.val(), - $form = $("#editor"); - $form.attr('action', this.updateQueryString("rows", rows, window.location.href)); - $form.submit(); +ExplorerEditor.prototype.showRows = function () { + var rows = this.$rows.val(), + $form = $("#editor"); + $form.attr( + "action", + this.updateQueryString("rows", rows, window.location.href) + ); + $form.submit(); }; -ExplorerEditor.prototype.bind = function() { - $("#show_schema_button").click(function() { - $("#schema_frame").attr('src', '../schema/'); - $("#query_area").addClass("col-md-9"); - var schema$ = $("#schema"); - schema$.addClass("col-md-3"); - schema$.show(); - $(this).hide(); - $("#hide_schema_button").show(); - return false; - }); - - $("#hide_schema_button").click(function() { - $("#query_area").removeClass("col-md-9"); - var schema$ = $("#schema"); - schema$.removeClass("col-md-3"); - schema$.hide(); - $(this).hide(); - $("#show_schema_button").show(); - return false; - }); - - $("#format_button").click(function(e) { - e.preventDefault(); - this.formatSql(); - }.bind(this)); - - $("#save_button").click(function() { - var params = this.getParams(this); - if(params) { - this.$form.attr('action', '../' + this.queryId + '/?params=' + this.serializeParams(params)); - } - this.$snapshotField.hide(); - this.$form.append(this.$snapshotField); - }.bind(this)); - - $("#refresh_button").click(function(e) { - e.preventDefault(); - var params = this.getParams(); - if(params) { - window.location.href = '../' + this.queryId + '/?params=' + this.serializeParams(params); - } else { - window.location.href = '../' + this.queryId + '/'; - } - }.bind(this)); - - $("#refresh_play_button").click(function() { - this.$form.attr('action', '../play/'); - }.bind(this)); - - $("#playground_button").click(function() { - this.$form.prepend(""); - this.$form.attr('action', '../play/'); - }.bind(this)); - - $("#download_play_button").click(function() { - this.$form.attr('action', '../csv'); - }.bind(this)); - - $(".download_button").click(function(e) { - e.preventDefault(); - var dl_link = 'download'; - var params = this.getParams(this); - if(params) { dl_link = dl_link + '?params=' + this.serializeParams(params); } - window.open(dl_link, '_blank'); - }.bind(this)); - - $("#create_button").click(function() { - this.$form.attr('action', '../new/'); - }.bind(this)); - - $(".stats-expand").click(function(e) { - e.preventDefault(); - $(".stats-expand").hide(); - $(".stats-wrapper").show(); - this.$table.floatThead('reflow'); - }.bind(this)); - - $(".sort").click(function(e) { - var t = $(e.target).data('sort'); - var dir = $(e.target).data('dir'); - $('.sort').css('background-image', 'url(http://cdn.datatables.net/1.10.0/images/sort_both.png)') - if (dir == 'asc'){ - $(e.target).data('dir', 'desc'); - $(e.target).css('background-image', 'url(http://cdn.datatables.net/1.10.0/images/sort_asc.png)') - } else { - $(e.target).data('dir', 'asc'); - $(e.target).css('background-image', 'url(http://cdn.datatables.net/1.10.0/images/sort_desc.png)') - } - var vals = []; - var ct = 0; - while (ct < this.$table.find('th').length) { - vals.push(ct++); - } - var options = { - valueNames: vals - }; - var tableList = new List('preview', options); - tableList.sort(t, { order: dir }); - }.bind(this)); - - $("#preview-tab-label").click(function() { - this.$table.floatThead('reflow'); - }.bind(this)); - - var pivotState = window.location.hash; - var navToPivot = false; - if (!pivotState) { - pivotState = {onRefresh: this.savePivotState}; - } else { - pivotState = JSON.parse(atob(pivotState.substr(1))); - pivotState['onRefresh'] = this.savePivotState; - navToPivot = true; - } - - $(".pivot-table").pivotUI(this.$table, pivotState); - if (navToPivot) { - $("#pivot-tab-label").tab('show'); - } - - this.$table.floatThead({ - scrollContainer: function() { - return this.$table.closest('.overflow-wrapper'); - }.bind(this) - }); - - this.$rows.change(function() { this.showRows(); }.bind(this)); - this.$rows.keyup(function(event) { - if(event.keyCode == 13){ this.showRows(); } - }.bind(this)); +ExplorerEditor.prototype.bind = function () { + $("#show_schema_button").click(function () { + $("#schema_frame").attr("src", "../schema/"); + $("#query_area").addClass("col-md-9"); + var schema$ = $("#schema"); + schema$.addClass("col-md-3"); + schema$.show(); + $(this).hide(); + $("#hide_schema_button").show(); + return false; + }); + + $("#hide_schema_button").click(function () { + $("#query_area").removeClass("col-md-9"); + var schema$ = $("#schema"); + schema$.removeClass("col-md-3"); + schema$.hide(); + $(this).hide(); + $("#show_schema_button").show(); + return false; + }); + + $("#format_button").click( + function (e) { + e.preventDefault(); + this.formatSql(); + }.bind(this) + ); + + $("#save_button").click( + function () { + var params = this.getParams(this); + if (params) { + this.$form.attr( + "action", + "../" + this.queryId + "/?params=" + this.serializeParams(params) + ); + } + this.$snapshotField.hide(); + this.$form.append(this.$snapshotField); + }.bind(this) + ); + + $("#refresh_button").click( + function (e) { + e.preventDefault(); + var params = this.getParams(); + if (params) { + window.location.href = + "../" + this.queryId + "/?params=" + this.serializeParams(params); + } else { + window.location.href = "../" + this.queryId + "/"; + } + }.bind(this) + ); + + $("#refresh_play_button").click( + function () { + this.$form.attr("action", "../play/"); + }.bind(this) + ); + + $("#playground_button").click( + function () { + this.$form.prepend(""); + this.$form.attr("action", "../play/"); + }.bind(this) + ); + + $("#download_play_button").click( + function () { + this.$form.attr("action", "../csv"); + }.bind(this) + ); + + $(".download_button").click( + function (e) { + e.preventDefault(); + var dl_link = "download"; + var params = this.getParams(this); + if (params) { + dl_link = dl_link + "?params=" + this.serializeParams(params); + } + window.open(dl_link, "_blank"); + }.bind(this) + ); + + $("#create_button").click( + function () { + this.$form.attr("action", "../new/"); + }.bind(this) + ); + + $(".stats-expand").click( + function (e) { + e.preventDefault(); + $(".stats-expand").hide(); + $(".stats-wrapper").show(); + this.$table.floatThead("reflow"); + }.bind(this) + ); + + $(".sort").click( + function (e) { + var t = $(e.target).data("sort"); + var dir = $(e.target).data("dir"); + $(".sort").css( + "background-image", + "url(http://cdn.datatables.net/1.10.0/images/sort_both.png)" + ); + if (dir == "asc") { + $(e.target).data("dir", "desc"); + $(e.target).css( + "background-image", + "url(http://cdn.datatables.net/1.10.0/images/sort_asc.png)" + ); + } else { + $(e.target).data("dir", "asc"); + $(e.target).css( + "background-image", + "url(http://cdn.datatables.net/1.10.0/images/sort_desc.png)" + ); + } + var vals = []; + var ct = 0; + while (ct < this.$table.find("th").length) { + vals.push(ct++); + } + var options = { + valueNames: vals, + }; + var tableList = new List("preview", options); + tableList.sort(t, { order: dir }); + }.bind(this) + ); + + $("#preview-tab-label").click( + function () { + this.$table.floatThead("reflow"); + }.bind(this) + ); + + var pivotState = window.location.hash; + var navToPivot = false; + if (!pivotState) { + pivotState = { onRefresh: this.savePivotState }; + } else { + pivotState = JSON.parse(atob(pivotState.substr(1))); + pivotState["onRefresh"] = this.savePivotState; + navToPivot = true; + } + + $(".pivot-table").pivotUI(this.$table, pivotState); + if (navToPivot) { + $("#pivot-tab-label").tab("show"); + } + + this.$table.floatThead({ + scrollContainer: function () { + return this.$table.closest(".overflow-wrapper"); + }.bind(this), + }); + + this.$rows.change( + function () { + this.showRows(); + }.bind(this) + ); + this.$rows.keyup( + function (event) { + if (event.keyCode == 13) { + this.showRows(); + } + }.bind(this) + ); }; -$(window).on('beforeunload', function () { - // Only do this if changed-input is on the page and we're not on the playground page. - if ($('.changed-input').length && !$('.playground-form').length) { - return 'You have unsaved changes to your query.'; - } +$(window).on("beforeunload", function () { + // Only do this if changed-input is on the page and we're not on the playground page. + if ($(".changed-input").length && !$(".playground-form").length) { + return "You have unsaved changes to your query."; + } }); // Disable unsaved changes warning when submitting the editor form -$(document).on("submit", "#editor", function(event){ - // disable warning - $(window).off('beforeunload'); +$(document).on("submit", "#editor", function (event) { + // disable warning + $(window).off("beforeunload"); }); diff --git a/explorer/templates/explorer/base.html b/explorer/templates/explorer/base.html index 392d0a85..8cd31b1d 100644 --- a/explorer/templates/explorer/base.html +++ b/explorer/templates/explorer/base.html @@ -22,6 +22,16 @@ dataUrl = "{{ dataUrl }}"; queryId = "{% firstof query.id 'new' %}"; +
diff --git a/explorer/templates/explorer/play.html b/explorer/templates/explorer/play.html index 4e0e3e49..bc65a014 100644 --- a/explorer/templates/explorer/play.html +++ b/explorer/templates/explorer/play.html @@ -5,6 +5,7 @@The playground is for experimenting and writing ad-hoc queries. By default, nothing you do here will be saved.
-