From a38beca61c16969ed3162c5d5e082077252ce8e6 Mon Sep 17 00:00:00 2001 From: Chris Clark Date: Mon, 13 May 2024 20:41:51 -0400 Subject: [PATCH] Userland connections --- .gitignore | 2 + LICENSE | 11 +- docs/features.rst | 38 +-- docs/settings.rst | 11 +- explorer/admin.py | 3 +- explorer/app_settings.py | 18 +- explorer/apps.py | 5 +- explorer/assistant/tests.py | 108 ------- explorer/assistant/utils.py | 71 ++++- explorer/assistant/views.py | 66 ++-- explorer/connections.py | 54 +++- explorer/ee/LICENSE | 21 ++ explorer/ee/__init__.py | 1 + explorer/ee/db_connections/__init__.py | 0 explorer/ee/db_connections/admin.py | 9 + explorer/ee/db_connections/forms.py | 17 ++ explorer/ee/db_connections/models.py | 63 ++++ explorer/ee/db_connections/utils.py | 170 +++++++++++ explorer/ee/db_connections/views.py | 127 ++++++++ explorer/ee/urls.py | 21 ++ explorer/forms.py | 5 +- explorer/locale/ru/LC_MESSAGES/django.po | 2 +- .../migrations/0017_databaseconnection.py | 26 ++ ..._alter_databaseconnection_host_and_more.py | 29 ++ explorer/models.py | 9 +- explorer/permissions.py | 4 + explorer/src/js/assistant.js | 32 +- explorer/src/js/codemirror-config.js | 3 +- explorer/src/js/explorer.js | 27 +- explorer/src/js/main.js | 4 +- explorer/src/js/query-list.js | 2 +- explorer/src/js/schemaService.js | 8 +- explorer/src/js/uploads.js | 83 +++++ explorer/tasks.py | 15 + explorer/telemetry.py | 5 +- .../templates/connections/connections.html | 43 +++ .../database_connection_confirm_delete.html | 12 + .../database_connection_detail.html | 39 +++ .../connections/database_connection_form.html | 23 ++ explorer/templates/explorer/assistant.html | 2 + explorer/templates/explorer/base.html | 12 +- explorer/templates/explorer/query.html | 2 +- explorer/templates/explorer/query_list.html | 35 ++- explorer/tests/csvs/all_types.csv | 8 + explorer/tests/csvs/dates.csv | 13 + explorer/tests/csvs/floats.csv | 8 + explorer/tests/csvs/integers.csv | 5 + explorer/tests/csvs/mixed.csv | 5 + explorer/tests/csvs/test_case1.csv | 31 ++ explorer/tests/settings.py | 5 +- explorer/tests/test_assistant.py | 283 ++++++++++++++++++ explorer/tests/test_connections.py | 71 +++++ explorer/tests/test_models.py | 18 +- explorer/tests/test_tasks.py | 108 ++++++- explorer/tests/test_telemetry.py | 13 + explorer/tests/test_user_connection_utils.py | 192 ++++++++++++ explorer/tests/test_utils.py | 4 +- explorer/tests/test_views.py | 192 +++++++++++- explorer/urls.py | 10 +- explorer/utils.py | 31 +- explorer/views/__init__.py | 1 - explorer/views/list.py | 4 +- explorer/views/mixins.py | 7 +- explorer/views/schema.py | 4 +- public_key.pem | 9 + requirements/base.txt | 4 +- requirements/extra/assistant.txt | 2 +- requirements/extra/user_connections.txt | 3 + test_project/celery_config.py | 23 +- test_project/settings.py | 17 +- tox.ini | 2 +- 71 files changed, 2028 insertions(+), 283 deletions(-) delete mode 100644 explorer/assistant/tests.py create mode 100644 explorer/ee/LICENSE create mode 100644 explorer/ee/__init__.py create mode 100644 explorer/ee/db_connections/__init__.py create mode 100644 explorer/ee/db_connections/admin.py create mode 100644 explorer/ee/db_connections/forms.py create mode 100644 explorer/ee/db_connections/models.py create mode 100644 explorer/ee/db_connections/utils.py create mode 100644 explorer/ee/db_connections/views.py create mode 100644 explorer/ee/urls.py create mode 100644 explorer/migrations/0017_databaseconnection.py create mode 100644 explorer/migrations/0018_alter_databaseconnection_host_and_more.py create mode 100644 explorer/src/js/uploads.js create mode 100644 explorer/templates/connections/connections.html create mode 100644 explorer/templates/connections/database_connection_confirm_delete.html create mode 100644 explorer/templates/connections/database_connection_detail.html create mode 100644 explorer/templates/connections/database_connection_form.html create mode 100644 explorer/tests/csvs/all_types.csv create mode 100644 explorer/tests/csvs/dates.csv create mode 100644 explorer/tests/csvs/floats.csv create mode 100644 explorer/tests/csvs/integers.csv create mode 100644 explorer/tests/csvs/mixed.csv create mode 100644 explorer/tests/csvs/test_case1.csv create mode 100644 explorer/tests/test_assistant.py create mode 100644 explorer/tests/test_connections.py create mode 100644 explorer/tests/test_user_connection_utils.py create mode 100644 public_key.pem create mode 100644 requirements/extra/user_connections.txt diff --git a/.gitignore b/.gitignore index 45ec76b9..1ba5acb7 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ docs/_build/ .env tst tst2 +user_dbs/* +tmp2 diff --git a/LICENSE b/LICENSE index 4de004d8..dab1e28f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,11 @@ -The MIT License (MIT) +* All content that resides under the "explorer/ee/" directory of this repository is licensed under the license defined +in "explorer/ee/LICENSE". -Copyright (c) 2013 Chris Clark, ePantry LLC +* Content outside of the above mentioned directory is provided under the "MIT" license as defined below. + +** The MIT License (MIT) ** + +Copyright (c) 2024, SQL Explorer, Inc Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +23,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file +THE SOFTWARE. diff --git a/docs/features.rst b/docs/features.rst index 98ffb7e1..457497b1 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -3,19 +3,16 @@ Features Security -------- -- Let's not kid ourselves - this tool is all about giving people - access to running SQL in production. So if that makes you - nervous (**and it should**) - you've been warned. Explorer makes an - effort to not allow terrible things to happen, but be careful! - It's recommended you setup read-only roles for each of your database +- It's recommended you setup read-only roles for each of your database connections and only use these particular connections for your queries through the - ``EXPLORER_CONNECTIONS`` setting. -- Explorer supports two different permission checks for users of + ``EXPLORER_CONNECTIONS`` setting -- or set up userland connections via DatabaseConnections in + the Django admin, or the SQL Explorer front-end. +- SQL Explorer supports two different permission checks for users of the tool. Users passing the ``EXPLORER_PERMISSION_CHANGE`` test can create, edit, delete, and execute queries. Users who do not pass this test but pass the ``EXPLORER_PERMISSION_VIEW`` test can only execute queries. Other users cannot access any part of - Explorer. Both permission groups are set to is_staff by default + SQL Explorer. Both permission groups are set to is_staff by default and can be overridden in your settings file. - Enforces a SQL blacklist so destructive queries don't get executed (delete, drop, alter, update etc). This is not @@ -37,19 +34,25 @@ SQL Assistant to quickly get help with your query, with relevant schema automatically injected into the prompt. +Configurable Connections +------------------------ +- Configure connections via the settings.py file, or via the SQL Explorer UI. +- Supports drag-and-drop uploading of CSV files or SQLite databases for instant SQL access to your data. + Snapshots --------- - Tick the 'snapshot' box on a query, and Explorer will upload a .csv snapshot of the query results to S3. Configure the snapshot - frequency via a celery cron task, e.g. for daily at 1am: + frequency via a celery cron task, e.g. for daily at 1am + (see test_project/celery_config.py for an example of this, along with test_project/__init__.py): .. code-block:: python app.conf.beat_schedule = { - 'explorer.tasks.snapshot_queries': { - 'task': 'explorer.tasks.snapshot_queries', - 'schedule': crontab(hour=1, minute=0) - } + "explorer.tasks.snapshot_queries": { + "task": "explorer.tasks.snapshot_queries", + "schedule": crontab(hour="1", minute="0") + }, } - Requires celery, obviously. Also uses boto3. All @@ -64,6 +67,7 @@ Email query results - Click the email icon in the query listing view, enter an email address, and the query results (zipped .csv) will be sent to you asynchronously. Very handy for long-running queries. +- You must also have the setting ``EXPLORER_TASKS_ENABLED`` enabled. Parameterized Queries --------------------- @@ -168,10 +172,10 @@ Query Logs .. code-block:: python app.conf.beat_schedule = { - 'explorer.tasks.truncate_querylogs': { - 'task': 'explorer.tasks.truncate_querylogs', - 'schedule': crontab(hour=1, minute=0), - 'kwargs': {'days': 30} + "explorer.tasks.truncate_querylogs": { + "task": "explorer.tasks.truncate_querylogs", + "schedule": crontab(hour="1", minute="10"), + "kwargs": {"days": 30} } } diff --git a/docs/settings.rst b/docs/settings.rst index fa164e2a..653b002c 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -357,4 +357,13 @@ The following three settings control the SQL Assistant. More information is avai EXPLORER_AI_API_KEY = getattr(settings, "EXPLORER_AI_API_KEY", None) EXPLORER_ASSISTANT_BASE_URL = getattr(settings, "EXPLORER_ASSISTANT_BASE_URL", "https://api.openai.com/v1") - EXPLORER_ASSISTANT_MODEL + EXPLORER_ASSISTANT_MODEL = getattr(settings, "EXPLORER_ASSISTANT_MODEL", + # Return the model name and max_tokens it supports + {"name": "gpt-4o", + "max_tokens": 128000}) + + +User-Configured DB Connections +****************************** +Set `EXPLORER_DB_CONNECTIONS_ENABLED` to `True` to enable DB connections to get configured in the browser (e.g. not +just in settings.py). This also allows uploading of CSV or SQLite files for instant querying. diff --git a/explorer/admin.py b/explorer/admin.py index aee8a3c7..0cad25ac 100644 --- a/explorer/admin.py +++ b/explorer/admin.py @@ -2,6 +2,7 @@ from explorer.actions import generate_report_action from explorer.models import Query, ExplorerValue +from explorer.ee.db_connections.admin import DatabaseConnectionAdmin # noqa @admin.register(Query) @@ -16,7 +17,6 @@ class QueryAdmin(admin.ModelAdmin): class ExplorerValueAdmin(admin.ModelAdmin): list_display = ("key", "value", "display_key") list_filter = ("key",) - readonly_fields = ("key",) search_fields = ("key", "value") def display_key(self, obj): @@ -24,4 +24,3 @@ def display_key(self, obj): return dict(ExplorerValue.EXPLORER_SETTINGS_CHOICES).get(obj.key, "") display_key.short_description = "Setting Name" - diff --git a/explorer/app_settings.py b/explorer/app_settings.py index 5f52b97a..72156cf5 100644 --- a/explorer/app_settings.py +++ b/explorer/app_settings.py @@ -69,8 +69,11 @@ EXPLORER_PERMISSION_CHANGE = getattr( settings, "EXPLORER_PERMISSION_CHANGE", lambda r: r.user.is_staff ) +EXPLORER_PERMISSION_CONNECTIONS = getattr( + settings, "EXPLORER_PERMISSION_CONNECTIONS", lambda r: r.user.is_staff +) EXPLORER_RECENT_QUERY_COUNT = getattr( - settings, "EXPLORER_RECENT_QUERY_COUNT", 10 + settings, "EXPLORER_RECENT_QUERY_COUNT", 5 ) EXPLORER_ASYNC_SCHEMA = getattr(settings, "EXPLORER_ASYNC_SCHEMA", False) @@ -125,7 +128,7 @@ S3_REGION = getattr(settings, "EXPLORER_S3_REGION", "us-east-1") S3_ENDPOINT_URL = getattr(settings, "EXPLORER_S3_ENDPOINT_URL", None) S3_DESTINATION = getattr(settings, "EXPLORER_S3_DESTINATION", "") -S3_SIGNATURE_VERSION = getattr(settings, "EXPLORER_S3_SIGNATURE_VERSION", "v2") +S3_SIGNATURE_VERSION = getattr(settings, "EXPLORER_S3_SIGNATURE_VERSION", "v4") UNSAFE_RENDERING = getattr(settings, "EXPLORER_UNSAFE_RENDERING", False) @@ -146,8 +149,17 @@ # AI Assistant settings. Setting the first to an OpenAI key is the simplest way to enable the assistant EXPLORER_AI_API_KEY = getattr(settings, "EXPLORER_AI_API_KEY", None) + EXPLORER_ASSISTANT_BASE_URL = getattr(settings, "EXPLORER_ASSISTANT_BASE_URL", "https://api.openai.com/v1") EXPLORER_ASSISTANT_MODEL = getattr(settings, "EXPLORER_ASSISTANT_MODEL", # Return the model name and max_tokens it supports - {"name": "gpt-4-0125-preview", + {"name": "gpt-4o", "max_tokens": 128000}) + +EXPLORER_DB_CONNECTIONS_ENABLED = getattr(settings, "EXPLORER_DB_CONNECTIONS_ENABLED", False) +EXPLORER_PRUNE_LOCAL_UPLOAD_COPY_DAYS_INACTIVITY = getattr(settings, + "EXPLORER_PRUNE_LOCAL_UPLOAD_COPY_DAYS_INACTIVITY", 7) + + +def has_assistant(): return EXPLORER_AI_API_KEY is not None +def db_connections_enabled(): return EXPLORER_DB_CONNECTIONS_ENABLED diff --git a/explorer/apps.py b/explorer/apps.py index 6797abfe..522d9ada 100644 --- a/explorer/apps.py +++ b/explorer/apps.py @@ -28,8 +28,9 @@ def _get_explorer_connections(): def _validate_connections(): - # Validate connections - if _get_default() not in _get_explorer_connections().values(): + # Validate connections, when using settings.EXPLORER_CONNECTIONS + # Skip if none are configured, as the app will use user-configured connections (DatabaseConnection models) + if _get_explorer_connections().values() and _get_default() not in _get_explorer_connections().values(): raise ImproperlyConfigured( f"EXPLORER_DEFAULT_CONNECTION is {_get_default()}, " f"but that alias is not present in the values of " diff --git a/explorer/assistant/tests.py b/explorer/assistant/tests.py deleted file mode 100644 index a92dacf9..00000000 --- a/explorer/assistant/tests.py +++ /dev/null @@ -1,108 +0,0 @@ -from explorer.tests.factories import SimpleQueryFactory -from unittest.mock import patch, Mock -import unittest - -import json -from django.test import TestCase -from django.urls import reverse -from django.contrib.auth.models import User -from explorer.app_settings import EXPLORER_DEFAULT_CONNECTION as CONN -from explorer import app_settings - - -class TestAssistantViews(TestCase): - - def setUp(self): - self.user = User.objects.create_superuser( - "admin", "admin@admin.com", "pwd" - ) - self.client.login(username="admin", password="pwd") - self.request_data = { - "sql": "SELECT * FROM explorer_query", - "connection": CONN, - "assistant_request": "Test Request" - } - - @unittest.skipIf(not app_settings.EXPLORER_AI_API_KEY, "assistant not enabled") - @patch("explorer.assistant.utils.openai_client") - def test_do_modify_query(self, mocked_openai_client): - from explorer.assistant.views import run_assistant - - # create.return_value should match: resp.choices[0].message - mocked_openai_client.return_value.chat.completions.create.return_value = Mock( - choices=[Mock(message=Mock(content="smart computer"))]) - resp = run_assistant(self.request_data, None) - self.assertEqual(resp, "smart computer") - - @unittest.skipIf(not app_settings.EXPLORER_AI_API_KEY, "assistant not enabled") - def test_assistant_help(self): - resp = self.client.post(reverse("assistant"), - data=json.dumps(self.request_data), - content_type="application/json") - self.assertIsNone(json.loads(resp.content)["message"]) - - -class TestPromptContext(TestCase): - - def test_retrieves_sample_rows(self): - from explorer.assistant.utils import sample_rows_from_table, ROW_SAMPLE_SIZE - SimpleQueryFactory(title="First Query") - SimpleQueryFactory(title="Second Query") - SimpleQueryFactory(title="Third Query") - SimpleQueryFactory(title="Fourth Query") - ret = sample_rows_from_table(CONN, "explorer_query") - self.assertEqual(len(ret), ROW_SAMPLE_SIZE+1) # includes header row - - def test_format_rows_from_table(self): - from explorer.assistant.utils import format_rows_from_table - d = [ - ["col1", "col2"], - ["val1", "val2"], - ] - ret = format_rows_from_table(d) - self.assertEqual(ret, "col1 | col2\n" + "-" * 50 + "\nval1 | val2\n") - - def test_parsing_tables_from_query(self): - from explorer.assistant.utils import get_table_names_from_query - sql = "SELECT * FROM explorer_query" - ret = get_table_names_from_query(sql) - self.assertEqual(ret, ["explorer_query"]) - - def test_parsing_tables_from_query_bad_sql(self): - from explorer.assistant.utils import get_table_names_from_query - sql = "foo" - ret = get_table_names_from_query(sql) - self.assertEqual(ret, []) - - def test_schema_info_from_table_names(self): - from explorer.assistant.utils import tables_from_schema_info - ret = tables_from_schema_info(CONN, ["explorer_query"]) - expected = [("explorer_query", [ - ("id", "AutoField"), - ("title", "CharField"), - ("sql", "TextField"), - ("description", "TextField"), - ("created_at", "DateTimeField"), - ("last_run_date", "DateTimeField"), - ("created_by_user_id", "IntegerField"), - ("snapshot", "BooleanField"), - ("connection", "CharField")])] - self.assertEqual(ret, expected) - - -class TestAssistantUtils(TestCase): - - def test_sample_rows_from_tables(self): - from explorer.assistant.utils import sample_rows_from_tables - SimpleQueryFactory(title="First Query") - SimpleQueryFactory(title="Second Query") - ret = sample_rows_from_tables(CONN, ["explorer_query"]) - self.assertTrue("First Query" in ret) - self.assertTrue("Second Query" in ret) - - def test_sample_rows_from_tables_no_tables(self): - from explorer.assistant.utils import sample_rows_from_tables - SimpleQueryFactory(title="First Query") - SimpleQueryFactory(title="Second Query") - ret = sample_rows_from_tables(CONN, []) - self.assertEqual(ret, "") diff --git a/explorer/assistant/utils.py b/explorer/assistant/utils.py index 7f444219..448d78fe 100644 --- a/explorer/assistant/utils.py +++ b/explorer/assistant/utils.py @@ -1,11 +1,13 @@ from explorer import app_settings from explorer.schema import schema_info +from explorer.models import ExplorerValue from explorer.utils import get_valid_connection from django.db.utils import OperationalError OPENAI_MODEL = app_settings.EXPLORER_ASSISTANT_MODEL["name"] ROW_SAMPLE_SIZE = 2 +MAX_FIELD_SAMPLE_SIZE = 500 # characters def openai_client(): @@ -41,20 +43,46 @@ def tables_from_schema_info(connection, table_names): def sample_rows_from_tables(connection, table_names): ret = "" for table_name in table_names: - ret = f"SAMPLE FROM TABLE {table_name}:\n" - ret = ret + format_rows_from_table( + ret += f"SAMPLE FROM TABLE {table_name}:\n" + ret += format_rows_from_table( sample_rows_from_table(connection, table_name) ) + "\n\n" return ret def sample_rows_from_table(connection, table_name): + """ + Fetches a sample of rows from the specified table and ensures that any field values + exceeding 500 characters (or bytes) are truncated. This is useful for handling fields + like "description" that might contain very long strings of text or binary data. + Truncating these fields prevents issues with displaying or processing overly large values. + An ellipsis ("...") is appended to indicate that the data has been truncated. + + Args: + connection: The database connection. + table_name: The name of the table to sample rows from. + + Returns: + A list of rows with field values truncated if they exceed 500 characters/bytes. + """ conn = get_valid_connection(connection) cursor = conn.cursor() try: cursor.execute(f"SELECT * FROM {table_name} LIMIT {ROW_SAMPLE_SIZE}") ret = [[header[0] for header in cursor.description]] - ret = ret + cursor.fetchall() + rows = cursor.fetchall() + + for row in rows: + processed_row = [] + for field in row: + new_val = field + if isinstance(field, str) and len(field) > MAX_FIELD_SAMPLE_SIZE: + new_val = field[:MAX_FIELD_SAMPLE_SIZE] + "..." # Truncate and add ellipsis + elif isinstance(field, (bytes, bytearray)) and len(field) > MAX_FIELD_SAMPLE_SIZE: + new_val = field[:MAX_FIELD_SAMPLE_SIZE] + b"..." # Truncate binary data + processed_row.append(new_val) + ret.append(processed_row) + return ret except OperationalError as e: return [[str(e)]] @@ -83,7 +111,10 @@ def get_table_names_from_query(sql): def num_tokens_from_string(string: str) -> int: """Returns the number of tokens in a text string.""" import tiktoken - encoding = tiktoken.encoding_for_model(OPENAI_MODEL) + try: + encoding = tiktoken.encoding_for_model(OPENAI_MODEL) + except KeyError: + encoding = tiktoken.get_encoding("cl100k_base") num_tokens = len(encoding.encode(string)) return num_tokens @@ -92,3 +123,35 @@ def fits_in_window(string: str) -> bool: # Ratchet down by 5% to account for other boilerplate and system prompt # TODO make this better by actually looking at the token count of the system prompt return num_tokens_from_string(string) < (app_settings.EXPLORER_ASSISTANT_MODEL["max_tokens"] * 0.95) + + +def build_prompt(request_data, included_tables): + user_prompt = "" + + db_vendor = get_valid_connection(request_data.get("connection")).vendor + user_prompt += f"## Database Vendor / SQL Flavor is {db_vendor}\n\n" + + db_error = request_data.get("db_error") + if db_error: + user_prompt += f"## Query Error ##\n\n{db_error}\n\n" + + sql = request_data.get("sql") + if sql: + user_prompt += f"## Existing SQL ##\n\n{sql}\n\n" + + results_sample = sample_rows_from_tables(request_data["connection"], + included_tables) + if fits_in_window(user_prompt + results_sample): + user_prompt += f"## Table Structure with Sampled Data ##\n\n{results_sample}\n\n" + else: # If it's too large with sampling, then provide *just* the structure + table_struct = tables_from_schema_info(request_data["connection"], + included_tables) + user_prompt += f"## Table Structure ##\n\n{table_struct}\n\n" + + user_prompt += f"## User's Request to Assistant ##\n\n{request_data['assistant_request']}\n\n" + + prompt = { + "system": ExplorerValue.objects.get_item(ExplorerValue.ASSISTANT_SYSTEM_PROMPT).value, + "user": user_prompt + } + return prompt diff --git a/explorer/assistant/views.py b/explorer/assistant/views.py index 05598ab6..ff557619 100644 --- a/explorer/assistant/views.py +++ b/explorer/assistant/views.py @@ -1,53 +1,24 @@ from django.http import JsonResponse +from django.views import View from django.utils import timezone -from django.views.decorators.http import require_POST import json from explorer.telemetry import Stat, StatNames -from explorer.utils import get_valid_connection -from explorer.models import ExplorerValue from explorer.assistant.models import PromptLog from explorer.assistant.utils import ( - do_req, extract_response, tables_from_schema_info, - get_table_names_from_query, sample_rows_from_tables, - fits_in_window + do_req, extract_response, + get_table_names_from_query, + build_prompt ) def run_assistant(request_data, user): - user_prompt = "" - - db_vendor = get_valid_connection(request_data.get("connection")).vendor - user_prompt += f"## Database Vendor / SQL Flavor is {db_vendor}\n\n" - - db_error = request_data.get("db_error") - if db_error: - user_prompt += f"## Query Error ##\n\n{db_error}\n\n" - sql = request_data.get("sql") - if sql: - user_prompt += f"## Existing SQL ##\n\n{sql}\n\n" - extra_tables = request_data.get("selected_tables", []) included_tables = get_table_names_from_query(sql) + extra_tables - if included_tables: - table_struct = tables_from_schema_info(request_data["connection"], - included_tables) - user_prompt += f"## Relevant Tables' Structure ##\n\n{table_struct}\n\n" - - if fits_in_window(user_prompt): - results_sample = sample_rows_from_tables(request_data["connection"], - included_tables) - if fits_in_window(user_prompt + results_sample): - user_prompt += f"## Sample Results from Tables ##\n\n{results_sample}\n\n" - user_prompt += f"## User's Request to Assistant ##\n\n{request_data['assistant_request']}\n\n" - - prompt = { - "system": ExplorerValue.objects.get_item(ExplorerValue.ASSISTANT_SYSTEM_PROMPT).value, - "user": user_prompt - } + prompt = build_prompt(request_data, included_tables) start = timezone.now() pl = PromptLog( @@ -75,19 +46,16 @@ def run_assistant(request_data, user): return response_text -@require_POST -def assistant_help(request): - try: - data = json.loads(request.body) - - resp = run_assistant(data, request.user) - - response_data = { - "status": "success", - "message": resp - } - - return JsonResponse(response_data) +class AssistantHelpView(View): - except json.JSONDecodeError: - return JsonResponse({"status": "error", "message": "Invalid JSON"}, status=400) + def post(self, request, *args, **kwargs): + try: + data = json.loads(request.body) + resp = run_assistant(data, request.user) + response_data = { + "status": "success", + "message": resp + } + return JsonResponse(response_data) + except json.JSONDecodeError: + return JsonResponse({"status": "error", "message": "Invalid JSON"}, status=400) diff --git a/explorer/connections.py b/explorer/connections.py index 03427e7a..e1921bf9 100644 --- a/explorer/connections.py +++ b/explorer/connections.py @@ -1,6 +1,40 @@ from django.db import connections as djcs +from django.db import transaction, DEFAULT_DB_ALIAS -from explorer.app_settings import EXPLORER_CONNECTIONS +from explorer.ee.db_connections.utils import create_django_style_connection +from explorer import app_settings +from explorer.models import DatabaseConnection + +# To support user-configured database connections that can be managed through the Explorer UI, *as well* as the +# 'legacy' connections that are configured in Django's normal settings.DATABASES config, we stitch together the two. + +# We allow queries to be associated with either type of connection, seamlessly. + +# The approach is to allow users to create connections with approximately the same parameters that a settings.DATABASE +# would expect. We then stitch them together into one list. When Explorer needs to access a connection, it coughs up a +# Django DatabaseWrapper connection in either case (natively, if it's coming from settings.DATABASES, or by taking the +# user-created connection and running it through the create_django_style_connection() function in this file). + +# In general, amazingly, this "just works" and the entire application is perfectly happy to use either type as a +# connection. The exception to this is that there are a few bits of code that ultimately (or directly) use the +# django.db.transaction.atomic context manager. For some reason that particular Django innard takes an *alias*, not a +# proper connection. Then it retrieves the connection based on that alias. But of course if we are providing a +# user-created connection alias, Django doesn't find it (because it is looking in settings.DATABASES). + +# The solution is to monkey-patch the get_connection function that transaction.atomic uses, to make it aware of the +# user-created connections. + + +def new_get_connection(using=None): + if using is None: + using = DEFAULT_DB_ALIAS + if using in djcs: + return djcs[using] + return create_django_style_connection(DatabaseConnection.objects.get(alias=using)) + + +# Monkey patch +transaction.get_connection = new_get_connection # We export valid SQL connections here so that consuming code never has to @@ -10,15 +44,19 @@ # Django insists that connections that are created in a thread are only accessed # by that thread, so here we create a dictionary-like collection of the valid # connections, but does a 'live' lookup of the connection on each item access. - - -_connections = {c: c for c in djcs if c in EXPLORER_CONNECTIONS.values()} - - class ExplorerConnections(dict): def __getitem__(self, item): - return djcs[item] + if item in djcs: + return djcs[item] + else: + return create_django_style_connection(DatabaseConnection.objects.get(alias=item)) + + +def connections(): + _connections = [c for c in djcs if c in app_settings.EXPLORER_CONNECTIONS.values()] + db_connections = DatabaseConnection.objects.all() + _connections += [c.alias for c in db_connections] + return ExplorerConnections(zip(_connections, _connections)) -connections = ExplorerConnections(_connections) diff --git a/explorer/ee/LICENSE b/explorer/ee/LICENSE new file mode 100644 index 00000000..4d007618 --- /dev/null +++ b/explorer/ee/LICENSE @@ -0,0 +1,21 @@ +** Additional License for "explorer/ee/" Directory ** + +All content that resides under the "explorer/ee/" directory of this repository is provided under the MIT License, +except that the following additional rights are reserved: + +** "Commons Clause" License Condition v1.0 ** + +The Software may not be used as part of any commercial offering or service, such as hosting, service, or consulting +offerings. For purposes of the foregoing, "commercial offering" means the provision of the Software to third parties +for a fee or other consideration, including without limitation, providing the Software as part of a managed service, +as part of a platform as a service, or providing it to third parties on a software as a service basis. + +This restriction does not apply to non-commercial use by any individual, nor does it prevent such individual from +providing services to third parties using the Software. + +** Exceptions ** + +SQL Explorer, Inc. may grant a Commercial License Agreement to provide exceptions to the "Commons Clause" License +Condition. If you wish to obtain such a license, please contact SQL Explorer, Inc. at support@sqlexplorer.io. No +exception is granted implicitly or explicitly without a written and signed Commercial License Agreement from SQL +Explorer, Inc. diff --git a/explorer/ee/__init__.py b/explorer/ee/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/explorer/ee/__init__.py @@ -0,0 +1 @@ + diff --git a/explorer/ee/db_connections/__init__.py b/explorer/ee/db_connections/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/explorer/ee/db_connections/admin.py b/explorer/ee/db_connections/admin.py new file mode 100644 index 00000000..f8b26789 --- /dev/null +++ b/explorer/ee/db_connections/admin.py @@ -0,0 +1,9 @@ +from django.contrib import admin + +from explorer.models import DatabaseConnection + + +@admin.register(DatabaseConnection) +class DatabaseConnectionAdmin(admin.ModelAdmin): + pass + diff --git a/explorer/ee/db_connections/forms.py b/explorer/ee/db_connections/forms.py new file mode 100644 index 00000000..04401efe --- /dev/null +++ b/explorer/ee/db_connections/forms.py @@ -0,0 +1,17 @@ +from django import forms +from explorer.ee.db_connections.models import DatabaseConnection + + +class DatabaseConnectionForm(forms.ModelForm): + class Meta: + model = DatabaseConnection + fields = "__all__" + widgets = { + "alias": forms.TextInput(attrs={"class": "form-control"}), + "engine": forms.Select(attrs={"class": "form-select"}), + "name": forms.TextInput(attrs={"class": "form-control"}), + "user": forms.TextInput(attrs={"class": "form-control"}), + "password": forms.PasswordInput(attrs={"class": "form-control"}), + "host": forms.TextInput(attrs={"class": "form-control"}), + "port": forms.TextInput(attrs={"class": "form-control"}), + } diff --git a/explorer/ee/db_connections/models.py b/explorer/ee/db_connections/models.py new file mode 100644 index 00000000..627a436e --- /dev/null +++ b/explorer/ee/db_connections/models.py @@ -0,0 +1,63 @@ +import os + +from django.conf import settings +from django.core.exceptions import ValidationError +from django.db import models +from django.db.models.signals import pre_save +from django.dispatch import receiver + +from django_cryptography.fields import encrypt + + +class DatabaseConnection(models.Model): + + SQLITE = "django.db.backends.sqlite3" + + DATABASE_ENGINES = ( + (SQLITE, "SQLite3"), + ("django.db.backends.postgresql", "PostgreSQL"), + ("django.db.backends.mysql", "MySQL"), + ("django.db.backends.oracle", "Oracle"), + ("django.db.backends.mysql", "MariaDB"), + ("django_cockroachdb", "CockroachDB"), + ("django.db.backends.sqlserver", "SQL Server (mssql-django)"), + ) + + alias = models.CharField(max_length=255, unique=True) + engine = models.CharField(max_length=255, choices=DATABASE_ENGINES) + name = models.CharField(max_length=255) + user = encrypt(models.CharField(max_length=255, blank=True)) + password = encrypt(models.CharField(max_length=255, blank=True)) + host = encrypt(models.CharField(max_length=255, blank=True)) + port = models.CharField(max_length=255, blank=True) + + def __str__(self): + return f"{self.name} ({self.alias})" + + @property + def is_upload(self): + return self.engine == self.SQLITE and self.host + + @property + def local_name(self): + if self.is_upload: + return os.path.normpath(os.path.join(os.getcwd(), f"user_dbs/{self.name}")) + + @classmethod + def from_django_connection(cls, connection_alias): + conn = settings.DATABASES.get(connection_alias) + if conn: + return DatabaseConnection( + alias=connection_alias, + engine=conn.get("ENGINE"), + name=conn.get("NAME"), + user=conn.get("USER"), + password=conn.get("PASSWORD"), + host=conn.get("HOST"), + port=conn.get("PORT"), + ) + +@receiver(pre_save, sender=DatabaseConnection) +def validate_database_connection(sender, instance, **kwargs): + if instance.name in settings.DATABASES.keys(): + raise ValidationError(f"Database name '{instance.name}' already exists.") diff --git a/explorer/ee/db_connections/utils.py b/explorer/ee/db_connections/utils.py new file mode 100644 index 00000000..7eae7e30 --- /dev/null +++ b/explorer/ee/db_connections/utils.py @@ -0,0 +1,170 @@ +from django.db import DatabaseError +from django.db.utils import load_backend +import os + +import locale +from dateutil import parser + +import pandas as pd +import sqlite3 +import io + + +# TODO deal with uploading the same file / conflicting file again +def upload_sqlite(db_bytes, path): + from explorer.utils import get_s3_bucket + bucket = get_s3_bucket() + bucket.put_object(Key=path, Body=db_bytes, ServerSideEncryption="AES256") + + +def create_connection_for_uploaded_sqlite(filename, s3_path): + from explorer.models import DatabaseConnection + return DatabaseConnection.objects.create( + alias=filename, + engine=DatabaseConnection.SQLITE, + name=filename, + host=s3_path + ) + + +def get_sqlite_for_connection(explorer_connection): + from explorer.utils import get_s3_bucket + + # Get the database from s3, then modify the connection to work with the downloaded file. + # E.g. "host" should not be set, and we need to get the full path to the file + local_name = explorer_connection.local_name + if not os.path.exists(local_name): + s3 = get_s3_bucket() + s3.download_file(explorer_connection.host, local_name) + explorer_connection.host = None + explorer_connection.name = local_name + return explorer_connection + + +def create_django_style_connection(explorer_connection): + + if explorer_connection.is_upload: + explorer_connection = get_sqlite_for_connection(explorer_connection) + + connection_settings = { + "ENGINE": explorer_connection.engine, + "NAME": explorer_connection.name, + "USER": explorer_connection.user, + "PASSWORD": explorer_connection.password, + "HOST": explorer_connection.host, + "PORT": explorer_connection.port, + "TIME_ZONE": None, + "CONN_MAX_AGE": 0, + "CONN_HEALTH_CHECKS": False, + "OPTIONS": {}, + "TEST": {}, + "AUTOCOMMIT": True, + "ATOMIC_REQUESTS": False, + } + + try: + backend = load_backend(explorer_connection.engine) + return backend.DatabaseWrapper(connection_settings, explorer_connection.alias) + except DatabaseError as e: + raise DatabaseError(f"Failed to create explorer connection: {e}") from e + + +def pandas_to_sqlite(df, local_path="local_database.db"): + # Write the DataFrame to a local SQLite database + # In theory, it would be nice to write the dataframe to an in-memory SQLite DB, and then dump the bytes from that + # but there is no way to get to the underlying bytes from an in-memory SQLite DB + con = sqlite3.connect(local_path) + try: + df.to_sql(name="data", con=con, if_exists="replace", index=False) + finally: + con.close() + + # Read the local SQLite database file into a BytesIO buffer + try: + db_file = io.BytesIO() + with open(local_path, "rb") as f: + db_file.write(f.read()) + db_file.seek(0) + return db_file + finally: + # Delete the local SQLite database file + # Finally block to ensure we don't litter files around + os.remove(local_path) + + +MAX_TYPING_SAMPLE_SIZE = 10000 +SHORTEST_PLAUSIBLE_DATE_STRING = 5 + + +def csv_to_typed_df(csv_bytes, delimiter=",", has_headers=True): # noqa + try: + + csv_file = io.BytesIO(csv_bytes) + df = pd.read_csv(csv_file, sep=delimiter, header=0 if has_headers else None) + locale.setlocale(locale.LC_NUMERIC, "en_US.UTF-8") + + for column in df.columns: + values = df[column].dropna().unique() + if len(values) > MAX_TYPING_SAMPLE_SIZE: + values = pd.Series(values).sample(MAX_TYPING_SAMPLE_SIZE, random_state=42).to_numpy() + + is_date = False + is_integer = True + is_float = True + + for value in values: + try: + float_val = locale.atof(str(value)) + if float_val == int(float_val): + continue # This is effectively an integer + else: + is_integer = False + except ValueError: + is_integer = False + is_float = False + break + + if is_integer: + is_float = False + + if not is_integer and not is_float: + is_date = True + + # The dateutil parser is very aggressive and will interpret many short strings as dates. + # For example "12a" will be interpreted as 12:00 AM on the current date. + # That is not the behavior anyone wants. The shortest plausible date string is e.g. 1-1-23 + try_parse = [v for v in values if len(str(v)) > SHORTEST_PLAUSIBLE_DATE_STRING] + if len(try_parse) > 0: + for value in try_parse: + try: + parser.parse(str(value)) + except (ValueError, TypeError, OverflowError): + is_date = False + break + else: + is_date = False + + if is_date: + df[column] = pd.to_datetime(df[column], errors="coerce", utc=True) + elif is_integer: + df[column] = df[column].apply(lambda x: int(locale.atof(str(x))) if pd.notna(x) else x) + # If there are NaN / blank values, the column will be converted to float + # Convert it back to integer + df[column] = df[column].astype("Int64") + elif is_float: + df[column] = df[column].apply(lambda x: locale.atof(str(x)) if pd.notna(x) else x) + else: + inferred_type = pd.api.types.infer_dtype(values) + if inferred_type == "integer": + df[column] = pd.to_numeric(df[column], errors="coerce", downcast="integer") + elif inferred_type == "floating": + df[column] = pd.to_numeric(df[column], errors="coerce") + + return df + + except pd.errors.ParserError as e: + return str(e) + + +def is_csv(file): + return file.content_type == "text/csv" diff --git a/explorer/ee/db_connections/views.py b/explorer/ee/db_connections/views.py new file mode 100644 index 00000000..d4b1383b --- /dev/null +++ b/explorer/ee/db_connections/views.py @@ -0,0 +1,127 @@ +from django.views.generic import ListView, DetailView, CreateView, UpdateView, DeleteView +from django.views import View +from django.http import JsonResponse +from django.urls import reverse_lazy +from explorer.models import DatabaseConnection +from explorer.ee.db_connections.utils import ( + upload_sqlite, + create_connection_for_uploaded_sqlite, + is_csv, + csv_to_typed_df, + pandas_to_sqlite +) +from explorer import app_settings +from explorer.ee.db_connections.forms import DatabaseConnectionForm +from explorer.utils import delete_from_s3 +from explorer.views.auth import PermissionRequiredMixin +from explorer.views.mixins import ExplorerContextMixin +from explorer.ee.db_connections.utils import create_django_style_connection + + +class UploadDbView(PermissionRequiredMixin, View): + + permission_required = "connections_permission" + + def post(self, request): + try: + file = request.FILES.get("file") + if file: + f_name = file.name + f_bytes = file.read() + + if is_csv(file): + df = csv_to_typed_df(f_bytes) + f_bytes = pandas_to_sqlite(df) + f_name = f_name.replace("csv", "db") + s3_path = f"user_dbs/user_{request.user.id}/{f_name}" + upload_sqlite(f_bytes, s3_path) + create_connection_for_uploaded_sqlite(f_name, s3_path) + return JsonResponse({"success": True}) + else: + # TODO handle this error client side. + return JsonResponse({"error": "No file provided"}, status=400) + except Exception as e: + return JsonResponse({"error": str(e)}, status=400) + + +class DatabaseConnectionsListView(PermissionRequiredMixin, ExplorerContextMixin, ListView): + + context_object_name = "sqlite_uploads" + permission_required = "connections_permission" + template_name = "connections/connections.html" + model = DatabaseConnection + + def get_queryset(self): + qs = list(DatabaseConnection.objects.all()) + for _, alias in app_settings.EXPLORER_CONNECTIONS.items(): + django_conn = DatabaseConnection.from_django_connection(alias) + if django_conn: + qs.append(django_conn) + return qs + + +class DatabaseConnectionDetailView(PermissionRequiredMixin, DetailView): + permission_required = "connections_permission" + model = DatabaseConnection + template_name = "connections/database_connection_detail.html" + + +class DatabaseConnectionCreateView(PermissionRequiredMixin, ExplorerContextMixin, CreateView): + permission_required = "connections_permission" + model = DatabaseConnection + form_class = DatabaseConnectionForm + template_name = "connections/database_connection_form.html" + success_url = reverse_lazy("explorer_connections") + + +class DatabaseConnectionUpdateView(PermissionRequiredMixin, UpdateView): + permission_required = "connections_permission" + model = DatabaseConnection + form_class = DatabaseConnectionForm + template_name = "connections/database_connection_form.html" + success_url = reverse_lazy("explorer_connections") + + +class DatabaseConnectionDeleteView(PermissionRequiredMixin, DeleteView): + permission_required = "connections_permission" + model = DatabaseConnection + template_name = "connections/database_connection_confirm_delete.html" + success_url = reverse_lazy("explorer_connections") + + def delete(self, request, *args, **kwargs): + connection = self.get_object() + if connection.is_upload: + delete_from_s3(connection.host) + return super().delete(request, *args, **kwargs) + + +class DatabaseConnectionValidateView(PermissionRequiredMixin, View): + + permission_required = "connections_permission" + + def post(self, request): + form = DatabaseConnectionForm(request.POST) + + instance = DatabaseConnection.objects.filter(alias=request.POST["alias"]).first() + if instance: + form = DatabaseConnectionForm(request.POST, instance=instance) + if form.is_valid(): + connection_data = form.cleaned_data + explorer_connection = DatabaseConnection( + alias=connection_data["alias"], + engine=connection_data["engine"], + name=connection_data["name"], + user=connection_data["user"], + password=connection_data["password"], + host=connection_data["host"], + port=connection_data["port"] + ) + try: + conn = create_django_style_connection(explorer_connection) + with conn.cursor() as cursor: + cursor.execute("SELECT 1") + return JsonResponse({"success": True}) + except Exception as e: + return JsonResponse({"success": False, "error": str(e)}) + else: + return JsonResponse({"success": False, "error": "Invalid form data"}) diff --git a/explorer/ee/urls.py b/explorer/ee/urls.py new file mode 100644 index 00000000..cf0ba16b --- /dev/null +++ b/explorer/ee/urls.py @@ -0,0 +1,21 @@ +from django.urls import path + +from explorer.ee.db_connections.views import ( + UploadDbView, + DatabaseConnectionsListView, + DatabaseConnectionCreateView, + DatabaseConnectionDetailView, + DatabaseConnectionUpdateView, + DatabaseConnectionDeleteView, + DatabaseConnectionValidateView +) + +ee_urls = [ + path("connections/", DatabaseConnectionsListView.as_view(), name="explorer_connections"), + path("connections/upload/", UploadDbView.as_view(), name="explorer_upload"), + path("connections//", DatabaseConnectionDetailView.as_view(), name="explorer_connection_detail"), + path("connections/new/", DatabaseConnectionCreateView.as_view(), name="explorer_connection_create"), + path("connections//edit/", DatabaseConnectionUpdateView.as_view(), name="explorer_connection_update"), + path("connections//delete/", DatabaseConnectionDeleteView.as_view(), name="explorer_connection_delete"), + path("connections/validate/", DatabaseConnectionValidateView.as_view(), name="explorer_connection_validate"), +] diff --git a/explorer/forms.py b/explorer/forms.py index db631adc..cd9dd2f7 100644 --- a/explorer/forms.py +++ b/explorer/forms.py @@ -1,7 +1,7 @@ from django.forms import BooleanField, CharField, ModelForm, ValidationError from django.forms.widgets import CheckboxInput, Select -from explorer.app_settings import EXPLORER_CONNECTIONS, EXPLORER_DEFAULT_CONNECTION +from explorer.app_settings import EXPLORER_DEFAULT_CONNECTION from explorer.models import MSG_FAILED_BLACKLIST, Query @@ -54,7 +54,8 @@ def created_at_time(self): @property def connections(self): - return [(v, k) for k, v in EXPLORER_CONNECTIONS.items()] + from explorer.connections import connections + return [(c, c) for c in connections()] class Meta: model = Query diff --git a/explorer/locale/ru/LC_MESSAGES/django.po b/explorer/locale/ru/LC_MESSAGES/django.po index 97baaaa1..4776d52c 100644 --- a/explorer/locale/ru/LC_MESSAGES/django.po +++ b/explorer/locale/ru/LC_MESSAGES/django.po @@ -211,7 +211,7 @@ msgstr "Вы уверены в удалении «%(title)s»?" #: explorer/templates/explorer/query_list.html:15 #, python-format -msgid "Your %(qlen)s Most Recently Run" +msgid "Recently Run by You" msgstr "Ваши последние запуски запросов, их %(qlen)s" #: explorer/templates/explorer/query_list.html:23 diff --git a/explorer/migrations/0017_databaseconnection.py b/explorer/migrations/0017_databaseconnection.py new file mode 100644 index 00000000..2d5c1e7c --- /dev/null +++ b/explorer/migrations/0017_databaseconnection.py @@ -0,0 +1,26 @@ +# Generated by Django 5.0.4 on 2024-05-07 18:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('explorer', '0016_alter_explorervalue_key'), + ] + + operations = [ + migrations.CreateModel( + name='DatabaseConnection', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('alias', models.CharField(max_length=255, unique=True)), + ('engine', models.CharField(choices=[('django.db.backends.sqlite3', 'SQLite3'), ('django.db.backends.postgresql_psycopg2', 'PostgreSQL'), ('django.db.backends.mysql', 'MySQL'), ('django.db.backends.oracle', 'Oracle')], max_length=255)), + ('name', models.CharField(max_length=255)), + ('user', models.CharField(blank=True, max_length=255)), + ('password', models.CharField(blank=True, max_length=255)), + ('host', models.CharField(blank=True, max_length=255)), + ('port', models.CharField(blank=True, max_length=255)), + ], + ), + ] diff --git a/explorer/migrations/0018_alter_databaseconnection_host_and_more.py b/explorer/migrations/0018_alter_databaseconnection_host_and_more.py new file mode 100644 index 00000000..329afaa7 --- /dev/null +++ b/explorer/migrations/0018_alter_databaseconnection_host_and_more.py @@ -0,0 +1,29 @@ +# Generated by Django 5.0.4 on 2024-05-14 15:55 + +import django_cryptography.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('explorer', '0017_databaseconnection'), + ] + + operations = [ + migrations.AlterField( + model_name='databaseconnection', + name='host', + field=django_cryptography.fields.encrypt(models.CharField(blank=True, max_length=255)), + ), + migrations.AlterField( + model_name='databaseconnection', + name='password', + field=django_cryptography.fields.encrypt(models.CharField(blank=True, max_length=255)), + ), + migrations.AlterField( + model_name='databaseconnection', + name='user', + field=django_cryptography.fields.encrypt(models.CharField(blank=True, max_length=255)), + ), + ] diff --git a/explorer/models.py b/explorer/models.py index f5785747..d2d5cba0 100644 --- a/explorer/models.py +++ b/explorer/models.py @@ -15,6 +15,9 @@ shared_dict_update, swap_params, ) +# Issue #618. All models must be imported so that Django understands how to manage migrations for the app +from explorer.ee.db_connections.models import DatabaseConnection # noqa +from explorer.assistant.models import PromptLog # noqa MSG_FAILED_BLACKLIST = "Query failed the SQL blacklist: %s" @@ -64,6 +67,10 @@ def __str__(self): def get_run_count(self): return self.querylog_set.count() + def last_run_log(self): + ql = self.querylog_set.first() + return ql or QueryLog(success=True, run_at=self.created_at) + def avg_duration_display(self): d = self.avg_duration() if d: @@ -439,7 +446,7 @@ class ExplorerValue(models.Model): EXPLORER_SETTINGS_CHOICES = [ (INSTALL_UUID, "Install Unique ID"), (STARTUP_METRIC_LAST_SEND, "Startup metric last send"), - (ASSISTANT_SYSTEM_PROMPT, "System prompt for SQL Assistant") + (ASSISTANT_SYSTEM_PROMPT, "System prompt for SQL Assistant"), ] key = models.CharField(max_length=5, choices=EXPLORER_SETTINGS_CHOICES, unique=True) diff --git a/explorer/permissions.py b/explorer/permissions.py index c9d7ff38..8704decf 100644 --- a/explorer/permissions.py +++ b/explorer/permissions.py @@ -23,3 +23,7 @@ def view_permission_list(request): def change_permission(request, *args, **kwargs): return app_settings.EXPLORER_PERMISSION_CHANGE(request) + + +def connections_permission(request, **kwargs): + return app_settings.EXPLORER_PERMISSION_CONNECTIONS(request) diff --git a/explorer/src/js/assistant.js b/explorer/src/js/assistant.js index 9b85c8f7..e424b6b4 100644 --- a/explorer/src/js/assistant.js +++ b/explorer/src/js/assistant.js @@ -3,7 +3,7 @@ import { marked } from "marked"; import DOMPurify from "dompurify"; import * as bootstrap from 'bootstrap'; import List from "list.js"; -import { SchemaSvc } from "./schemaService" +import { SchemaSvc, getConnElement } from "./schemaService" function getErrorMessage() { const errorElement = document.querySelector('.alert-danger.db-error'); @@ -11,8 +11,7 @@ function getErrorMessage() { } function setupTableList() { - const conn = document.querySelector('#id_connection').value; - SchemaSvc.get(conn).then(schema => { + SchemaSvc.get().then(schema => { const keys = Object.keys(schema); const tableList = document.getElementById('table-list'); tableList.innerHTML = ''; @@ -42,6 +41,23 @@ function setupTableList() { }; new List('additional_table_container', options); + + const selectAllButton = document.getElementById('select_all_button'); + const checkboxes = document.querySelectorAll('.table-checkbox'); + + let selectState = 'all'; + + selectAllButton.innerHTML = 'Select All'; + + selectAllButton.addEventListener('click', (e) => { + e.preventDefault(); + const isSelectingAll = selectState === 'all'; + checkboxes.forEach((checkbox) => { + checkbox.checked = isSelectingAll; + }); + selectState = isSelectingAll ? 'none' : 'all'; + selectAllButton.innerHTML = isSelectingAll ? 'Deselect All' : 'Select All'; + }); }) .catch(error => { console.error('Error retrieving JSON schema:', error); @@ -50,8 +66,7 @@ function setupTableList() { export function setUpAssistant(expand = false) { - const connEl = document.querySelector('#id_connection'); - connEl.addEventListener('change', setupTableList); + getConnElement().addEventListener('change', setupTableList); setupTableList(); const error = getErrorMessage(); @@ -129,6 +144,13 @@ function submitAssistantAsk() { }); } + // Similarly, if there is no description, copy the prompt into the description + const prompt = document.getElementById("id_assistant_input")?.value; + const description = document.getElementById("id_description"); + if (description?.value === "") { + description.value = prompt; + } + setUpCopyButtons(); }) .catch(error => { diff --git a/explorer/src/js/codemirror-config.js b/explorer/src/js/codemirror-config.js index 157794c9..4343eb9c 100644 --- a/explorer/src/js/codemirror-config.js +++ b/explorer/src/js/codemirror-config.js @@ -50,8 +50,7 @@ function fetchAndShowSchema(view) { if (wordRange) { const tableName = state.doc.sliceString(wordRange.from, wordRange.to); - const conn = document.querySelector('#id_connection').value; - SchemaSvc.get(conn).then(schema => { + SchemaSvc.get().then(schema => { let formattedSchema; if (schema.hasOwnProperty(tableName)) { formattedSchema = JSON.stringify(schema[tableName], null, 2); diff --git a/explorer/src/js/explorer.js b/explorer/src/js/explorer.js index c0a0da01..b76d6207 100644 --- a/explorer/src/js/explorer.js +++ b/explorer/src/js/explorer.js @@ -11,14 +11,11 @@ import { toggleFavorite } from "./favorites"; import {schemaCompletionSource, StandardSQL} from "@codemirror/lang-sql"; import {StateEffect} from "@codemirror/state"; -import {SchemaSvc} from "./schemaService"; +import {getConnElement, SchemaSvc} from "./schemaService"; function updateSchema() { - - const conn = document.querySelector('#id_connection').value; - - SchemaSvc.get(conn).then(schema => { + SchemaSvc.get().then(schema => { window.editor.dispatch({ effects: StateEffect.appendConfig.of( StandardSQL.language.data.of({ @@ -28,7 +25,7 @@ function updateSchema() { }); }); - $("#schema_frame").attr("src", `../schema/${conn}`); + $("#schema_frame").attr("src", `../schema/${getConnElement().value}`); } @@ -46,9 +43,24 @@ function editorFromTextArea(textarea) { return view } + +function selectConnection() { + var urlParams = new URLSearchParams(window.location.search); + var connectionId = urlParams.get('connection'); + + if (connectionId) { + var connectionSelect = document.getElementById('id_connection'); + if (connectionSelect) { + connectionSelect.value = connectionId; + } + } +} + export class ExplorerEditor { constructor(queryId) { + selectConnection(); + const aa = document.getElementById('assistant_accordion'); const pa = document.getElementById('nav-preview'); if (aa) { @@ -365,8 +377,7 @@ export class ExplorerEditor { }.bind(this)); // Set up schema autocomplete in the editor. When the connection changes, load new schema. - const connEl = document.querySelector('#id_connection'); - connEl.addEventListener('change', updateSchema); + getConnElement().addEventListener('change', updateSchema); updateSchema(); } } diff --git a/explorer/src/js/main.js b/explorer/src/js/main.js index 2ab7188b..419b1b04 100644 --- a/explorer/src/js/main.js +++ b/explorer/src/js/main.js @@ -15,7 +15,9 @@ const route_initializers = { new ExplorerEditor(document.getElementById('queryIdGlobal').value)), query_create: () => import('./explorer').then(({ExplorerEditor}) => new ExplorerEditor('new')), explorer_playground: () => import('./explorer').then(({ExplorerEditor}) => new ExplorerEditor('new')), - explorer_schema: () => import('./schema').then(({setupSchema}) => setupSchema()) + explorer_schema: () => import('./schema').then(({setupSchema}) => setupSchema()), + explorer_connection_create: () => import('./uploads').then(({setupUploads}) => setupUploads()), + explorer_connection_update: () => import('./uploads').then(({setupUploads}) => setupUploads()) }; document.addEventListener('DOMContentLoaded', function() { diff --git a/explorer/src/js/query-list.js b/explorer/src/js/query-list.js index abe8ca5e..cd936441 100644 --- a/explorer/src/js/query-list.js +++ b/explorer/src/js/query-list.js @@ -16,7 +16,7 @@ export function setupQueryList() { }); let options = { - valueNames: ['name'], + valueNames: ['sort-name', 'sort-created', 'sort-created', 'sort-last-run', 'sort-run-count', 'sort-connection'], handlers: {'updated': [searchFocus]} }; new List('queries', options); diff --git a/explorer/src/js/schemaService.js b/explorer/src/js/schemaService.js index f0b075ec..7f5e49bf 100644 --- a/explorer/src/js/schemaService.js +++ b/explorer/src/js/schemaService.js @@ -1,6 +1,8 @@ const schemaCache = {}; -const fetchSchema = async (conn) => { +const fetchSchema = async () => { + + const conn = getConnElement().value; if (schemaCache[conn]) { return schemaCache[conn]; @@ -23,3 +25,7 @@ const fetchSchema = async (conn) => { export const SchemaSvc = { get: fetchSchema }; + +export function getConnElement() { + return document.querySelector('#id_connection'); +} diff --git a/explorer/src/js/uploads.js b/explorer/src/js/uploads.js new file mode 100644 index 00000000..c1b9a16e --- /dev/null +++ b/explorer/src/js/uploads.js @@ -0,0 +1,83 @@ +import { getCsrfToken } from "./csrf"; + +export function setupUploads() { + var dropArea = document.getElementById('drop-area'); + var fileElem = document.getElementById('fileElem'); + + if (dropArea) { + dropArea.onclick = function() { + fileElem.click(); + }; + + dropArea.addEventListener('dragover', function(e) { + e.preventDefault(); // Prevent default behavior (Prevent file from being opened) + dropArea.classList.add('bg-info'); // Optional: add a style when dragging over + }); + + dropArea.addEventListener('dragleave', function(e) { + dropArea.classList.remove('bg-info'); // Optional: remove style when not dragging over + }); + + dropArea.addEventListener('drop', function(e) { + e.preventDefault(); + dropArea.classList.remove('bg-info'); // Optional: remove style after dropping + + let files = e.dataTransfer.files; + if (files.length) { + handleFiles(files[0]); // Assuming only one file is dropped + } + }); + + fileElem.onchange = function() { + if (this.files.length) { + handleFiles(this.files[0]); + } + }; + } + + + + function handleFiles(file) { + uploadFile(file); + } + + function uploadFile(file) { + let formData = new FormData(); + formData.append('file', file); + + fetch("../upload/", { + method: 'POST', + headers: { + 'X-CSRFToken': getCsrfToken() + }, + body: formData + }).then(response => response.json()) + .then(() => { + window.location.reload(); + }) + .catch(error => console.error('Error:', error)); + } + + document.getElementById("test-connection-btn").addEventListener("click", function() { + var form = document.getElementById("db-connection-form"); + var formData = new FormData(form); + + fetch("../../validate/", { + method: "POST", + body: formData, + headers: { + "X-CSRFToken": getCsrfToken() + } + }) + .then(response => response.json()) + .then(data => { + if (data.success) { + alert("Connection successful!"); + } else { + alert("Connection failed: " + data.error); + } + }) + .catch(error => console.error("Error:", error)); + }); + +} diff --git a/explorer/tasks.py b/explorer/tasks.py index ea8f2f05..66ad2dc5 100644 --- a/explorer/tasks.py +++ b/explorer/tasks.py @@ -1,6 +1,7 @@ import io import random import string +import os from datetime import date, datetime, timedelta from django.core.cache import cache @@ -9,6 +10,7 @@ from explorer import app_settings from explorer.exporters import get_exporter_class from explorer.models import Query, QueryLog +from explorer.ee.db_connections.models import DatabaseConnection if app_settings.ENABLE_TASKS: @@ -114,3 +116,16 @@ def build_schema_cache_async(connection_alias): cache.set(connection_schema_json_cache_key(connection_alias), transform_to_json_schema(ret)) return ret + + +# TODO tests +@shared_task +def remove_unused_sqlite_dbs(): + uploaded_dbs = DatabaseConnection.objects.filter(engine=DatabaseConnection.SQLITE, + host__isnull=False) + for db in uploaded_dbs: + if os.path.exists(db.local_name): + recent_run = QueryLog.objects.filter(connection=db.alias).first() + days = app_settings.EXPLORER_PRUNE_LOCAL_UPLOAD_COPY_DAYS_INACTIVITY + if recent_run and (datetime.now() - timedelta(days=days)) > recent_run.run_at: + os.remove(db.local_name) diff --git a/explorer/telemetry.py b/explorer/telemetry.py index b8947b28..fc460b2e 100644 --- a/explorer/telemetry.py +++ b/explorer/telemetry.py @@ -72,6 +72,7 @@ def send_summary_stats(self): def track(self): from explorer import app_settings + if not app_settings.EXPLORER_ENABLE_ANONYMOUS_STATS: return @@ -119,6 +120,7 @@ def _gather_summary_stats(): from explorer import app_settings from explorer.models import Query, QueryLog + from explorer.ee.db_connections.models import DatabaseConnection import explorer try: @@ -144,7 +146,8 @@ def _gather_summary_stats(): "tasks_enabled": app_settings.ENABLE_TASKS, "unsafe_rendering": app_settings.UNSAFE_RENDERING, "transform_count": len(app_settings.EXPLORER_TRANSFORMS), - "assistant_enabled": app_settings.EXPLORER_AI_API_KEY is not None, + "assistant_enabled": app_settings.has_assistant(), + "user_dbs": DatabaseConnection.objects.count(), "version": explorer.get_version(), "charts_enabled": app_settings.EXPLORER_CHARTS_ENABLED } diff --git a/explorer/templates/connections/connections.html b/explorer/templates/connections/connections.html new file mode 100644 index 00000000..a38153cd --- /dev/null +++ b/explorer/templates/connections/connections.html @@ -0,0 +1,43 @@ +{% extends "explorer/base.html" %} +{% load explorer_tags i18n %} + +{% block sql_explorer_content %} +
+

Connections

+
+ Add New Connection + + + + + + + + + + + {% for connection in object_list %} + + + + + + + {% endfor %} + +
AliasNameEngineActions
+ {% if connection.id %} + {{ connection.alias }} + {% else %} + {{ connection.alias }} + {% endif %} + {{ connection.name }}{{ connection.get_engine_display }} + Query + {% if connection.id %} + + + {% endif %} +
+
+
+{% endblock %} diff --git a/explorer/templates/connections/database_connection_confirm_delete.html b/explorer/templates/connections/database_connection_confirm_delete.html new file mode 100644 index 00000000..bcd221f2 --- /dev/null +++ b/explorer/templates/connections/database_connection_confirm_delete.html @@ -0,0 +1,12 @@ +{% extends 'explorer/base.html' %} +{% block sql_explorer_content %} +
+

Delete Database Connection

+

Are you sure you want to delete "{{ object }}"?

+
+ {% csrf_token %} + + Cancel +
+
+{% endblock %} diff --git a/explorer/templates/connections/database_connection_detail.html b/explorer/templates/connections/database_connection_detail.html new file mode 100644 index 00000000..162904ff --- /dev/null +++ b/explorer/templates/connections/database_connection_detail.html @@ -0,0 +1,39 @@ +{% extends "explorer/base.html" %} +{% block sql_explorer_content %} +
+

Connection Details

+ + + + + + + + + + + + + + {% if not object.is_upload %} + + + + + + + + + + + + + {% endif %} +
Alias{{ object.alias }}
Name{{ object.name }}
Engine{{ object.get_engine_display }}
User{{ object.user }}
Host{{ object.host }}
Port{{ object.port }}
+ {% if not object.is_upload %} + Edit + {% else %} + The source of this connection is an uploaded file. + {% endif %} +
+{% endblock %} diff --git a/explorer/templates/connections/database_connection_form.html b/explorer/templates/connections/database_connection_form.html new file mode 100644 index 00000000..cce67638 --- /dev/null +++ b/explorer/templates/connections/database_connection_form.html @@ -0,0 +1,23 @@ +{% extends 'explorer/base.html' %} +{% block sql_explorer_content %} +
+

{% if object %}Edit{% else %}Create New{% endif %} Connection

+
+ {% csrf_token %} + {{ form.as_p }} + + Cancel + +
+ {% if not object and s3_enabled %} +
+

...or upload a SQLite DB or CSV File

+

CSV files will get parsed and typed automatically. SQLite databases must not be password protected.

+
+

Drag and drop .csv or SQLite .db or click to upload.

+ +
+
+ {% endif %} +
+{% endblock %} diff --git a/explorer/templates/explorer/assistant.html b/explorer/templates/explorer/assistant.html index 077f28f5..dce8ed0e 100644 --- a/explorer/templates/explorer/assistant.html +++ b/explorer/templates/explorer/assistant.html @@ -21,10 +21,12 @@
+ (?) +
diff --git a/explorer/templates/explorer/base.html b/explorer/templates/explorer/base.html index c014767b..0e878608 100644 --- a/explorer/templates/explorer/base.html +++ b/explorer/templates/explorer/base.html @@ -48,16 +48,22 @@

This is easy to fix, I promise!

{% if can_change %} + {% if db_connections_enabled and can_manage_connections %} + + {% endif %} {% endif %}