diff --git a/docs/features.rst b/docs/features.rst index 30c39b23..53d7e9a5 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -169,7 +169,49 @@ Multiple Connections multi-connection setup. - SQL Explorer also supports user-provided connections in the form of standard database connection details, or uploading CSV, JSON or SQLite - files. See the 'User uploads' section of :doc:`settings`. + files. + +File Uploads +------------ + +Upload CSV or JSON files, or SQLite databases to immediately create connections for querying. + +The base name of the file and the ID of the uploaded is used as the database name, to prevent collisions from multiple +users uploading a file with the same name. The base name of the file is also used as the table name (e.g. uploading +customers.csv results in a database file named customers_1.db, with a table named 'customers'). + +Of interest, you can also append uploaded files to previously uploaded data sources. For example, if you had a +'customers.csv' file and an 'orders.csv' file, you could upload customers.csv and create a new data source. You can +then go back and upload orders.csv with the 'Append' drop-down set to your newly-created customers database, and you +will have a resulting SQLite database connection with both tables available to be queried together. If you were to +upload a new 'orders.csv' and append it to customers, the table 'orders' would be *fully replaced* with the new file. + +**How it works** + +1. Your file is uploaded to the web server. For CSV files, the first row is assumed to be a header. +2. It is read into a Pandas dataframe. Many fields end up as strings that are in fact numeric or datetimes. +3. During this step, if it is a json file, the json is 'normalized'. E.g. nested objects are flattened. +4. A customer parser runs type-detection on each column for richer typer information. +5. The dataframe is coerced to these more accurate types. +6. The dataframe is written to a SQLite file, which is present on the server, and uploaded to S3. +7. The SQLite database is added as a new connection to SQL Explorer and is available for querying, just like any + other data source. +8. If the SQLite file is not available locally, it will be pulled on-demand from S3 when needed. +9. Local SQLite files are periodically cleaned up by a recurring task after (by default) 7 days of inactivity. + +Note that if the upload is a SQLite database, steps 2-5 are skipped and the database is simply uploaded to S3 and made +available for querying. + +**File formats** + +- Supports well-formed .csv, and .json files. Also supports .json files where each line of the file is a separate json + object. See /explorer/tests/json/ in the source for examples of what is supported. +- Supports SQLite files with a .db or .sqlite extension. The validity of the SQLite file is not fully checked until + a query is attempted. + +**Configuration** + +- See the 'User uploads' section of :doc:`settings` for configuration details. Power tips ---------- diff --git a/docs/settings.rst b/docs/settings.rst index c70a8332..8386cefd 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -383,7 +383,7 @@ User Uploads With `EXPLORER_DB_CONNECTIONS_ENABLED` set to `True`, you can also set `EXPLORER_USER_UPLOADS_ENABLED` to allow users to upload their own CSV and SQLite files directly to explorer as new connections. -Go to connections->Add New and scroll down to see the upload interface. The uploaded files are limited in size by the +Go to connections->Upload File. The uploaded files are limited in size by the `EXPLORER_MAX_UPLOAD_SIZE` setting which is set to 500mb by default (500 * 1024 * 1024). SQLite files (in either .db or -.sqlite) will simple appear as connections. CSV files get run through a parser that infers the type of each field. +.sqlite) will simply appear as connections. CSV files get run through a parser that infers the type of each field. diff --git a/explorer/charts.py b/explorer/charts.py index 6d3c9a3c..b8780433 100644 --- a/explorer/charts.py +++ b/explorer/charts.py @@ -34,7 +34,7 @@ def get_chart(result: QueryResult, chart_type: str) -> Optional[str]: bar_positions = [] for idx, col_num in enumerate(numeric_columns): if chart_type == "bar": - values = [row[col_num] for row in result.data] + values = [row[col_num] if row[col_num] is not None else 0 for row in result.data] bar_container = ax.bar([x + idx * BAR_WIDTH for x in range(len(labels))], values, BAR_WIDTH, label=result.headers[col_num]) bars.append(bar_container) diff --git a/explorer/ee/db_connections/create_sqlite.py b/explorer/ee/db_connections/create_sqlite.py index b7e111ac..e65e55f8 100644 --- a/explorer/ee/db_connections/create_sqlite.py +++ b/explorer/ee/db_connections/create_sqlite.py @@ -1,24 +1,43 @@ import os from io import BytesIO +from explorer.utils import secure_filename from explorer.ee.db_connections.type_infer import get_parser -from explorer.ee.db_connections.utils import pandas_to_sqlite +from explorer.ee.db_connections.utils import pandas_to_sqlite, uploaded_db_local_path -def parse_to_sqlite(file) -> (BytesIO, str): - f_name = file.name - f_bytes = file.read() +def get_names(file, append_conn=None, user_id=None): + s_filename = secure_filename(file.name) + table_name, _ = os.path.splitext(s_filename) + + # f_name represents the filename of both the sqlite DB on S3, and on the local filesystem. + # If we are appending to an existing data source, then we re-use the same name. + # New connections get a new database name. + if append_conn: + f_name = os.path.basename(append_conn.name) + else: + f_name = f"{table_name}_{user_id}.db" + + return table_name, f_name + + +def parse_to_sqlite(file, append_conn=None, user_id=None) -> (BytesIO, str): + + table_name, f_name = get_names(file, append_conn, user_id) + + # When appending, make sure the database exists locally so that we can write to it + if append_conn: + append_conn.download_sqlite_if_needed() + df_parser = get_parser(file) if df_parser: - df = df_parser(f_bytes) try: - f_bytes = pandas_to_sqlite(df, local_path=f"{f_name}_tmp_local.db") + df = df_parser(file.read()) + local_path = uploaded_db_local_path(f_name) + f_bytes = pandas_to_sqlite(df, table_name, local_path) except Exception as e: # noqa raise ValueError(f"Error while parsing {f_name}: {e}") from e - # replace the previous extension with .db, as it is now a sqlite file - name, _ = os.path.splitext(f_name) - f_name = f"{name}.db" else: - return BytesIO(f_bytes), f_name # if it's a SQLite file already, simply cough it up as a BytesIO object + # If it's a SQLite file already, simply cough it up as a BytesIO object + return BytesIO(file.read()), f_name return f_bytes, f_name - diff --git a/explorer/ee/db_connections/mime.py b/explorer/ee/db_connections/mime.py index 148d3e67..dff29939 100644 --- a/explorer/ee/db_connections/mime.py +++ b/explorer/ee/db_connections/mime.py @@ -42,7 +42,7 @@ def is_json_list(file): def is_sqlite(file): - if file.content_type != "application/x-sqlite3": + if file.content_type not in ["application/x-sqlite3", "application/octet-stream"]: return False try: # Check if the file starts with the SQLite file header diff --git a/explorer/ee/db_connections/models.py b/explorer/ee/db_connections/models.py index 2b0586dd..55e4ccaf 100644 --- a/explorer/ee/db_connections/models.py +++ b/explorer/ee/db_connections/models.py @@ -1,11 +1,10 @@ import os - from django.conf import settings from django.core.exceptions import ValidationError from django.db import models from django.db.models.signals import pre_save from django.dispatch import receiver -from explorer.ee.db_connections.utils import user_dbs_local_dir +from explorer.ee.db_connections.utils import uploaded_db_local_path, quick_hash from django_cryptography.fields import encrypt @@ -33,10 +32,32 @@ class DatabaseConnection(models.Model): host = encrypt(models.CharField(max_length=255, blank=True)) port = models.CharField(max_length=255, blank=True) extras = models.JSONField(blank=True, null=True) + upload_fingerprint = models.CharField(max_length=255, blank=True, null=True) def __str__(self): return f"{self.name} ({self.alias})" + def update_fingerprint(self): + self.upload_fingerprint = self.local_fingerprint() + self.save() + + def local_fingerprint(self): + if os.path.exists(self.local_name): + return quick_hash(self.local_name) + + def _download_sqlite(self): + from explorer.utils import get_s3_bucket + s3 = get_s3_bucket() + s3.download_file(self.host, self.local_name) + + def download_sqlite_if_needed(self): + download = not os.path.exists(self.local_name) or self.local_fingerprint() != self.upload_fingerprint + + if download: + self._download_sqlite() + self.update_fingerprint() + + @property def is_upload(self): return self.engine == self.SQLITE and self.host @@ -44,7 +65,11 @@ def is_upload(self): @property def local_name(self): if self.is_upload: - return os.path.join(user_dbs_local_dir(), self.name) + return uploaded_db_local_path(self.name) + + def delete_local_sqlite(self): + if self.is_upload and os.path.exists(self.local_name): + os.remove(self.local_name) @classmethod def from_django_connection(cls, connection_alias): diff --git a/explorer/ee/db_connections/utils.py b/explorer/ee/db_connections/utils.py index 8ce18c3b..7acae8b1 100644 --- a/explorer/ee/db_connections/utils.py +++ b/explorer/ee/db_connections/utils.py @@ -2,7 +2,7 @@ from django.db.utils import load_backend import os import json - +import hashlib import sqlite3 import io @@ -21,29 +21,23 @@ def upload_sqlite(db_bytes, path): # to this new database connection. Oops! # TODO: In the future, queries should probably be FK'ed to the ID of the connection, rather than simply # storing the alias of the connection as a string. -def create_connection_for_uploaded_sqlite(filename, user_id, s3_path): +def create_connection_for_uploaded_sqlite(filename, s3_path): from explorer.models import DatabaseConnection - base, ext = os.path.splitext(filename) - filename = f"{base}_{user_id}{ext}" return DatabaseConnection.objects.create( - alias=f"{filename}", + alias=filename, engine=DatabaseConnection.SQLITE, name=filename, - host=s3_path + host=s3_path, ) def get_sqlite_for_connection(explorer_connection): - from explorer.utils import get_s3_bucket - # Get the database from s3, then modify the connection to work with the downloaded file. # E.g. "host" should not be set, and we need to get the full path to the file - local_name = explorer_connection.local_name - if not os.path.exists(local_name): - s3 = get_s3_bucket() - s3.download_file(explorer_connection.host, local_name) + explorer_connection.download_sqlite_if_needed() + # Note the order here is important; .local_name checked "is_upload" which relies on .host being set + explorer_connection.name = explorer_connection.local_name explorer_connection.host = None - explorer_connection.name = local_name return explorer_connection @@ -54,6 +48,10 @@ def user_dbs_local_dir(): return d +def uploaded_db_local_path(name): + return os.path.join(user_dbs_local_dir(), name) + + def create_django_style_connection(explorer_connection): if explorer_connection.is_upload: @@ -87,24 +85,45 @@ def create_django_style_connection(explorer_connection): raise DatabaseError(f"Failed to create explorer connection: {e}") from e -def pandas_to_sqlite(df, local_path="local_database.db"): - # Write the DataFrame to a local SQLite database - # In theory, it would be nice to write the dataframe to an in-memory SQLite DB, and then dump the bytes from that - # but there is no way to get to the underlying bytes from an in-memory SQLite DB - con = sqlite3.connect(local_path) - try: - df.to_sql(name="data", con=con, if_exists="replace", index=False) - finally: - con.close() +def sqlite_to_bytesio(local_path): + # Write the file to disk. It'll be uploaded to s3, and left here locally for querying + db_file = io.BytesIO() + with open(local_path, "rb") as f: + db_file.write(f.read()) + db_file.seek(0) + return db_file + + +def pandas_to_sqlite(df, table_name, local_path): + # Write the DataFrame to a local SQLite database and return it as a BytesIO object. + # This intentionally leaves the sqlite db on the local disk so that it is ready to go for + # querying immediately after the connection has been created. Removing it would also be OK, since + # the system knows to re-download it if it's not available, but this saves an extra download from S3. + conn = sqlite3.connect(local_path) - # Read the local SQLite database file into a BytesIO buffer try: - db_file = io.BytesIO() - with open(local_path, "rb") as f: - db_file.write(f.read()) - db_file.seek(0) - return db_file + df.to_sql(table_name, conn, if_exists="replace", index=False) finally: - # Delete the local SQLite database file - # Finally block to ensure we don't litter files around - os.remove(local_path) + conn.commit() + conn.close() + + return sqlite_to_bytesio(local_path) + + +def quick_hash(file_path, num_samples=10, sample_size=1024): + hasher = hashlib.sha256() + file_size = os.path.getsize(file_path) + + if file_size == 0: + return hasher.hexdigest() + + sample_interval = file_size // num_samples + with open(file_path, "rb") as f: + for i in range(num_samples): + f.seek(i * sample_interval) + sample_data = f.read(sample_size) + if not sample_data: + break + hasher.update(sample_data) + + return hasher.hexdigest() diff --git a/explorer/ee/db_connections/views.py b/explorer/ee/db_connections/views.py index 22211dac..ed1e5ed5 100644 --- a/explorer/ee/db_connections/views.py +++ b/explorer/ee/db_connections/views.py @@ -1,7 +1,7 @@ import logging -from django.views.generic import ListView, DetailView, CreateView, UpdateView, DeleteView +from django.views.generic import ListView, DetailView, CreateView, UpdateView, DeleteView, TemplateView from django.views import View -from django.http import JsonResponse +from django.http import JsonResponse, HttpResponse from django.urls import reverse_lazy from django.db.utils import OperationalError from explorer.models import DatabaseConnection @@ -11,12 +11,14 @@ ) from explorer.ee.db_connections.create_sqlite import parse_to_sqlite from explorer import app_settings +from explorer.schema import clear_schema_cache from explorer.app_settings import EXPLORER_MAX_UPLOAD_SIZE from explorer.ee.db_connections.forms import DatabaseConnectionForm from explorer.utils import delete_from_s3 from explorer.views.auth import PermissionRequiredMixin from explorer.views.mixins import ExplorerContextMixin from explorer.ee.db_connections.utils import create_django_style_connection +from explorer.ee.db_connections.mime import is_sqlite logger = logging.getLogger(__name__) @@ -26,15 +28,29 @@ class UploadDbView(PermissionRequiredMixin, View): permission_required = "connections_permission" - def post(self, request): + def post(self, request): # noqa file = request.FILES.get("file") if file: + + # 'append' should be None, or the ID of the DatabaseConnection to append this table to. + # This is stored in DatabaseConnection.host of the previously uploaded connection + append = request.POST.get("append") + append_path = None + conn = None + if append: + conn = DatabaseConnection.objects.get(id=append) + append_path = conn.host + if file.size > EXPLORER_MAX_UPLOAD_SIZE: friendly = EXPLORER_MAX_UPLOAD_SIZE / (1024 * 1024) return JsonResponse({"error": f"File size exceeds the limit of {friendly} MB"}, status=400) + # You can't double stramp a triple stamp! + if append_path and is_sqlite(file): + raise TypeError("Can't append a SQLite file to a SQLite file. Only CSV and JSON.") + try: - f_bytes, f_name = parse_to_sqlite(file) + f_bytes, f_name = parse_to_sqlite(file, conn, request.user.id) except ValueError as e: logger.error(f"Error getting bytes for {file.name}: {e}") return JsonResponse({"error": "File was not csv, json, or sqlite."}, status=400) @@ -42,14 +58,23 @@ def post(self, request): logger.error(f"Error parse {file.name}: {e}") return JsonResponse({"error": "Error parsing file."}, status=400) - try: + if append_path: + s3_path = append_path + else: s3_path = f"user_dbs/user_{request.user.id}/{f_name}" + + try: upload_sqlite(f_bytes, s3_path) except Exception as e: # noqa logger.exception(f"Exception while uploading file {f_name}: {e}") return JsonResponse({"error": "Error while uploading file to S3."}, status=400) - create_connection_for_uploaded_sqlite(f_name, request.user.id, s3_path) + # If we're not appending, then need to create a new DatabaseConnection + if not append_path: + conn = create_connection_for_uploaded_sqlite(f_name, s3_path) + + clear_schema_cache(conn.alias) + conn.update_fingerprint() return JsonResponse({"success": True}) else: return JsonResponse({"error": "No file provided"}, status=400) @@ -85,6 +110,16 @@ class DatabaseConnectionCreateView(PermissionRequiredMixin, ExplorerContextMixin success_url = reverse_lazy("explorer_connections") +class DatabaseConnectionUploadCreateView(TemplateView): + template_name = "connections/connection_upload.html" + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context["valid_connections"] = DatabaseConnection.objects.filter(engine=DatabaseConnection.SQLITE, + host__isnull=False) + return context + + class DatabaseConnectionUpdateView(PermissionRequiredMixin, ExplorerContextMixin, UpdateView): permission_required = "connections_permission" model = DatabaseConnection @@ -106,6 +141,22 @@ def delete(self, request, *args, **kwargs): return super().delete(request, *args, **kwargs) +class DatabaseConnectionRefreshView(PermissionRequiredMixin, View): + + permission_required = "connections_permission" + success_url = reverse_lazy("explorer_connections") + + def get(self, request, pk): # noqa + conn = DatabaseConnection.objects.get(id=pk) + conn.delete_local_sqlite() + clear_schema_cache(conn.alias) + message = f"Deleted schema cache for {conn.alias}. Schema will be regenerated on next use." + if conn.is_upload: + message += "\nRemoved local SQLite DB. Will be re-downloaded from S3 on next use." + message += "\nPlease hit back to return to the application." + return HttpResponse(content_type="text/plain", content=message) + + class DatabaseConnectionValidateView(PermissionRequiredMixin, View): permission_required = "connections_permission" diff --git a/explorer/ee/urls.py b/explorer/ee/urls.py index ad4a8725..0359d628 100644 --- a/explorer/ee/urls.py +++ b/explorer/ee/urls.py @@ -7,7 +7,9 @@ DatabaseConnectionDetailView, DatabaseConnectionUpdateView, DatabaseConnectionDeleteView, - DatabaseConnectionValidateView + DatabaseConnectionValidateView, + DatabaseConnectionUploadCreateView, + DatabaseConnectionRefreshView ) ee_urls = [ @@ -15,6 +17,7 @@ path("connections/upload/", UploadDbView.as_view(), name="explorer_upload"), path("connections//", DatabaseConnectionDetailView.as_view(), name="explorer_connection_detail"), path("connections/new/", DatabaseConnectionCreateView.as_view(), name="explorer_connection_create"), + path("connections/create_upload/", DatabaseConnectionUploadCreateView.as_view(), name="explorer_upload_create"), path("connections//edit/", DatabaseConnectionUpdateView.as_view(), name="explorer_connection_update"), path("connections//delete/", DatabaseConnectionDeleteView.as_view(), name="explorer_connection_delete"), # There are two URLs here because the form can call validate from /connections/new/ or from /connections//edit/ @@ -23,4 +26,6 @@ path("connections/validate/", DatabaseConnectionValidateView.as_view(), name="explorer_connection_validate"), path("connections//validate/", DatabaseConnectionValidateView.as_view(), name="explorer_connection_validate_with_pk"), + path("connections//refresh/", DatabaseConnectionRefreshView.as_view(), + name="explorer_connection_refresh") ] diff --git a/explorer/migrations/0022_databaseconnection_upload_fingerprint.py b/explorer/migrations/0022_databaseconnection_upload_fingerprint.py new file mode 100644 index 00000000..7167424c --- /dev/null +++ b/explorer/migrations/0022_databaseconnection_upload_fingerprint.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.4 on 2024-07-24 20:08 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('explorer', '0021_alter_databaseconnection_password_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='databaseconnection', + name='upload_fingerprint', + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/explorer/schema.py b/explorer/schema.py index db2e7a6c..6609cc81 100644 --- a/explorer/schema.py +++ b/explorer/schema.py @@ -73,6 +73,14 @@ def schema_info(connection_alias): return build_schema_cache_async(connection_alias) +def clear_schema_cache(connection_alias): + key = connection_schema_cache_key(connection_alias) + cache.delete(key) + + key = connection_schema_json_cache_key(connection_alias) + cache.delete(key) + + def build_schema_info(connection_alias): """ Construct schema information via engine-specific queries of the diff --git a/explorer/src/js/main.js b/explorer/src/js/main.js index 419b1b04..32c79947 100644 --- a/explorer/src/js/main.js +++ b/explorer/src/js/main.js @@ -16,7 +16,7 @@ const route_initializers = { query_create: () => import('./explorer').then(({ExplorerEditor}) => new ExplorerEditor('new')), explorer_playground: () => import('./explorer').then(({ExplorerEditor}) => new ExplorerEditor('new')), explorer_schema: () => import('./schema').then(({setupSchema}) => setupSchema()), - explorer_connection_create: () => import('./uploads').then(({setupUploads}) => setupUploads()), + explorer_upload_create: () => import('./uploads').then(({setupUploads}) => setupUploads()), explorer_connection_update: () => import('./uploads').then(({setupUploads}) => setupUploads()) }; diff --git a/explorer/src/js/uploads.js b/explorer/src/js/uploads.js index a89f40e8..6b653a04 100644 --- a/explorer/src/js/uploads.js +++ b/explorer/src/js/uploads.js @@ -45,6 +45,12 @@ export function setupUploads() { let formData = new FormData(); formData.append('file', file); + let appendElem = document.getElementById('append'); + let appendValue = appendElem.value; + if (appendValue) { + formData.append('append', appendValue); + } + let xhr = new XMLHttpRequest(); xhr.open('POST', '../upload/', true); xhr.setRequestHeader('X-CSRFToken', getCsrfToken()); @@ -63,9 +69,8 @@ export function setupUploads() { xhr.onload = function() { if (xhr.status === 200) { - let fileName = file.name; - let fileNameWithoutExt = fileName.substring(0, fileName.lastIndexOf('.')) || fileName; - window.location.href = `../?highlight=${encodeURIComponent(fileNameWithoutExt)}`; + let highlightValue = appendValue ? appendElem.options[appendElem.selectedIndex].text : file.name.substring(0, file.name.lastIndexOf('.')) || file.name; + window.location.href = `../?highlight=${encodeURIComponent(highlightValue)}`; } else { console.error('Error:', xhr.response); uploadStatus.innerHTML = xhr.response; @@ -80,25 +85,28 @@ export function setupUploads() { xhr.send(formData); } - document.getElementById("test-connection-btn").addEventListener("click", function() { - var form = document.getElementById("db-connection-form"); - var formData = new FormData(form); - - fetch("../validate/", { - method: "POST", - body: formData, - headers: { - "X-CSRFToken": getCsrfToken() - } - }) - .then(response => response.json()) - .then(data => { - if (data.success) { - alert("Connection successful!"); - } else { - alert("Connection failed: " + data.error); - } - }) - .catch(error => console.error("Error:", error)); - }); + let testConnBtn = document.getElementById("test-connection-btn"); + if (testConnBtn) { + testConnBtn.addEventListener("click", function() { + let form = document.getElementById("db-connection-form"); + let formData = new FormData(form); + + fetch("../validate/", { + method: "POST", + body: formData, + headers: { + "X-CSRFToken": getCsrfToken() + } + }) + .then(response => response.json()) + .then(data => { + if (data.success) { + alert("Connection successful!"); + } else { + alert("Connection failed: " + data.error); + } + }) + .catch(error => console.error("Error:", error)); + }); + } } diff --git a/explorer/tasks.py b/explorer/tasks.py index 84c3c004..0d4292c2 100644 --- a/explorer/tasks.py +++ b/explorer/tasks.py @@ -18,7 +18,7 @@ from celery import shared_task from celery.utils.log import get_task_logger - from explorer.utils import s3_upload + from explorer.utils import s3_csv_upload logger = get_task_logger(__name__) else: @@ -42,7 +42,7 @@ def execute_query(query_id, email_address): exporter = get_exporter_class("csv")(q) random_part = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) try: - url = s3_upload(f"{random_part}.csv", convert_csv_to_bytesio(exporter)) + url = s3_csv_upload(f"{random_part}.csv", convert_csv_to_bytesio(exporter)) subj = f'[SQL Explorer] Report "{q.title}" is ready' msg = f"Download results:\n\r{url}" except Exception as e: @@ -69,7 +69,7 @@ def snapshot_query(query_id): exporter = get_exporter_class("csv")(q) k = "query-{}/snap-{}.csv".format(q.id, date.today().strftime("%Y%m%d-%H:%M:%S")) logger.info(f"Uploading snapshot for query {query_id} as {k}...") - url = s3_upload(k, convert_csv_to_bytesio(exporter)) + url = s3_csv_upload(k, convert_csv_to_bytesio(exporter)) logger.info(f"Done uploading snapshot for query {query_id}. URL: {url}") except Exception as e: logger.warning(f"Failed to snapshot query {query_id} ({e}). Retrying...") diff --git a/explorer/templates/connections/connection_upload.html b/explorer/templates/connections/connection_upload.html new file mode 100644 index 00000000..4f79d1ff --- /dev/null +++ b/explorer/templates/connections/connection_upload.html @@ -0,0 +1,32 @@ +{% extends 'explorer/base.html' %} +{% block sql_explorer_content %} +
+
+

Upload a file

+

Supports .csv, .json, .db, and .sqlite files. JSON files with one JSON document per line are also supported. CSV/JSON data will be parsed and converted to SQLite. SQLite databases must not be password protected.

+

+ +
+
+ + + When appending, if a table with the filename already exists, it will be replaced with the uploaded data. +
+ +
+

Drag and drop, or click to upload .csv, .json, .db, .sqlite.

+ +
+
0%
+
+

+
+
+
+
+{% endblock %} diff --git a/explorer/templates/connections/connections.html b/explorer/templates/connections/connections.html index d9c196c6..f0e350cb 100644 --- a/explorer/templates/connections/connections.html +++ b/explorer/templates/connections/connections.html @@ -6,6 +6,7 @@

Connections

Add New Connection + Upload File @@ -32,6 +33,7 @@

Connections

{% if connection.id %} + {% endif %} diff --git a/explorer/templates/connections/database_connection_form.html b/explorer/templates/connections/database_connection_form.html index d4644385..92e54a68 100644 --- a/explorer/templates/connections/database_connection_form.html +++ b/explorer/templates/connections/database_connection_form.html @@ -2,27 +2,9 @@ {% block sql_explorer_content %}

{% if object %}Edit{% else %}Create New{% endif %} Connection

- {% if not object %} -

SQL Explorer supports two connection methods; uploading a file, or configuring a connection to an existing database.

- {% endif %} {% if object.is_upload %} The source of this connection is an uploaded file. In all likelihood you should not be editing it. {% endif %} - {% if not object and user_uploads_enabled %} -
-

Upload a file

-

Supports .csv, .json, .db, and .sqlite files. JSON files with one JSON document per line are also supported. CSV/JSON data will be parsed and converted to SQLite. SQLite databases must not be password protected.

-
-

Drag and drop, or click to upload .csv, .json, .db, .sqlite.

- -
-
0%
-
-

-
-
- {% endif %} -

Configure a connection to an existing database.

{% csrf_token %}
diff --git a/explorer/templates/explorer/schema.html b/explorer/templates/explorer/schema.html index 904fe841..c3f16f9f 100644 --- a/explorer/templates/explorer/schema.html +++ b/explorer/templates/explorer/schema.html @@ -6,7 +6,6 @@

{% translate "Schema" %}

- {% if m|length > 1 %}

@@ -20,7 +19,6 @@

{% translate "Schema" %}

- {% endif %}
    {% for m in schema %} diff --git a/explorer/tests/csvs/rc_sample.csv b/explorer/tests/csvs/rc_sample.csv new file mode 100644 index 00000000..cfc4350d --- /dev/null +++ b/explorer/tests/csvs/rc_sample.csv @@ -0,0 +1,50 @@ +name,material_type,seating_type,speed,height,length,num_inversions,manufacturer,park,status +Goudurix,Steel,Sit Down,75.0,37.0,950.0,7.0,Vekoma,Parc Asterix,status.operating +Dream catcher,Steel,Suspended,45.0,25.0,600.0,0.0,Vekoma,Bobbejaanland,status.operating +Alucinakis,Steel,Sit Down,30.0,8.0,250.0,0.0,Zamperla,Terra Mítica,status.operating +Anaconda,Wooden,Sit Down,85.0,35.0,1200.0,0.0,William J. Cobb,Walygator Parc,status.operating +Azteka,Steel,Sit Down,55.0,17.0,500.0,0.0,Soquet,Le Pal,status.operating +Bat Coaster,Steel,Inverted,70.0,20.0,400.0,2.0,Pinfari,Nigloland,status.relocated +Batman : Arkham Asylum,Steel,Inverted,80.0,32.0,823.0,5.0,B&M,Parque Warner Madrid,status.operating +Big Thunder Mountain,Steel,Sit Down,60.0,22.0,1500.0,0.0,Vekoma,Disneyland Park,status.operating +EqWalizer,Steel,Sit Down,76.0,36.0,285.0,3.0,Vekoma,Walibi Rhône Alpes,status.operating +Calamity Mine,Steel,Sit Down,48.0,14.0,785.0,0.0,Vekoma,Walibi Belgium,status.operating +"Casey Jr, le Petit Train du Cirque",Steel,Sit Down,30.0,,,0.0,Vekoma,Disneyland Park,status.operating +Cobra,Steel,Sit Down,76.0,36.0,285.0,3.0,Vekoma,Walibi Belgium,status.operating +Coccinelle,Steel,Sit Down,36.0,8.0,360.0,0.0,Zierer,Walibi Rhône Alpes,status.operating +Coleoz'Arbres,Steel,Sit Down,60.0,,540.0,0.0,Schwarzkopf,Bagatelle,status.closed.definitely +Comet,Steel,Sit Down,64.0,24.0,,3.0,Vekoma,Walygator Parc,status.operating +Course de Bobsleigh,Steel,Sit Down,65.0,15.0,450.0,0.0,Schwarzkopf,Nigloland,status.relocated +Cumbres,Steel,Sit Down,,3.0,,0.0,Miler Coaster,Parque de Atracciones de Madrid,status.closed.definitely +Le Dragon de Bei Hai,Steel,Sit Down,,,,0.0,Cavazza Diego,La Mer de Sable,status.closed.definitely +Euro Mir,Steel,Spinning,80.0,28.0,980.0,0.0,Mack,Europa Park,status.operating +Eurosat,Steel,Sit Down,60.0,26.0,877.0,0.0,Mack,Europa Park,status.retracked +Expedition Ge Force,Steel,Sit Down,120.0,53.0,1220.0,0.0,Intamin,Holiday Park,status.operating +Le Grand canyon,Steel,Sit Down,50.0,12.0,380.0,0.0,Soquet,Fraispertuis City,status.operating +Indiana Jones et le Temple du Péril,Steel,Sit Down,58.0,18.0,566.0,1.0,Intamin,Disneyland Park,status.operating +Jaguar,Steel,Inverted,83.0,34.0,689.0,5.0,Vekoma,Isla Magica,status.operating +Cop Car Chase (1),Steel,Sit Down,60.0,16.0,620.0,2.0,Intamin,Movie Park Germany,status.closed.definitely +Loup Garou,Wooden,Sit Down,80.0,28.0,1035.0,0.0,Vekoma,Walibi Belgium,status.operating +Magnus Colossus,Wooden,Sit Down,92.0,38.0,1150.0,0.0,RCCA,Terra Mítica,status.closed.temporarily +Oki Doki,Steel,Sit Down,58.0,16.0,436.0,0.0,Vekoma,Bobbejaanland,status.operating +SOS Numerobis,Steel,Sit Down,32.0,6.0,200.0,0.0,Zierer,Parc Asterix,status.operating +Poseïdon,Steel,Water Coaster,70.0,23.0,836.0,0.0,Mack,Europa Park,status.operating +Rock'n Roller Coaster avec Aerosmith,Steel,Sit Down,92.0,24.0,1037.0,3.0,Vekoma,Walt Disney Studios,status.operating +La Ronde des Rondins,Steel,Sit Down,26.0,3.0,60.0,0.0,Zierer,Parc Asterix,status.relocated +Silverstar,Steel,Sit Down,127.0,73.0,1620.0,0.0,B&M,Europa Park,status.operating +Superman la Atraccion de Acero,Steel,Floorless,105.0,50.0,1200.0,7.0,B&M,Parque Warner Madrid,status.operating +La Trace du Hourra,Steel,Bobsleigh,60.0,31.0,900.0,0.0,Mack,Parc Asterix,status.operating +Stunt Fall,Steel,Sit Down,106.0,58.0,367.0,3.0,Vekoma,Parque Warner Madrid,status.operating +Le Tigre de Sibérie,Steel,Sit Down,40.0,13.0,360.0,0.0,Reverchon,Le Pal,status.operating +Titánide,Steel,Inverted,80.0,33.0,689.0,5.0,Vekoma,Terra Mítica,status.operating +Tom y Jerry,Steel,Sit Down,36.0,8.0,360.0,0.0,Zierer,Parque Warner Madrid,status.operating +Psyké underground,Steel,Sit Down,85.0,42.0,260.0,1.0,Schwarzkopf,Walibi Belgium,status.operating +Tonnerre de Zeus,Wooden,Sit Down,84.0,30.0,1233.0,0.0,CCI,Parc Asterix,status.operating +Tren Bravo (Left),Steel,Sit Down,45.0,6.0,394.0,0.0,Zamperla,Terra Mítica,status.closed.temporarily +Typhoon,Steel,Sit Down,80.0,26.0,670.0,4.0,Gerstlauer,Bobbejaanland,status.operating +Schweizer Bobbahn,Steel,Bobsleigh,50.0,19.0,487.0,0.0,Mack,Europa Park,status.operating +Vampire,Steel,Inverted,80.0,33.0,689.0,5.0,Vekoma,Walibi Belgium,status.operating +Le Vol d'Icare,Steel,Sit Down,42.0,11.0,410.0,0.0,Zierer,Parc Asterix,status.operating +Wild Train,Steel,Sit Down,70.0,15.0,330.0,0.0,Pax,Parc Saint Paul,status.operating +Bandit,Wooden,Sit Down,80.0,28.0,1099.0,0.0,RCCA,Movie Park Germany,status.operating +Woodstock Express,Steel,Sit Down,,,220.0,0.0,Zamperla,Walibi Rhône Alpes,status.operating diff --git a/explorer/tests/test_create_sqlite.py b/explorer/tests/test_create_sqlite.py index 19b0f685..4efa9eb7 100644 --- a/explorer/tests/test_create_sqlite.py +++ b/explorer/tests/test_create_sqlite.py @@ -1,8 +1,8 @@ from django.test import TestCase from django.core.files.uploadedfile import SimpleUploadedFile -from unittest import skipIf +from unittest import skipIf, mock from explorer.app_settings import EXPLORER_USER_UPLOADS_ENABLED -from explorer.ee.db_connections.create_sqlite import parse_to_sqlite +from explorer.ee.db_connections.create_sqlite import parse_to_sqlite, get_names import os import sqlite3 @@ -11,13 +11,13 @@ PATH = "./test_parse_to_sqlite.db" -def write_sqlite_and_get_row(f_bytes): +def write_sqlite_and_get_row(f_bytes, table_name): os.makedirs(os.path.dirname(PATH), exist_ok=True) with open(PATH, "wb") as temp_file: temp_file.write(f_bytes.getvalue()) conn = sqlite3.connect(PATH) cursor = conn.cursor() - cursor.execute("SELECT * FROM data") + cursor.execute(f"SELECT * FROM {table_name}") rows = cursor.fetchall() cursor.close() conn.close() @@ -28,18 +28,57 @@ def write_sqlite_and_get_row(f_bytes): @skipIf(not EXPLORER_USER_UPLOADS_ENABLED, reason="User uploads disabled") class TestCreateSqlite(TestCase): + #def test_parse_to_sqlite_with_sqlite_file + def test_parse_to_sqlite(self): file = SimpleUploadedFile("name.csv", b"name, title\nchris,cto", content_type="text/csv") - sqlite_bytes, name = parse_to_sqlite(file) - rows = write_sqlite_and_get_row(sqlite_bytes) + sqlite_bytes, name = parse_to_sqlite(file, None, user_id=1) + rows = write_sqlite_and_get_row(sqlite_bytes, "name") - self.assertEqual(name, "name.db") self.assertEqual(rows[0], ("chris", "cto")) + self.assertEqual(name, "name_1.db") def test_parse_to_sqlite_with_no_parser(self): file = SimpleUploadedFile("name.db", SQLITE_BYTES, content_type="application/x-sqlite3") - sqlite_bytes, name = parse_to_sqlite(file) - rows = write_sqlite_and_get_row(sqlite_bytes) + sqlite_bytes, name = parse_to_sqlite(file, None, user_id=1) + rows = write_sqlite_and_get_row(sqlite_bytes, "data") self.assertEqual(rows[0], ("chris", "cto")) - self.assertEqual(name, "name.db") + self.assertEqual(name, "name_1.db") + + +class TestGetNames(TestCase): + def setUp(self): + # Mock file object + self.mock_file = mock.MagicMock() + self.mock_file.name = "test file name.txt" + + # Mock append_conn object + self.mock_append_conn = mock.MagicMock() + self.mock_append_conn.name = "/path/to/existing_db.sqlite" + + def test_no_append_conn(self): + table_name, f_name = get_names(self.mock_file, append_conn=None, user_id=123) + self.assertEqual(table_name, "test_file_name") + self.assertEqual(f_name, "test_file_name_123.db") + + def test_with_append_conn(self): + table_name, f_name = get_names(self.mock_file, append_conn=self.mock_append_conn, user_id=123) + self.assertEqual(table_name, "test_file_name") + self.assertEqual(f_name, "existing_db.sqlite") + + def test_secure_filename(self): + self.mock_file.name = "测试文件.txt" + table_name, f_name = get_names(self.mock_file, append_conn=None, user_id=123) + self.assertEqual(table_name, "_") + self.assertEqual(f_name, "__123.db") + + def test_empty_filename(self): + self.mock_file.name = ".txt" + with self.assertRaises(ValueError): + get_names(self.mock_file, append_conn=None, user_id=123) + + def test_invalid_extension(self): + self.mock_file.name = "filename.exe" + with self.assertRaises(ValueError): + get_names(self.mock_file, append_conn=None, user_id=123) diff --git a/explorer/tests/test_db_connection_utils.py b/explorer/tests/test_db_connection_utils.py index a0ef0b54..5b85a944 100644 --- a/explorer/tests/test_db_connection_utils.py +++ b/explorer/tests/test_db_connection_utils.py @@ -14,52 +14,55 @@ ) - - @skipIf(not EXPLORER_USER_UPLOADS_ENABLED, "User uploads not enabled") class TestSQLiteConnection(TestCase): @patch("explorer.utils.get_s3_bucket") - @patch("os.path.exists") - @patch("os.getcwd") - def test_get_sqlite_for_connection_downloads_file_if_not_exists(self, mock_getcwd, mock_path_exists, - mock_get_s3_bucket): - mock_getcwd.return_value = "/tmp" - mock_path_exists.return_value = False + def test_get_sqlite_for_connection_downloads_file_if_not_exists(self, mock_get_s3_bucket): mock_s3 = MagicMock() mock_get_s3_bucket.return_value = mock_s3 - mock_explorer_connection = MagicMock() - mock_explorer_connection.name = "test_db" - mock_explorer_connection.host = "s3_bucket/test_db" - mock_explorer_connection.local_name = "/tmp/user_dbs/test_db" + conn = DatabaseConnection( + name="test_db.db", + host="s3_bucket/test_db.db", + engine=DatabaseConnection.SQLITE + ) + conn.delete_local_sqlite() + + local_name = conn.local_name - result = get_sqlite_for_connection(mock_explorer_connection) + result = get_sqlite_for_connection(conn) - mock_s3.download_file.assert_called_once_with("s3_bucket/test_db", "/tmp/user_dbs/test_db") + mock_s3.download_file.assert_called_once_with("s3_bucket/test_db.db", local_name) self.assertIsNone(result.host) - self.assertEqual(result.name, "/tmp/user_dbs/test_db") + self.assertEqual(result.name, local_name) @patch("explorer.utils.get_s3_bucket") - @patch("os.path.exists") - @patch("os.getcwd") - def test_get_sqlite_for_connection_skips_download_if_exists(self, mock_getcwd, mock_path_exists, - mock_get_s3_bucket): - mock_getcwd.return_value = "/tmp" - mock_path_exists.return_value = True + def test_get_sqlite_for_connection_skips_download_if_exists(self, mock_get_s3_bucket): mock_s3 = MagicMock() mock_get_s3_bucket.return_value = mock_s3 - mock_explorer_connection = MagicMock() - mock_explorer_connection.name = "test_db" - mock_explorer_connection.host = "s3_bucket/test_db" - mock_explorer_connection.local_name = "/tmp/user_dbs/test_db" + conn = DatabaseConnection( + name="test_db.db", + host="s3_bucket/test_db.db", + engine=DatabaseConnection.SQLITE + ) + conn.delete_local_sqlite() + + local_name = conn.local_name + + with open(local_name, "wb") as file: + file.write(b"\x00" * 10) - result = get_sqlite_for_connection(mock_explorer_connection) + conn.update_fingerprint() + + result = get_sqlite_for_connection(conn) mock_s3.download_file.assert_not_called() self.assertIsNone(result.host) - self.assertEqual(result.name, "/tmp/user_dbs/test_db") + self.assertEqual(result.name, local_name) + + os.remove(local_name) class TestDjangoStyleConnection(TestCase): @@ -125,7 +128,7 @@ def test_pandas_to_sqlite(self): df = pd.DataFrame(data) # Convert the DataFrame to SQLite and get the BytesIO buffer - db_buffer = pandas_to_sqlite(df) + db_buffer = pandas_to_sqlite(df, "data", "test_pandas_to_sqlite.db") # Write the buffer to a temporary file to simulate reading it back temp_db_path = "temp_test_database.db" diff --git a/explorer/tests/test_models.py b/explorer/tests/test_models.py index 5c8647ca..53e24a5b 100644 --- a/explorer/tests/test_models.py +++ b/explorer/tests/test_models.py @@ -1,5 +1,6 @@ import unittest -from unittest.mock import Mock, patch +import os +from unittest.mock import Mock, patch, MagicMock from django.core.exceptions import ValidationError from django.db import connections @@ -224,6 +225,7 @@ def test_empty_data(self): class TestDatabaseConnection(TestCase): + def test_cant_create_a_connection_with_conflicting_name(self): thrown = False try: @@ -257,3 +259,66 @@ def test_local_name_calls_user_dbs_local_dir(self, mock_getcwd, mock_exists, moc # Ensure os.makedirs was called once since the directory does not exist mock_makedirs.assert_called_once_with("/mocked/path/user_dbs") + + @patch("explorer.utils.get_s3_bucket") + def test_fingerprint_is_updated_after_download_and_download_is_not_called_again(self, mock_get_s3_bucket): + # Setup + mock_s3 = mock_get_s3_bucket.return_value + + connection = DatabaseConnection.objects.create( + alias="test", + engine=DatabaseConnection.SQLITE, + name="test_db.sqlite3", + host="some-s3-bucket", + ) + + # Define a function to mock S3 download + def mock_download_file(path, filename): + with open(filename, "w") as f: + f.write("Initial content") + + mock_s3.download_file = MagicMock(side_effect=mock_download_file) + + # First download + connection.download_sqlite_if_needed() + + # Check that the file was "downloaded" (in this case, created) + self.assertTrue(os.path.exists(connection.local_name)) + + # Check that the fingerprint was updated + self.assertIsNotNone(connection.upload_fingerprint) + initial_fingerprint = connection.upload_fingerprint + + # Mock S3 download to track calls + mock_s3.download_file.reset_mock() + + # Second attempt to download + connection.download_sqlite_if_needed() + + # Check that download was not called again + mock_s3.download_file.assert_not_called() + + # Check that the fingerprint hasn't changed + connection.refresh_from_db() + self.assertEqual(connection.upload_fingerprint, initial_fingerprint) + + # Modify the file to simulate changes + with open(connection.local_name, "w") as f: + f.write("Modified content") + + # Third attempt to download + connection.download_sqlite_if_needed() + + # Check that download was called again + mock_s3.download_file.assert_called_once() + + # Check that the fingerprint has been updated back to the original + connection.refresh_from_db() + self.assertEqual(connection.upload_fingerprint, initial_fingerprint) + + def tearDown(self): + # Clean up any files created during the test + for obj in DatabaseConnection.objects.all(): + if os.path.exists(obj.local_name): + os.remove(obj.local_name) + DatabaseConnection.objects.all().delete() diff --git a/explorer/tests/test_tasks.py b/explorer/tests/test_tasks.py index 0aae18b3..8be14cb4 100644 --- a/explorer/tests/test_tasks.py +++ b/explorer/tests/test_tasks.py @@ -20,7 +20,7 @@ class TestTasks(TestCase): @unittest.skipIf(not app_settings.ENABLE_TASKS, "tasks not enabled") - @patch("explorer.tasks.s3_upload") + @patch("explorer.tasks.s3_csv_upload") def test_async_results(self, mocked_upload): mocked_upload.return_value = "http://s3.com/your-file.csv" @@ -46,7 +46,7 @@ def test_async_results(self, mocked_upload): self.assertEqual(mocked_upload.call_count, 1) @unittest.skipIf(not app_settings.ENABLE_TASKS, "tasks not enabled") - @patch("explorer.tasks.s3_upload") + @patch("explorer.tasks.s3_csv_upload") def test_async_results_fails_with_message(self, mocked_upload): mocked_upload.return_value = "http://s3.com/your-file.csv" @@ -61,7 +61,7 @@ def test_async_results_fails_with_message(self, mocked_upload): self.assertEqual(mocked_upload.call_count, 0) @unittest.skipIf(not app_settings.ENABLE_TASKS, "tasks not enabled") - @patch("explorer.tasks.s3_upload") + @patch("explorer.tasks.s3_csv_upload") def test_snapshots(self, mocked_upload): mocked_upload.return_value = "http://s3.com/your-file.csv" diff --git a/explorer/tests/test_utils.py b/explorer/tests/test_utils.py index 1fa2ebfa..e160dc6b 100644 --- a/explorer/tests/test_utils.py +++ b/explorer/tests/test_utils.py @@ -6,7 +6,7 @@ from explorer.tests.factories import SimpleQueryFactory from explorer.utils import ( EXPLORER_PARAM_TOKEN, extract_params, get_params_for_url, get_params_from_request, param, passes_blacklist, - shared_dict_update, swap_params, + shared_dict_update, swap_params, secure_filename ) @@ -271,3 +271,35 @@ def test_only_registered_connections_are_in_connections(self): from explorer.connections import connections self.assertTrue(EXPLORER_DEFAULT_CONNECTION in connections()) self.assertNotEqual(len(connections()), len([c for c in djcs])) + + +class TestSecureFilename(TestCase): + def test_basic_ascii(self): + self.assertEqual(secure_filename("simple_file.txt"), "simple_file.txt") + + def test_special_characters(self): + self.assertEqual(secure_filename("file@name!.txt"), "file_name.txt") + + def test_leading_trailing_underscores(self): + self.assertEqual(secure_filename("_leading.txt"), "leading.txt") + self.assertEqual(secure_filename("trailing_.txt"), "trailing.txt") + self.assertEqual(secure_filename(".__filename__.txt"), "filename.txt") + + def test_unicode_characters(self): + self.assertEqual(secure_filename("fïléñâmé.txt"), "filename.txt") + self.assertEqual(secure_filename("测试文件.txt"), "_.txt") + + def test_empty_filename(self): + with self.assertRaises(ValueError): + secure_filename("") + + def test_bad_extension(self): + with self.assertRaises(ValueError): + secure_filename("foo.xyz") + + def test_empty_extension(self): + with self.assertRaises(ValueError): + secure_filename("foo.") + + def test_spaces(self): + self.assertEqual(secure_filename("file name.txt"), "file_name.txt") diff --git a/explorer/tests/test_views.py b/explorer/tests/test_views.py index 4ae92c3d..bc65335e 100644 --- a/explorer/tests/test_views.py +++ b/explorer/tests/test_views.py @@ -936,7 +936,7 @@ def test_post_csv_file(self): def test_upload_file(self, mock_upload_sqlite): self.assertFalse(DatabaseConnection.objects.filter(alias__contains="kings").exists()) - # Test data file + # Upload some JSON file_path = os.path.join(os.getcwd(), "explorer/tests/json/kings.json") with open(file_path, "rb") as f: response = self.client.post(reverse("explorer_upload"), {"file": f}) @@ -945,22 +945,31 @@ def test_upload_file(self, mock_upload_sqlite): self.assertEqual(response.status_code, 200) self.assertEqual(mock_upload_sqlite.call_count, 1) - # Now write the SQLite bytes locally, to the newly-created connection's local path - # We are going query this new data source, and writing the bytes here preempts the system's attempt to download - # it from S3 since the file already exists on disk. No need to mock get_sqlite_for_connection! + # Query it and make sure that the reign of this particular king is indeed in the results. conn = DatabaseConnection.objects.filter(alias__contains="kings").first() - os.makedirs(os.path.dirname(conn.local_name), exist_ok=True) - with open(conn.local_name, "wb") as temp_file: - temp_file.write(mock_upload_sqlite.call_args[0][0].getvalue()) - resp = self.client.post( reverse("explorer_playground"), - {"sql": "select * from data where Name = 'Athelstan';", "connection": conn.alias} + {"sql": "select * from kings where Name = 'Athelstan';", "connection": conn.alias} ) - - # Assert that the reign of this particular king is indeed in the results. self.assertIn("925-940", resp.content.decode("utf-8")) + # Append a new table to the existing connection + file_path = os.path.join(os.getcwd(), "explorer/tests/csvs/rc_sample.csv") + with open(file_path, "rb") as f: + response = self.client.post(reverse("explorer_upload"), {"file": f, "append": conn.id}) + + # Make sure it got re-uploaded + self.assertEqual(response.status_code, 200) + self.assertEqual(mock_upload_sqlite.call_count, 2) + + # Query it and make sure a valid result is in the response. Note this is the *same* connection. + resp = self.client.post( + reverse("explorer_playground"), + {"sql": "select * from rc_sample where material_type = 'Steel';", "connection": conn.alias} + ) + self.assertIn("Goudurix", resp.content.decode("utf-8")) + + # Clean up filesystem os.remove(conn.local_name) def test_post_no_file(self): @@ -1126,3 +1135,6 @@ def test_database_connection_delete_view(self): response = self.client.get(reverse("explorer_connection_delete", args=[self.connection.pk])) self.assertEqual(response.status_code, 200) + def test_database_connection_upload_view(self): + response = self.client.get(reverse("explorer_upload_create")) + self.assertEqual(response.status_code, 200) diff --git a/explorer/utils.py b/explorer/utils.py index 3737f027..b6454310 100644 --- a/explorer/utils.py +++ b/explorer/utils.py @@ -1,4 +1,6 @@ import re +import os +import unicodedata from collections import deque from typing import Iterable, Tuple @@ -228,7 +230,7 @@ def get_s3_bucket(): return s3.Bucket(name=app_settings.S3_BUCKET) -def s3_upload(key, data): +def s3_csv_upload(key, data): if app_settings.S3_DESTINATION: key = "/".join([app_settings.S3_DESTINATION, key]) bucket = get_s3_bucket() @@ -250,3 +252,18 @@ def is_xls_writer_available(): return True except ImportError: return False + + +def secure_filename(filename): + filename, ext = os.path.splitext(filename) + if not filename and not ext: + raise ValueError("Filename or extension cannot be blank") + if ext.lower() not in [".db", ".sqlite", ".sqlite3", ".csv", ".json", ".txt"]: + raise ValueError(f"Invalid extension: {ext}") + + filename = unicodedata.normalize("NFKD", filename).encode("ascii", "ignore").decode("ascii") + filename = re.sub(r"[^a-zA-Z0-9_.-]", "_", filename) + filename = filename.strip("._") + if not filename: # If filename becomes empty, replace it with an underscore + filename = "_" + return f"{filename}{ext}" diff --git a/explorer/views/utils.py b/explorer/views/utils.py index 61259f8b..a850ef2d 100644 --- a/explorer/views/utils.py +++ b/explorer/views/utils.py @@ -50,8 +50,8 @@ def query_viewmodel(request, query, title=None, form=None, message=None, try: if app_settings.EXPLORER_CHARTS_ENABLED and has_valid_results: - charts["line_chart_svg"] = get_chart(res,"line") - charts["bar_chart_svg"] = get_chart(res,"bar") + charts["line_chart_svg"] = get_chart(res, "line") + charts["bar_chart_svg"] = get_chart(res, "bar") except TypeError as e: if ql is not None: msg = f"Error generating charts for querylog {ql.id}: {e}"