Skip to content

Commit

Permalink
Userland connections and licensing
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisclark committed May 25, 2024
1 parent 5693ac3 commit 220b2b3
Show file tree
Hide file tree
Showing 71 changed files with 2,172 additions and 273 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ docs/_build/
.env
tst
tst2
user_dbs/*
tmp2
11 changes: 10 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
* All content that resides under the "explorer/ee/" directory of this repository,
if that directory exists, is licensed under the license defined in "explorer/ee/LICENSE".

* All third party components incorporated into the SQL Explorer Software are
licensed under the original license provided by the owner of the applicable component.

* Content outside of the above mentioned directories or restrictions above
is available under the "MIT" license as defined below.

The MIT License (MIT)

Copyright (c) 2013 Chris Clark, ePantry LLC
Expand All @@ -18,4 +27,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
THE SOFTWARE.
16 changes: 8 additions & 8 deletions docs/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ Snapshots
.. code-block:: python
app.conf.beat_schedule = {
'explorer.tasks.snapshot_queries': {
'task': 'explorer.tasks.snapshot_queries',
'schedule': crontab(hour=1, minute=0)
}
"explorer.tasks.snapshot_queries": {
"task": "explorer.tasks.snapshot_queries",
"schedule": crontab(hour="1", minute="0")
},
}
- Requires celery, obviously. Also uses boto3. All
Expand Down Expand Up @@ -168,10 +168,10 @@ Query Logs
.. code-block:: python
app.conf.beat_schedule = {
'explorer.tasks.truncate_querylogs': {
'task': 'explorer.tasks.truncate_querylogs',
'schedule': crontab(hour=1, minute=0),
'kwargs': {'days': 30}
"explorer.tasks.truncate_querylogs": {
"task": "explorer.tasks.truncate_querylogs",
"schedule": crontab(hour="1", minute="10"),
"kwargs": {"days": 30}
}
}
Expand Down
3 changes: 1 addition & 2 deletions explorer/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from explorer.actions import generate_report_action
from explorer.models import Query, ExplorerValue
from explorer.ee.user_connections.admin import DatabaseConnectionAdmin # noqa


@admin.register(Query)
Expand All @@ -16,12 +17,10 @@ class QueryAdmin(admin.ModelAdmin):
class ExplorerValueAdmin(admin.ModelAdmin):
list_display = ("key", "value", "display_key")
list_filter = ("key",)
readonly_fields = ("key",)
search_fields = ("key", "value")

def display_key(self, obj):
# Human-readable name for the key
return dict(ExplorerValue.EXPLORER_SETTINGS_CHOICES).get(obj.key, "")

display_key.short_description = "Setting Name"

18 changes: 15 additions & 3 deletions explorer/app_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from django.conf import settings

from explorer.ee import has_valid_license


EXPLORER_CONNECTIONS = getattr(settings, "EXPLORER_CONNECTIONS", {})
EXPLORER_DEFAULT_CONNECTION = getattr(
Expand Down Expand Up @@ -70,7 +72,7 @@
settings, "EXPLORER_PERMISSION_CHANGE", lambda r: r.user.is_staff
)
EXPLORER_RECENT_QUERY_COUNT = getattr(
settings, "EXPLORER_RECENT_QUERY_COUNT", 10
settings, "EXPLORER_RECENT_QUERY_COUNT", 5
)
EXPLORER_ASYNC_SCHEMA = getattr(settings, "EXPLORER_ASYNC_SCHEMA", False)

Expand Down Expand Up @@ -125,7 +127,7 @@
S3_REGION = getattr(settings, "EXPLORER_S3_REGION", "us-east-1")
S3_ENDPOINT_URL = getattr(settings, "EXPLORER_S3_ENDPOINT_URL", None)
S3_DESTINATION = getattr(settings, "EXPLORER_S3_DESTINATION", "")
S3_SIGNATURE_VERSION = getattr(settings, "EXPLORER_S3_SIGNATURE_VERSION", "v2")
S3_SIGNATURE_VERSION = getattr(settings, "EXPLORER_S3_SIGNATURE_VERSION", "v4")

UNSAFE_RENDERING = getattr(settings, "EXPLORER_UNSAFE_RENDERING", False)

Expand All @@ -146,8 +148,18 @@

# AI Assistant settings. Setting the first to an OpenAI key is the simplest way to enable the assistant
EXPLORER_AI_API_KEY = getattr(settings, "EXPLORER_AI_API_KEY", None)

EXPLORER_ASSISTANT_BASE_URL = getattr(settings, "EXPLORER_ASSISTANT_BASE_URL", "https://api.openai.com/v1")
EXPLORER_ASSISTANT_MODEL = getattr(settings, "EXPLORER_ASSISTANT_MODEL",
# Return the model name and max_tokens it supports
{"name": "gpt-4-0125-preview",
{"name": "gpt-4o",
"max_tokens": 128000})

EXPLORER_USER_UPLOADS_ENABLED = getattr(settings, "EXPLORER_USER_UPLOADS_ENABLED", False)
EXPLORER_PRUNE_LOCAL_UPLOAD_COPY_DAYS_INACTIVITY = getattr(settings,
"EXPLORER_PRUNE_LOCAL_UPLOAD_COPY_DAYS_INACTIVITY", 7)
EXPLORER_LICENSE_KEY = getattr(settings,"EXPLORER_LICENSE_KEY", None)


def has_assistant(): return EXPLORER_AI_API_KEY is not None
def has_user_uploads(): return has_valid_license() and EXPLORER_USER_UPLOADS_ENABLED
5 changes: 3 additions & 2 deletions explorer/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ def _get_explorer_connections():

def _validate_connections():

# Validate connections
if _get_default() not in _get_explorer_connections().values():
# Validate connections, when using settings.EXPLORER_CONNECTIONS
# Skip if none are configured, as the app will use user-configured connections (DatabaseConnection models)
if _get_explorer_connections().values() and _get_default() not in _get_explorer_connections().values():
raise ImproperlyConfigured(
f"EXPLORER_DEFAULT_CONNECTION is {_get_default()}, "
f"but that alias is not present in the values of "
Expand Down
108 changes: 0 additions & 108 deletions explorer/assistant/tests.py

This file was deleted.

71 changes: 67 additions & 4 deletions explorer/assistant/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from explorer import app_settings
from explorer.schema import schema_info
from explorer.models import ExplorerValue
from explorer.utils import get_valid_connection
from django.db.utils import OperationalError


OPENAI_MODEL = app_settings.EXPLORER_ASSISTANT_MODEL["name"]
ROW_SAMPLE_SIZE = 2
MAX_FIELD_SAMPLE_SIZE = 500 # characters


def openai_client():
Expand Down Expand Up @@ -41,20 +43,46 @@ def tables_from_schema_info(connection, table_names):
def sample_rows_from_tables(connection, table_names):
ret = ""
for table_name in table_names:
ret = f"SAMPLE FROM TABLE {table_name}:\n"
ret = ret + format_rows_from_table(
ret += f"SAMPLE FROM TABLE {table_name}:\n"
ret += format_rows_from_table(
sample_rows_from_table(connection, table_name)
) + "\n\n"
return ret


def sample_rows_from_table(connection, table_name):
"""
Fetches a sample of rows from the specified table and ensures that any field values
exceeding 500 characters (or bytes) are truncated. This is useful for handling fields
like "description" that might contain very long strings of text or binary data.
Truncating these fields prevents issues with displaying or processing overly large values.
An ellipsis ("...") is appended to indicate that the data has been truncated.
Args:
connection: The database connection.
table_name: The name of the table to sample rows from.
Returns:
A list of rows with field values truncated if they exceed 500 characters/bytes.
"""
conn = get_valid_connection(connection)
cursor = conn.cursor()
try:
cursor.execute(f"SELECT * FROM {table_name} LIMIT {ROW_SAMPLE_SIZE}")
ret = [[header[0] for header in cursor.description]]
ret = ret + cursor.fetchall()
rows = cursor.fetchall()

for row in rows:
processed_row = []
for field in row:
new_val = field
if isinstance(field, str) and len(field) > MAX_FIELD_SAMPLE_SIZE:
new_val = field[:MAX_FIELD_SAMPLE_SIZE] + "..." # Truncate and add ellipsis
elif isinstance(field, (bytes, bytearray)) and len(field) > MAX_FIELD_SAMPLE_SIZE:
new_val = field[:MAX_FIELD_SAMPLE_SIZE] + b"..." # Truncate binary data
processed_row.append(new_val)
ret.append(processed_row)

return ret
except OperationalError as e:
return [[str(e)]]
Expand Down Expand Up @@ -83,7 +111,10 @@ def get_table_names_from_query(sql):
def num_tokens_from_string(string: str) -> int:
"""Returns the number of tokens in a text string."""
import tiktoken
encoding = tiktoken.encoding_for_model(OPENAI_MODEL)
try:
encoding = tiktoken.encoding_for_model(OPENAI_MODEL)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
num_tokens = len(encoding.encode(string))
return num_tokens

Expand All @@ -92,3 +123,35 @@ def fits_in_window(string: str) -> bool:
# Ratchet down by 5% to account for other boilerplate and system prompt
# TODO make this better by actually looking at the token count of the system prompt
return num_tokens_from_string(string) < (app_settings.EXPLORER_ASSISTANT_MODEL["max_tokens"] * 0.95)


def build_prompt(request_data, included_tables):
user_prompt = ""

db_vendor = get_valid_connection(request_data.get("connection")).vendor
user_prompt += f"## Database Vendor / SQL Flavor is {db_vendor}\n\n"

db_error = request_data.get("db_error")
if db_error:
user_prompt += f"## Query Error ##\n\n{db_error}\n\n"

sql = request_data.get("sql")
if sql:
user_prompt += f"## Existing SQL ##\n\n{sql}\n\n"

results_sample = sample_rows_from_tables(request_data["connection"],
included_tables)
if fits_in_window(user_prompt + results_sample):
user_prompt += f"## Table Structure with Sampled Data ##\n\n{results_sample}\n\n"
else: # If it's too large with sampling, then provide *just* the structure
table_struct = tables_from_schema_info(request_data["connection"],
included_tables)
user_prompt += f"## Table Structure ##\n\n{table_struct}\n\n"

user_prompt += f"## User's Request to Assistant ##\n\n{request_data['assistant_request']}\n\n"

prompt = {
"system": ExplorerValue.objects.get_item(ExplorerValue.ASSISTANT_SYSTEM_PROMPT).value,
"user": user_prompt
}
return prompt
Loading

0 comments on commit 220b2b3

Please sign in to comment.