Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion datasette/templates/table.html
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ <h3>{{ extra_wheres_for_ui|length }} extra where clause{% if extra_wheres_for_ui
<p><a class="not-underlined" title="{{ query.sql }}" href="{{ urls.database(database) }}?{{ {'sql': query.sql}|urlencode|safe }}{% if query.params %}&amp;{{ query.params|urlencode|safe }}{% endif %}">&#x270e; <span class="underlined">View and edit SQL</span></a></p>
{% endif %}

<p class="export-links">This data as {% for name, url in renderers.items() %}<a href="{{ url }}">{{ name }}</a>{{ ", " if not loop.last }}{% endfor %}{% if display_rows %}, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>){% endif %}</p>
<p class="export-links">This data as {% for name, url in renderers.items() %}<a href="{{ url }}">{{ name }}</a>{{ ", " if not loop.last }}{% endfor %}{% if display_rows %}, <a href="{{ url_csv }}">CSV</a> (<a href="#export">advanced</a>), <a href="{{ url_markdown }}">Markdown</a> (<a href="#export-markdown">advanced</a>){% endif %}</p>

{% if suggested_facets %}
{% include "_suggested_facets.html" %}
Expand Down Expand Up @@ -186,6 +186,20 @@ <h3>Advanced export</h3>
{% endfor %}
</p>
</form>
<div id="export-markdown" class="advanced-export">
<form class="core" action="{{ url_markdown_path }}" method="get">
<p>
Markdown options:
{% if expandable_columns %}<label><input type="checkbox" name="_labels" checked> expand labels</label>{% endif %}
{% if next_url and settings.allow_csv_stream %}<label><input type="checkbox" name="_stream"> export all rows (unlimited)</label>{% endif %}
<label>Max rows: <input type="number" name="_max_rows" value="500" min="0" max="10000" style="width: 80px;"></label>
<input type="submit" value="Export Markdown">
{% for key, value in url_markdown_hidden_args %}
<input type="hidden" name="{{ key }}" value="{{ value }}">
{% endfor %}
</p>
</form>
</div>
</div>
{% endif %}

Expand Down
237 changes: 236 additions & 1 deletion datasette/views/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,16 +206,21 @@ async def data(self, request):
async def as_csv(self, request, database):
return await stream_csv(self.ds, self.data, request, database)

async def as_markdown(self, request, database):
return await stream_markdown(self.ds, self.data, request, database)

async def get(self, request):
db = await self.ds.resolve_database(request)
database = db.name
database_route = db.route

_format = request.url_vars["format"]
_format = request.url_vars["format"] or request.args.get("_format")
data_kwargs = {}

if _format == "csv":
return await self.as_csv(request, database_route)
elif _format == "markdown":
return await self.as_markdown(request, database_route)

if _format is None:
# HTML views default to expanding all foreign key labels
Expand Down Expand Up @@ -568,3 +573,233 @@ async def stream_fn(r):
headers["content-disposition"] = disposition

return AsgiStream(stream_fn, headers=headers, content_type=content_type)


async def stream_markdown(datasette, fetch_data, request, database):
# 行数限制配置
DEFAULT_MAX_ROWS = 500
MAX_ALLOWED_ROWS = 10000

# 解析 _max_rows 参数
max_rows_param = request.args.get("_max_rows")
if max_rows_param:
try:
max_rows = int(max_rows_param)
# 确保不超过最大允许值
max_rows = min(max_rows, MAX_ALLOWED_ROWS)
# 确保不小于 0
max_rows = max(max_rows, 0)
except ValueError:
# 无效参数,使用默认值
max_rows = DEFAULT_MAX_ROWS
else:
max_rows = DEFAULT_MAX_ROWS

# 是否使用流式模式获取全部数据
use_stream = request.args.get("_stream")

# 如果 _stream=1,则忽略行数限制(获取全部数据)
if use_stream:
max_rows = None # None 表示无限制

kwargs = {}
# Do not calculate facets or counts:
extra_parameters = [
"{}=1".format(key)
for key in ("_nofacet", "_nocount")
if not request.args.get(key)
]
if extra_parameters:
# Replace request object with a new one with modified scope
if not request.query_string:
new_query_string = "&".join(extra_parameters)
else:
new_query_string = request.query_string + "&" + "&".join(extra_parameters)
new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1"))
receive = request.receive
request = Request(new_scope, receive)

# 如果使用流式模式获取全部数据
if use_stream:
# Some quick soundness checks
if not datasette.setting("allow_csv_stream"):
raise BadRequest("Markdown streaming is disabled")
if request.args.get("_next"):
raise BadRequest("_next not allowed for Markdown streaming")
kwargs["_size"] = "max"

# Fetch the first page
try:
response_or_template_contexts = await fetch_data(request)
if isinstance(response_or_template_contexts, Response):
return response_or_template_contexts
elif len(response_or_template_contexts) == 4:
data, _, _, _ = response_or_template_contexts
else:
data, _, _ = response_or_template_contexts
except (sqlite3.OperationalError, InvalidSql) as e:
raise DatasetteError(str(e), title="Invalid SQL", status=400)

except sqlite3.OperationalError as e:
raise DatasetteError(str(e))

except DatasetteError:
raise

# Convert rows and columns to Markdown table
headings = data["columns"]
# if there are expanded_columns we need to add additional headings
expanded_columns = set(data.get("expanded_columns") or [])
if expanded_columns:
headings = []
for column in data["columns"]:
headings.append(column)
if column in expanded_columns:
headings.append(f"{column}_label")

content_type = "text/markdown; charset=utf-8"
preamble = ""
postamble = ""

trace = request.args.get("_trace")
if trace:
content_type = "text/html; charset=utf-8"
preamble = (
"<html><head><title>Markdown debug</title></head>"
'<body><textarea style="width: 90%; height: 70vh">'
)
postamble = "</textarea></body></html>"

def escape_markdown(value):
if value is None:
return ""
value = str(value)
# Escape pipe characters and newlines for Markdown table
value = value.replace("|", "\\|")
value = value.replace("\n", " ")
value = value.replace("\r", " ")
return value

async def stream_fn(r):
nonlocal data, trace, max_rows, use_stream
limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb"))
if trace:
await limited_writer.write(preamble)

first = True
next = None
row_count = 0
is_truncated = False

while first or (next and use_stream):
try:
kwargs = {}
if next:
kwargs["_next"] = next
if not first:
data, _, _ = await fetch_data(request, **kwargs)
if first:
if request.args.get("_header") != "off":
# Write header row
header_row = "| " + " | ".join(escape_markdown(h) for h in headings) + " |\n"
await limited_writer.write(header_row)
# Write separator row
separator_row = "| " + " | ".join(["---"] * len(headings)) + " |\n"
await limited_writer.write(separator_row)
first = False
next = data.get("next")
for row in data["rows"]:
# 检查是否达到行数限制
if max_rows is not None and row_count >= max_rows:
is_truncated = True
# 退出循环
next = None
break

if any(isinstance(r, bytes) for r in row):
new_row = []
for column, cell in zip(headings, row):
if isinstance(cell, bytes):
# If this is a table page, use .urls.row_blob()
if data.get("table"):
pks = data.get("primary_keys") or []
cell = datasette.absolute_url(
request,
datasette.urls.row_blob(
database,
data["table"],
path_from_row_pks(row, pks, not pks),
column,
),
)
else:
# Otherwise generate URL for this query
url = datasette.absolute_url(
request,
path_with_format(
request=request,
format="blob",
extra_qs={
"_blob_column": column,
"_blob_hash": hashlib.sha256(
cell
).hexdigest(),
},
replace_format="markdown",
),
)
cell = url.replace("&_nocount=1", "").replace(
"&_nofacet=1", ""
)
new_row.append(cell)
row = new_row

# 构建并写入数据行
if not expanded_columns:
# Simple path
markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in row) + " |\n"
await limited_writer.write(markdown_row)
else:
# Look for {"value": "label": } dicts and expand
new_row = []
for heading, cell in zip(data["columns"], row):
if heading in expanded_columns:
if cell is None:
new_row.extend(("", ""))
else:
if not isinstance(cell, dict):
new_row.extend((cell, ""))
else:
new_row.append(cell["value"])
new_row.append(cell["label"])
else:
new_row.append(cell)
markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in new_row) + " |\n"
await limited_writer.write(markdown_row)

# 增加行数计数
row_count += 1
except Exception as ex:
sys.stderr.write("Caught this error: {}\n".format(ex))
sys.stderr.flush()
await r.write(str(ex))
return

# 如果被截断,添加截断注释
if is_truncated:
truncation_note = f"\n*已截断,仅显示前{row_count}行*\n"
await limited_writer.write(truncation_note)

await limited_writer.write(postamble)

headers = {}
if datasette.cors:
add_cors_headers(headers)

# Always set Content-Disposition for Markdown export
disposition = 'attachment; filename="{}.md"'.format(
request.url_vars.get("table", database)
)
headers["content-disposition"] = disposition

return AsgiStream(stream_fn, headers=headers, content_type=content_type)
13 changes: 11 additions & 2 deletions datasette/views/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

class DatabaseView(View):
async def get(self, request, datasette):
format_ = request.url_vars.get("format") or "html"
format_ = request.url_vars.get("format") or request.args.get("_format") or "html"

await datasette.refresh_schemas()

Expand Down Expand Up @@ -591,7 +591,7 @@ async def get(self, request, datasette):
if params.get("_timelimit"):
extra_args["custom_time_limit"] = int(params["_timelimit"])

format_ = request.url_vars.get("format") or "html"
format_ = request.url_vars.get("format") or request.args.get("_format") or "html"

query_error = None
results = None
Expand Down Expand Up @@ -651,6 +651,15 @@ async def fetch_data_for_csv(request, _next=None):
return data, None, None

return await stream_csv(datasette, fetch_data_for_csv, request, db.name)
elif format_ == "markdown":

async def fetch_data_for_markdown(request, _next=None):
results = await db.execute(sql, params, truncate=True)
data = {"rows": results.rows, "columns": results.columns}
return data, None, None

from datasette.views.base import stream_markdown
return await stream_markdown(datasette, fetch_data_for_markdown, request, db.name)
elif format_ in datasette.renderers.keys():
# Dispatch request to the correct output format renderer
# (CSV is not handled here due to streaming)
Expand Down
42 changes: 41 additions & 1 deletion datasette/views/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ async def table_view_traced(datasette, request):
if request.method == "POST":
return Response.text("Method not allowed", status=405)

format_ = request.url_vars.get("format") or "html"
format_ = request.url_vars.get("format") or request.args.get("_format") or "html"
extra_extras = None
context_for_html_hack = False
default_labels = False
Expand Down Expand Up @@ -1024,6 +1024,33 @@ async def fetch_data(request, _next=None):
return data, None, None

return await stream_csv(datasette, fetch_data, request, resolved.db.name)
elif format_ == "markdown":

async def fetch_data(request, _next=None):
(
data,
rows,
columns,
expanded_columns,
sql,
next_url,
) = await table_view_data(
datasette,
request,
resolved,
extra_extras=extra_extras,
context_for_html_hack=context_for_html_hack,
default_labels=default_labels,
_next=_next,
)
data["rows"] = rows
data["table"] = resolved.table
data["columns"] = columns
data["expanded_columns"] = expanded_columns
return data, None, None

from datasette.views.base import stream_markdown
return await stream_markdown(datasette, fetch_data, request, resolved.db.name)
elif format_ in datasette.renderers.keys():
# Dispatch request to the correct output format renderer
# (CSV is not handled here due to streaming)
Expand Down Expand Up @@ -2045,6 +2072,12 @@ async def extra_facets_timed_out(extra_facet_results):
path_with_format(request=request, format="csv", extra_qs=url_csv_args)
)
url_csv_path = url_csv.split("?")[0]
# Markdown export URL
url_markdown_args = {**url_labels_extra}
url_markdown = datasette.urls.path(
path_with_format(request=request, format="markdown", extra_qs=url_markdown_args)
)
url_markdown_path = url_markdown.split("?")[0]
data.update(
{
"url_csv": url_csv,
Expand All @@ -2055,6 +2088,13 @@ async def extra_facets_timed_out(extra_facet_results):
if key not in ("_labels", "_facet", "_size")
]
+ [("_size", "max")],
"url_markdown": url_markdown,
"url_markdown_path": url_markdown_path,
"url_markdown_hidden_args": [
(key, value)
for key, value in urllib.parse.parse_qsl(request.query_string)
if key not in ("_labels", "_facet", "_size", "_max_rows")
],
}
)
# if no sort specified AND table has a single primary key,
Expand Down
Loading
Loading