From c854ad4bf558d0519253e08566233862aa759ed5 Mon Sep 17 00:00:00 2001
From: muyusajiangtian <3024297095@qq.com>
Date: Wed, 29 Apr 2026 11:03:42 +0800
Subject: [PATCH 1/2] =?UTF-8?q?feat(=E5=AF=BC=E5=87=BA):=20=E6=B7=BB?=
=?UTF-8?q?=E5=8A=A0Markdown=E6=A0=BC=E5=BC=8F=E5=AF=BC=E5=87=BA=E5=8A=9F?=
=?UTF-8?q?=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
为数据库查询和表格视图添加Markdown格式导出支持,实现类似CSV导出的流式处理机制。新增stream_markdown函数处理Markdown表格生成,包括表头、分隔线和数据行的格式化,同时支持扩展列和二进制数据的处理。
---
datasette/views/base.py | 181 ++++++++++++++++++++++++++++++++++++
datasette/views/database.py | 9 ++
datasette/views/table.py | 27 ++++++
3 files changed, 217 insertions(+)
diff --git a/datasette/views/base.py b/datasette/views/base.py
index e4c1c7382d..135eb5d382 100644
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@@ -568,3 +568,184 @@ async def stream_fn(r):
headers["content-disposition"] = disposition
return AsgiStream(stream_fn, headers=headers, content_type=content_type)
+
+
+async def stream_markdown(datasette, fetch_data, request, database):
+ kwargs = {}
+ stream = request.args.get("_stream")
+ # Do not calculate facets or counts:
+ extra_parameters = [
+ "{}=1".format(key)
+ for key in ("_nofacet", "_nocount")
+ if not request.args.get(key)
+ ]
+ if extra_parameters:
+ # Replace request object with a new one with modified scope
+ if not request.query_string:
+ new_query_string = "&".join(extra_parameters)
+ else:
+ new_query_string = request.query_string + "&" + "&".join(extra_parameters)
+ new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1"))
+ receive = request.receive
+ request = Request(new_scope, receive)
+ if stream:
+ # Some quick soundness checks
+ if not datasette.setting("allow_csv_stream"):
+ raise BadRequest("Markdown streaming is disabled")
+ if request.args.get("_next"):
+ raise BadRequest("_next not allowed for Markdown streaming")
+ kwargs["_size"] = "max"
+ # Fetch the first page
+ try:
+ response_or_template_contexts = await fetch_data(request)
+ if isinstance(response_or_template_contexts, Response):
+ return response_or_template_contexts
+ elif len(response_or_template_contexts) == 4:
+ data, _, _, _ = response_or_template_contexts
+ else:
+ data, _, _ = response_or_template_contexts
+ except (sqlite3.OperationalError, InvalidSql) as e:
+ raise DatasetteError(str(e), title="Invalid SQL", status=400)
+
+ except sqlite3.OperationalError as e:
+ raise DatasetteError(str(e))
+
+ except DatasetteError:
+ raise
+
+ # Convert rows and columns to Markdown table
+ headings = data["columns"]
+ # if there are expanded_columns we need to add additional headings
+ expanded_columns = set(data.get("expanded_columns") or [])
+ if expanded_columns:
+ headings = []
+ for column in data["columns"]:
+ headings.append(column)
+ if column in expanded_columns:
+ headings.append(f"{column}_label")
+
+ content_type = "text/markdown; charset=utf-8"
+ preamble = ""
+ postamble = ""
+
+ trace = request.args.get("_trace")
+ if trace:
+ content_type = "text/html; charset=utf-8"
+ preamble = (
+ "
Markdown debug"
+ '"
+
+ def escape_markdown(value):
+ if value is None:
+ return ""
+ value = str(value)
+ # Escape pipe characters and newlines for Markdown table
+ value = value.replace("|", "\\|")
+ value = value.replace("\n", " ")
+ value = value.replace("\r", " ")
+ return value
+
+ async def stream_fn(r):
+ nonlocal data, trace
+ limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb"))
+ if trace:
+ await limited_writer.write(preamble)
+
+ first = True
+ next = None
+ while first or (next and stream):
+ try:
+ kwargs = {}
+ if next:
+ kwargs["_next"] = next
+ if not first:
+ data, _, _ = await fetch_data(request, **kwargs)
+ if first:
+ if request.args.get("_header") != "off":
+ # Write header row
+ header_row = "| " + " | ".join(escape_markdown(h) for h in headings) + " |\n"
+ await limited_writer.write(header_row)
+ # Write separator row
+ separator_row = "| " + " | ".join(["---"] * len(headings)) + " |\n"
+ await limited_writer.write(separator_row)
+ first = False
+ next = data.get("next")
+ for row in data["rows"]:
+ if any(isinstance(r, bytes) for r in row):
+ new_row = []
+ for column, cell in zip(headings, row):
+ if isinstance(cell, bytes):
+ # If this is a table page, use .urls.row_blob()
+ if data.get("table"):
+ pks = data.get("primary_keys") or []
+ cell = datasette.absolute_url(
+ request,
+ datasette.urls.row_blob(
+ database,
+ data["table"],
+ path_from_row_pks(row, pks, not pks),
+ column,
+ ),
+ )
+ else:
+ # Otherwise generate URL for this query
+ url = datasette.absolute_url(
+ request,
+ path_with_format(
+ request=request,
+ format="blob",
+ extra_qs={
+ "_blob_column": column,
+ "_blob_hash": hashlib.sha256(
+ cell
+ ).hexdigest(),
+ },
+ replace_format="markdown",
+ ),
+ )
+ cell = url.replace("&_nocount=1", "").replace(
+ "&_nofacet=1", ""
+ )
+ new_row.append(cell)
+ row = new_row
+ if not expanded_columns:
+ # Simple path
+ markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in row) + " |\n"
+ await limited_writer.write(markdown_row)
+ else:
+ # Look for {"value": "label": } dicts and expand
+ new_row = []
+ for heading, cell in zip(data["columns"], row):
+ if heading in expanded_columns:
+ if cell is None:
+ new_row.extend(("", ""))
+ else:
+ if not isinstance(cell, dict):
+ new_row.extend((cell, ""))
+ else:
+ new_row.append(cell["value"])
+ new_row.append(cell["label"])
+ else:
+ new_row.append(cell)
+ markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in new_row) + " |\n"
+ await limited_writer.write(markdown_row)
+ except Exception as ex:
+ sys.stderr.write("Caught this error: {}\n".format(ex))
+ sys.stderr.flush()
+ await r.write(str(ex))
+ return
+ await limited_writer.write(postamble)
+
+ headers = {}
+ if datasette.cors:
+ add_cors_headers(headers)
+
+ # Always set Content-Disposition for Markdown export
+ disposition = 'attachment; filename="{}.md"'.format(
+ request.url_vars.get("table", database)
+ )
+ headers["content-disposition"] = disposition
+
+ return AsgiStream(stream_fn, headers=headers, content_type=content_type)
diff --git a/datasette/views/database.py b/datasette/views/database.py
index faf870d032..44d24fb9bf 100644
--- a/datasette/views/database.py
+++ b/datasette/views/database.py
@@ -651,6 +651,15 @@ async def fetch_data_for_csv(request, _next=None):
return data, None, None
return await stream_csv(datasette, fetch_data_for_csv, request, db.name)
+ elif format_ == "markdown":
+
+ async def fetch_data_for_markdown(request, _next=None):
+ results = await db.execute(sql, params, truncate=True)
+ data = {"rows": results.rows, "columns": results.columns}
+ return data, None, None
+
+ from datasette.views.base import stream_markdown
+ return await stream_markdown(datasette, fetch_data_for_markdown, request, db.name)
elif format_ in datasette.renderers.keys():
# Dispatch request to the correct output format renderer
# (CSV is not handled here due to streaming)
diff --git a/datasette/views/table.py b/datasette/views/table.py
index 7027bb10f3..0930d3a097 100644
--- a/datasette/views/table.py
+++ b/datasette/views/table.py
@@ -1024,6 +1024,33 @@ async def fetch_data(request, _next=None):
return data, None, None
return await stream_csv(datasette, fetch_data, request, resolved.db.name)
+ elif format_ == "markdown":
+
+ async def fetch_data(request, _next=None):
+ (
+ data,
+ rows,
+ columns,
+ expanded_columns,
+ sql,
+ next_url,
+ ) = await table_view_data(
+ datasette,
+ request,
+ resolved,
+ extra_extras=extra_extras,
+ context_for_html_hack=context_for_html_hack,
+ default_labels=default_labels,
+ _next=_next,
+ )
+ data["rows"] = rows
+ data["table"] = resolved.table
+ data["columns"] = columns
+ data["expanded_columns"] = expanded_columns
+ return data, None, None
+
+ from datasette.views.base import stream_markdown
+ return await stream_markdown(datasette, fetch_data, request, resolved.db.name)
elif format_ in datasette.renderers.keys():
# Dispatch request to the correct output format renderer
# (CSV is not handled here due to streaming)
From af4a24a5b221fbb7cd00bf7d2831213e1346a72d Mon Sep 17 00:00:00 2001
From: muyusajiangtian <3024297095@qq.com>
Date: Wed, 29 Apr 2026 11:32:31 +0800
Subject: [PATCH 2/2] =?UTF-8?q?feat(views):=20=E4=B8=BAMarkdown=E6=B5=81?=
=?UTF-8?q?=E5=BC=8F=E8=BE=93=E5=87=BA=E6=B7=BB=E5=8A=A0=E8=A1=8C=E6=95=B0?=
=?UTF-8?q?=E9=99=90=E5=88=B6=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
添加_max_rows参数控制Markdown输出的行数限制,支持_stream参数绕过限制
当输出被截断时添加提示信息,防止数据量过大导致性能问题
---
datasette/views/base.py | 57 ++++++++++++++++++++++++++++++++++++++---
1 file changed, 53 insertions(+), 4 deletions(-)
diff --git a/datasette/views/base.py b/datasette/views/base.py
index 135eb5d382..08e9c6a52c 100644
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@@ -571,8 +571,33 @@ async def stream_fn(r):
async def stream_markdown(datasette, fetch_data, request, database):
+ # 行数限制配置
+ DEFAULT_MAX_ROWS = 500
+ MAX_ALLOWED_ROWS = 10000
+
+ # 解析 _max_rows 参数
+ max_rows_param = request.args.get("_max_rows")
+ if max_rows_param:
+ try:
+ max_rows = int(max_rows_param)
+ # 确保不超过最大允许值
+ max_rows = min(max_rows, MAX_ALLOWED_ROWS)
+ # 确保不小于 0
+ max_rows = max(max_rows, 0)
+ except ValueError:
+ # 无效参数,使用默认值
+ max_rows = DEFAULT_MAX_ROWS
+ else:
+ max_rows = DEFAULT_MAX_ROWS
+
+ # 是否使用流式模式获取全部数据
+ use_stream = request.args.get("_stream")
+
+ # 如果 _stream=1,则忽略行数限制(获取全部数据)
+ if use_stream:
+ max_rows = None # None 表示无限制
+
kwargs = {}
- stream = request.args.get("_stream")
# Do not calculate facets or counts:
extra_parameters = [
"{}=1".format(key)
@@ -588,13 +613,16 @@ async def stream_markdown(datasette, fetch_data, request, database):
new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1"))
receive = request.receive
request = Request(new_scope, receive)
- if stream:
+
+ # 如果使用流式模式获取全部数据
+ if use_stream:
# Some quick soundness checks
if not datasette.setting("allow_csv_stream"):
raise BadRequest("Markdown streaming is disabled")
if request.args.get("_next"):
raise BadRequest("_next not allowed for Markdown streaming")
kwargs["_size"] = "max"
+
# Fetch the first page
try:
response_or_template_contexts = await fetch_data(request)
@@ -648,14 +676,17 @@ def escape_markdown(value):
return value
async def stream_fn(r):
- nonlocal data, trace
+ nonlocal data, trace, max_rows, use_stream
limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb"))
if trace:
await limited_writer.write(preamble)
first = True
next = None
- while first or (next and stream):
+ row_count = 0
+ is_truncated = False
+
+ while first or (next and use_stream):
try:
kwargs = {}
if next:
@@ -673,6 +704,13 @@ async def stream_fn(r):
first = False
next = data.get("next")
for row in data["rows"]:
+ # 检查是否达到行数限制
+ if max_rows is not None and row_count >= max_rows:
+ is_truncated = True
+ # 退出循环
+ next = None
+ break
+
if any(isinstance(r, bytes) for r in row):
new_row = []
for column, cell in zip(headings, row):
@@ -710,6 +748,8 @@ async def stream_fn(r):
)
new_row.append(cell)
row = new_row
+
+ # 构建并写入数据行
if not expanded_columns:
# Simple path
markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in row) + " |\n"
@@ -731,11 +771,20 @@ async def stream_fn(r):
new_row.append(cell)
markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in new_row) + " |\n"
await limited_writer.write(markdown_row)
+
+ # 增加行数计数
+ row_count += 1
except Exception as ex:
sys.stderr.write("Caught this error: {}\n".format(ex))
sys.stderr.flush()
await r.write(str(ex))
return
+
+ # 如果被截断,添加截断注释
+ if is_truncated:
+ truncation_note = f"\n*已截断,仅显示前{row_count}行*\n"
+ await limited_writer.write(truncation_note)
+
await limited_writer.write(postamble)
headers = {}