From c854ad4bf558d0519253e08566233862aa759ed5 Mon Sep 17 00:00:00 2001 From: muyusajiangtian <3024297095@qq.com> Date: Wed, 29 Apr 2026 11:03:42 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat(=E5=AF=BC=E5=87=BA):=20=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0Markdown=E6=A0=BC=E5=BC=8F=E5=AF=BC=E5=87=BA=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 为数据库查询和表格视图添加Markdown格式导出支持,实现类似CSV导出的流式处理机制。新增stream_markdown函数处理Markdown表格生成,包括表头、分隔线和数据行的格式化,同时支持扩展列和二进制数据的处理。 --- datasette/views/base.py | 181 ++++++++++++++++++++++++++++++++++++ datasette/views/database.py | 9 ++ datasette/views/table.py | 27 ++++++ 3 files changed, 217 insertions(+) diff --git a/datasette/views/base.py b/datasette/views/base.py index e4c1c7382d..135eb5d382 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -568,3 +568,184 @@ async def stream_fn(r): headers["content-disposition"] = disposition return AsgiStream(stream_fn, headers=headers, content_type=content_type) + + +async def stream_markdown(datasette, fetch_data, request, database): + kwargs = {} + stream = request.args.get("_stream") + # Do not calculate facets or counts: + extra_parameters = [ + "{}=1".format(key) + for key in ("_nofacet", "_nocount") + if not request.args.get(key) + ] + if extra_parameters: + # Replace request object with a new one with modified scope + if not request.query_string: + new_query_string = "&".join(extra_parameters) + else: + new_query_string = request.query_string + "&" + "&".join(extra_parameters) + new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1")) + receive = request.receive + request = Request(new_scope, receive) + if stream: + # Some quick soundness checks + if not datasette.setting("allow_csv_stream"): + raise BadRequest("Markdown streaming is disabled") + if request.args.get("_next"): + raise BadRequest("_next not allowed for Markdown streaming") + kwargs["_size"] = "max" + # Fetch the first page + try: + response_or_template_contexts = await fetch_data(request) + if isinstance(response_or_template_contexts, Response): + return response_or_template_contexts + elif len(response_or_template_contexts) == 4: + data, _, _, _ = response_or_template_contexts + else: + data, _, _ = response_or_template_contexts + except (sqlite3.OperationalError, InvalidSql) as e: + raise DatasetteError(str(e), title="Invalid SQL", status=400) + + except sqlite3.OperationalError as e: + raise DatasetteError(str(e)) + + except DatasetteError: + raise + + # Convert rows and columns to Markdown table + headings = data["columns"] + # if there are expanded_columns we need to add additional headings + expanded_columns = set(data.get("expanded_columns") or []) + if expanded_columns: + headings = [] + for column in data["columns"]: + headings.append(column) + if column in expanded_columns: + headings.append(f"{column}_label") + + content_type = "text/markdown; charset=utf-8" + preamble = "" + postamble = "" + + trace = request.args.get("_trace") + if trace: + content_type = "text/html; charset=utf-8" + preamble = ( + "Markdown debug" + '" + + def escape_markdown(value): + if value is None: + return "" + value = str(value) + # Escape pipe characters and newlines for Markdown table + value = value.replace("|", "\\|") + value = value.replace("\n", " ") + value = value.replace("\r", " ") + return value + + async def stream_fn(r): + nonlocal data, trace + limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb")) + if trace: + await limited_writer.write(preamble) + + first = True + next = None + while first or (next and stream): + try: + kwargs = {} + if next: + kwargs["_next"] = next + if not first: + data, _, _ = await fetch_data(request, **kwargs) + if first: + if request.args.get("_header") != "off": + # Write header row + header_row = "| " + " | ".join(escape_markdown(h) for h in headings) + " |\n" + await limited_writer.write(header_row) + # Write separator row + separator_row = "| " + " | ".join(["---"] * len(headings)) + " |\n" + await limited_writer.write(separator_row) + first = False + next = data.get("next") + for row in data["rows"]: + if any(isinstance(r, bytes) for r in row): + new_row = [] + for column, cell in zip(headings, row): + if isinstance(cell, bytes): + # If this is a table page, use .urls.row_blob() + if data.get("table"): + pks = data.get("primary_keys") or [] + cell = datasette.absolute_url( + request, + datasette.urls.row_blob( + database, + data["table"], + path_from_row_pks(row, pks, not pks), + column, + ), + ) + else: + # Otherwise generate URL for this query + url = datasette.absolute_url( + request, + path_with_format( + request=request, + format="blob", + extra_qs={ + "_blob_column": column, + "_blob_hash": hashlib.sha256( + cell + ).hexdigest(), + }, + replace_format="markdown", + ), + ) + cell = url.replace("&_nocount=1", "").replace( + "&_nofacet=1", "" + ) + new_row.append(cell) + row = new_row + if not expanded_columns: + # Simple path + markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in row) + " |\n" + await limited_writer.write(markdown_row) + else: + # Look for {"value": "label": } dicts and expand + new_row = [] + for heading, cell in zip(data["columns"], row): + if heading in expanded_columns: + if cell is None: + new_row.extend(("", "")) + else: + if not isinstance(cell, dict): + new_row.extend((cell, "")) + else: + new_row.append(cell["value"]) + new_row.append(cell["label"]) + else: + new_row.append(cell) + markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in new_row) + " |\n" + await limited_writer.write(markdown_row) + except Exception as ex: + sys.stderr.write("Caught this error: {}\n".format(ex)) + sys.stderr.flush() + await r.write(str(ex)) + return + await limited_writer.write(postamble) + + headers = {} + if datasette.cors: + add_cors_headers(headers) + + # Always set Content-Disposition for Markdown export + disposition = 'attachment; filename="{}.md"'.format( + request.url_vars.get("table", database) + ) + headers["content-disposition"] = disposition + + return AsgiStream(stream_fn, headers=headers, content_type=content_type) diff --git a/datasette/views/database.py b/datasette/views/database.py index faf870d032..44d24fb9bf 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -651,6 +651,15 @@ async def fetch_data_for_csv(request, _next=None): return data, None, None return await stream_csv(datasette, fetch_data_for_csv, request, db.name) + elif format_ == "markdown": + + async def fetch_data_for_markdown(request, _next=None): + results = await db.execute(sql, params, truncate=True) + data = {"rows": results.rows, "columns": results.columns} + return data, None, None + + from datasette.views.base import stream_markdown + return await stream_markdown(datasette, fetch_data_for_markdown, request, db.name) elif format_ in datasette.renderers.keys(): # Dispatch request to the correct output format renderer # (CSV is not handled here due to streaming) diff --git a/datasette/views/table.py b/datasette/views/table.py index 7027bb10f3..0930d3a097 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -1024,6 +1024,33 @@ async def fetch_data(request, _next=None): return data, None, None return await stream_csv(datasette, fetch_data, request, resolved.db.name) + elif format_ == "markdown": + + async def fetch_data(request, _next=None): + ( + data, + rows, + columns, + expanded_columns, + sql, + next_url, + ) = await table_view_data( + datasette, + request, + resolved, + extra_extras=extra_extras, + context_for_html_hack=context_for_html_hack, + default_labels=default_labels, + _next=_next, + ) + data["rows"] = rows + data["table"] = resolved.table + data["columns"] = columns + data["expanded_columns"] = expanded_columns + return data, None, None + + from datasette.views.base import stream_markdown + return await stream_markdown(datasette, fetch_data, request, resolved.db.name) elif format_ in datasette.renderers.keys(): # Dispatch request to the correct output format renderer # (CSV is not handled here due to streaming) From af4a24a5b221fbb7cd00bf7d2831213e1346a72d Mon Sep 17 00:00:00 2001 From: muyusajiangtian <3024297095@qq.com> Date: Wed, 29 Apr 2026 11:32:31 +0800 Subject: [PATCH 2/2] =?UTF-8?q?feat(views):=20=E4=B8=BAMarkdown=E6=B5=81?= =?UTF-8?q?=E5=BC=8F=E8=BE=93=E5=87=BA=E6=B7=BB=E5=8A=A0=E8=A1=8C=E6=95=B0?= =?UTF-8?q?=E9=99=90=E5=88=B6=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加_max_rows参数控制Markdown输出的行数限制,支持_stream参数绕过限制 当输出被截断时添加提示信息,防止数据量过大导致性能问题 --- datasette/views/base.py | 57 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/datasette/views/base.py b/datasette/views/base.py index 135eb5d382..08e9c6a52c 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -571,8 +571,33 @@ async def stream_fn(r): async def stream_markdown(datasette, fetch_data, request, database): + # 行数限制配置 + DEFAULT_MAX_ROWS = 500 + MAX_ALLOWED_ROWS = 10000 + + # 解析 _max_rows 参数 + max_rows_param = request.args.get("_max_rows") + if max_rows_param: + try: + max_rows = int(max_rows_param) + # 确保不超过最大允许值 + max_rows = min(max_rows, MAX_ALLOWED_ROWS) + # 确保不小于 0 + max_rows = max(max_rows, 0) + except ValueError: + # 无效参数,使用默认值 + max_rows = DEFAULT_MAX_ROWS + else: + max_rows = DEFAULT_MAX_ROWS + + # 是否使用流式模式获取全部数据 + use_stream = request.args.get("_stream") + + # 如果 _stream=1,则忽略行数限制(获取全部数据) + if use_stream: + max_rows = None # None 表示无限制 + kwargs = {} - stream = request.args.get("_stream") # Do not calculate facets or counts: extra_parameters = [ "{}=1".format(key) @@ -588,13 +613,16 @@ async def stream_markdown(datasette, fetch_data, request, database): new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1")) receive = request.receive request = Request(new_scope, receive) - if stream: + + # 如果使用流式模式获取全部数据 + if use_stream: # Some quick soundness checks if not datasette.setting("allow_csv_stream"): raise BadRequest("Markdown streaming is disabled") if request.args.get("_next"): raise BadRequest("_next not allowed for Markdown streaming") kwargs["_size"] = "max" + # Fetch the first page try: response_or_template_contexts = await fetch_data(request) @@ -648,14 +676,17 @@ def escape_markdown(value): return value async def stream_fn(r): - nonlocal data, trace + nonlocal data, trace, max_rows, use_stream limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb")) if trace: await limited_writer.write(preamble) first = True next = None - while first or (next and stream): + row_count = 0 + is_truncated = False + + while first or (next and use_stream): try: kwargs = {} if next: @@ -673,6 +704,13 @@ async def stream_fn(r): first = False next = data.get("next") for row in data["rows"]: + # 检查是否达到行数限制 + if max_rows is not None and row_count >= max_rows: + is_truncated = True + # 退出循环 + next = None + break + if any(isinstance(r, bytes) for r in row): new_row = [] for column, cell in zip(headings, row): @@ -710,6 +748,8 @@ async def stream_fn(r): ) new_row.append(cell) row = new_row + + # 构建并写入数据行 if not expanded_columns: # Simple path markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in row) + " |\n" @@ -731,11 +771,20 @@ async def stream_fn(r): new_row.append(cell) markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in new_row) + " |\n" await limited_writer.write(markdown_row) + + # 增加行数计数 + row_count += 1 except Exception as ex: sys.stderr.write("Caught this error: {}\n".format(ex)) sys.stderr.flush() await r.write(str(ex)) return + + # 如果被截断,添加截断注释 + if is_truncated: + truncation_note = f"\n*已截断,仅显示前{row_count}行*\n" + await limited_writer.write(truncation_note) + await limited_writer.write(postamble) headers = {}