diff --git a/datasette/templates/table.html b/datasette/templates/table.html index c841e1bed0..6fa1781b88 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -124,7 +124,7 @@

{{ extra_wheres_for_ui|length }} extra where clause{% if extra_wheres_for_ui

View and edit SQL

{% endif %} - + {% if suggested_facets %} {% include "_suggested_facets.html" %} @@ -186,6 +186,20 @@

Advanced export

{% endfor %}

+
+
+

+ Markdown options: + {% if expandable_columns %}{% endif %} + {% if next_url and settings.allow_csv_stream %}{% endif %} + + + {% for key, value in url_markdown_hidden_args %} + + {% endfor %} +

+
+
{% endif %} diff --git a/datasette/views/base.py b/datasette/views/base.py index e4c1c7382d..c3a36f5ae2 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -206,16 +206,21 @@ async def data(self, request): async def as_csv(self, request, database): return await stream_csv(self.ds, self.data, request, database) + async def as_markdown(self, request, database): + return await stream_markdown(self.ds, self.data, request, database) + async def get(self, request): db = await self.ds.resolve_database(request) database = db.name database_route = db.route - _format = request.url_vars["format"] + _format = request.url_vars["format"] or request.args.get("_format") data_kwargs = {} if _format == "csv": return await self.as_csv(request, database_route) + elif _format == "markdown": + return await self.as_markdown(request, database_route) if _format is None: # HTML views default to expanding all foreign key labels @@ -568,3 +573,233 @@ async def stream_fn(r): headers["content-disposition"] = disposition return AsgiStream(stream_fn, headers=headers, content_type=content_type) + + +async def stream_markdown(datasette, fetch_data, request, database): + # 行数限制配置 + DEFAULT_MAX_ROWS = 500 + MAX_ALLOWED_ROWS = 10000 + + # 解析 _max_rows 参数 + max_rows_param = request.args.get("_max_rows") + if max_rows_param: + try: + max_rows = int(max_rows_param) + # 确保不超过最大允许值 + max_rows = min(max_rows, MAX_ALLOWED_ROWS) + # 确保不小于 0 + max_rows = max(max_rows, 0) + except ValueError: + # 无效参数,使用默认值 + max_rows = DEFAULT_MAX_ROWS + else: + max_rows = DEFAULT_MAX_ROWS + + # 是否使用流式模式获取全部数据 + use_stream = request.args.get("_stream") + + # 如果 _stream=1,则忽略行数限制(获取全部数据) + if use_stream: + max_rows = None # None 表示无限制 + + kwargs = {} + # Do not calculate facets or counts: + extra_parameters = [ + "{}=1".format(key) + for key in ("_nofacet", "_nocount") + if not request.args.get(key) + ] + if extra_parameters: + # Replace request object with a new one with modified scope + if not request.query_string: + new_query_string = "&".join(extra_parameters) + else: + new_query_string = request.query_string + "&" + "&".join(extra_parameters) + new_scope = dict(request.scope, query_string=new_query_string.encode("latin-1")) + receive = request.receive + request = Request(new_scope, receive) + + # 如果使用流式模式获取全部数据 + if use_stream: + # Some quick soundness checks + if not datasette.setting("allow_csv_stream"): + raise BadRequest("Markdown streaming is disabled") + if request.args.get("_next"): + raise BadRequest("_next not allowed for Markdown streaming") + kwargs["_size"] = "max" + + # Fetch the first page + try: + response_or_template_contexts = await fetch_data(request) + if isinstance(response_or_template_contexts, Response): + return response_or_template_contexts + elif len(response_or_template_contexts) == 4: + data, _, _, _ = response_or_template_contexts + else: + data, _, _ = response_or_template_contexts + except (sqlite3.OperationalError, InvalidSql) as e: + raise DatasetteError(str(e), title="Invalid SQL", status=400) + + except sqlite3.OperationalError as e: + raise DatasetteError(str(e)) + + except DatasetteError: + raise + + # Convert rows and columns to Markdown table + headings = data["columns"] + # if there are expanded_columns we need to add additional headings + expanded_columns = set(data.get("expanded_columns") or []) + if expanded_columns: + headings = [] + for column in data["columns"]: + headings.append(column) + if column in expanded_columns: + headings.append(f"{column}_label") + + content_type = "text/markdown; charset=utf-8" + preamble = "" + postamble = "" + + trace = request.args.get("_trace") + if trace: + content_type = "text/html; charset=utf-8" + preamble = ( + "Markdown debug" + '" + + def escape_markdown(value): + if value is None: + return "" + value = str(value) + # Escape pipe characters and newlines for Markdown table + value = value.replace("|", "\\|") + value = value.replace("\n", " ") + value = value.replace("\r", " ") + return value + + async def stream_fn(r): + nonlocal data, trace, max_rows, use_stream + limited_writer = LimitedWriter(r, datasette.setting("max_csv_mb")) + if trace: + await limited_writer.write(preamble) + + first = True + next = None + row_count = 0 + is_truncated = False + + while first or (next and use_stream): + try: + kwargs = {} + if next: + kwargs["_next"] = next + if not first: + data, _, _ = await fetch_data(request, **kwargs) + if first: + if request.args.get("_header") != "off": + # Write header row + header_row = "| " + " | ".join(escape_markdown(h) for h in headings) + " |\n" + await limited_writer.write(header_row) + # Write separator row + separator_row = "| " + " | ".join(["---"] * len(headings)) + " |\n" + await limited_writer.write(separator_row) + first = False + next = data.get("next") + for row in data["rows"]: + # 检查是否达到行数限制 + if max_rows is not None and row_count >= max_rows: + is_truncated = True + # 退出循环 + next = None + break + + if any(isinstance(r, bytes) for r in row): + new_row = [] + for column, cell in zip(headings, row): + if isinstance(cell, bytes): + # If this is a table page, use .urls.row_blob() + if data.get("table"): + pks = data.get("primary_keys") or [] + cell = datasette.absolute_url( + request, + datasette.urls.row_blob( + database, + data["table"], + path_from_row_pks(row, pks, not pks), + column, + ), + ) + else: + # Otherwise generate URL for this query + url = datasette.absolute_url( + request, + path_with_format( + request=request, + format="blob", + extra_qs={ + "_blob_column": column, + "_blob_hash": hashlib.sha256( + cell + ).hexdigest(), + }, + replace_format="markdown", + ), + ) + cell = url.replace("&_nocount=1", "").replace( + "&_nofacet=1", "" + ) + new_row.append(cell) + row = new_row + + # 构建并写入数据行 + if not expanded_columns: + # Simple path + markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in row) + " |\n" + await limited_writer.write(markdown_row) + else: + # Look for {"value": "label": } dicts and expand + new_row = [] + for heading, cell in zip(data["columns"], row): + if heading in expanded_columns: + if cell is None: + new_row.extend(("", "")) + else: + if not isinstance(cell, dict): + new_row.extend((cell, "")) + else: + new_row.append(cell["value"]) + new_row.append(cell["label"]) + else: + new_row.append(cell) + markdown_row = "| " + " | ".join(escape_markdown(cell) for cell in new_row) + " |\n" + await limited_writer.write(markdown_row) + + # 增加行数计数 + row_count += 1 + except Exception as ex: + sys.stderr.write("Caught this error: {}\n".format(ex)) + sys.stderr.flush() + await r.write(str(ex)) + return + + # 如果被截断,添加截断注释 + if is_truncated: + truncation_note = f"\n*已截断,仅显示前{row_count}行*\n" + await limited_writer.write(truncation_note) + + await limited_writer.write(postamble) + + headers = {} + if datasette.cors: + add_cors_headers(headers) + + # Always set Content-Disposition for Markdown export + disposition = 'attachment; filename="{}.md"'.format( + request.url_vars.get("table", database) + ) + headers["content-disposition"] = disposition + + return AsgiStream(stream_fn, headers=headers, content_type=content_type) diff --git a/datasette/views/database.py b/datasette/views/database.py index faf870d032..10018afca2 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -40,7 +40,7 @@ class DatabaseView(View): async def get(self, request, datasette): - format_ = request.url_vars.get("format") or "html" + format_ = request.url_vars.get("format") or request.args.get("_format") or "html" await datasette.refresh_schemas() @@ -591,7 +591,7 @@ async def get(self, request, datasette): if params.get("_timelimit"): extra_args["custom_time_limit"] = int(params["_timelimit"]) - format_ = request.url_vars.get("format") or "html" + format_ = request.url_vars.get("format") or request.args.get("_format") or "html" query_error = None results = None @@ -651,6 +651,15 @@ async def fetch_data_for_csv(request, _next=None): return data, None, None return await stream_csv(datasette, fetch_data_for_csv, request, db.name) + elif format_ == "markdown": + + async def fetch_data_for_markdown(request, _next=None): + results = await db.execute(sql, params, truncate=True) + data = {"rows": results.rows, "columns": results.columns} + return data, None, None + + from datasette.views.base import stream_markdown + return await stream_markdown(datasette, fetch_data_for_markdown, request, db.name) elif format_ in datasette.renderers.keys(): # Dispatch request to the correct output format renderer # (CSV is not handled here due to streaming) diff --git a/datasette/views/table.py b/datasette/views/table.py index 7027bb10f3..82844774f5 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -976,7 +976,7 @@ async def table_view_traced(datasette, request): if request.method == "POST": return Response.text("Method not allowed", status=405) - format_ = request.url_vars.get("format") or "html" + format_ = request.url_vars.get("format") or request.args.get("_format") or "html" extra_extras = None context_for_html_hack = False default_labels = False @@ -1024,6 +1024,33 @@ async def fetch_data(request, _next=None): return data, None, None return await stream_csv(datasette, fetch_data, request, resolved.db.name) + elif format_ == "markdown": + + async def fetch_data(request, _next=None): + ( + data, + rows, + columns, + expanded_columns, + sql, + next_url, + ) = await table_view_data( + datasette, + request, + resolved, + extra_extras=extra_extras, + context_for_html_hack=context_for_html_hack, + default_labels=default_labels, + _next=_next, + ) + data["rows"] = rows + data["table"] = resolved.table + data["columns"] = columns + data["expanded_columns"] = expanded_columns + return data, None, None + + from datasette.views.base import stream_markdown + return await stream_markdown(datasette, fetch_data, request, resolved.db.name) elif format_ in datasette.renderers.keys(): # Dispatch request to the correct output format renderer # (CSV is not handled here due to streaming) @@ -2045,6 +2072,12 @@ async def extra_facets_timed_out(extra_facet_results): path_with_format(request=request, format="csv", extra_qs=url_csv_args) ) url_csv_path = url_csv.split("?")[0] + # Markdown export URL + url_markdown_args = {**url_labels_extra} + url_markdown = datasette.urls.path( + path_with_format(request=request, format="markdown", extra_qs=url_markdown_args) + ) + url_markdown_path = url_markdown.split("?")[0] data.update( { "url_csv": url_csv, @@ -2055,6 +2088,13 @@ async def extra_facets_timed_out(extra_facet_results): if key not in ("_labels", "_facet", "_size") ] + [("_size", "max")], + "url_markdown": url_markdown, + "url_markdown_path": url_markdown_path, + "url_markdown_hidden_args": [ + (key, value) + for key, value in urllib.parse.parse_qsl(request.query_string) + if key not in ("_labels", "_facet", "_size", "_max_rows") + ], } ) # if no sort specified AND table has a single primary key, diff --git a/test.md b/test.md new file mode 100644 index 0000000000..1f8623cfee --- /dev/null +++ b/test.md @@ -0,0 +1,1564 @@ + + + + big: logs: 5,000 rows + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file