From 1bdc63851d2329f3fa1f07027adc862d89f24df9 Mon Sep 17 00:00:00 2001 From: aplio Date: Thu, 30 Jan 2025 21:51:11 +0900 Subject: [PATCH 1/2] feature. add feat to modify metadata via dataset api --- .../service_api/dataset/document.py | 98 +++++++++ api/services/dataset_service.py | 15 ++ .../knowledge_entities/knowledge_entities.py | 6 + .../datasets/template/template.en.mdx | 199 +++++++++++++++++ .../datasets/template/template.zh.mdx | 204 ++++++++++++++++++ 5 files changed, 522 insertions(+) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 2e148dd84c05da..b4c3a4c6075781 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -18,6 +18,7 @@ from controllers.service_api.dataset.error import ( ArchivedDocumentImmutableError, DocumentIndexingError, + InvalidMetadataError, ) from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_resource_check from core.errors.error import ProviderTokenNotInitError @@ -50,6 +51,9 @@ def post(self, tenant_id, dataset_id): "indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json" ) parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json") + parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json") + parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json") + args = parser.parse_args() dataset_id = str(dataset_id) tenant_id = str(tenant_id) @@ -61,6 +65,28 @@ def post(self, tenant_id, dataset_id): if not dataset.indexing_technique and not args["indexing_technique"]: raise ValueError("indexing_technique is required.") + # Validate metadata if provided + if args.get("doc_type") or args.get("doc_metadata"): + if not args.get("doc_type") or not args.get("doc_metadata"): + raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata") + + if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA: + raise InvalidMetadataError( + "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys()) + ) + + if not isinstance(args["doc_metadata"], dict): + raise InvalidMetadataError("doc_metadata must be a dictionary") + + # Validate metadata schema based on doc_type + if args["doc_type"] != "others": + metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]] + for key, value in args["doc_metadata"].items(): + if key in metadata_schema and not isinstance(value, metadata_schema[key]): + raise InvalidMetadataError(f"Invalid type for metadata field {key}") + # set to MetaDataConfig + args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]} + text = args.get("text") name = args.get("name") if text is None or name is None: @@ -107,6 +133,8 @@ def post(self, tenant_id, dataset_id, document_id): "doc_language", type=str, default="English", required=False, nullable=False, location="json" ) parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json") + parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json") + parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json") args = parser.parse_args() dataset_id = str(dataset_id) tenant_id = str(tenant_id) @@ -115,6 +143,29 @@ def post(self, tenant_id, dataset_id, document_id): if not dataset: raise ValueError("Dataset is not exist.") + # Validate metadata if provided + if args.get("doc_type") or args.get("doc_metadata"): + if not args.get("doc_type") or not args.get("doc_metadata"): + raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata") + + if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA: + raise InvalidMetadataError( + "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys()) + ) + + if not isinstance(args["doc_metadata"], dict): + raise InvalidMetadataError("doc_metadata must be a dictionary") + + # Validate metadata schema based on doc_type + if args["doc_type"] != "others": + metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]] + for key, value in args["doc_metadata"].items(): + if key in metadata_schema and not isinstance(value, metadata_schema[key]): + raise InvalidMetadataError(f"Invalid type for metadata field {key}") + + # set to MetaDataConfig + args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]} + if args["text"]: text = args.get("text") name = args.get("name") @@ -161,6 +212,30 @@ def post(self, tenant_id, dataset_id): args["doc_form"] = "text_model" if "doc_language" not in args: args["doc_language"] = "English" + + # Validate metadata if provided + if args.get("doc_type") or args.get("doc_metadata"): + if not args.get("doc_type") or not args.get("doc_metadata"): + raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata") + + if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA: + raise InvalidMetadataError( + "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys()) + ) + + if not isinstance(args["doc_metadata"], dict): + raise InvalidMetadataError("doc_metadata must be a dictionary") + + # Validate metadata schema based on doc_type + if args["doc_type"] != "others": + metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]] + for key, value in args["doc_metadata"].items(): + if key in metadata_schema and not isinstance(value, metadata_schema[key]): + raise InvalidMetadataError(f"Invalid type for metadata field {key}") + + # set to MetaDataConfig + args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]} + # get dataset info dataset_id = str(dataset_id) tenant_id = str(tenant_id) @@ -228,6 +303,29 @@ def post(self, tenant_id, dataset_id, document_id): if "doc_language" not in args: args["doc_language"] = "English" + # Validate metadata if provided + if args.get("doc_type") or args.get("doc_metadata"): + if not args.get("doc_type") or not args.get("doc_metadata"): + raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata") + + if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA: + raise InvalidMetadataError( + "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys()) + ) + + if not isinstance(args["doc_metadata"], dict): + raise InvalidMetadataError("doc_metadata must be a dictionary") + + # Validate metadata schema based on doc_type + if args["doc_type"] != "others": + metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]] + for key, value in args["doc_metadata"].items(): + if key in metadata_schema and not isinstance(value, metadata_schema[key]): + raise InvalidMetadataError(f"Invalid type for metadata field {key}") + + # set to MetaDataConfig + args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]} + # get dataset info dataset_id = str(dataset_id) tenant_id = str(tenant_id) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index c4059337367c1a..38025b5213aaa3 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -42,6 +42,7 @@ from services.entities.knowledge_entities.knowledge_entities import ( ChildChunkUpdateArgs, KnowledgeConfig, + MetaDataConfig, RerankingModel, RetrievalModel, SegmentUpdateArgs, @@ -894,6 +895,9 @@ def save_document_with_dataset_id( document.data_source_info = json.dumps(data_source_info) document.batch = batch document.indexing_status = "waiting" + if knowledge_config.metadata: + document.doc_type = knowledge_config.metadata.doc_type + document.metadata = knowledge_config.metadata.doc_metadata db.session.add(document) documents.append(document) duplicate_document_ids.append(document.id) @@ -910,6 +914,7 @@ def save_document_with_dataset_id( account, file_name, batch, + knowledge_config.metadata, ) db.session.add(document) db.session.flush() @@ -965,6 +970,7 @@ def save_document_with_dataset_id( account, page.page_name, batch, + knowledge_config.metadata, ) db.session.add(document) db.session.flush() @@ -1005,6 +1011,7 @@ def save_document_with_dataset_id( account, document_name, batch, + knowledge_config.metadata, ) db.session.add(document) db.session.flush() @@ -1042,6 +1049,7 @@ def build_document( account: Account, name: str, batch: str, + metadata: Optional[MetaDataConfig] = None, ): document = Document( tenant_id=dataset.tenant_id, @@ -1057,6 +1065,9 @@ def build_document( doc_form=document_form, doc_language=document_language, ) + if metadata is not None: + document.doc_metadata = metadata.doc_metadata + document.doc_type = metadata.doc_type return document @staticmethod @@ -1169,6 +1180,10 @@ def update_document_with_dataset_id( # update document name if document_data.name: document.name = document_data.name + # update doc_type and doc_metadata if provided + if document_data.metadata is not None: + document.doc_metadata = document_data.metadata.doc_type + document.doc_type = document_data.metadata.doc_type # update document to be waiting document.indexing_status = "waiting" document.completed_at = None diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py index 8d6a246b6428d0..f14c5b513a8687 100644 --- a/api/services/entities/knowledge_entities/knowledge_entities.py +++ b/api/services/entities/knowledge_entities/knowledge_entities.py @@ -93,6 +93,11 @@ class RetrievalModel(BaseModel): score_threshold: Optional[float] = None +class MetaDataConfig(BaseModel): + doc_type: str + doc_metadata: dict + + class KnowledgeConfig(BaseModel): original_document_id: Optional[str] = None duplicate: bool = True @@ -105,6 +110,7 @@ class KnowledgeConfig(BaseModel): embedding_model: Optional[str] = None embedding_model_provider: Optional[str] = None name: Optional[str] = None + metadata: Optional[MetaDataConfig] = None class SegmentUpdateArgs(BaseModel): diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index 3fa22a1620ed9e..ac57e3aef2233b 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -47,6 +47,44 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi Document content + + Type of document (optional): + - book Book + - web_page Web page + - paper Academic paper/article + - social_media_post Social media post + - wikipedia_entry Wikipedia entry + - personal_document Personal document + - business_document Business document + - im_chat_log Chat log + - synced_from_notion Notion document + - synced_from_github GitHub document + - others Other document types + + + Document metadata (required if doc_type is provided). Fields vary by doc_type: + For book: + - title Book title + - language Book language + - author Book author + - publisher Publisher name + - publication_date Publication date + - isbn ISBN number + - category Book category + + For web_page: + - title Page title + - url Page URL + - language Page language + - publish_date Publish date + - author/publisher Author or publisher + - topic/keywords Topic or keywords + - description Page description + + Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type. + + For doc_type "others", any valid JSON object is accepted + Index mode - high_quality High quality: embedding using embedding model, built as vector database index @@ -195,6 +233,68 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi - hierarchical_model Parent-child mode - qa_model Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions + - doc_type Type of document (optional) + - book Book + Document records a book or publication + - web_page Web page + Document records web page content + - paper Academic paper/article + Document records academic paper or research article + - social_media_post Social media post + Content from social media posts + - wikipedia_entry Wikipedia entry + Content from Wikipedia entries + - personal_document Personal document + Documents related to personal content + - business_document Business document + Documents related to business content + - im_chat_log Chat log + Records of instant messaging chats + - synced_from_notion Notion document + Documents synchronized from Notion + - synced_from_github GitHub document + Documents synchronized from GitHub + - others Other document types + Other document types not listed above + + - doc_metadata Document metadata (required if doc_type is provided) + Fields vary by doc_type: + + For book: + - title Book title + Title of the book + - language Book language + Language of the book + - author Book author + Author of the book + - publisher Publisher name + Name of the publishing house + - publication_date Publication date + Date when the book was published + - isbn ISBN number + International Standard Book Number + - category Book category + Category or genre of the book + + For web_page: + - title Page title + Title of the web page + - url Page URL + URL address of the web page + - language Page language + Language of the web page + - publish_date Publish date + Date when the web page was published + - author/publisher Author or publisher + Author or publisher of the web page + - topic/keywords Topic or keywords + Topics or keywords of the web page + - description Page description + Description of the web page content + + Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type. + For doc_type "others", any valid JSON object is accepted + - doc_language In Q&A mode, specify the language of the document, for example: English, Chinese - process_rule Processing rules @@ -307,6 +407,44 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi Knowledge description (optional) + + Type of document (optional): + - book Book + - web_page Web page + - paper Academic paper/article + - social_media_post Social media post + - wikipedia_entry Wikipedia entry + - personal_document Personal document + - business_document Business document + - im_chat_log Chat log + - synced_from_notion Notion document + - synced_from_github GitHub document + - others Other document types + + + Document metadata (required if doc_type is provided). Fields vary by doc_type: + For book: + - title Book title + - language Book language + - author Book author + - publisher Publisher name + - publication_date Publication date + - isbn ISBN number + - category Book category + + For web_page: + - title Page title + - url Page URL + - language Page language + - publish_date Publish date + - author/publisher Author or publisher + - topic/keywords Topic or keywords + - description Page description + + Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type. + + For doc_type "others", any valid JSON object is accepted + Index technique (optional) - high_quality High quality @@ -624,6 +762,67 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi - separator Segmentation identifier. Currently, only one delimiter is allowed. The default is *** - max_tokens The maximum length (tokens) must be validated to be shorter than the length of the parent chunk - chunk_overlap Define the overlap between adjacent chunks (optional) + - doc_type Type of document (optional) + - book Book + Document records a book or publication + - web_page Web page + Document records web page content + - paper Academic paper/article + Document records academic paper or research article + - social_media_post Social media post + Content from social media posts + - wikipedia_entry Wikipedia entry + Content from Wikipedia entries + - personal_document Personal document + Documents related to personal content + - business_document Business document + Documents related to business content + - im_chat_log Chat log + Records of instant messaging chats + - synced_from_notion Notion document + Documents synchronized from Notion + - synced_from_github GitHub document + Documents synchronized from GitHub + - others Other document types + Other document types not listed above + + - doc_metadata Document metadata (required if doc_type is provided) + Fields vary by doc_type: + + For book: + - title Book title + Title of the book + - language Book language + Language of the book + - author Book author + Author of the book + - publisher Publisher name + Name of the publishing house + - publication_date Publication date + Date when the book was published + - isbn ISBN number + International Standard Book Number + - category Book category + Category or genre of the book + + For web_page: + - title Page title + Title of the web page + - url Page URL + URL address of the web page + - language Page language + Language of the web page + - publish_date Publish date + Date when the web page was published + - author/publisher Author or publisher + Author or publisher of the web page + - topic/keywords Topic or keywords + Topics or keywords of the web page + - description Page description + Description of the web page content + + Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type. + For doc_type "others", any valid JSON object is accepted diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index 334591743f931b..7bd617b55bc731 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -47,6 +47,46 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi 文档内容 + + 文档类型(选填) + - book 图书 Book + - web_page 网页 Web page + - paper 学术论文/文章 Academic paper/article + - social_media_post 社交媒体帖子 Social media post + - wikipedia_entry 维基百科条目 Wikipedia entry + - personal_document 个人文档 Personal document + - business_document 商业文档 Business document + - im_chat_log 即时通讯记录 Chat log + - synced_from_notion Notion同步文档 Notion document + - synced_from_github GitHub同步文档 GitHub document + - others 其他文档类型 Other document types + + + + 文档元数据(如提供文档类型则必填)。字段因文档类型而异: + + 针对图书 For book: + - title 书名 Book title + - language 图书语言 Book language + - author 作者 Book author + - publisher 出版社 Publisher name + - publication_date 出版日期 Publication date + - isbn ISBN号码 ISBN number + - category 图书分类 Book category + + 针对网页 For web_page: + - title 页面标题 Page title + - url 页面网址 Page URL + - language 页面语言 Page language + - publish_date 发布日期 Publish date + - author/publisher 作者/发布者 Author or publisher + - topic/keywords 主题/关键词 Topic or keywords + - description 页面描述 Page description + + 请查看 api/services/dataset_service.py 了解各文档类型所需字段的详细信息。 + + 针对"其他"类型文档,接受任何有效的JSON对象 + 索引方式 - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 @@ -194,6 +234,68 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi - text_model text 文档直接 embedding,经济模式默认为该模式 - hierarchical_model parent-child 模式 - qa_model Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding + - doc_type 文档类型(选填)Type of document (optional) + - book 图书 + 文档记录一本书籍或出版物 + - web_page 网页 + 网页内容的文档记录 + - paper 学术论文/文章 + 学术论文或研究文章的记录 + - social_media_post 社交媒体帖子 + 社交媒体上的帖子内容 + - wikipedia_entry 维基百科条目 + 维基百科的词条内容 + - personal_document 个人文档 + 个人相关的文档记录 + - business_document 商业文档 + 商业相关的文档记录 + - im_chat_log 即时通讯记录 + 即时通讯的聊天记录 + - synced_from_notion Notion同步文档 + 从Notion同步的文档内容 + - synced_from_github GitHub同步文档 + 从GitHub同步的文档内容 + - others 其他文档类型 + 其他未列出的文档类型 + + - doc_metadata 文档元数据(如提供文档类型则必填 + 字段因文档类型而异 + + 针对图书类型 For book: + - title 书名 + 书籍的标题 + - language 图书语言 + 书籍的语言 + - author 作者 + 书籍的作者 + - publisher 出版社 + 出版社的名称 + - publication_date 出版日期 + 书籍的出版日期 + - isbn ISBN号码 + 书籍的ISBN编号 + - category 图书分类 + 书籍的分类类别 + + 针对网页类型 For web_page: + - title 页面标题 + 网页的标题 + - url 页面网址 + 网页的URL地址 + - language 页面语言 + 网页的语言 + - publish_date 发布日期 + 网页的发布日期 + - author/publisher 作者/发布者 + 网页的作者或发布者 + - topic/keywords 主题/关键词 + 网页的主题或关键词 + - description 页面描述 + 网页的描述信息 + + 请查看 api/services/dataset_service.py 了解各文档类型所需字段的详细信息。 + + 针对"其他"类型文档,接受任何有效的JSON对象 - doc_language 在 Q&A 模式下,指定文档的语言,例如:EnglishChinese @@ -504,6 +606,46 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi 文档内容(选填) + + 文档类型(选填) + - book 图书 Book + - web_page 网页 Web page + - paper 学术论文/文章 Academic paper/article + - social_media_post 社交媒体帖子 Social media post + - wikipedia_entry 维基百科条目 Wikipedia entry + - personal_document 个人文档 Personal document + - business_document 商业文档 Business document + - im_chat_log 即时通讯记录 Chat log + - synced_from_notion Notion同步文档 Notion document + - synced_from_github GitHub同步文档 GitHub document + - others 其他文档类型 Other document types + + + + 文档元数据(如提供文档类型则必填)。字段因文档类型而异: + + 针对图书 For book: + - title 书名 Book title + - language 图书语言 Book language + - author 作者 Book author + - publisher 出版社 Publisher name + - publication_date 出版日期 Publication date + - isbn ISBN号码 ISBN number + - category 图书分类 Book category + + 针对网页 For web_page: + - title 页面标题 Page title + - url 页面网址 Page URL + - language 页面语言 Page language + - publish_date 发布日期 Publish date + - author/publisher 作者/发布者 Author or publisher + - topic/keywords 主题/关键词 Topic or keywords + - description 页面描述 Page description + + 请查看 api/services/dataset_service.py 了解各文档类型所需字段的详细信息。 + + 针对"其他"类型文档,接受任何有效的JSON对象 + 处理规则(选填) - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 @@ -624,6 +766,68 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi - separator 分段标识符,目前仅允许设置一个分隔符。默认为 *** - max_tokens 最大长度 (token) 需要校验小于父级的长度 - chunk_overlap 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填) + - doc_type 文档类型(选填)Type of document (optional) + - book 图书 + 文档记录一本书籍或出版物 + - web_page 网页 + 网页内容的文档记录 + - paper 学术论文/文章 + 学术论文或研究文章的记录 + - social_media_post 社交媒体帖子 + 社交媒体上的帖子内容 + - wikipedia_entry 维基百科条目 + 维基百科的词条内容 + - personal_document 个人文档 + 个人相关的文档记录 + - business_document 商业文档 + 商业相关的文档记录 + - im_chat_log 即时通讯记录 + 即时通讯的聊天记录 + - synced_from_notion Notion同步文档 + 从Notion同步的文档内容 + - synced_from_github GitHub同步文档 + 从GitHub同步的文档内容 + - others 其他文档类型 + 其他未列出的文档类型 + + - doc_metadata 文档元数据(如提供文档类型则必填 + 字段因文档类型而异 + + 针对图书类型 For book: + - title 书名 + 书籍的标题 + - language 图书语言 + 书籍的语言 + - author 作者 + 书籍的作者 + - publisher 出版社 + 出版社的名称 + - publication_date 出版日期 + 书籍的出版日期 + - isbn ISBN号码 + 书籍的ISBN编号 + - category 图书分类 + 书籍的分类类别 + + 针对网页类型 For web_page: + - title 页面标题 + 网页的标题 + - url 页面网址 + 网页的URL地址 + - language 页面语言 + 网页的语言 + - publish_date 发布日期 + 网页的发布日期 + - author/publisher 作者/发布者 + 网页的作者或发布者 + - topic/keywords 主题/关键词 + 网页的主题或关键词 + - description 页面描述 + 网页的描述信息 + + 请查看 api/services/dataset_service.py 了解各文档类型所需字段的详细信息。 + + 针对"其他"类型文档,接受任何有效的JSON对象 From ef39389a480e93ffeb7ddb2783bed683e931a0f7 Mon Sep 17 00:00:00 2001 From: aplio Date: Thu, 30 Jan 2025 22:15:12 +0900 Subject: [PATCH 2/2] fix. add link to zh too --- web/app/(commonLayout)/datasets/template/template.zh.mdx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index 7bd617b55bc731..0e5857c4464d8a 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -83,7 +83,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi - topic/keywords 主题/关键词 Topic or keywords - description 页面描述 Page description - 请查看 api/services/dataset_service.py 了解各文档类型所需字段的详细信息。 + 请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。 针对"其他"类型文档,接受任何有效的JSON对象 @@ -293,7 +293,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi - description 页面描述 网页的描述信息 - 请查看 api/services/dataset_service.py 了解各文档类型所需字段的详细信息。 + 请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。 针对"其他"类型文档,接受任何有效的JSON对象 @@ -642,7 +642,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi - topic/keywords 主题/关键词 Topic or keywords - description 页面描述 Page description - 请查看 api/services/dataset_service.py 了解各文档类型所需字段的详细信息。 + 请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。 针对"其他"类型文档,接受任何有效的JSON对象 @@ -825,7 +825,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi - description 页面描述 网页的描述信息 - 请查看 api/services/dataset_service.py 了解各文档类型所需字段的详细信息。 + 请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。 针对"其他"类型文档,接受任何有效的JSON对象