diff --git a/apps/site/docs/en/describe-elements-by-your-own.mdx b/apps/site/docs/en/describe-elements-by-your-own.mdx new file mode 100644 index 000000000..2f2a7d0b1 --- /dev/null +++ b/apps/site/docs/en/describe-elements-by-your-own.mdx @@ -0,0 +1,37 @@ +# Describe Elements by Your Own + +In some cases, you may want to describe elements by yourself. For example, you may want to describe the content inside an `` element which cannot be extracted by Midscene. Or you just want to "be friendly" to the LLM. + +To do this, you can put an identifier in the `midscene-description-ref` attribute, and put the description JSON in a ` +``` + +The core fields are `rect`, `text`, and `description`. + +- `rect`: The rectangle of this content. Required. All the coordinates are relative to the upper left corner of the element. +- `text`: The text of this content. Optional. Should be same as the text on screen. +- `description`: A short description of this content. Optional. diff --git a/apps/site/docs/zh/describe-elements-by-your-own.mdx b/apps/site/docs/zh/describe-elements-by-your-own.mdx new file mode 100644 index 000000000..e2b781dc3 --- /dev/null +++ b/apps/site/docs/zh/describe-elements-by-your-own.mdx @@ -0,0 +1,37 @@ +# 自定义页面元素的描述信息 + +在某些情况下,你可能需要自定义页面元素的描述。例如,你可能需要描述页面中无法被 Midscene 提取的 `` 元素的内容。或者你只是单纯想对 LLM 示好。 + +为此,你可以在元素上添加 `midscene-description-ref` 属性,然后在 ` +``` + +这里的关键字段是 `rect`、`text` 和 `description`。 + +- `rect`: 该内容的坐标区域。必选。所有坐标都是相对于此元素的左上角。 +- `text`: 该内容的文本内容。可选。应与屏幕上的文本一致。 +- `description`: 该内容的简短描述。可选。 diff --git a/apps/site/rspress.config.ts b/apps/site/rspress.config.ts index f223bd17a..dd4900b0c 100644 --- a/apps/site/rspress.config.ts +++ b/apps/site/rspress.config.ts @@ -90,6 +90,10 @@ export default defineConfig({ text: 'Customize Model and Provider', link: '/model-provider', }, + { + text: 'Describe Elements by Your Own', + link: '/describe-elements-by-your-own', + }, ], }, { @@ -147,6 +151,10 @@ export default defineConfig({ text: '自定义模型和服务商', link: '/zh/model-provider', }, + { + text: '自定义页面元素的描述信息', + link: '/zh/describe-elements-by-your-own', + }, ], }, { diff --git a/packages/web-integration/src/extractor/dom-util.ts b/packages/web-integration/src/extractor/dom-util.ts index 42b08e54b..29791e121 100644 --- a/packages/web-integration/src/extractor/dom-util.ts +++ b/packages/web-integration/src/extractor/dom-util.ts @@ -1,3 +1,14 @@ +export const USER_DESCRIBED_ELEMENT_ATTRIBUTE_REF = 'midscene-description-ref'; +export const USER_DESCRIBED_ELEMENT_ATTRIBUTE_ID = 'midscene-description-id'; + +export function isUserDescribedElement(node: Node): boolean { + if (node instanceof Element) { + return node.hasAttribute(USER_DESCRIBED_ELEMENT_ATTRIBUTE_REF); + } + + return false; +} + export function isFormElement(node: Node) { return ( node instanceof HTMLElement && @@ -54,6 +65,10 @@ export function isContainerElement(node: Node): node is HTMLElement { return false; } + if (includeUserDescribedElement(node)) { + return false; + } + const computedStyle = window.getComputedStyle(node); const backgroundColor = computedStyle.getPropertyValue('background-color'); if (backgroundColor) { @@ -63,6 +78,17 @@ export function isContainerElement(node: Node): node is HTMLElement { return false; } +function includeUserDescribedElement(node: Node) { + if (node instanceof Element) { + const selector = `[${USER_DESCRIBED_ELEMENT_ATTRIBUTE_REF}]`; + const elements = node.querySelectorAll(selector); + if (elements.length > 0) { + return true; + } + } + return false; +} + function includeBaseElement(node: Node) { if (!(node instanceof HTMLElement)) return false; @@ -72,6 +98,7 @@ function includeBaseElement(node: Node) { } const includeList = [ + 'canvas', 'svg', 'button', 'input', diff --git a/packages/web-integration/src/extractor/web-extractor.ts b/packages/web-integration/src/extractor/web-extractor.ts index 70f7fffa2..ec2279bc9 100644 --- a/packages/web-integration/src/extractor/web-extractor.ts +++ b/packages/web-integration/src/extractor/web-extractor.ts @@ -5,11 +5,14 @@ import { } from '@midscene/shared/constants'; import type { ElementInfo } from '.'; import { + USER_DESCRIBED_ELEMENT_ATTRIBUTE_ID, + USER_DESCRIBED_ELEMENT_ATTRIBUTE_REF, isButtonElement, isContainerElement, isFormElement, isImgElement, isTextElement, + isUserDescribedElement, } from './dom-util'; import { getDocument, @@ -40,7 +43,7 @@ function tagNameOfNode(node: Node): string { } const parentElement = node.parentElement; - if (parentElement && parentElement instanceof HTMLElement) { + if (!tagName && parentElement && parentElement instanceof HTMLElement) { tagName = parentElement.tagName.toLowerCase(); } @@ -51,7 +54,7 @@ function collectElementInfo( node: Node, nodePath: string, baseZoom = 1, -): WebElementInfo | null { +): WebElementInfo | WebElementInfo[] | null { const rect = visibleRect(node, baseZoom); if ( !rect || @@ -60,6 +63,88 @@ function collectElementInfo( ) { return null; } + + if (isUserDescribedElement(node)) { + console.log('isUserDescribedElement', node); + const element = node as Element; + const descriptionId = element.getAttribute( + USER_DESCRIBED_ELEMENT_ATTRIBUTE_REF, + ); + const targetSelector = `[${USER_DESCRIBED_ELEMENT_ATTRIBUTE_ID}="${descriptionId}"]`; + const targetElement = document.querySelectorAll(targetSelector); + if (targetElement.length === 0) { + console.warn( + 'cannot find element for Midscene description', + targetSelector, + ); + return null; + } + + if (targetElement.length > 1) { + console.warn( + 'multiple elements found for Midscene description', + targetSelector, + ); + } + + const descriptionElement = targetElement[0] as HTMLElement; + const description = descriptionElement.innerText; + try { + const descriptionJson = JSON.parse(description); + if (!Array.isArray(descriptionJson)) { + console.warn('description is not a valid JSON', description); + return null; + } + + const infoList = descriptionJson.map((item, index) => { + const overallRect = { + left: item.rect.x + rect.left, + top: item.rect.y + rect.top, + width: item.rect.width, + height: item.rect.height, + }; + const center: [number, number] = [ + Math.round(overallRect.left + overallRect.width / 2), + Math.round(overallRect.top + overallRect.height / 2), + ]; + const nodeType = item.text ? NodeType.TEXT : NodeType.CONTAINER; + const nodeHashId = midsceneGenerateHash( + null, + `${index}-${item.text || ''}`, + overallRect, + ); + return { + id: nodeHashId, + indexId: indexId++, + zoom: 1, + locator: '', + nodeType, + nodePath, + nodeHashId, + attributes: { + ...(item.description ? { description: item.description } : {}), + nodeType, + }, + content: item.text || '', + rect: overallRect, + center, + screenWidth: window.innerWidth, + screenHeight: window.innerHeight, + }; + }); + + return infoList; + } catch (e) { + console.error(e); + console.warn( + 'description is not a valid JSON', + targetSelector, + description, + ); + return null; + } + } + if (isFormElement(node)) { const attributes = getNodeAttributes(node); let valueContent = @@ -266,6 +351,12 @@ export function extractTextWithPosition( } const elementInfo = collectElementInfo(node, nodePath, baseZoom); + + if (Array.isArray(elementInfo)) { + elementInfoArray.push(...elementInfo); + return null; + } + // stop collecting if the node is a Button or Image if ( elementInfo?.nodeType === NodeType.BUTTON || diff --git a/packages/web-integration/tests/ai/web/puppeteer/utils.ts b/packages/web-integration/tests/ai/web/puppeteer/utils.ts index 1a29e85db..32f723203 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/utils.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/utils.ts @@ -44,6 +44,11 @@ export async function launchPage( const pages = await browser.pages(); await Promise.all(pages.map((page) => page.close())); await browser.close(); + await sleep(1000); }, }; } + +async function sleep(ms: number) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap b/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap index d373445fc..7c58bcc7c 100644 --- a/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap +++ b/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap @@ -268,7 +268,7 @@ exports[`extractor > basic 1`] = ` }, { "attributes": { - "htmlTagName": "
", + "htmlTagName": "