diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index e1f99c1d9..7ee0f812d 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -725,6 +725,30 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, }); } + function tryFallbackSelector(rootElement, originalSelector) { + let element = queryElement(rootElement, originalSelector); + + if (!element && originalSelector.includes('nth-child')) { + const match = originalSelector.match(/nth-child\((\d+)\)/); + if (match) { + const position = parseInt(match[1], 10); + + for (let i = position - 1; i >= 1; i--) { + const fallbackSelector = originalSelector.replace(/nth-child\(\d+\)/, `nth-child(${i})`); + element = queryElement(rootElement, fallbackSelector); + if (element) break; + } + + if (!element) { + const baseSelector = originalSelector.replace(/\:nth-child\(\d+\)/, ''); + element = queryElement(rootElement, baseSelector); + } + } + } + + return element; + } + // Main scraping logic with context support let containers = queryElementAll(document, listSelector); containers = Array.from(containers); @@ -902,7 +926,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { // Get the last part of the selector after any context delimiter const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0]; - const element = queryElement(container, relativeSelector); + const element = tryFallbackSelector(container, relativeSelector); if (element) { record[label] = extractValue(element, attribute); diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 69dd3127a..19b977074 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -825,6 +825,7 @@ export default class Interpreter extends EventEmitter { button.click() ]); debugLog("Navigation successful after regular click"); + await page.waitForTimeout(2000); paginationSuccess = true; } catch (navError) { debugLog("Regular click with navigation failed, trying dispatch event with navigation"); @@ -839,6 +840,7 @@ export default class Interpreter extends EventEmitter { button.dispatchEvent('click') ]); debugLog("Navigation successful after dispatch event"); + await page.waitForTimeout(2000); paginationSuccess = true; } catch (dispatchNavError) { try { diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 5851e7af6..181d38a12 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -2299,7 +2299,12 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> '); } - // Regular DOM path generation + const elementSelector = getNonUniqueSelector(element); + + if (elementSelector.includes('.') && elementSelector.split('.').length > 1) { + return elementSelector; + } + const path: string[] = []; let currentElement = element; const MAX_DEPTH = 2; @@ -2656,7 +2661,12 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> '); } - // Regular DOM path generation + const elementSelector = getNonUniqueSelector(element); + + if (elementSelector.includes('.') && elementSelector.split('.').length > 1) { + return elementSelector; + } + const path: string[] = []; let currentElement = element; const MAX_DEPTH = 2; @@ -2753,12 +2763,14 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro const frameElement = ownerDocument?.defaultView?.frameElement; if (frameElement) { const frameSelector = getNonUniqueSelector(frameElement as HTMLElement); - const isFrame = frameElement.tagName === 'FRAME'; // Use the appropriate delimiter based on whether it's a frame or iframe return `${frameSelector} :>> ${elementSelector}`; } - // Regular DOM context + if (elementSelector.includes('.') && elementSelector.split('.').length > 1) { + return elementSelector; + } + const parentSelector = getNonUniqueSelector(element.parentElement); return `${parentSelector} > ${elementSelector}`; }