Skip to content

Commit 7665340

Browse files
committed
feat: add integration in javascript
1 parent 788107b commit 7665340

File tree

7 files changed

+1192
-3
lines changed

7 files changed

+1192
-3
lines changed

scrapegraph-js/README.md

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,43 @@ const numberOfScrolls = 10; // Will scroll 10 times to load more content
131131

132132
The `numberOfScrolls` parameter accepts values between 0 and 100, allowing you to control how many times the page should be scrolled before extraction.
133133

134+
#### Interactive Steps (New!)
135+
136+
For complex websites that require user interaction, you can use the `steps` parameter to perform interactive actions before extraction:
137+
138+
```javascript
139+
import { smartScraper } from 'scrapegraph-js';
140+
141+
const apiKey = 'your-api-key';
142+
const url = 'https://github.com/';
143+
const prompt = 'Extract user profile information';
144+
145+
// Interactive steps for website navigation
146+
const steps = [
147+
'click on search bar',
148+
'wait for 500ms',
149+
'fill search with "javascript"',
150+
'wait for 1 second',
151+
'click on first user result'
152+
];
153+
154+
(async () => {
155+
try {
156+
const response = await smartScraper(apiKey, url, prompt, null, null, steps);
157+
console.log('Extracted data after navigation:', response);
158+
} catch (error) {
159+
console.error('Error:', error);
160+
}
161+
})();
162+
```
163+
164+
**Supported Step Types:**
165+
- **Navigation**: `'click on search bar'`, `'click on menu'`
166+
- **Input**: `'fill search with "term"'`, `'fill email with [email protected]'`
167+
- **Wait**: `'wait for 500ms'`, `'wait for 2 seconds'`
168+
- **Scrolling**: `'scroll to bottom'`, `'scroll to contact form'`
169+
- **Complex Actions**: `'click on first result'`, `'click on load more button'`
170+
134171
### Search Scraping
135172

136173
Search and extract information from multiple web sources using AI.
@@ -284,7 +321,7 @@ const websiteHtml = `<html>
284321
Converts a webpage into clean, well-structured markdown format.
285322

286323
```javascript
287-
import { smartScraper } from 'scrapegraph-js';
324+
import { markdownify } from 'scrapegraph-js';
288325

289326
const apiKey = 'your_api_key';
290327
const url = 'https://scrapegraphai.com/';
@@ -299,6 +336,40 @@ const url = 'https://scrapegraphai.com/';
299336
})();
300337
```
301338

339+
#### Markdownify with Interactive Steps
340+
341+
For complex websites that require navigation before conversion, you can use the `steps` parameter:
342+
343+
```javascript
344+
import { markdownify } from 'scrapegraph-js';
345+
346+
const apiKey = 'your_api_key';
347+
const url = 'https://scrapegraphai.com/';
348+
349+
// Interactive steps for website navigation before conversion
350+
const steps = [
351+
'click on accept cookies',
352+
'wait for 1 second',
353+
'click on main content',
354+
'scroll to article section'
355+
];
356+
357+
(async () => {
358+
try {
359+
const response = await markdownify(apiKey, url, steps);
360+
console.log('Markdown after navigation:', response);
361+
} catch (error) {
362+
console.error(error);
363+
}
364+
})();
365+
```
366+
367+
This is particularly useful for:
368+
- **SPA Navigation**: Navigating single-page applications
369+
- **Content Expansion**: Expanding collapsible content sections
370+
- **Cookie Consent**: Handling cookie consent dialogs
371+
- **Gated Content**: Accessing content behind forms or authentication
372+
302373
### Checking API Credits
303374

304375
```javascript
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
import { markdownify, getMarkdownifyRequest } from 'scrapegraph-js';
2+
import fs from 'fs';
3+
import 'dotenv/config';
4+
5+
/**
6+
* Example demonstrating how to use the Markdownify with interactive steps.
7+
* This example shows how to navigate websites before converting to markdown.
8+
*
9+
* Interactive steps allow you to:
10+
* - Navigate to specific sections before conversion
11+
* - Click on elements to expand content
12+
* - Fill forms to access gated content
13+
* - Wait for dynamic content to load
14+
* - Perform authentication flows
15+
*/
16+
17+
// Configuration
18+
const apiKey = process.env.SGAI_APIKEY;
19+
const url = 'https://scrapegraphai.com/';
20+
21+
// Interactive steps for website navigation before markdown conversion
22+
const steps = [
23+
'click on search bar',
24+
'wait for 500ms',
25+
'fill email input box with [email protected]',
26+
'wait a sec',
27+
'click on the first result of search',
28+
'wait for 2 seconds to load the result of search'
29+
];
30+
31+
console.log('🚀 Starting Markdownify with Interactive Steps...');
32+
console.log(`🌐 Website URL: ${url}`);
33+
console.log(`🎯 Interactive Steps: ${steps.length} steps configured`);
34+
console.log('📝 Goal: Convert website to clean markdown format after navigation');
35+
console.log('\n' + '='.repeat(60));
36+
37+
// Display interactive steps
38+
console.log('🎯 Interactive Steps to Execute:');
39+
steps.forEach((step, index) => {
40+
console.log(` ${index + 1}. ${step}`);
41+
});
42+
console.log('\n' + '='.repeat(60));
43+
44+
// Start timer
45+
const startTime = Date.now();
46+
console.log(`⏱️ Timer started at: ${new Date(startTime).toLocaleTimeString()}`);
47+
console.log('🔄 Processing markdown conversion with interactive steps...');
48+
49+
try {
50+
// Make request with interactive steps
51+
const response = await markdownify(apiKey, url, steps);
52+
53+
// Calculate execution time
54+
const endTime = Date.now();
55+
const executionTime = (endTime - startTime) / 1000;
56+
const executionMinutes = executionTime / 60;
57+
58+
console.log(`⏱️ Timer stopped at: ${new Date(endTime).toLocaleTimeString()}`);
59+
console.log(`⚡ Total execution time: ${executionTime.toFixed(2)} seconds (${executionMinutes.toFixed(2)} minutes)`);
60+
console.log(`📊 Performance: ${executionTime.toFixed(1)}s (${executionMinutes.toFixed(1)}m) for markdown conversion with ${steps.length} steps`);
61+
62+
// Display results
63+
const markdownContent = response.result || '';
64+
console.log('✅ Request completed successfully!');
65+
console.log(`📊 Request ID: ${response.request_id || 'N/A'}`);
66+
console.log(`🔄 Status: ${response.status || 'N/A'}`);
67+
console.log(`📝 Content Length: ${markdownContent.length} characters`);
68+
69+
if (response.error) {
70+
console.log(`❌ Error: ${response.error}`);
71+
} else {
72+
console.log('\n📋 MARKDOWN CONVERSION RESULTS:');
73+
console.log('='.repeat(60));
74+
75+
// Display markdown statistics
76+
const lines = markdownContent.split('\n');
77+
const words = markdownContent.split(/\s+/).filter(word => word.length > 0);
78+
console.log('📊 Statistics:');
79+
console.log(` - Total Lines: ${lines.length}`);
80+
console.log(` - Total Words: ${words.length}`);
81+
console.log(` - Total Characters: ${markdownContent.length}`);
82+
console.log(` - Processing Speed: ${Math.round(markdownContent.length / executionTime)} chars/second`);
83+
console.log(` - Steps Efficiency: ${(executionTime / steps.length).toFixed(2)}s per step`);
84+
85+
// Display first 500 characters
86+
console.log('\n🔍 First 500 characters:');
87+
console.log('-'.repeat(50));
88+
console.log(markdownContent.substring(0, 500));
89+
if (markdownContent.length > 500) {
90+
console.log('...');
91+
}
92+
console.log('-'.repeat(50));
93+
94+
// Save to file
95+
const filename = `markdownify_steps_output_${Date.now()}.md`;
96+
saveMarkdownToFile(markdownContent, filename);
97+
98+
// Display content analysis
99+
analyzeMarkdownContent(markdownContent, steps);
100+
}
101+
102+
} catch (error) {
103+
const endTime = Date.now();
104+
const executionTime = (endTime - startTime) / 1000;
105+
const executionMinutes = executionTime / 60;
106+
107+
console.log(`⏱️ Timer stopped at: ${new Date(endTime).toLocaleTimeString()}`);
108+
console.log(`⚡ Execution time before error: ${executionTime.toFixed(2)} seconds (${executionMinutes.toFixed(2)} minutes)`);
109+
console.log(`💥 Error occurred: ${error.message}`);
110+
console.log('\n🛠️ Troubleshooting:');
111+
console.log('1. Make sure your .env file contains SGAI_APIKEY');
112+
console.log('2. Check your internet connection');
113+
console.log('3. Verify the target website is accessible');
114+
console.log('4. Ensure you have sufficient credits in your account');
115+
}
116+
117+
/**
118+
* Save markdown content to a file with enhanced error handling.
119+
* @param {string} markdownContent - The markdown content to save
120+
* @param {string} filename - The name of the file to save to
121+
*/
122+
function saveMarkdownToFile(markdownContent, filename) {
123+
try {
124+
fs.writeFileSync(filename, markdownContent, 'utf8');
125+
console.log(`💾 Markdown saved to: ${filename}`);
126+
} catch (error) {
127+
console.log(`❌ Error saving file: ${error.message}`);
128+
}
129+
}
130+
131+
/**
132+
* Analyze the markdown content and provide insights.
133+
* @param {string} markdownContent - The markdown content to analyze
134+
* @param {string[]} steps - The interactive steps that were executed
135+
*/
136+
function analyzeMarkdownContent(markdownContent, steps) {
137+
console.log('\n🔍 CONTENT ANALYSIS:');
138+
console.log('-'.repeat(50));
139+
140+
// Count different markdown elements
141+
const lines = markdownContent.split('\n');
142+
const headers = lines.filter(line => line.trim().startsWith('#'));
143+
const links = lines.filter(line => line.includes('[') && line.includes(']('));
144+
const codeBlocks = (markdownContent.match(/```/g) || []).length / 2; // Divide by 2 since each block has opening and closing
145+
146+
console.log(`📑 Headers found: ${headers.length}`);
147+
console.log(`🔗 Links found: ${links.length}`);
148+
console.log(`💻 Code blocks: ${Math.floor(codeBlocks)}`);
149+
console.log(`🎯 Interactive steps executed: ${steps.length}`);
150+
151+
// Show first few headers if they exist
152+
if (headers.length > 0) {
153+
console.log('\n📋 First few headers:');
154+
headers.slice(0, 3).forEach((header, index) => {
155+
console.log(` ${index + 1}. ${header.trim()}`);
156+
});
157+
if (headers.length > 3) {
158+
console.log(` ... and ${headers.length - 3} more`);
159+
}
160+
}
161+
162+
// Show which steps might have contributed to content
163+
console.log('\n🔧 Steps Analysis:');
164+
steps.forEach((step, index) => {
165+
const stepType = step.includes('click') ? 'Navigation' :
166+
step.includes('wait') ? 'Wait' :
167+
step.includes('fill') ? 'Input' :
168+
step.includes('scroll') ? 'Action' : 'Other';
169+
console.log(` ${index + 1}. ${step} [${stepType}]`);
170+
});
171+
}
172+
173+
// Multiple scenarios demonstration
174+
console.log('\n🎯 MULTIPLE SCENARIOS DEMONSTRATION');
175+
console.log('='.repeat(60));
176+
177+
const scenarios = [
178+
{
179+
name: 'Documentation Navigation',
180+
url: 'https://docs.scrapegraphai.com/',
181+
steps: [
182+
'click on getting started menu',
183+
'wait for 1 second',
184+
'scroll down to examples section',
185+
'wait for 500ms',
186+
'click on first example'
187+
]
188+
},
189+
{
190+
name: 'Blog Content Access',
191+
url: 'https://scrapegraphai.com/blog/',
192+
steps: [
193+
'click on latest blog post',
194+
'wait for 2 seconds',
195+
'scroll to full content',
196+
'wait for 1 second',
197+
'click read more if available'
198+
]
199+
},
200+
{
201+
name: 'Product Information',
202+
url: 'https://scrapegraphai.com/',
203+
steps: [
204+
'click on features menu',
205+
'wait for 500ms',
206+
'scroll to pricing section',
207+
'wait for 1 second',
208+
'click on enterprise plan'
209+
]
210+
}
211+
];
212+
213+
scenarios.forEach((scenario, index) => {
214+
console.log(`\n📋 Scenario ${index + 1}: ${scenario.name}`);
215+
console.log(`🌐 URL: ${scenario.url}`);
216+
console.log(`📝 Steps: ${scenario.steps.length}`);
217+
scenario.steps.forEach((step, stepIndex) => {
218+
console.log(` ${stepIndex + 1}. ${step}`);
219+
});
220+
console.log('-'.repeat(40));
221+
});
222+
223+
// Step patterns demonstration
224+
console.log('\n🎯 MARKDOWNIFY STEP PATTERNS DEMONSTRATION');
225+
console.log('='.repeat(60));
226+
227+
const stepPatterns = [
228+
{
229+
name: 'SPA Navigation',
230+
description: 'Steps for navigating Single Page Applications',
231+
steps: [
232+
'wait for 2 seconds for page load',
233+
'click on main menu',
234+
'wait for 1 second',
235+
'click on about section',
236+
'wait for 1 second',
237+
'scroll to footer'
238+
]
239+
},
240+
{
241+
name: 'Content Expansion',
242+
description: 'Steps for expanding collapsible content',
243+
steps: [
244+
'click on show more button',
245+
'wait for 500ms',
246+
'click on expand all',
247+
'wait for 1 second',
248+
'click on details tab',
249+
'wait for 1 second'
250+
]
251+
},
252+
{
253+
name: 'Cookie Consent & Content Access',
254+
description: 'Steps for handling cookie consent before content access',
255+
steps: [
256+
'wait for 1 second',
257+
'click on accept cookies',
258+
'wait for 500ms',
259+
'click on continue reading',
260+
'wait for 2 seconds',
261+
'scroll to main content'
262+
]
263+
}
264+
];
265+
266+
stepPatterns.forEach((pattern, index) => {
267+
console.log(`\n📋 Pattern ${index + 1}: ${pattern.name}`);
268+
console.log(`📝 Description: ${pattern.description}`);
269+
console.log(`🎯 Steps (${pattern.steps.length}):`);
270+
pattern.steps.forEach((step, stepIndex) => {
271+
const stepType = step.includes('click') ? 'Navigation' :
272+
step.includes('wait') ? 'Wait' :
273+
step.includes('fill') ? 'Input' :
274+
step.includes('scroll') ? 'Action' : 'Other';
275+
console.log(` ${stepIndex + 1}. ${step} [${stepType}]`);
276+
});
277+
console.log('-'.repeat(40));
278+
});
279+
280+
console.log('\n🎯 MARKDOWNIFY INTERACTIVE STEPS EXAMPLE COMPLETED');
281+
console.log('='.repeat(60));
282+
console.log('This example demonstrates how to use interactive steps with Markdownify.');
283+
console.log('Interactive steps allow you to navigate to specific content before conversion.');
284+
console.log('This is useful for accessing gated content, expanding sections, or navigating SPAs.');

0 commit comments

Comments
 (0)