ScrapeGraphAI
diff --git a/‎scrapegraph-js/README.md
Lines changed: 59 additions & 0 deletions b/‎scrapegraph-js/README.md
Lines changed: 59 additions & 0 deletions
diff --git a/‎scrapegraph-js/examples/cookies_integration_example.js
Lines changed: 261 additions & 0 deletions b/‎scrapegraph-js/examples/cookies_integration_example.js
Lines changed: 261 additions & 0 deletions
@@ -131,6 +131,65 @@ const numberOfScrolls = 10; // Will scroll 10 times to load more content
 
 The `numberOfScrolls` parameter accepts values between 0 and 100, allowing you to control how many times the page should be scrolled before extraction.
 
+#### Scraping with Cookies
+
+Use cookies for authentication and session management when scraping websites that require login or have user-specific content:
+
+```javascript
+import { smartScraper } from 'scrapegraph-js';
+
+const apiKey = 'your-api-key';
+const url = 'https://example.com/dashboard';
+const prompt = 'Extract user profile information';
+
+// Define cookies for authentication
+const cookies = {
+  session_id: 'abc123def456',
+  auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...',
+  user_preferences: 'dark_mode,usd'
+};
+
+(async () => {
+  try {
+    const response = await smartScraper(apiKey, url, prompt, null, null, null, cookies);
+    console.log(response.result);
+  } catch (error) {
+    console.error('Error:', error);
+  }
+})();
+```
+
+**Common Use Cases:**
+- **E-commerce sites**: User authentication, shopping cart persistence
+- **Social media**: Session management, user preferences
+- **Banking/Financial**: Secure authentication, transaction history
+- **News sites**: User preferences, subscription content
+- **API endpoints**: Authentication tokens, API keys
+
+#### Advanced Scraping with Cookies, Scrolling, and Pagination
+
+Combine cookies with infinite scrolling and pagination for comprehensive data extraction:
+
+```javascript
+import { smartScraper } from 'scrapegraph-js';
+
+const apiKey = 'your-api-key';
+const url = 'https://example.com/feed';
+const prompt = 'Extract all posts from the feed';
+const cookies = { session_token: 'xyz789abc123' };
+const numberOfScrolls = 10; // Scroll 10 times
+const totalPages = 5; // Scrape 5 pages
+
+(async () => {
+  try {
+    const response = await smartScraper(apiKey, url, prompt, null, numberOfScrolls, totalPages, cookies);
+    console.log('Extracted data:', response);
+  } catch (error) {
+    console.error('Error:', error);
+  }
+})();
+```
+
 ### Search Scraping
 
 Search and extract information from multiple web sources using AI.
 
@@ -0,0 +1,261 @@
+/**
+ * Comprehensive example demonstrating cookies integration for web scraping.
+ *
+ * This example shows various real-world scenarios where cookies are essential:
+ * 1. E-commerce site scraping with authentication
+ * 2. Social media scraping with session cookies
+ * 3. Banking/financial site scraping with secure cookies
+ * 4. News site scraping with user preferences
+ * 5. API endpoint scraping with authentication tokens
+ *
+ * Requirements:
+ * - Node.js 16+
+ * - scrapegraph-js
+ * - A .env file with your SGAI_APIKEY
+ *
+ * Example .env file:
+ * SGAI_APIKEY=your_api_key_here
+ */
+
+import { smartScraper } from 'scrapegraph-js';
+import { z } from 'zod';
+import 'dotenv/config';
+
+// Define data schemas for different scenarios
+const ProductInfoSchema = z.object({
+  name: z.string().describe('Product name'),
+  price: z.string().describe('Product price'),
+  availability: z.string().describe('Product availability status'),
+  rating: z.string().optional().describe('Product rating')
+});
+
+const SocialMediaPostSchema = z.object({
+  author: z.string().describe('Post author'),
+  content: z.string().describe('Post content'),
+  likes: z.string().optional().describe('Number of likes'),
+  comments: z.string().optional().describe('Number of comments'),
+  timestamp: z.string().optional().describe('Post timestamp')
+});
+
+const NewsArticleSchema = z.object({
+  title: z.string().describe('Article title'),
+  summary: z.string().describe('Article summary'),
+  author: z.string().optional().describe('Article author'),
+  publish_date: z.string().optional().describe('Publish date')
+});
+
+const BankTransactionSchema = z.object({
+  date: z.string().describe('Transaction date'),
+  description: z.string().describe('Transaction description'),
+  amount: z.string().describe('Transaction amount'),
+  type: z.string().describe('Transaction type (credit/debit)')
+});
+
+async function scrapeEcommerceWithAuth() {
+  console.log('='.repeat(60));
+  console.log('E-COMMERCE SITE SCRAPING WITH AUTHENTICATION');
+  console.log('='.repeat(60));
+  
+  // Example cookies for an e-commerce site
+  const cookies = {
+    session_id: 'abc123def456',
+    user_id: 'user789',
+    cart_id: 'cart101112',
+    preferences: 'dark_mode,usd',
+    auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...'
+  };
+  
+  const websiteUrl = 'https://example-ecommerce.com/products';
+  const userPrompt = 'Extract product information including name, price, availability, and rating';
+  
+  try {
+    const response = await smartScraper(
+      process.env.SGAI_APIKEY,
+      websiteUrl,
+      userPrompt,
+      ProductInfoSchema,
+      5, // numberOfScrolls - Scroll to load more products
+      null, // totalPages
+      cookies
+    );
+    
+    console.log('✅ E-commerce scraping completed successfully');
+    console.log(JSON.stringify(response, null, 2));
+    
+  } catch (error) {
+    console.error(`❌ Error in e-commerce scraping: ${error.message}`);
+  }
+}
+
+async function scrapeSocialMediaWithSession() {
+  console.log('\n' + '='.repeat(60));
+  console.log('SOCIAL MEDIA SCRAPING WITH SESSION COOKIES');
+  console.log('='.repeat(60));
+  
+  // Example cookies for a social media site
+  const cookies = {
+    session_token: 'xyz789abc123',
+    user_session: 'def456ghi789',
+    csrf_token: 'jkl012mno345',
+    remember_me: 'true',
+    language: 'en_US'
+  };
+  
+  const websiteUrl = 'https://example-social.com/feed';
+  const userPrompt = 'Extract posts from the feed including author, content, likes, and comments';
+  
+  try {
+    const response = await smartScraper(
+      process.env.SGAI_APIKEY,
+      websiteUrl,
+      userPrompt,
+      SocialMediaPostSchema,
+      10, // numberOfScrolls - Scroll to load more posts
+      null, // totalPages
+      cookies
+    );
+    
+    console.log('✅ Social media scraping completed successfully');
+    console.log(JSON.stringify(response, null, 2));
+    
+  } catch (error) {
+    console.error(`❌ Error in social media scraping: ${error.message}`);
+  }
+}
+
+async function scrapeNewsWithPreferences() {
+  console.log('\n' + '='.repeat(60));
+  console.log('NEWS SITE SCRAPING WITH USER PREFERENCES');
+  console.log('='.repeat(60));
+  
+  // Example cookies for a news site
+  const cookies = {
+    user_preferences: 'technology,science,ai',
+    reading_level: 'advanced',
+    region: 'US',
+    subscription_tier: 'premium',
+    theme: 'dark'
+  };
+  
+  const websiteUrl = 'https://example-news.com/technology';
+  const userPrompt = 'Extract news articles including title, summary, author, and publish date';
+  
+  try {
+    const response = await smartScraper(
+      process.env.SGAI_APIKEY,
+      websiteUrl,
+      userPrompt,
+      NewsArticleSchema,
+      null, // numberOfScrolls
+      3, // totalPages - Scrape multiple pages
+      cookies
+    );
+    
+    console.log('✅ News scraping completed successfully');
+    console.log(JSON.stringify(response, null, 2));
+    
+  } catch (error) {
+    console.error(`❌ Error in news scraping: ${error.message}`);
+  }
+}
+
+async function scrapeBankingWithSecureCookies() {
+  console.log('\n' + '='.repeat(60));
+  console.log('BANKING SITE SCRAPING WITH SECURE COOKIES');
+  console.log('='.repeat(60));
+  
+  // Example secure cookies for a banking site
+  const cookies = {
+    secure_session: 'pqr678stu901',
+    auth_token: 'vwx234yz567',
+    mfa_verified: 'true',
+    device_id: 'device_abc123',
+    last_activity: '2024-01-15T10:30:00Z'
+  };
+  
+  const websiteUrl = 'https://example-bank.com/transactions';
+  const userPrompt = 'Extract recent transactions including date, description, amount, and type';
+  
+  try {
+    const response = await smartScraper(
+      process.env.SGAI_APIKEY,
+      websiteUrl,
+      userPrompt,
+      BankTransactionSchema,
+      null, // numberOfScrolls
+      5, // totalPages - Scrape multiple pages of transactions
+      cookies
+    );
+    
+    console.log('✅ Banking scraping completed successfully');
+    console.log(JSON.stringify(response, null, 2));
+    
+  } catch (error) {
+    console.error(`❌ Error in banking scraping: ${error.message}`);
+  }
+}
+
+async function scrapeApiWithAuthTokens() {
+  console.log('\n' + '='.repeat(60));
+  console.log('API ENDPOINT SCRAPING WITH AUTH TOKENS');
+  console.log('='.repeat(60));
+  
+  // Example API authentication cookies
+  const cookies = {
+    api_token: 'api_abc123def456',
+    client_id: 'client_789',
+    access_token: 'access_xyz789',
+    refresh_token: 'refresh_abc123',
+    scope: 'read:all'
+  };
+  
+  const websiteUrl = 'https://api.example.com/data';
+  const userPrompt = 'Extract data from the API response';
+  
+  try {
+    const response = await smartScraper(
+      process.env.SGAI_APIKEY,
+      websiteUrl,
+      userPrompt,
+      null, // No schema for generic API response
+      null, // numberOfScrolls
+      null, // totalPages
+      cookies
+    );
+    
+    console.log('✅ API scraping completed successfully');
+    console.log(JSON.stringify(response, null, 2));
+    
+  } catch (error) {
+    console.error(`❌ Error in API scraping: ${error.message}`);
+  }
+}
+
+async function main() {
+  const apiKey = process.env.SGAI_APIKEY;
+  
+  // Check if API key is available
+  if (!apiKey) {
+    console.error('Error: SGAI_APIKEY not found in .env file');
+    console.log('Please create a .env file with your API key:');
+    console.log('SGAI_APIKEY=your_api_key_here');
+    return;
+  }
+  
+  console.log('🍪 COOKIES INTEGRATION EXAMPLES');
+  console.log('This demonstrates various real-world scenarios where cookies are essential for web scraping.');
+  
+  // Run all examples
+  await scrapeEcommerceWithAuth();
+  await scrapeSocialMediaWithSession();
+  await scrapeNewsWithPreferences();
+  await scrapeBankingWithSecureCookies();
+  await scrapeApiWithAuthTokens();
+  
+  console.log('\n' + '='.repeat(60));
+  console.log('✅ All examples completed!');
+  console.log('='.repeat(60));
+}
+
+// Run the example
+main().catch(console.error);