|
| 1 | +import unittest |
| 2 | +from unittest.mock import patch |
| 3 | +from scrapegraph_py.local_scraper import scrape_text |
| 4 | +from pydantic import BaseModel, Field |
| 5 | +import requests |
| 6 | + |
| 7 | +class TestSchema(BaseModel): |
| 8 | + title: str = Field(description="The title") |
| 9 | + content: str = Field(description="The content") |
| 10 | + |
| 11 | +class TestLocalScraper(unittest.TestCase): |
| 12 | + |
| 13 | + @patch('scrapegraph_py.local_scraper.requests.post') |
| 14 | + def test_scrape_text_success(self, mock_post): |
| 15 | + # Setup mock response |
| 16 | + mock_post.return_value.status_code = 200 |
| 17 | + mock_post.return_value.text = '{"title": "Test", "content": "Content"}' |
| 18 | + |
| 19 | + # Test basic scraping without schema |
| 20 | + response = scrape_text( |
| 21 | + "test_api_key", |
| 22 | + "Sample website text", |
| 23 | + "Extract information" |
| 24 | + ) |
| 25 | + self.assertEqual(response, '{"title": "Test", "content": "Content"}') |
| 26 | + |
| 27 | + @patch('scrapegraph_py.local_scraper.requests.post') |
| 28 | + def test_scrape_text_with_schema(self, mock_post): |
| 29 | + # Setup mock response |
| 30 | + mock_post.return_value.status_code = 200 |
| 31 | + mock_post.return_value.text = '{"title": "Test", "content": "Content"}' |
| 32 | + |
| 33 | + # Test scraping with schema |
| 34 | + response = scrape_text( |
| 35 | + "test_api_key", |
| 36 | + "Sample website text", |
| 37 | + "Extract information", |
| 38 | + schema=TestSchema |
| 39 | + ) |
| 40 | + self.assertEqual(response, '{"title": "Test", "content": "Content"}') |
| 41 | + |
| 42 | + @patch('scrapegraph_py.local_scraper.requests.post') |
| 43 | + def test_scrape_text_http_error(self, mock_post): |
| 44 | + # Test HTTP error handling |
| 45 | + mock_post.side_effect = requests.exceptions.HTTPError("404 Client Error") |
| 46 | + response = scrape_text( |
| 47 | + "test_api_key", |
| 48 | + "Sample website text", |
| 49 | + "Extract information" |
| 50 | + ) |
| 51 | + self.assertIn("HTTP error occurred", response) |
| 52 | + |
| 53 | + @patch('scrapegraph_py.local_scraper.requests.post') |
| 54 | + def test_scrape_text_forbidden(self, mock_post): |
| 55 | + # Test 403 forbidden error |
| 56 | + mock_response = mock_post.return_value |
| 57 | + mock_response.status_code = 403 |
| 58 | + mock_post.side_effect = requests.exceptions.HTTPError("403 Forbidden") |
| 59 | + |
| 60 | + response = scrape_text( |
| 61 | + "test_api_key", |
| 62 | + "Sample website text", |
| 63 | + "Extract information" |
| 64 | + ) |
| 65 | + self.assertIn("Access forbidden (403)", response) |
| 66 | + |
| 67 | + @patch('scrapegraph_py.local_scraper.requests.post') |
| 68 | + def test_scrape_text_general_error(self, mock_post): |
| 69 | + # Test general request exception handling |
| 70 | + mock_post.side_effect = requests.exceptions.RequestException("Connection error") |
| 71 | + response = scrape_text( |
| 72 | + "test_api_key", |
| 73 | + "Sample website text", |
| 74 | + "Extract information" |
| 75 | + ) |
| 76 | + self.assertIn("An error occurred", response) |
| 77 | + |
| 78 | +if __name__ == '__main__': |
| 79 | + unittest.main() |
0 commit comments