Skip to content

I cant manage to extract the contents of a page behind cookies #42

@tropxy

Description

@tropxy
async def scrapegraphai_test():
    import asyncio
    from scrapegraph_py import AsyncClient
    print("Start crawling...")
    await get_login_cookies()
    # Load cookies from JSON file
    with open('cookies.json', 'r') as f:
        cookie_data = json.load(f)

    # Convert cookies to Cookie header format
    cookie_header = "; ".join([f"{cookie['name']}={cookie['value']}" for cookie in cookie_data['cookies']])
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Cookie": cookie_header
    }
    print(headers)
    
    async with AsyncClient(api_key="sgai-key") as client:
        response = await client.markdownify(
            website_url="https://poc18.demo.dev.charge.ampeco.tech/admin/resources/charge-points/4",
            headers=headers,
        )
        print(response)
        
if __name__ == "__main__":
    asyncio.run(scrapegraphai_test())
Start crawling...
Navigating to login page....
Filling login form...
Locating login button...
Waiting for dashboard redirect...
{'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Cookie': 'XSRF-TOKEN=eyJpdiI6IjBydUlVUEVmY2EyUUF4cmpIRzFHR0E9PSIsInZhbHVlIjoiYjcvejNld2g2YXNxT0FvUTZVWkZpTlJTV0UxSFhkRlorb3VsVGVSeGhPeTlVcnRHS2ZpcFVkMUVLQlVhTWlhSHpMT3FWK3pTTXNGRU14Vlp0UjdsOCtOSHQvRldWS3BYMGhZQW40c0xiVjExODN3UVRFNmF0K3d4bEhWZFIwcVQiLCJtYWMiOiI0YmJlY2Q2ZDJkZDQxMjZhMjc5YjA0MWFjODM1MzI2YTM4YWVhNzU0ODk2NDE2ZDkzZmMzYzFlNWYxNWY2NTg3IiwidGFnIjoiIn0%3D; laravel_session=eyJpdiI6IlFlOFU4a1FvTEM3ejVKNVpvTUEyVlE9PSIsInZhbHVlIjoib0ZJemRmMlppd0k5WERSeEVEWEVMcnFRQjNYcktkM1FOT1oycUx6dnFvMjBsTlhrQUdqV2J1K0ppVW0vTCtubm1Ta0tzbThTa0IvM1dRemRhRkt3UGNhMVFWMEZZSnhsbWh5SmpmZVMybmo2WnFLcS9zK3NVeUtwU2N3Rlp6QWoiLCJtYWMiOiJlMTg0M2I0MGY2NmZjZmE0MjQxYzliYWJkYmMwOTMzMTQzNTg0OGJlMzc4MWQxMTk3ZWRhMjEzM2E5NGRiNWEyIiwidGFnIjoiIn0%3D'}
Traceback (most recent call last):
  File "/Users/andre/Devel/amp_assist_backend_api/results_markdown.py", line 415, in <module>
    asyncio.run(scrapegraphai_test())
    ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/[email protected]/3.13.5/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/runners.py", line 195, in run
    return runner.run(main)
           ~~~~~~~~~~^^^^^^
  File "/opt/homebrew/Cellar/[email protected]/3.13.5/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
  File "/opt/homebrew/Cellar/[email protected]/3.13.5/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/base_events.py", line 725, in run_until_complete
    return future.result()
           ~~~~~~~~~~~~~^^
  File "/Users/andre/Devel/amp_assist_backend_api/results_markdown.py", line 406, in scrapegraphai_test
    response = await client.markdownify(
               ^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<2 lines>...
    )
    ^
  File "/Users/andre/.virtualenvs/amp_assist_backend_api/lib/python3.13/site-packages/scrapegraph_py/async_client.py", line 151, in markdownify
    result = await self._make_request(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
        "POST", f"{API_BASE_URL}/markdownify", json=request.model_dump()
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "/Users/andre/.virtualenvs/amp_assist_backend_api/lib/python3.13/site-packages/scrapegraph_py/async_client.py", line 113, in _make_request
    result = await handle_async_response(response)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/andre/.virtualenvs/amp_assist_backend_api/lib/python3.13/site-packages/scrapegraph_py/utils/helpers.py", line 40, in handle_async_response
    raise APIError(error_msg, status_code=response.status)
scrapegraph_py.exceptions.APIError: [400] Website is not accessible

I know however this should work, becase I was able to use crawl4AI with those cookies and that works

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions