Skip to content

Latest commit

 

History

History
44 lines (37 loc) · 1.71 KB

webscraping.org

File metadata and controls

44 lines (37 loc) · 1.71 KB
import httpx
from parser import Selector

response = httpx.get("https://www.remotepython.com/jobs")
assert response.status_code == 200

selector = Selector(text=response.text)
for job in selector.css('.box-list .item'):
  title = job.css('h3 a::text').get()
  print(title)
  relative_url = job.css('h3 a::attr(href)').get()
  print(response.url.join(relative_url))
  • parse url
    from urllib.parse import urlparse
    urlparse("http://www.domain.com/path/to/resource?arg1=true&arg2=false") # ParseResult()