This repository was archived by the owner on Mar 14, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
[General] 爬蟲問題-getting none type while parsing #389
Copy link
Copy link
Open
Labels
Description
提交連結
程式碼
#爬取輸入地址後以評分排列的店家資訊
#ubereat地址輸入中山區並在載入下一頁後選取「評分」標籤後開始爬取店家名稱
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
from lxml import etree
url='https://www.ubereats.com/api/getFeedV1?localeCode=tw'
head={
'cookie':'uev2.id.xp=b2d90d43-a0df-4283-95e8-e7fa74c7346f; dId=65f906c3-72c2-4fda-8573-2f006b27f53c; uev2.id.session=7557caec-a5c2-4a47-83cc-ad0cc2f14f9b; uev2.ts.session=1639113207407; marketing_vistor_id=e1779e3e-545e-40b5-8057-73e9611af4e2; jwt-session=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2MzkxMTMyMDcsImV4cCI6MTYzOTE5OTYwN30.NyZZ8r2-8tqP1KVnUbAEhOh4Sc_l0m5X4qHrUtpxTiU; uev2.gg=true; utm_medium=undefined; CONSENTMGR=c1:1%7Cc2:1%7Cc3:1%7Cc4:1%7Cc5:1%7Cc6:1%7Cc7:1%7Cc8:1%7Cc9:1%7Cc10:1%7Cc11:1%7Cc12:1%7Cc13:1%7Cc14:1%7Cc15:1%7Cts:1639113221142%7Cconsent:true; _userUuid=; _gcl_au=1.1.1164597726.1639113223; _fbp=fb.1.1639113224102.1810193959; _ga=GA1.2.22554046.1639113225; _gid=GA1.2.1601263129.1639113225; uev2.loc=%7B%22address%22%3A%7B%22address1%22%3A%22%E4%B8%AD%E5%B1%B1%E5%8D%80%22%2C%22address2%22%3A%22%E5%8F%B0%E5%8C%97%E5%B8%82%22%2C%22aptOrSuite%22%3A%22%22%2C%22eaterFormattedAddress%22%3A%2210491%E5%8F%B0%E7%81%A3%E5%8F%B0%E5%8C%97%E5%B8%82%E4%B8%AD%E5%B1%B1%E5%8D%80%22%2C%22subtitle%22%3A%22%E5%8F%B0%E5%8C%97%E5%B8%82%22%2C%22title%22%3A%22%E4%B8%AD%E5%B1%B1%E5%8D%80%22%2C%22uuid%22%3A%22%22%7D%2C%22latitude%22%3A25.0792018%2C%22longitude%22%3A121.5427093%2C%22reference%22%3A%22ChIJAR6jgvmrQjQRKbIugg0XTUY%22%2C%22referenceType%22%3A%22google_places%22%2C%22type%22%3A%22google_places%22%2C%22source%22%3A%22manual_auto_complete%22%2C%22addressComponents%22%3A%7B%22countryCode%22%3A%22%22%2C%22firstLevelSubdivisionCode%22%3A%22%22%2C%22city%22%3A%22%22%2C%22postalCode%22%3A%22%22%7D%2C%22originType%22%3A%22user_autocomplete%22%7D; _ts_yjad=1639113226667; _rdt_uuid=1639113227545.cb3112f6-6c44-4ae3-b94c-9b63716ad7bf; _gat_tealium_0=1; _scid=d3602dfa-a411-40c1-a315-40785f6a93c5; utag_main=v_id:017da2c2e3d3002a1ec46c87162c05072001b06a00978$_sn:1$_se:10$_ss:0$_st:1639115059285$ses_id:1639113221080%3Bexp-session$_pn:1%3Bexp-session; _uetsid=f2fd78d0597711ecb14b614bc96ad804; _uetvid=f2ff03a0597711ec8b14014eae4cdf46',
'x-csrf-token':'x'
} #其實只需要cookie和最後一個
#json格式
list_req=requests.post(url,headers=head)
#使用xpath定位元素,也抓到空值none
#tree=etree.HTML(list_req.text)
#getdata=tree.xpath("//*[@id='main-content']/div")
#使用bs4格式定位元素,也抓到空值none
soup=BeautifulSoup(list_req.text,"html.parser")
getdata=soup.find("class","kj") #getdata=soup.find("div","class")的結果是none
#利用條件式判斷發現getdata抓到的是空值
if getdata is not None:
print(getdata.text)
else:
print("None")
#補充:
#print(soup)
#{"status":"success","data":{"countdowns":[],"diningModes":[{"mode":"DELIVERY","title":"外送","isAvailable":true,"isSelected":true},{"mode":"PICKUP","title":"外帶","isAvailable":true,"isSelected":false}],"sortAndFilters":[{"uuid":"1c7cf7ef-730f-431f-9072-26bc39f7c021","type":"sort","label":"分類","maxPermitted":1,"minPermitted":1,"options":[{"uuid":"3c7cf7ef-730f-431f-9072-26bc39f7c022","value":"Recommended","isDefault":true,"label":" 您專屬的推薦餐點 (預設)","iconUrl":"https://duyt4h9nfnj50.cloudfront.net/sort_and_filters/filter-ic-recommended-v2.png"},{"uuid":"4c7cf7ef-730f-431f-9072-26bc39f7c023","value":"Most popular","isDefault":false,"label":" 熱門餐點","iconUrl":"https://duyt4h9nfnj50.cloudfront.net/sort_and_filters/filter-ic-mostpop.png"},{"uuid":"5991d63c-4d42-46bc-8301-ce224557e615","value":"Rating","isDefault":false,"label":" 評分","iconUrl":"http://duyt4h9nfnj50.cloudfront.net/sort_and_filters/rating_icn.png"},{"uuid":"5c7cf7ef-730f-431f-9072-26bc39f7c024","value":"Delivery time","isDefault":false,"label":" 配送時間","iconUrl":"https://duyt4h9nfnj50.cloudfront.net/sort_and_filters/filter-ic-etd.pn...(後面太長略
#print(list_req)
#<Response [200]>錯誤訊息
問題描述
在靜態爬取ubereat 網站時,所定位的元素明明不是空值,卻一直回傳none,有使用bs4 find 方法及etree的xpath定位元素,都回傳none