-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfb.py
executable file
·68 lines (58 loc) · 1.91 KB
/
fb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from bs4 import BeautifulSoup
from bs4 import Comment
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import requests
import time
import csv
def writecsv(keyword, list_of_dictionaries):
with open("fb_vape.csv", "a", newline='') as csvfile:
write = csv.writer(csvfile, delimiter=',')
write.writerow(['關鍵字', '頁面名稱', '連結網址'])
for entry in list_of_dictionaries:
write.writerow([keyword, entry['title'], entry['link']])
def scrape_fb_fanpage(keyword):
url = "https://facebook.com/public?query=" + keyword + "&type=pages&init=dir&nomc=0"
# use selenium to simulate user behavior
# open firefox with no display mode (or else will produce error in my current evironment)
# reference: https://stackoverflow.com/questions/52534658/webdriverexception-message-invalid-argument-cant-kill-an-exited-process-with (2nd answer)
options = Options()
options.headless = True
driver = webdriver.Firefox(options=options) # open firefox
driver.get(url)
last_height = driver.execute_script("return document.body.scrollHeight") # get current window height
# scroll till the bottom has been reached
pause_time = 3
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(pause_time)
new_height = driver.execute_script("return document.body.scrollHeight")
print(new_height)
if new_height == last_height:
break
last_height = new_height
data = driver.page_source
# parse source html
soup = BeautifulSoup(data, "html.parser")
fanpages = soup.find_all(class_="_32mo")
content = []
for entry in fanpages:
info = {
'link' : entry['href'],
'title': entry.text
}
print(info)
content.append(info)
return content
def main():
search_list = {
"電子菸",
"電子煙",
"vape"
}
data = {}
for key_word in search_list:
data = scrape_fb_fanpage(key_word)
writecsv(key_word, data)
if __name__ == '__main__':
main()