-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathvisibility-project-python.py
More file actions
84 lines (68 loc) · 2.19 KB
/
visibility-project-python.py
File metadata and controls
84 lines (68 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import requests
from bs4 import BeautifulSoup
import re
from urllib.request import urlopen
from gtts import gTTS
from playsound import playsound
import os
import sys
import json
import urllib
url = 'http://localhost:3000'
def text_extractor():
''' Extracts text from JS '''
text = str(sys.argv[1])
res = json.loads(text)
return (res['text'])
def text_to_speech(text):
''' Text to Speech function, also saves audio file '''
language = 'en'
myobj = gTTS(text=text, lang=language, slow=True)
myobj.save("public/audio/sample.mp3")
def scrape(urls):
''' Scrapes links '''
grab = requests.get(urls)
soup = BeautifulSoup(grab.text, 'html.parser')
websites = set()# traverse paragraphs from soup
for link in soup.find_all('a'):
data = link.get('href')
websites.add(data)
links = dict()
links['home'] = urls
for site in websites:
if len(site) > 1:
if site[0] != '/':
site = '/' + site
links[site.split('/')[-1].lower()] = urls + site
return links
def find_link(links):
''' Finds link to open from input sentence '''
s = text_extractor()
s = re.sub('[^a-zA-Z]','',s)
s = s.lower()
for key in links:
# 'find' function returns index. If for any key, we find a non negative index, it means the key is present in our string
if s.find(key) != -1:
return links[key] if key != 'home' else '/'
return 'stop'
def program(urls):
links = scrape(urls)
input_speech = find_link(links)
temp_input = input_speech.split('3000')[-1]
path_key = "/audio/sample.mp3"
str = "str"
src = "src"
if temp_input == 'stop':
input_speech = 'stop'
print('{"' + str + '":"' + input_speech + '","' + src + '":"' + input_speech + '"}')
exit()
print('{"' + str + '":"' + temp_input + '","' + src + '":"' + path_key + '"}')
if temp_input != '/':
urls = input_speech
page = urllib.request.urlopen(urls)
html = page.read().decode("utf-8")
soup = BeautifulSoup(html, "html.parser")
text = soup.get_text()
text = text.replace('\n',' ').split('Technologies')[-1]
text_to_speech(text)
program(url)