Project-Visibility-ExpressJS/visibility-project-python.py at master · Anunay-Anand/Project-Visibility-ExpressJS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import requests
from bs4 import BeautifulSoup
import re
from urllib.request import urlopen
from gtts import gTTS
from playsound import playsound
import os
import sys
import json
import urllib

url = 'http://localhost:3000'

def text_extractor():
    ''' Extracts text from JS '''
    text = str(sys.argv[1])
    res = json.loads(text)
    return (res['text'])


def text_to_speech(text):
    ''' Text to Speech function, also saves audio file '''
    language = 'en'
    myobj = gTTS(text=text, lang=language, slow=True)
    myobj.save("public/audio/sample.mp3")


def scrape(urls):
    ''' Scrapes links '''
    grab = requests.get(urls)
    soup = BeautifulSoup(grab.text, 'html.parser')


    websites = set()# traverse paragraphs from soup
    for link in soup.find_all('a'):
        data = link.get('href')
        websites.add(data)

    links = dict()
    links['home'] = urls

    for site in websites:
        if len(site) > 1:
            if site[0] != '/':
                site = '/' + site
            links[site.split('/')[-1].lower()] = urls + site
    return links


def find_link(links):
    ''' Finds link to open from input sentence '''
    s = text_extractor()
    s = re.sub('[^a-zA-Z]','',s)
    s = s.lower()
    for key in links:
        # 'find' function returns index. If for any key, we find a non negative index, it means the key is present in our string
        if s.find(key) != -1:
            return links[key] if key != 'home' else '/'
    return 'stop'


def program(urls):
    links = scrape(urls)
    input_speech = find_link(links)
    temp_input = input_speech.split('3000')[-1]
    path_key = "/audio/sample.mp3"
    str = "str"
    src = "src"
    if temp_input == 'stop':
        input_speech = 'stop'
        print('{"' + str + '":"' + input_speech + '","' + src + '":"' + input_speech + '"}')
        exit()
    print('{"' + str + '":"' + temp_input + '","' + src + '":"' + path_key + '"}')
    if temp_input != '/':
        urls = input_speech
    page = urllib.request.urlopen(urls)
    html = page.read().decode("utf-8")
    soup = BeautifulSoup(html, "html.parser")
    text = soup.get_text()
    text = text.replace('\n',' ').split('Technologies')[-1]
    text_to_speech(text)


program(url)