-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjio.py
50 lines (36 loc) · 1.27 KB
/
jio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import urllib.request
import xmltodict
from bs4 import BeautifulSoup
# from . import models
import re
# from models import *
import settings
url = 'https://hindi.gadgets360.com/mobiles/cubot-kingkong-5-pro-rugged-smartphone-launch-soon-massive-8000mah-battery-4gb-ram-waterproof-dustproof-specifications-8000-news-2389286'
def getArticleWebpage(url):
try:
req = urllib.request.Request(
url,
data=None,
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36',
}
)
return urllib.request.urlopen(req)
except:
print('pa fourth')
def scrapArticle(web_page):
try:
soup = BeautifulSoup(web_page, 'html.parser')
content = soup.find("div", {"class" : "content_text row description"})
contentn = content.find("div",{"class":"content_text row description"})
if contentn is not None:
content = contentn
for divs in content.findAll("div"):
divs.extract()
for divs in content.findAll("blockquote"):
divs.extract()
return content.get_text()
except:
print('pa fifth')
web_page = getArticleWebpage(url)
print(scrapArticle(web_page))