-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetTabs.py
72 lines (58 loc) · 1.84 KB
/
getTabs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 12 15:12:54 2016
@author: matt
"""
import shelve
import requests
from lxml import html
def getTab(url):
page = requests.get(url)
tree= html.fromstring(page.content)
myxpath = '//*[@id="cont"]/pre[2]/text()'
tab = tree.xpath(myxpath)
return tab
band = 'radiohead'
page = '1'
def getTree(band, page):
if type(page) == int:
page = str(page)
bandURL = 'https://www.ultimate-guitar.com/search.php?band_name=' + band + \
'&type%5B1%5D=200&type2%5B0%5D=40000&rating%5B4%5D=5&tuning%5Bstandard%5D=standard&page=' \
+ page + '&view_state=advanced&tab_type_group=text&app_name=ugt&order=myweight'
pageBand = requests.get(bandURL)
return html.fromstring(pageBand.content)
#urlBand = 'https://www.ultimate-guitar.com/search.php?search_type=title&order=&value=metallica+'
#pageBand = requests.get(getURL(band, page))
#tree1 = html.fromstring(pageBand.content)
tree1 = getTree(band, page)
pages = tree1.find_class('paging')
maxPage = len(list(pages[0].iter('a')))
print('Max Page: '+ str(maxPage))
songs = tree1.find_class('song result-link')
songLinks = []
for i in songs:
songLinks.append(i.get('href'))
for i in range(maxPage -1):
looppage = i + 2
looptree = getTree(band, str(looppage))
loopsongs = looptree.find_class('song result-link')
for song in loopsongs:
songLinks.append(song.get('href'))
print('No of tabs: ' + str(len(songLinks)))
myTabs = []
j = 0
for i in songLinks:
myTabs.append(getTab(i))
j = j + 1
filename='shelve.out'
my_shelf = shelve.open(filename,'n')
for key in dir():
try:
my_shelf[key] = globals()[key]
except TypeError:
#
# __builtins__, my_shelf, and imported modules can not be shelved.
#
print('ERROR shelving: {0}'.format(key))
my_shelf.close()