forked from gutenbergtools/autocat3
-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
CloudStorage.py
316 lines (230 loc) · 9.48 KB
/
CloudStorage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env python
# -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*-
"""
CloudStorage.py
Copyright 2013-15 by Marcello Perathoner
Distributable under the GNU General Public License Version 3 or newer.
Base classes for uploads to file hosting services.
"""
from __future__ import unicode_literals
from contextlib import closing
from six.moves import urllib
import logging
import re
import os
import cherrypy
import routes
import requests
import requests_oauthlib
from requests import RequestException
from oauthlib.oauth2.rfc6749.errors import OAuth2Error
from i18n_tool import ugettext as _
import BaseSearcher
# pylint: disable=R0921
http_adapter = requests.adapters.HTTPAdapter ()
https_adapter = requests.adapters.HTTPAdapter ()
# Google Drive `bug´ see:
# https://github.com/idan/oauthlib/commit/ca4811b3087f9d34754d3debf839e247593b8a39
os.environ['OAUTHLIB_RELAX_TOKEN_SCOPE'] = '1'
config = cherrypy.config
urlgen = routes.URLGenerator (cherrypy.routes_mapper, {
'HTTP_HOST': config['file_host'],
'HTTPS': config['host_https']
})
def log (msg):
""" Log an informational message. """
cherrypy.log (msg, context = 'CLOUDSTORAGE', severity = logging.INFO)
def error_log (msg):
""" Log an error message. """
cherrypy.log ('Error: ' + msg, context = 'CLOUDSTORAGE', severity = logging.ERROR)
class CloudOAuth2Session (requests_oauthlib.OAuth2Session): # pylint: disable=R0904
""" An OAuth2 session. """
name_prefix = None
oauth2_auth_endpoint = None
oauth2_token_endpoint = None
oauth2_scope = None
def __init__ (self, **kwargs):
""" Initialize session from cherrypy config. """
prefix = self.name_prefix
client_id = config[prefix + '_client_id']
redirect_uri = urlgen (prefix + '_callback', host = config['file_host'])
super (CloudOAuth2Session, self).__init__ (
client_id = client_id,
scope = self.oauth2_scope,
redirect_uri = redirect_uri,
**kwargs
)
self.client_secret = config[prefix + '_client_secret']
self.ebook = None
self.mount ("http://", http_adapter)
self.mount ("https://", https_adapter)
def oauth_dance (self, kwargs):
""" Do the OAuth2 dance. """
#
# OAuth 2.0 flow see:
# http://tools.ietf.org/html/rfc6749
#
if not self.token:
if 'code' not in kwargs:
# oauth step 1:
# redirect the user to the Authorization Endpoint
log ('Building auth url ...')
auth_url, dummy_state = self.authorization_url (
self.oauth2_auth_endpoint)
log ('Redirecting user to auth endpoint ...')
raise cherrypy.HTTPRedirect (auth_url)
else:
# oauth step 2
# the user's browser just came back with an authorization code
# get the access_token from the Token Endpoint
log ('Fetching access token ...')
self.fetch_token (self.oauth2_token_endpoint,
client_secret = self.client_secret,
code = kwargs['code'])
log ('Got access token.')
def unauthorized (self, msg = 'Unauthorized'):
""" Called on OAuth2 failure. """
pass
class CloudStorage (object):
""" Base class for uploads to cloud storage providers.
:param name: The name of the cloud service, eg. 'Dropbox'.
:param session_class: The class to use for the oauth session.
:param user_agent: The user agent to make requests to www.gutenberg.org.
"""
name = None
session_class = CloudOAuth2Session
user_agent = None
upload_endpoint = None
re_filename = re.compile (r'[/\<>:"|?* ]')
def __init__ (self):
self.host = cherrypy.config['host']
self.urlgen = urlgen
def index (self, **kwargs):
""" Output the page. """
#
# OAuth 2.0 flow see:
# http://tools.ietf.org/html/rfc6749
#
session = self.get_or_create_session ()
if 'id' in kwargs:
session.ebook = EbookMetaData (kwargs)
if session.ebook is None:
raise cherrypy.HTTPError (400, "No ebook selected. Are your cookies enabled?")
name = self.name
if 'not_approved' in kwargs or 'error' in kwargs:
self._dialog (
_('Sorry. The file could not be sent to {name}.').format (name = name),
_('Error'))
self.redirect_done (session)
try:
session.oauth_dance (kwargs)
log ("Sending file %s to %s" % (
session.ebook.get_source_url (), name))
with closing (self.request_ebook (session)) as r:
r.raise_for_status ()
self.upload_file (session, r)
log ("File %s sent to %s" % (
session.ebook.get_source_url (), name))
self._dialog (
_('The file has been sent to {name}.').format (name = name),
_('Sent to {name}').format (name = name))
self.redirect_done (session)
except (OAuth2Error, ) as what:
session.unauthorized (what)
self.unauthorized ('OAuthError: ' + str (what.urlencoded))
except (RequestException, IOError, ValueError) as what:
session.unauthorized (what)
self.unauthorized ('RequestError: ' + str (what))
raise cherrypy.HTTPError (500, str (what))
def upload_file (self, oauth_session, response):
""" Upload the file. """
raise NotImplementedError
def get_or_create_session (self):
""" Retrieve an ongoing cloud session or create a new one. """
session_name = self.session_class.name_prefix + '_session'
session = cherrypy.session.get (session_name, self.session_class ())
cherrypy.session[session_name] = session
return session
def delete_session (self):
""" Delete cloud session. """
session_name = self.session_class.name_prefix + '_session'
# cherrypy.session[session_name].close ()
del cherrypy.session[session_name]
def request_ebook (self, session):
""" Return an open request object for the ebook file. """
url = session.ebook.get_source_url ()
# Caveat: use requests.get, not session.get, because it is an insecure
# transport. session.get would raise InsecureTransportError
# turn off server encoding since we're going to re-stream the bytes
return requests.get (
url,
headers = {'user-agent': self.user_agent, 'accept-encoding': ''},
stream = True
)
def fix_filename (self, filename):
""" Replace characters unsupported by many OSs. """
return self.re_filename.sub ('_', filename)
def redirect_done (self, session):
""" Redirect user back to bibrec page. """
raise cherrypy.HTTPRedirect (self.urlgen (
'bibrec', id = session.ebook.id, host = self.host))
def unauthorized (self, msg = 'Unauthorized'):
""" Call on OAuth failure. """
msg = str (msg) # msg may be exception class
error_log (msg)
self.delete_session ()
raise cherrypy.HTTPError (401, msg)
@staticmethod
def _dialog (message, title):
""" Open a user-visible dialog on the next page. """
cherrypy.session['user_dialog'] = (message, title)
class EbookMetaData (object):
""" Helper class that holds ebook metadata. """
accepted_filetypes = (
'epub.images',
'epub.noimages',
'kindle.images',
'kindle.noimages',
'pdf')
def __init__ (self, kwargs):
self.id = None
self.filetype = None
try :
self.id = int (kwargs['id'])
self.filetype = kwargs['filetype']
if self.filetype not in self.accepted_filetypes:
self.filetype = None
raise ValueError
except (KeyError, ValueError):
raise cherrypy.HTTPError (400, 'Bad Request. Invalid parameters')
def get_dc (self):
""" Get a DublinCore struct for the ebook. """
dc = BaseSearcher.DC (cherrypy.engine.pool)
dc.load_from_database (self.id)
# dc.translate ()
return dc
def get_extension (self):
""" Get the ebook filename extension. """
ext = self.filetype.split ('.', 1)[0]
if ext == 'kindle':
ext = 'mobi'
return ext
def get_filename (self):
""" Get a suitable filename to store the ebook. """
filename = self.get_dc ().make_pretty_title () + '.' + self.get_extension ()
return filename.replace (':', '_')
def get_source_url (self):
""" Return the url of the ebook file on gutenberg.org. """
protocol = 'https://' if cherrypy.config['host_https'] else 'http://'
if self.id == 99999:
# test filename
return urllib.parse.urljoin (
protocol + str(cherrypy.config['file_host']) , 'test.pdf')
if self.filetype == 'pdf':
return urllib.parse.urljoin (
protocol + cherrypy.config['file_host'],
'files/%d/%d-pdf.pdf' % (self.id, self.id))
else:
return urllib.parse.urljoin (
protocol + cherrypy.config['file_host'],
'ebooks/%d.%s' % (self.id, self.filetype))