@@ -79,7 +79,8 @@ def __del__(self):
79
79
self .db .close ()
80
80
self .db_env .close ()
81
81
82
- def store (self , name , data , version , size = 0 , compressed = False , digest = None ):
82
+ def store (self , name , data , version , size = 0 ,
83
+ compressed = False , digest = None , logical_size = None ):
83
84
"""Adds a new file to the storage.
84
85
85
86
If the file with the same name existed before, it's not
@@ -108,62 +109,53 @@ def store(self, name, data, version, size=0, compressed=False, digest=None):
108
109
digest: SHA256 digest of the file before compression
109
110
If specified, the digest will not be computed again, saving
110
111
resources.
112
+ logical_size: if ``data`` is gzip-compressed, this parameter
113
+ has to be set to decompressed file size.
111
114
"""
112
115
with _exclusive_lock (self ._lock_path ('links' , name )):
113
116
link_path = self ._link_path (name )
114
117
if _path_exists (link_path ) and _file_version (link_path ) > version :
115
118
return _file_version (link_path )
116
119
117
- # Path to temporary file that may be created in some cases.
118
- temp_file_path = None
119
-
120
- if digest is None :
121
- # Write data to temp file and calculate hash.
122
- temp_file_fd , temp_file_path = tempfile .mkstemp ()
123
- temp_file = os .fdopen (temp_file_fd , 'wb' )
124
- _copy_stream (data , temp_file , size )
125
- temp_file .close ()
126
-
127
- if compressed :
128
- # If data was already compressed, we have to decompress it
129
- # before calculating the digest.
130
- with gzip .open (temp_file_path , 'rb' ) as compressed_file :
131
- digest = file_digest (compressed_file )
132
- else :
133
- digest = file_digest (temp_file_path )
120
+ # data is managed by contents now, and shouldn't be used directly
121
+ with _InputStreamWrapper (data , size ) as contents :
122
+ if digest is None or logical_size is None :
123
+ contents .save ()
124
+ if compressed :
125
+ # This shouldn't occur if the request came from a proper
126
+ # filetracker client, so we don't care if it's slow.
127
+ with gzip .open (
128
+ contents .current_path , 'rb' ) as decompressed :
129
+ digest = file_digest (decompressed )
130
+ with gzip .open (
131
+ contents .current_path , 'rb' ) as decompressed :
132
+ logical_size = _read_stream_for_size (decompressed )
133
+ else :
134
+ digest = file_digest (contents .current_path )
135
+ logical_size = os .stat (contents .current_path ).st_size
136
+
137
+ blob_path = self ._blob_path (digest )
138
+
139
+ with self ._lock_blob_with_txn (digest ) as txn :
140
+ digest_bytes = digest .encode ()
134
141
135
- blob_path = self ._blob_path (digest )
136
-
137
- with self ._lock_blob_with_txn (digest ) as txn :
138
- digest_bytes = digest .encode ('utf8' )
139
- try :
140
142
link_count = int (self .db .get (digest_bytes , 0 , txn = txn ))
141
- except KeyError :
142
- link_count = 0
143
+ new_count = str ( link_count + 1 ). encode ()
144
+ self . db . put ( digest_bytes , new_count , txn = txn )
143
145
144
- new_count = str (link_count + 1 ).encode ('utf8' )
145
- self .db .put (digest_bytes , new_count , txn = txn )
146
+ # Create a new blob if this isn't a duplicate.
147
+ if link_count == 0 :
148
+ _create_file_dirs (blob_path )
149
+ self .db .put ('{}:logical_size' .format (digest ).encode (),
150
+ str (logical_size ).encode ())
146
151
147
- if link_count == 0 :
148
- # Create a new blob.
149
- _create_file_dirs (blob_path )
150
- if compressed :
151
- if temp_file_path :
152
- shutil .move (temp_file_path , blob_path )
152
+ if compressed :
153
+ contents .save (blob_path )
153
154
else :
154
- with open (blob_path , 'wb' ) as blob :
155
- _copy_stream (data , blob , size )
156
- else :
157
- if temp_file_path :
158
- with open (temp_file_path , 'rb' ) as raw ,\
155
+ contents .save ()
156
+ with open (contents .current_path , 'rb' ) as raw ,\
159
157
gzip .open (blob_path , 'wb' ) as blob :
160
158
shutil .copyfileobj (raw , blob )
161
- else :
162
- with gzip .open (blob_path , 'wb' ) as blob :
163
- _copy_stream (data , blob , size )
164
-
165
- if temp_file_path and os .path .exists (temp_file_path ):
166
- os .unlink (temp_file_path )
167
159
168
160
if _path_exists (link_path ):
169
161
# Lend the link lock to delete().
@@ -206,29 +198,34 @@ def delete(self, name, version, _lock=True):
206
198
digest = self ._digest_for_link (name )
207
199
with self ._lock_blob_with_txn (digest ) as txn :
208
200
os .unlink (link_path )
209
- digest_bytes = digest .encode ('utf8' )
201
+ digest_bytes = digest .encode ()
210
202
link_count = self .db .get (digest_bytes , txn = txn )
211
203
if link_count is None :
212
204
raise RuntimeError ("File exists but has no key in db" )
213
205
link_count = int (link_count )
214
206
if link_count == 1 :
215
207
self .db .delete (digest_bytes , txn = txn )
208
+ self .db .delete (
209
+ '{}:logical_size' .format (digest ).encode (), txn = txn )
216
210
os .unlink (self ._blob_path (digest ))
217
211
else :
218
- new_count = str (link_count - 1 ).encode ('utf8' )
212
+ new_count = str (link_count - 1 ).encode ()
219
213
self .db .put (digest_bytes , new_count , txn = txn )
220
214
return True
221
215
222
216
def stored_version (self , name ):
223
- """
224
- Returns the version of file `name` that is currently stored
225
- or None if it doesn't exist.
226
- """
217
+ """Returns the version of file `name` or None if it doesn't exist."""
227
218
link_path = self ._link_path (name )
228
219
if not _path_exists (link_path ):
229
220
return None
230
221
return _file_version (link_path )
231
222
223
+ def logical_size (self , name ):
224
+ """Returns the logical size (before compression) of file `name`."""
225
+ digest = self ._digest_for_link (name )
226
+ return int (self .db .get ('{}:logical_size'
227
+ .format (digest ).encode ()).decode ())
228
+
232
229
def _link_path (self , name ):
233
230
return os .path .join (self .links_dir , name )
234
231
@@ -261,6 +258,44 @@ def _digest_for_link(self, name):
261
258
return digest
262
259
263
260
261
+ class _InputStreamWrapper (object ):
262
+ """A wrapper for lazy reading and moving contents of 'wsgi.input'.
263
+
264
+ Should be used as a context manager.
265
+ """
266
+ def __init__ (self , data , size ):
267
+ self ._data = data
268
+ self ._size = size
269
+ self .current_path = None
270
+ self .saved_in_temp = False
271
+
272
+ def __enter__ (self ):
273
+ return self
274
+
275
+ def __exit__ (self , _exc_type , _exc_value , _traceback ):
276
+ """Removes file if it was last saved as a temporary file."""
277
+ if self .saved_in_temp :
278
+ os .unlink (self .current_path )
279
+
280
+ def save (self , new_path = None ):
281
+ """Moves or creates the file with stream contents to a new location.
282
+
283
+ Args:
284
+ new_path: path to move to, if None a temporary file is created.
285
+ """
286
+ self .saved_in_temp = new_path is None
287
+ if new_path is None :
288
+ fd , new_path = tempfile .mkstemp ()
289
+ os .close (fd )
290
+
291
+ if self .current_path :
292
+ shutil .move (self .current_path , new_path )
293
+ else :
294
+ with open (new_path , 'wb' ) as dest :
295
+ _copy_stream (self ._data , dest , self ._size )
296
+ self .current_path = new_path
297
+
298
+
264
299
_BUFFER_SIZE = 64 * 1024
265
300
266
301
@@ -292,6 +327,17 @@ def _copy_stream(src, dest, length=0):
292
327
bytes_left -= buf_size
293
328
294
329
330
+ def _read_stream_for_size (stream ):
331
+ """Reads a stream discarding the data read and returns its size."""
332
+ size = 0
333
+ while True :
334
+ buf = stream .read (_BUFFER_SIZE )
335
+ size += len (buf )
336
+ if not buf :
337
+ break
338
+ return size
339
+
340
+
295
341
def _create_file_dirs (file_path ):
296
342
"""Creates directory tree to file if it doesn't exist."""
297
343
dir_name = os .path .dirname (file_path )
0 commit comments