1
+ from io import StringIO
1
2
import os
2
3
import posixpath
3
4
import secrets
@@ -25,17 +26,39 @@ def remote_dir(base_remote_dir):
25
26
return base_remote_dir + "/" + str (uuid .uuid4 ())
26
27
27
28
28
- @pytest .fixture
29
- def fs ( tmpdir , base_remote_dir ):
29
+ @pytest .fixture ( scope = "module" )
30
+ def service_auth ( tmp_path_factory ):
30
31
setup_credentials ()
31
- auth = GoogleAuth (settings_file_path ("default.yaml" , tmpdir / "" ))
32
+ tmpdir = tmp_path_factory .mktemp ("settings" )
33
+ auth = GoogleAuth (settings_file_path ("default.yaml" , wkdir = tmpdir ))
32
34
auth .ServiceAuth ()
35
+ return auth
36
+
37
+
38
+ @pytest .fixture (scope = "module" )
39
+ def fs_factory (base_remote_dir , service_auth ):
40
+ base_item = None
41
+ GDriveFileSystem .cachable = False
42
+
43
+ def _create_fs ():
44
+ nonlocal base_item
45
+ _ , base = base_remote_dir .split ("/" , 1 )
46
+ fs = GDriveFileSystem (base_remote_dir , service_auth )
47
+ if base_item is None :
48
+ base_item = fs ._gdrive_create_dir ("root" , base )
49
+
50
+ return fs , base_item
51
+
52
+ yield _create_fs
53
+
54
+ GDriveFileSystem .cachable = True
55
+ fs = GDriveFileSystem (base_remote_dir , service_auth )
56
+ fs .rm_file (base_remote_dir )
33
57
34
- bucket , base = base_remote_dir .split ("/" , 1 )
35
- fs = GDriveFileSystem (base_remote_dir , auth )
36
- fs ._gdrive_create_dir ("root" , base )
37
58
38
- return fs
59
+ @pytest .fixture
60
+ def fs (fs_factory ):
61
+ return fs_factory ()[0 ]
39
62
40
63
41
64
@pytest .mark .manual
@@ -66,7 +89,7 @@ def test_fs_service_json(base_remote_dir):
66
89
)
67
90
68
91
69
- def test_info (fs , tmpdir , remote_dir ):
92
+ def test_info (fs , remote_dir ):
70
93
fs .touch (remote_dir + "/info/a.txt" )
71
94
fs .touch (remote_dir + "/info/b.txt" )
72
95
details = fs .info (remote_dir + "/info/a.txt" )
@@ -116,20 +139,20 @@ def test_rm(fs, remote_dir):
116
139
assert not fs .exists (remote_dir + "/dir/c/a" )
117
140
118
141
119
- def test_ls (fs : GDriveFileSystem , remote_dir ):
120
- _ , base = fs .split_path (remote_dir + "dir/" )
142
+ def test_ls (fs , remote_dir ):
143
+ _ , base = fs .split_path (remote_dir + "/ dir/" )
121
144
fs ._path_to_item_ids (base , create = True )
122
- assert fs .ls (remote_dir + "dir/" ) == []
145
+ assert fs .ls (remote_dir + "/ dir/" ) == []
123
146
124
147
files = set ()
125
148
for no in range (8 ):
126
- file = remote_dir + f"dir/test_{ no } "
149
+ file = remote_dir + f"/ dir/test_{ no } "
127
150
fs .touch (file )
128
151
files .add (file )
129
152
130
- assert set (fs .ls (remote_dir + "dir/" )) == files
153
+ assert set (fs .ls (remote_dir + "/ dir/" )) == files
131
154
132
- dirs = fs .ls (remote_dir + "dir/" , detail = True )
155
+ dirs = fs .ls (remote_dir + "/ dir/" , detail = True )
133
156
expected = [fs .info (file ) for file in files ]
134
157
135
158
def by_name (details ):
@@ -141,12 +164,95 @@ def by_name(details):
141
164
assert dirs == expected
142
165
143
166
167
+ def test_basic_ops_caching (fs_factory , remote_dir , mocker ):
168
+ # Internally we have to derefence names into IDs to call GDrive APIs
169
+ # we are trying hard to cache those and make sure that operations like
170
+ # exists, ls, find, etc. don't hit the API more than once per path
171
+
172
+ # ListFile (_gdrive_list) is the main operation that we use to retieve file
173
+ # metadata in all operations like find/ls/exist - etc. It should be fine as
174
+ # a basic benchmark to count those.
175
+ # Note: we can't count direct API calls since we have retries, also can't
176
+ # count even direct calls to the GDrive client - for the same reason
177
+ fs , _ = fs_factory ()
178
+ spy = mocker .spy (fs , "_gdrive_list" )
179
+
180
+ dir_path = remote_dir + "/a/b/c/"
181
+ file_path = dir_path + "test.txt"
182
+ fs .touch (file_path )
183
+
184
+ assert spy .call_count == 5
185
+ spy .reset_mock ()
186
+
187
+ fs .exists (file_path )
188
+ assert spy .call_count == 1
189
+ spy .reset_mock ()
190
+
191
+ fs .ls (remote_dir )
192
+ assert spy .call_count == 1
193
+ spy .reset_mock ()
194
+
195
+ fs .ls (dir_path )
196
+ assert spy .call_count == 1
197
+ spy .reset_mock ()
198
+
199
+ fs .find (dir_path )
200
+ assert spy .call_count == 1
201
+ spy .reset_mock ()
202
+
203
+ fs .find (remote_dir )
204
+ assert spy .call_count == 1
205
+ spy .reset_mock ()
206
+
207
+
208
+ def test_ops_work_with_duplicate_names (fs_factory , remote_dir ):
209
+ fs , base_item = fs_factory ()
210
+
211
+ remote_dir_item = fs ._gdrive_create_dir (
212
+ base_item ["id" ], remote_dir .split ("/" )[- 1 ]
213
+ )
214
+ dir_name = str (uuid .uuid4 ())
215
+ dir1 = fs ._gdrive_create_dir (remote_dir_item ["id" ], dir_name )
216
+ dir2 = fs ._gdrive_create_dir (remote_dir_item ["id" ], dir_name )
217
+
218
+ # Two directories were created with the same name
219
+ assert dir1 ["id" ] != dir2 ["id" ]
220
+
221
+ dir_path = remote_dir + "/" + dir_name + "/"
222
+
223
+ # ls returns both of them, even though the names are the same
224
+ test_fs = fs
225
+ result = test_fs .ls (remote_dir )
226
+ assert len (result ) == 2
227
+ assert set (result ) == {dir_path }
228
+
229
+ # ls returns both of them, even though the names are the same
230
+ test_fs , _ = fs_factory ()
231
+ result = test_fs .ls (remote_dir )
232
+ assert len (result ) == 2
233
+ assert set (result ) == {dir_path }
234
+
235
+ for test_fs in [fs , fs_factory ()[0 ]]:
236
+ # find by default doesn't return dirs at all
237
+ result = test_fs .find (remote_dir )
238
+ assert len (result ) == 0
239
+
240
+ fs ._gdrive_upload_fobj ("a.txt" , dir1 ["id" ], StringIO ("" ))
241
+ fs ._gdrive_upload_fobj ("b.txt" , dir2 ["id" ], StringIO ("" ))
242
+
243
+ for test_fs in [fs , fs_factory ()[0 ]]:
244
+ # now we should have both files
245
+ result = test_fs .find (remote_dir )
246
+ assert len (result ) == 2
247
+ assert set (result ) == {dir_path + file for file in ["a.txt" , "b.txt" ]}
248
+
249
+
144
250
def test_ls_non_existing_dir (fs , remote_dir ):
145
251
with pytest .raises (FileNotFoundError ):
146
252
fs .ls (remote_dir + "dir/" )
147
253
148
254
149
- def test_find (fs , remote_dir ):
255
+ def test_find (fs , fs_factory , remote_dir ):
150
256
fs .mkdir (remote_dir + "/dir" )
151
257
152
258
files = [
@@ -169,12 +275,24 @@ def test_find(fs, remote_dir):
169
275
for file in files :
170
276
fs .touch (file )
171
277
172
- assert set (fs .find (remote_dir )) == set (files )
278
+ for test_fs in [fs , fs_factory ()[0 ]]:
279
+ # Test for https://github.com/iterative/PyDrive2/issues/229
280
+ # It must go first, so that we test with a cache miss as well
281
+ assert set (test_fs .find (remote_dir + "/dir/c/d/" )) == set (
282
+ [
283
+ file
284
+ for file in files
285
+ if file .startswith (remote_dir + "/dir/c/d/" )
286
+ ]
287
+ )
288
+
289
+ # General find test
290
+ assert set (test_fs .find (remote_dir )) == set (files )
173
291
174
- find_results = fs .find (remote_dir , detail = True )
175
- info_results = [fs .info (file ) for file in files ]
176
- info_results = {content ["name" ]: content for content in info_results }
177
- assert find_results == info_results
292
+ find_results = test_fs .find (remote_dir , detail = True )
293
+ info_results = [test_fs .info (file ) for file in files ]
294
+ info_results = {content ["name" ]: content for content in info_results }
295
+ assert find_results == info_results
178
296
179
297
180
298
def test_exceptions (fs , tmpdir , remote_dir ):
@@ -199,15 +317,20 @@ def test_open_rw(fs, remote_dir):
199
317
assert stream .read () == data
200
318
201
319
202
- def test_concurrent_operations (fs , remote_dir ):
320
+ def test_concurrent_operations (fs , fs_factory , remote_dir ):
321
+ # Include an extra dir name to force upload operations creating it
322
+ # this way we can also test that only a single directory is created
323
+ # enven if multiple threads are uploading files into the same dir
324
+ dir_name = secrets .token_hex (16 )
325
+
203
326
def create_random_file ():
204
327
name = secrets .token_hex (16 )
205
- with fs .open (remote_dir + " /" + name , "w" ) as stream :
328
+ with fs .open (remote_dir + f"/ { dir_name } /" + name , "w" ) as stream :
206
329
stream .write (name )
207
330
return name
208
331
209
332
def read_random_file (name ):
210
- with fs .open (remote_dir + " /" + name , "r" ) as stream :
333
+ with fs .open (remote_dir + f"/ { dir_name } /" + name , "r" ) as stream :
211
334
return stream .read ()
212
335
213
336
with futures .ThreadPoolExecutor () as executor :
@@ -225,6 +348,11 @@ def read_random_file(name):
225
348
226
349
assert write_names == read_names
227
350
351
+ # Test that only a single dir is cretead
352
+ for test_fs in [fs , fs_factory ()[0 ]]:
353
+ results = test_fs .ls (remote_dir )
354
+ assert results == [remote_dir + f"/{ dir_name } /" ]
355
+
228
356
229
357
def test_put_file (fs , tmpdir , remote_dir ):
230
358
src_file = tmpdir / "a.txt"
0 commit comments