Skip to content

Commit 7074aba

Browse files
authored
chore(fs): add tests to cover recent PRs (#328)
1 parent 1a8cd72 commit 7074aba

File tree

3 files changed

+170
-41
lines changed

3 files changed

+170
-41
lines changed

pydrive2/fs/spec.py

+16-17
Original file line numberDiff line numberDiff line change
@@ -245,17 +245,18 @@ def split_path(self, path):
245245
@wrap_prop(threading.RLock())
246246
@cached_property
247247
def _ids_cache(self):
248-
cache = {
249-
"dirs": defaultdict(list),
250-
"ids": {},
251-
"root_id": self._get_item_id(
252-
self.path,
253-
use_cache=False,
254-
hint="Confirm the directory exists and you can access it.",
255-
),
256-
}
248+
cache = {"dirs": defaultdict(list), "ids": {}}
249+
250+
base_item_ids = self._path_to_item_ids(self.base, use_cache=False)
251+
if not base_item_ids:
252+
raise FileNotFoundError(
253+
errno.ENOENT,
254+
os.strerror(errno.ENOENT),
255+
f"Confirm {self.path} exists and you can access it",
256+
)
257+
258+
self._cache_path_id(self.base, *base_item_ids, cache=cache)
257259

258-
self._cache_path_id(self.base, cache["root_id"], cache=cache)
259260
return cache
260261

261262
def _cache_path_id(self, path, *item_ids, cache=None):
@@ -366,7 +367,7 @@ def _path_to_item_ids(self, path, create=False, use_cache=True):
366367
[self._create_dir(min(parent_ids), title, path)] if create else []
367368
)
368369

369-
def _get_item_id(self, path, create=False, use_cache=True, hint=None):
370+
def _get_item_id(self, path, create=False, use_cache=True):
370371
bucket, base = self.split_path(path)
371372
assert bucket == self.root
372373

@@ -375,9 +376,7 @@ def _get_item_id(self, path, create=False, use_cache=True, hint=None):
375376
return min(item_ids)
376377

377378
assert not create
378-
raise FileNotFoundError(
379-
errno.ENOENT, os.strerror(errno.ENOENT), hint or path
380-
)
379+
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
381380

382381
@_gdrive_retry
383382
def _gdrive_create_dir(self, parent_id, title):
@@ -427,9 +426,9 @@ def info(self, path):
427426

428427
def ls(self, path, detail=False):
429428
bucket, base = self.split_path(path)
429+
assert bucket == self.root
430430

431431
dir_ids = self._path_to_item_ids(base)
432-
433432
if not dir_ids:
434433
raise FileNotFoundError(
435434
errno.ENOENT, os.strerror(errno.ENOENT), path
@@ -465,14 +464,14 @@ def ls(self, path, detail=False):
465464

466465
def find(self, path, detail=False, **kwargs):
467466
bucket, base = self.split_path(path)
468-
469-
seen_paths = set()
467+
assert bucket == self.root
470468

471469
# Make sure the base path is cached and dir_ids below has some
472470
# dirs revelant to this call
473471
self._path_to_item_ids(base)
474472

475473
dir_ids = [self._ids_cache["ids"].copy()]
474+
seen_paths = set()
476475
contents = []
477476
while dir_ids:
478477
query_ids = {

pydrive2/test/test_fs.py

+151-23
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from io import StringIO
12
import os
23
import posixpath
34
import secrets
@@ -25,17 +26,39 @@ def remote_dir(base_remote_dir):
2526
return base_remote_dir + "/" + str(uuid.uuid4())
2627

2728

28-
@pytest.fixture
29-
def fs(tmpdir, base_remote_dir):
29+
@pytest.fixture(scope="module")
30+
def service_auth(tmp_path_factory):
3031
setup_credentials()
31-
auth = GoogleAuth(settings_file_path("default.yaml", tmpdir / ""))
32+
tmpdir = tmp_path_factory.mktemp("settings")
33+
auth = GoogleAuth(settings_file_path("default.yaml", wkdir=tmpdir))
3234
auth.ServiceAuth()
35+
return auth
36+
37+
38+
@pytest.fixture(scope="module")
39+
def fs_factory(base_remote_dir, service_auth):
40+
base_item = None
41+
GDriveFileSystem.cachable = False
42+
43+
def _create_fs():
44+
nonlocal base_item
45+
_, base = base_remote_dir.split("/", 1)
46+
fs = GDriveFileSystem(base_remote_dir, service_auth)
47+
if base_item is None:
48+
base_item = fs._gdrive_create_dir("root", base)
49+
50+
return fs, base_item
51+
52+
yield _create_fs
53+
54+
GDriveFileSystem.cachable = True
55+
fs = GDriveFileSystem(base_remote_dir, service_auth)
56+
fs.rm_file(base_remote_dir)
3357

34-
bucket, base = base_remote_dir.split("/", 1)
35-
fs = GDriveFileSystem(base_remote_dir, auth)
36-
fs._gdrive_create_dir("root", base)
3758

38-
return fs
59+
@pytest.fixture
60+
def fs(fs_factory):
61+
return fs_factory()[0]
3962

4063

4164
@pytest.mark.manual
@@ -66,7 +89,7 @@ def test_fs_service_json(base_remote_dir):
6689
)
6790

6891

69-
def test_info(fs, tmpdir, remote_dir):
92+
def test_info(fs, remote_dir):
7093
fs.touch(remote_dir + "/info/a.txt")
7194
fs.touch(remote_dir + "/info/b.txt")
7295
details = fs.info(remote_dir + "/info/a.txt")
@@ -116,20 +139,20 @@ def test_rm(fs, remote_dir):
116139
assert not fs.exists(remote_dir + "/dir/c/a")
117140

118141

119-
def test_ls(fs: GDriveFileSystem, remote_dir):
120-
_, base = fs.split_path(remote_dir + "dir/")
142+
def test_ls(fs, remote_dir):
143+
_, base = fs.split_path(remote_dir + "/dir/")
121144
fs._path_to_item_ids(base, create=True)
122-
assert fs.ls(remote_dir + "dir/") == []
145+
assert fs.ls(remote_dir + "/dir/") == []
123146

124147
files = set()
125148
for no in range(8):
126-
file = remote_dir + f"dir/test_{no}"
149+
file = remote_dir + f"/dir/test_{no}"
127150
fs.touch(file)
128151
files.add(file)
129152

130-
assert set(fs.ls(remote_dir + "dir/")) == files
153+
assert set(fs.ls(remote_dir + "/dir/")) == files
131154

132-
dirs = fs.ls(remote_dir + "dir/", detail=True)
155+
dirs = fs.ls(remote_dir + "/dir/", detail=True)
133156
expected = [fs.info(file) for file in files]
134157

135158
def by_name(details):
@@ -141,12 +164,95 @@ def by_name(details):
141164
assert dirs == expected
142165

143166

167+
def test_basic_ops_caching(fs_factory, remote_dir, mocker):
168+
# Internally we have to derefence names into IDs to call GDrive APIs
169+
# we are trying hard to cache those and make sure that operations like
170+
# exists, ls, find, etc. don't hit the API more than once per path
171+
172+
# ListFile (_gdrive_list) is the main operation that we use to retieve file
173+
# metadata in all operations like find/ls/exist - etc. It should be fine as
174+
# a basic benchmark to count those.
175+
# Note: we can't count direct API calls since we have retries, also can't
176+
# count even direct calls to the GDrive client - for the same reason
177+
fs, _ = fs_factory()
178+
spy = mocker.spy(fs, "_gdrive_list")
179+
180+
dir_path = remote_dir + "/a/b/c/"
181+
file_path = dir_path + "test.txt"
182+
fs.touch(file_path)
183+
184+
assert spy.call_count == 5
185+
spy.reset_mock()
186+
187+
fs.exists(file_path)
188+
assert spy.call_count == 1
189+
spy.reset_mock()
190+
191+
fs.ls(remote_dir)
192+
assert spy.call_count == 1
193+
spy.reset_mock()
194+
195+
fs.ls(dir_path)
196+
assert spy.call_count == 1
197+
spy.reset_mock()
198+
199+
fs.find(dir_path)
200+
assert spy.call_count == 1
201+
spy.reset_mock()
202+
203+
fs.find(remote_dir)
204+
assert spy.call_count == 1
205+
spy.reset_mock()
206+
207+
208+
def test_ops_work_with_duplicate_names(fs_factory, remote_dir):
209+
fs, base_item = fs_factory()
210+
211+
remote_dir_item = fs._gdrive_create_dir(
212+
base_item["id"], remote_dir.split("/")[-1]
213+
)
214+
dir_name = str(uuid.uuid4())
215+
dir1 = fs._gdrive_create_dir(remote_dir_item["id"], dir_name)
216+
dir2 = fs._gdrive_create_dir(remote_dir_item["id"], dir_name)
217+
218+
# Two directories were created with the same name
219+
assert dir1["id"] != dir2["id"]
220+
221+
dir_path = remote_dir + "/" + dir_name + "/"
222+
223+
# ls returns both of them, even though the names are the same
224+
test_fs = fs
225+
result = test_fs.ls(remote_dir)
226+
assert len(result) == 2
227+
assert set(result) == {dir_path}
228+
229+
# ls returns both of them, even though the names are the same
230+
test_fs, _ = fs_factory()
231+
result = test_fs.ls(remote_dir)
232+
assert len(result) == 2
233+
assert set(result) == {dir_path}
234+
235+
for test_fs in [fs, fs_factory()[0]]:
236+
# find by default doesn't return dirs at all
237+
result = test_fs.find(remote_dir)
238+
assert len(result) == 0
239+
240+
fs._gdrive_upload_fobj("a.txt", dir1["id"], StringIO(""))
241+
fs._gdrive_upload_fobj("b.txt", dir2["id"], StringIO(""))
242+
243+
for test_fs in [fs, fs_factory()[0]]:
244+
# now we should have both files
245+
result = test_fs.find(remote_dir)
246+
assert len(result) == 2
247+
assert set(result) == {dir_path + file for file in ["a.txt", "b.txt"]}
248+
249+
144250
def test_ls_non_existing_dir(fs, remote_dir):
145251
with pytest.raises(FileNotFoundError):
146252
fs.ls(remote_dir + "dir/")
147253

148254

149-
def test_find(fs, remote_dir):
255+
def test_find(fs, fs_factory, remote_dir):
150256
fs.mkdir(remote_dir + "/dir")
151257

152258
files = [
@@ -169,12 +275,24 @@ def test_find(fs, remote_dir):
169275
for file in files:
170276
fs.touch(file)
171277

172-
assert set(fs.find(remote_dir)) == set(files)
278+
for test_fs in [fs, fs_factory()[0]]:
279+
# Test for https://github.com/iterative/PyDrive2/issues/229
280+
# It must go first, so that we test with a cache miss as well
281+
assert set(test_fs.find(remote_dir + "/dir/c/d/")) == set(
282+
[
283+
file
284+
for file in files
285+
if file.startswith(remote_dir + "/dir/c/d/")
286+
]
287+
)
288+
289+
# General find test
290+
assert set(test_fs.find(remote_dir)) == set(files)
173291

174-
find_results = fs.find(remote_dir, detail=True)
175-
info_results = [fs.info(file) for file in files]
176-
info_results = {content["name"]: content for content in info_results}
177-
assert find_results == info_results
292+
find_results = test_fs.find(remote_dir, detail=True)
293+
info_results = [test_fs.info(file) for file in files]
294+
info_results = {content["name"]: content for content in info_results}
295+
assert find_results == info_results
178296

179297

180298
def test_exceptions(fs, tmpdir, remote_dir):
@@ -199,15 +317,20 @@ def test_open_rw(fs, remote_dir):
199317
assert stream.read() == data
200318

201319

202-
def test_concurrent_operations(fs, remote_dir):
320+
def test_concurrent_operations(fs, fs_factory, remote_dir):
321+
# Include an extra dir name to force upload operations creating it
322+
# this way we can also test that only a single directory is created
323+
# enven if multiple threads are uploading files into the same dir
324+
dir_name = secrets.token_hex(16)
325+
203326
def create_random_file():
204327
name = secrets.token_hex(16)
205-
with fs.open(remote_dir + "/" + name, "w") as stream:
328+
with fs.open(remote_dir + f"/{dir_name}/" + name, "w") as stream:
206329
stream.write(name)
207330
return name
208331

209332
def read_random_file(name):
210-
with fs.open(remote_dir + "/" + name, "r") as stream:
333+
with fs.open(remote_dir + f"/{dir_name}/" + name, "r") as stream:
211334
return stream.read()
212335

213336
with futures.ThreadPoolExecutor() as executor:
@@ -225,6 +348,11 @@ def read_random_file(name):
225348

226349
assert write_names == read_names
227350

351+
# Test that only a single dir is cretead
352+
for test_fs in [fs, fs_factory()[0]]:
353+
results = test_fs.ls(remote_dir)
354+
assert results == [remote_dir + f"/{dir_name}/"]
355+
228356

229357
def test_put_file(fs, tmpdir, remote_dir):
230358
src_file = tmpdir / "a.txt"

pydrive2/test/test_util.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from pathlib import Path
12
import random
23
import re
34
import os
@@ -29,7 +30,8 @@ def setup_credentials(credentials_path=DEFAULT_USER_CREDENTIALS_FILE):
2930

3031
def settings_file_path(settings_file, wkdir=LOCAL_PATH):
3132
template_path = SETTINGS_PATH + settings_file
32-
local_path = wkdir + settings_file
33+
wkdir = Path(wkdir)
34+
local_path = wkdir / settings_file
3335
assert os.path.exists(template_path)
3436
if not os.path.exists(wkdir):
3537
os.makedirs(wkdir, exist_ok=True)

0 commit comments

Comments
 (0)