Skip to content

Commit 98b13a6

Browse files
beeenderWauplin
andcommitted
Keep lock files to prevent rare concurrency issue
The lock file could be removed when the 2nd process gets the lock. And then the 3rd process will lock on a different lock file handle. Although the lock path stays the same. 1st proc gets the lock 2nd proc waits for the lock 1st proc releases the lock 1st proc removes the lock file 2nd proc gets the lock 3rd proc creates a new lock file and gets the lock Windows doesn't have this problem. This commit moves the lock files to a subdirectory of the hub cache, and don't remove it after downloading. The lock files are named with their 'etag' and '.lock' extension, placed in the 'HUGGINGFACE_HUB_CACHE/.locks/repo_folder_name' directory. The repo_folder_name is generated from 'repo_id' and 'repo_type', to avoid same 'etag' in different repos. Co-authored-by: Lucain <[email protected]>
1 parent 209c1ea commit 98b13a6

File tree

2 files changed

+18
-6
lines changed

2 files changed

+18
-6
lines changed

src/huggingface_hub/file_download.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,7 @@ def hf_hub_download(
11971197
cache_dir = str(cache_dir)
11981198
if isinstance(local_dir, Path):
11991199
local_dir = str(local_dir)
1200+
locks_dir = os.path.join(cache_dir, ".locks")
12001201

12011202
if subfolder == "":
12021203
subfolder = None
@@ -1410,7 +1411,8 @@ def hf_hub_download(
14101411
return pointer_path
14111412

14121413
# Prevent parallel downloads of the same file with a lock.
1413-
lock_path = blob_path + ".lock"
1414+
# etag could be duplicated across repos,
1415+
lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
14141416

14151417
# Some Windows versions do not allow for paths longer than 255 characters.
14161418
# In this case, we must specify it is an extended path by using the "\\?\" prefix.
@@ -1420,6 +1422,7 @@ def hf_hub_download(
14201422
if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
14211423
blob_path = "\\\\?\\" + os.path.abspath(blob_path)
14221424

1425+
Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
14231426
with FileLock(lock_path):
14241427
# If the download just completed while the lock was activated.
14251428
if os.path.exists(pointer_path) and not force_download:
@@ -1494,11 +1497,6 @@ def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]:
14941497
_chmod_and_replace(temp_file.name, local_dir_filepath)
14951498
pointer_path = local_dir_filepath # for return value
14961499

1497-
try:
1498-
os.remove(lock_path)
1499-
except OSError:
1500-
pass
1501-
15021500
return pointer_path
15031501

15041502

tests/test_file_download.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,20 @@ def test_download_from_a_gated_repo_with_hf_hub_download(self):
653653
cache_dir=tmpdir,
654654
)
655655

656+
@unittest.skipIf(os.name == "nt", "Lock files are always deleted on Windows.")
657+
def test_keep_lock_file(self):
658+
"""Lock files should not be deleted on Linux."""
659+
with SoftTemporaryDirectory() as tmpdir:
660+
hf_hub_download(DUMMY_MODEL_ID, filename=CONFIG_NAME, cache_dir=tmpdir)
661+
lock_file_exist = False
662+
locks_dir = os.path.join(tmpdir, ".locks")
663+
for subdir, dirs, files in os.walk(locks_dir):
664+
for file in files:
665+
if file.endswith(".lock"):
666+
lock_file_exist = True
667+
break
668+
self.assertTrue(lock_file_exist, "no lock file can be found")
669+
656670

657671
@with_production_testing
658672
@pytest.mark.usefixtures("fx_cache_dir")

0 commit comments

Comments
 (0)