@@ -89,7 +89,7 @@ async def _get_chunks(url: str, chunk_size: int) -> Generator[bytes, None, None]
89
89
pbar .update (len (value ))
90
90
pbar .close ()
91
91
except JsException :
92
- raise Exception (f"Failed to read dataset at { url } " ) from None
92
+ raise Exception (f"Failed to read dataset at ' { url } '. " ) from None
93
93
else :
94
94
import requests # pyright: ignore
95
95
from requests .exceptions import ConnectionError # pyright: ignore
@@ -99,7 +99,7 @@ async def _get_chunks(url: str, chunk_size: int) -> Generator[bytes, None, None]
99
99
# If requests.get fails, it will return readable error
100
100
if response .status_code >= 400 :
101
101
raise Exception (
102
- f"received status code { response .status_code } from { url } "
102
+ f"received status code { response .status_code } from ' { url } '. "
103
103
)
104
104
pbar = tqdm (
105
105
miniters = 1 ,
@@ -111,7 +111,7 @@ async def _get_chunks(url: str, chunk_size: int) -> Generator[bytes, None, None]
111
111
pbar .update (len (chunk ))
112
112
pbar .close ()
113
113
except ConnectionError :
114
- raise Exception (f"Failed to read dataset at { url } " ) from None
114
+ raise Exception (f"Failed to read dataset at ' { url } '. " ) from None
115
115
116
116
117
117
def _rmrf (path : Path ) -> None :
@@ -126,7 +126,7 @@ def _verify_files_dont_exist(
126
126
) -> None :
127
127
"""
128
128
Verifies all paths in 'paths' don't exist.
129
- :param paths: A iterable of pathlib.Paths .
129
+ :param paths: A iterable of pathlib.Path s .
130
130
:param remove_if_exist=False: Remove each file at each path in paths if they already exist.
131
131
:returns: None
132
132
:raises FileExistsError: On the first path found that already exists if remove_if_exist is False.
@@ -232,9 +232,9 @@ async def prepare(
232
232
path = Path .cwd () if path is None else Path (path )
233
233
# Check if path contains /tmp
234
234
if Path ("/tmp" ) in path .parents :
235
- raise ValueError ("path must not be in /tmp" )
235
+ raise ValueError ("path must not be in /tmp. " )
236
236
elif path .is_file ():
237
- raise ValueError ("Datasets must be prepared to directories, not files" )
237
+ raise ValueError ("Datasets must be prepared to directories, not files. " )
238
238
# Create the target path if it doesn't exist yet
239
239
path .mkdir (exist_ok = True )
240
240
@@ -254,39 +254,45 @@ async def prepare(
254
254
shutil .rmtree (extract_dir )
255
255
extract_dir .mkdir ()
256
256
257
- if tarfile .is_tarfile (tmp_download_file ):
258
- with tarfile .open (tmp_download_file ) as tf :
259
- _verify_files_dont_exist (
260
- [
261
- path / child .name
262
- for child in map (Path , tf .getnames ())
263
- if len (child .parents ) == 1 and _is_file_to_symlink (child )
264
- ], # Only check if top-level fileobject
265
- remove_if_exist = overwrite ,
266
- )
267
- pbar = tqdm (iterable = tf .getmembers (), total = len (tf .getmembers ()))
268
- pbar .set_description (f"Extracting { filename } " )
269
- for member in pbar :
270
- tf .extract (member = member , path = extract_dir )
271
- tmp_download_file .unlink ()
272
- elif zipfile .is_zipfile (tmp_download_file ):
273
- with zipfile .ZipFile (tmp_download_file ) as zf :
274
- _verify_files_dont_exist (
275
- [
276
- path / child .name
277
- for child in map (Path , zf .namelist ())
278
- if len (child .parents ) == 1 and _is_file_to_symlink (child )
279
- ], # Only check if top-level fileobject
280
- remove_if_exist = overwrite ,
281
- )
282
- pbar = tqdm (iterable = zf .infolist (), total = len (zf .infolist ()))
283
- pbar .set_description (f"Extracting { filename } " )
284
- for member in pbar :
285
- zf .extract (member = member , path = extract_dir )
286
- tmp_download_file .unlink ()
287
- else :
288
- _verify_files_dont_exist ([path / filename ], remove_if_exist = overwrite )
289
- shutil .move (tmp_download_file , extract_dir / filename )
257
+ try :
258
+ if tarfile .is_tarfile (tmp_download_file ):
259
+ with tarfile .open (tmp_download_file ) as tf :
260
+ _verify_files_dont_exist (
261
+ [
262
+ path / child .name
263
+ for child in map (Path , tf .getnames ())
264
+ if len (child .parents ) == 1 and _is_file_to_symlink (child )
265
+ ], # Only check if top-level fileobject
266
+ remove_if_exist = overwrite ,
267
+ )
268
+ pbar = tqdm (iterable = tf .getmembers (), total = len (tf .getmembers ()))
269
+ pbar .set_description (f"Extracting { filename } " )
270
+ for member in pbar :
271
+ tf .extract (member = member , path = extract_dir )
272
+ tmp_download_file .unlink ()
273
+ elif zipfile .is_zipfile (tmp_download_file ):
274
+ with zipfile .ZipFile (tmp_download_file ) as zf :
275
+ _verify_files_dont_exist (
276
+ [
277
+ path / child .name
278
+ for child in map (Path , zf .namelist ())
279
+ if len (child .parents ) == 1 and _is_file_to_symlink (child )
280
+ ], # Only check if top-level fileobject
281
+ remove_if_exist = overwrite ,
282
+ )
283
+ pbar = tqdm (iterable = zf .infolist (), total = len (zf .infolist ()))
284
+ pbar .set_description (f"Extracting { filename } " )
285
+ for member in pbar :
286
+ zf .extract (member = member , path = extract_dir )
287
+ tmp_download_file .unlink ()
288
+ else :
289
+ _verify_files_dont_exist ([path / filename ], remove_if_exist = overwrite )
290
+ shutil .move (tmp_download_file , extract_dir / filename )
291
+ except FileExistsError as e :
292
+ raise FileExistsError (
293
+ str (e )
294
+ + "\n If you want to overwrite any existing files, use prepare(..., overwrite=True)."
295
+ ) from None
290
296
291
297
# If in jupyterlite environment, the extract_dir = path, so the files are already there.
292
298
if not _is_jupyterlite ():
@@ -304,29 +310,6 @@ def setup() -> None:
304
310
if _is_jupyterlite ():
305
311
tqdm .monitor_interval = 0
306
312
307
- try :
308
- import sys # pyright: ignore
309
-
310
- ipython = get_ipython ()
311
-
312
- def hide_traceback (
313
- exc_tuple = None ,
314
- filename = None ,
315
- tb_offset = None ,
316
- exception_only = False ,
317
- running_compiled_code = False ,
318
- ):
319
- etype , value , tb = sys .exc_info ()
320
- value .__cause__ = None # suppress chained exceptions
321
- return ipython ._showtraceback (
322
- etype , value , ipython .InteractiveTB .get_exception_only (etype , value )
323
- )
324
-
325
- ipython .showtraceback = hide_traceback
326
-
327
- except NameError :
328
- pass
329
-
330
313
331
314
setup ()
332
315
0 commit comments