Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/together/lib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
# Download defaults
DOWNLOAD_BLOCK_SIZE = 10 * 1024 * 1024 # 10 MB
DISABLE_TQDM = False
MAX_DOWNLOAD_RETRIES = 5 # Maximum retries for download failures
DOWNLOAD_RETRY_DELAY = 1.0 # Initial retry delay in seconds

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd name DOWNLOAD_INITIAL_RETRY_DELAY according to the comment for consistency

DOWNLOAD_MAX_RETRY_DELAY = 30.0 # Maximum retry delay in seconds

# Upload defaults
MAX_CONCURRENT_PARTS = 4 # Maximum concurrent parts for multipart upload
Expand Down
73 changes: 67 additions & 6 deletions src/together/lib/resources/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import math
import stat
import time
import uuid
import shutil
import asyncio
Expand All @@ -28,13 +29,16 @@
DOWNLOAD_BLOCK_SIZE,
MAX_MULTIPART_PARTS,
TARGET_PART_SIZE_MB,
DOWNLOAD_RETRY_DELAY,
MAX_CONCURRENT_PARTS,
MAX_DOWNLOAD_RETRIES,
MULTIPART_THRESHOLD_GB,
DOWNLOAD_MAX_RETRY_DELAY,
MULTIPART_UPLOAD_TIMEOUT,
)
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..types.error import DownloadError, FileTypeError
from ..._exceptions import APIStatusError, AuthenticationError
from ..._exceptions import APIStatusError, APIConnectionError, AuthenticationError

log: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -198,21 +202,78 @@ def download(

assert file_size != 0, "Unable to retrieve remote file."

# Download with retry logic
bytes_downloaded = 0
retry_count = 0
retry_delay = DOWNLOAD_RETRY_DELAY

with tqdm(
total=file_size,
unit="B",
unit_scale=True,
desc=f"Downloading file {file_path.name}",
disable=bool(DISABLE_TQDM),
) as pbar:
for chunk in response.iter_bytes(DOWNLOAD_BLOCK_SIZE):
pbar.update(len(chunk))
temp_file.write(chunk) # type: ignore
while bytes_downloaded < file_size:
try:
# If this is a retry, close the previous response and create a new one with Range header
if bytes_downloaded > 0:
if 'response' in locals():

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do
response = None above the with tqdm to avoid using "locals" doing if response is None instead?

response.close()

log.info(f"Resuming download from byte {bytes_downloaded}")
response = self._client.get(
path=url,
cast_to=httpx.Response,
stream=True,
options=RequestOptions(
headers={"Range": f"bytes={bytes_downloaded}-"},
),
)

# Download chunks
for chunk in response.iter_bytes(DOWNLOAD_BLOCK_SIZE):
temp_file.write(chunk) # type: ignore
bytes_downloaded += len(chunk)
pbar.update(len(chunk))

# Successfully completed download
break

except (httpx.RequestError, httpx.StreamError, APIConnectionError) as e:
if retry_count >= MAX_DOWNLOAD_RETRIES:
log.error(f"Download failed after {retry_count} retries")
raise DownloadError(
f"Download failed after {retry_count} retries. Last error: {str(e)}"
) from e

retry_count += 1
log.warning(
f"Download interrupted at {bytes_downloaded}/{file_size} bytes. "
f"Retry {retry_count}/{MAX_DOWNLOAD_RETRIES} in {retry_delay}s..."
)
time.sleep(retry_delay)

# Exponential backoff with max delay cap
retry_delay = min(retry_delay * 2, DOWNLOAD_MAX_RETRY_DELAY)

except APIStatusError as e:
# For API errors, don't retry
log.error(f"API error during download: {e}")
raise APIStatusError(
"Error downloading file",
response=e.response,
body=e.response,
) from e

# Close the response
if 'response' in locals():
response.close()

# Raise exception if remote file size does not match downloaded file size
if os.stat(temp_file.name).st_size != file_size:
DownloadError(
f"Downloaded file size `{pbar.n}` bytes does not match remote file size `{file_size}` bytes."
raise DownloadError(
f"Downloaded file size `{bytes_downloaded}` bytes does not match remote file size `{file_size}` bytes."
)

# Moves temp file to output file path
Expand Down