@@ -257,20 +257,21 @@ Given a resource and a list of storage servers, return the storage server that r
257257most quickly to a HEAD request for that storage server, as well as the HEAD response
258258itself so that metadata such as the content-length of the resource can be inspected.
259259"""
260- function select_server (resource:: AbstractString , servers:: Vector{<:AbstractString} ; timeout = 5 , retries = 2 )
261- function head_req (server, resource)
260+ function select_server (resource:: AbstractString , servers:: Vector{<:AbstractString} , accept_encoding :: AbstractString = " gzip " ; timeout = 5 , retries = 2 )
261+ function head_req (server, resource, accept_encoding )
262262 @try_printerror begin
263263 response = HTTP. head (
264264 string (server, resource);
265265 status_exception = false ,
266266 timeout = timeout,
267267 retries = retries,
268+ headers = [" Accept-Encoding" => accept_encoding],
268269 )
269270 return server, response
270271 end
271272 end
272273 # Launch one task per server, performing a HEAD request
273- tasks = [@spawn head_req (server, resource) for server in servers]
274+ tasks = [@spawn head_req (server, resource, accept_encoding ) for server in servers]
274275
275276 # Wait for the first Task that gives us an HTTP 200 OK, returning that server.
276277 # If none have it, we return `nothing`. :(
@@ -314,12 +315,26 @@ are recorded and future downloads of that same resource will be skipped, until
314315`forget_failures()` is called. The `DownloadState` object contains within it enough
315316information to still serve a resource as it is being downloaded in the background task.
316317"""
317- function fetch_resource (resource:: AbstractString , request_id:: AbstractString ; servers:: Vector{String} = config. storage_servers)
318+ function fetch_resource (resource:: AbstractString , request_id:: AbstractString , http :: Union{HTTP.Stream,Nothing} = nothing ; servers:: Vector{String} = config. storage_servers)
318319 if isempty (servers)
319320 @error (" fetch called with no servers" , resource)
320321 error (" fetch called with no servers" )
321322 end
322323
324+ # Determine what encoding to request from storage servers based on client preferences
325+ # For backwards compatibility, if no Accept-Encoding header, default to gzip
326+ if http != = nothing
327+ client_accept = HTTP. header (http, " Accept-Encoding" , " " )
328+ if isempty (client_accept)
329+ accept_encoding = " gzip" # no header means gzip
330+ else
331+ accept_encoding = client_accept
332+ end
333+ else
334+ # No client context, prefer zstd for storage efficiency
335+ accept_encoding = " zstd, gzip"
336+ end
337+
323338 # with_fetch_state() will wait for a lock
324339 with_fetch_state (resource) do state
325340 # check if this has failed to download recently
@@ -335,15 +350,15 @@ function fetch_resource(resource::AbstractString, request_id::AbstractString; se
335350 end
336351
337352 # If not, let's figure out which storage server we're going to download from:
338- server, response = select_server (resource, servers)
353+ server, response = select_server (resource, servers, accept_encoding )
339354 if response === nothing
340355 @debug (" no upstream server" , resource, servers)
341356 return nothing
342357 end
343358
344359 # Launch download process in a separate task:
345360 dl_task = @async begin
346- success = download (server, resource, content_length (response), request_id)
361+ success = download (server, resource, content_length (response), request_id, accept_encoding )
347362 lock (state. lock) do
348363 if success
349364 global fetch_hits += 1
@@ -422,8 +437,8 @@ function stream_file(io_in::IO, start_byte::Int, length::Int, dl_task::Task, io_
422437 return transmitted
423438end
424439
425- function download (server:: AbstractString , resource:: AbstractString , content_length:: Int , request_id:: AbstractString )
426- @info (" downloading resource" , server, resource, request_id)
440+ function download (server:: AbstractString , resource:: AbstractString , content_length:: Int , request_id:: AbstractString , accept_encoding :: AbstractString = " gzip " )
441+ @info (" downloading resource" , server, resource, request_id, accept_encoding )
427442 t_start = time ()
428443 hash = basename (resource)
429444
@@ -465,6 +480,8 @@ function download(server::AbstractString, resource::AbstractString, content_leng
465480 req = HTTP. get (server * resource,
466481 status_exception = false ,
467482 response_stream = file_io,
483+ headers = [" Accept-Encoding" => accept_encoding],
484+ decompress = false ,
468485 )
469486 close (file_io)
470487 return req
0 commit comments