44
55module PlatformEngines
66
7- using SHA, Downloads, Tar
7+ using SHA, Downloads, Tar, Dates, Printf
88import ... Pkg: Pkg, TOML, pkg_server, depots1, can_fancyprint, stderr_f, atomic_toml_write
99using .. MiniProgressBars
10- using Base. BinaryPlatforms, p7zip_jll
10+ using Base. BinaryPlatforms, p7zip_jll, Zstd_jll
1111
12- export verify, unpack, package, download_verify_unpack
12+ export verify, unpack, package, download_verify_unpack, get_extract_cmd, detect_archive_format
1313
1414const EXE7Z_LOCK = ReentrantLock ()
1515const EXE7Z = Ref {String} ()
16+ const EXEZSTD_LOCK = ReentrantLock ()
17+ const EXEZSTD = Ref {String} ()
1618
1719function exe7z ()
1820 # If the JLL is available, use the wrapper function defined in there
@@ -28,6 +30,20 @@ function exe7z()
2830 end
2931end
3032
33+ function exezstd ()
34+ # If the JLL is available, use the wrapper function defined in there
35+ if Zstd_jll. is_available ()
36+ return Zstd_jll. zstd ()
37+ end
38+
39+ return lock (EXEZSTD_LOCK) do
40+ if ! isassigned (EXEZSTD)
41+ EXEZSTD[] = findzstd ()
42+ end
43+ return Cmd ([EXEZSTD[]])
44+ end
45+ end
46+
3147function find7z ()
3248 name = " 7z"
3349 Sys. iswindows () && (name = " $name .exe" )
@@ -40,6 +56,18 @@ function find7z()
4056 error (" 7z binary not found" )
4157end
4258
59+ function findzstd ()
60+ name = " zstd"
61+ Sys. iswindows () && (name = " $name .exe" )
62+ for dir in (joinpath (" .." , " libexec" ), " ." )
63+ path = normpath (Sys. BINDIR:: String , dir, name)
64+ isfile (path) && return path
65+ end
66+ path = Sys. which (name)
67+ path != = nothing && return path
68+ error (" zstd binary not found" )
69+ end
70+
4371is_secure_url (url:: AbstractString ) =
4472 occursin (r" ^(https://|\w +://(127\. 0\. 0\. 1|localhost)(:\d +)?($|/))" i , url)
4573
@@ -232,6 +260,13 @@ function get_metadata_headers(url::AbstractString)
232260 end
233261 push! (headers, " Julia-CI-Variables" => join (ci_info, ' ;' ))
234262 push! (headers, " Julia-Interactive" => string (isinteractive ()))
263+
264+ # Add Accept-Encoding header only for compressed archive resources
265+ # (registries, packages, artifacts - not for metadata endpoints like /registries or /meta)
266+ if occursin (r" /(registry|package|artifact)/" , url)
267+ push! (headers, " Accept-Encoding" => " zstd, gzip" )
268+ end
269+
235270 for (key, val) in ENV
236271 m = match (r" ^JULIA_PKG_SERVER_([A-Z0-9_]+)$" i , key)
237272 m === nothing && continue
@@ -403,22 +438,76 @@ function copy_symlinks()
403438 lowercase (var) in (" false" , " f" , " no" , " n" , " 0" ) ? false : nothing
404439end
405440
441+ """
442+ detect_archive_format(tarball_path::AbstractString)
443+
444+ Detect compression format by reading file magic bytes.
445+ Returns "zstd" or "gzip".
446+
447+ Note: This is primarily used for determining the correct file extension after download.
448+ For decompression, we always use zstd as it can efficiently handle both formats.
449+ """
450+ function detect_archive_format (tarball_path:: AbstractString )
451+ file_size = filesize (tarball_path)
452+
453+ if file_size == 0
454+ error (" cannot detect compression format: $tarball_path is empty" )
455+ end
456+
457+ magic = open (tarball_path, " r" ) do io
458+ read (io, min (4 , file_size))
459+ end
460+
461+ # Zstd magic number: 0x28 0xB5 0x2F 0xFD
462+ if length (magic) >= 4 && magic[1 : 4 ] == [0x28 , 0xB5 , 0x2F , 0xFD ]
463+ return " zstd"
464+ end
465+ # Gzip magic number: 0x1F 0x8B
466+ if length (magic) >= 2 && magic[1 : 2 ] == [0x1F , 0x8B ]
467+ return " gzip"
468+ end
469+
470+ # Show hex dump of magic bytes for debugging
471+ hex_dump = length (magic) > 0 ? join ([@sprintf (" 0x%02X" , b) for b in magic], " " ) : " none"
472+ error (" unknown compression format for $tarball_path (magic bytes: $hex_dump , expected zstd [0x28 0xB5 0x2F 0xFD] or gzip [0x1F 0x8B])" )
473+ end
474+
475+ """
476+ get_extract_cmd(tarball_path::AbstractString)
477+
478+ Get the decompression command for a tarball.
479+ Uses zstd for all decompression as it handles both zstd and gzip formats efficiently.
480+ """
481+ function get_extract_cmd (tarball_path:: AbstractString )
482+ # zstd can decompress both zstd and gzip formats, and is ~3x faster than 7z for gzip
483+ return ` $(exezstd ()) -d -c $tarball_path `
484+ end
485+
406486function unpack (
407487 tarball_path:: AbstractString ,
408488 dest:: AbstractString ;
409489 verbose:: Bool = false ,
410490 )
411- return Tar. extract (` $( exe7z ()) x $ tarball_path -so ` , dest, copy_symlinks = copy_symlinks ())
491+ return Tar. extract (get_extract_cmd ( tarball_path) , dest, copy_symlinks = copy_symlinks ())
412492end
413493
414494"""
415495 package(src_dir::AbstractString, tarball_path::AbstractString)
416496
417497Compress `src_dir` into a tarball located at `tarball_path`.
498+ Supports both gzip and zstd compression based on file extension.
418499"""
419500function package (src_dir:: AbstractString , tarball_path:: AbstractString ; io = stderr_f ())
420501 rm (tarball_path, force = true )
421- cmd = ` $(exe7z ()) a -si -tgzip -mx9 $tarball_path `
502+ # Choose compression based on file extension (case-insensitive)
503+ tarball_lower = lowercase (tarball_path)
504+ if endswith (tarball_lower, " .zst" ) || endswith (tarball_lower, " .tar.zst" )
505+ # Use zstd compression (level 19 for good compression)
506+ cmd = ` $(exezstd ()) -19 -c -T -o $tarball_path `
507+ else
508+ # Use gzip compression (default)
509+ cmd = ` $(exe7z ()) a -si -tgzip -mx9 $tarball_path `
510+ end
422511 return open (pipeline (cmd, stdout = devnull , stderr = io), write = true ) do io
423512 Tar. create (src_dir, io)
424513 end
@@ -496,8 +585,8 @@ function download_verify_unpack(
496585 end
497586
498587 # If extension of url contains a recognized extension, use it, otherwise use ".gz"
499- ext = url_ext (url)
500- if ! (ext in [" tar" , " gz" , " tgz" , " bz2" , " xz" ])
588+ ext = lowercase ( url_ext (url) )
589+ if ! (ext in [" tar" , " gz" , " tgz" , " bz2" , " xz" , " zst " ])
501590 ext = " gz"
502591 end
503592
@@ -538,7 +627,7 @@ function download_verify_unpack(
538627 @info (" Unpacking $(tarball_path) into $(dest) ..." )
539628 end
540629 isnothing (progress) || progress (10000 , 10000 ; status = " unpacking" )
541- open (` $( exe7z ()) x $ tarball_path -so ` ) do io
630+ open (get_extract_cmd ( tarball_path) ) do io
542631 Tar. extract (io, dest, copy_symlinks = copy_symlinks ())
543632 end
544633 finally
@@ -690,7 +779,7 @@ function verify_archive_tree_hash(tar_gz::AbstractString, expected_hash::Base.SH
690779 # tarball, tree hash verification requires that the file can i) be
691780 # decompressed and ii) is a proper archive.
692781 calc_hash = try
693- Base. SHA1 (open (Tar. tree_hash, ` $( exe7z ()) x $ tar_gz -so ` ))
782+ Base. SHA1 (open (Tar. tree_hash, get_extract_cmd ( tar_gz) ))
694783 catch err
695784 @warn " unable to decompress and read archive" exception = err
696785 return false
0 commit comments