Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
Zstd_jll = "3161d3a3-bdf6-5164-811a-617609db77b4"
p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"

[weakdeps]
Expand All @@ -41,11 +42,12 @@ Libdl = "1.11"
Logging = "1.11"
Markdown = "1.11"
Printf = "1.11"
Random = "1.11"
REPL = "1.11"
Random = "1.11"
SHA = "0.7, 1"
TOML = "1"
Tar = "1.10"
UUIDs = "1.11"
Zstd_jll = "1.5.7"
julia = "1.12"
p7zip_jll = "17.5"
13 changes: 13 additions & 0 deletions docs/src/protocol.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,19 @@ The client can make GET or HEAD requests to the following resources:

Only the `/registries` changes - all other resources can be cached forever and the server will indicate this with the appropriate HTTP headers.

### Compression Negotiation

The Pkg protocol supports multiple compression formats.

- **Zstd compression** (current): Modern clients send `Accept-Encoding: zstd, gzip` to request Zstandard-compressed resources with gzip as a fallback.
- **Gzip compression** (legacy): Older clients that only support gzip send `Accept-Encoding: gzip` or omit the header entirely.

Clients verify the actual compression format by reading file magic bytes after download:

- **Zstd format**: Magic bytes `0x28 0xB5 0x2F 0xFD` (4 bytes) - decompressed with `zstd` (significantly faster)
- **Gzip format**: Magic bytes `0x1F 0x8B` (2 bytes) - decompressed with 7z


### Reference Implementation

A reference implementation of the Pkg Server protocol is available at [PkgServer.jl](https://github.com/JuliaPackaging/PkgServer.jl).
Expand Down
120 changes: 111 additions & 9 deletions src/PlatformEngines.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@

module PlatformEngines

using SHA, Downloads, Tar
using SHA, Downloads, Tar, Dates, Printf
import ...Pkg: Pkg, TOML, pkg_server, depots1, can_fancyprint, stderr_f, atomic_toml_write
using ..MiniProgressBars
using Base.BinaryPlatforms, p7zip_jll
using Base.BinaryPlatforms, p7zip_jll, Zstd_jll

export verify, unpack, package, download_verify_unpack
export verify, unpack, package, download_verify_unpack, get_extract_cmd, detect_archive_format

const EXE7Z_LOCK = ReentrantLock()
const EXE7Z = Ref{String}()
const EXEZSTD_LOCK = ReentrantLock()
const EXEZSTD = Ref{String}()

function exe7z()
# If the JLL is available, use the wrapper function defined in there
Expand All @@ -28,6 +30,20 @@ function exe7z()
end
end

function exezstd()
# If the JLL is available, use the wrapper function defined in there
if Zstd_jll.is_available()
return Zstd_jll.zstd()
end

return lock(EXEZSTD_LOCK) do
if !isassigned(EXEZSTD)
EXEZSTD[] = findzstd()
end
return Cmd([EXEZSTD[]])
end
end

function find7z()
name = "7z"
Sys.iswindows() && (name = "$name.exe")
Expand All @@ -40,6 +56,18 @@ function find7z()
error("7z binary not found")
end

function findzstd()
name = "zstd"
Sys.iswindows() && (name = "$name.exe")
for dir in (joinpath("..", "libexec"), ".")
path = normpath(Sys.BINDIR::String, dir, name)
isfile(path) && return path
end
path = Sys.which(name)
path !== nothing && return path
error("zstd binary not found")
end

is_secure_url(url::AbstractString) =
occursin(r"^(https://|\w+://(127\.0\.0\.1|localhost)(:\d+)?($|/))"i, url)

Expand Down Expand Up @@ -232,6 +260,13 @@ function get_metadata_headers(url::AbstractString)
end
push!(headers, "Julia-CI-Variables" => join(ci_info, ';'))
push!(headers, "Julia-Interactive" => string(isinteractive()))

# Add Accept-Encoding header only for compressed archive resources
# (registries, packages, artifacts - not for metadata endpoints like /registries or /meta)
if occursin(r"/(registry|package|artifact)/", url)
push!(headers, "Accept-Encoding" => "zstd, gzip")
end

for (key, val) in ENV
m = match(r"^JULIA_PKG_SERVER_([A-Z0-9_]+)$"i, key)
m === nothing && continue
Expand Down Expand Up @@ -403,22 +438,89 @@ function copy_symlinks()
lowercase(var) in ("false", "f", "no", "n", "0") ? false : nothing
end

"""
detect_archive_format(tarball_path::AbstractString)

Detect compression format by reading file magic bytes.
Returns one of: "zstd", "gzip", "bzip2", "xz", "lz4", "tar", or "unknown".

Note: This is used both for determining file extensions after download
and for selecting the appropriate decompression tool.
"""
function detect_archive_format(tarball_path::AbstractString)
file_size = filesize(tarball_path)

if file_size == 0
error("cannot detect compression format: $tarball_path is empty")
end

magic = open(tarball_path, "r") do io
read(io, min(6, file_size))
end

# Check magic bytes for various formats
# Zstd: 0x28 0xB5 0x2F 0xFD (4 bytes)
if length(magic) >= 4 && magic[1:4] == [0x28, 0xB5, 0x2F, 0xFD]
return "zstd"
end
# Gzip: 0x1F 0x8B (2 bytes)
if length(magic) >= 2 && magic[1:2] == [0x1F, 0x8B]
return "gzip"
end
# Bzip2: 0x42 0x5A 0x68 (BZh) (3 bytes)
if length(magic) >= 3 && magic[1:3] == [0x42, 0x5A, 0x68]
return "bzip2"
end
# XZ: 0xFD 0x37 0x7A 0x58 0x5A 0x00 (6 bytes)
if length(magic) >= 6 && magic[1:6] == [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]
return "xz"
end
# LZ4: 0x04 0x22 0x4D 0x18 (4 bytes)
if length(magic) >= 4 && magic[1:4] == [0x04, 0x22, 0x4D, 0x18]
return "lz4"
end
return "unknown"
end

"""
get_extract_cmd(tarball_path::AbstractString)

Get the decompression command for a tarball by detecting format via magic bytes.
"""
function get_extract_cmd(tarball_path::AbstractString)
format = detect_archive_format(tarball_path)
if format == "zstd"
return `$(exezstd()) -d -c $tarball_path`
else
return `$(exe7z()) x $tarball_path -so`
end
end

function unpack(
tarball_path::AbstractString,
dest::AbstractString;
verbose::Bool = false,
)
return Tar.extract(`$(exe7z()) x $tarball_path -so`, dest, copy_symlinks = copy_symlinks())
return Tar.extract(get_extract_cmd(tarball_path), dest, copy_symlinks = copy_symlinks())
end

"""
package(src_dir::AbstractString, tarball_path::AbstractString)

Compress `src_dir` into a tarball located at `tarball_path`.
Supports both gzip and zstd compression based on file extension.
"""
function package(src_dir::AbstractString, tarball_path::AbstractString; io = stderr_f())
rm(tarball_path, force = true)
cmd = `$(exe7z()) a -si -tgzip -mx9 $tarball_path`
# Choose compression based on file extension (case-insensitive)
tarball_lower = lowercase(tarball_path)
if endswith(tarball_lower, ".zst") || endswith(tarball_lower, ".tar.zst")
# Use zstd compression (level 19 for good compression)
cmd = `$(exezstd()) -19 -c -T -o $tarball_path`
else
# Use gzip compression (default)
cmd = `$(exe7z()) a -si -tgzip -mx9 $tarball_path`
end
return open(pipeline(cmd, stdout = devnull, stderr = io), write = true) do io
Tar.create(src_dir, io)
end
Expand Down Expand Up @@ -497,7 +599,7 @@ function download_verify_unpack(

# If extension of url contains a recognized extension, use it, otherwise use ".gz"
ext = url_ext(url)
if !(ext in ["tar", "gz", "tgz", "bz2", "xz"])
if !(ext in ["tar", "gz", "tgz", "bz2", "xz", "zst"])
ext = "gz"
end

Expand Down Expand Up @@ -538,7 +640,7 @@ function download_verify_unpack(
@info("Unpacking $(tarball_path) into $(dest)...")
end
isnothing(progress) || progress(10000, 10000; status = "unpacking")
open(`$(exe7z()) x $tarball_path -so`) do io
open(get_extract_cmd(tarball_path)) do io
Tar.extract(io, dest, copy_symlinks = copy_symlinks())
end
finally
Expand Down Expand Up @@ -685,12 +787,12 @@ function verify(
end

# Verify the git-tree-sha1 hash of a compressed archive.
function verify_archive_tree_hash(tar_gz::AbstractString, expected_hash::Base.SHA1)
function verify_archive_tree_hash(compressed_tar::AbstractString, expected_hash::Base.SHA1)
# This can fail because unlike sha256 verification of the downloaded
# tarball, tree hash verification requires that the file can i) be
# decompressed and ii) is a proper archive.
calc_hash = try
Base.SHA1(open(Tar.tree_hash, `$(exe7z()) x $tar_gz -so`))
Base.SHA1(open(Tar.tree_hash, get_extract_cmd(compressed_tar)))
catch err
@warn "unable to decompress and read archive" exception = err
return false
Expand Down
34 changes: 29 additions & 5 deletions src/Registry/Registry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ module Registry
import ..Pkg
using ..Pkg: depots, depots1, printpkgstyle, stderr_f, isdir_nothrow, pathrepr, pkg_server,
GitTools, atomic_toml_write, create_cachedir_tag
using ..Pkg.PlatformEngines: download_verify_unpack, download, download_verify, exe7z, verify_archive_tree_hash
using ..Pkg.PlatformEngines: download_verify_unpack, download, download_verify, verify_archive_tree_hash, get_extract_cmd, detect_archive_format
using UUIDs, LibGit2, TOML, Dates
import FileWatching

Expand Down Expand Up @@ -240,6 +240,25 @@ function check_registry_state(reg)
return nothing
end

function archive_format_to_extension(filepath::AbstractString)::String
format = detect_archive_format(filepath)
# Map detected format to file extension
if format == "zstd"
return ".tar.zst"
elseif format == "gzip"
return ".tar.gz"
elseif format == "bzip2"
return ".tar.bz2"
elseif format == "xz"
return ".tar.xz"
elseif format == "lz4"
return ".tar.lz4"
else
# Default to .tar.gz for tar or unknown formats
return ".tar.gz"
end
end

function download_registries(io::IO, regs::Vector{RegistrySpec}, depots::Union{String, Vector{String}} = depots())
# Use the first depot as the target
target_depot = depots1(depots)
Expand Down Expand Up @@ -282,8 +301,10 @@ function download_registries(io::IO, regs::Vector{RegistrySpec}, depots::Union{S
reg_unc = uncompress_registry(tmp)
reg.name = TOML.parse(reg_unc["Registry.toml"])["name"]::String
end
mv(tmp, joinpath(regdir, reg.name * ".tar.gz"); force = true)
reg_info = Dict("uuid" => string(reg.uuid), "git-tree-sha1" => string(_hash), "path" => reg.name * ".tar.gz")
# Detect what we actually got from the server (defensive against servers that don't support zstd yet)
ext = archive_format_to_extension(tmp)
mv(tmp, joinpath(regdir, reg.name * ext); force = true)
reg_info = Dict("uuid" => string(reg.uuid), "git-tree-sha1" => string(_hash), "path" => reg.name * ext)
atomic_toml_write(joinpath(regdir, reg.name * ".toml"), reg_info)
registry_update_log[string(reg.uuid)] = now()
printpkgstyle(io, :Added, "`$(reg.name)` registry to $(Base.contractuser(regdir))")
Expand Down Expand Up @@ -546,8 +567,11 @@ function update(regs::Vector{RegistrySpec}; io::IO = stderr_f(), force::Bool = t
Base.rm(reg.path; recursive = true, force = true)
end
registry_path = dirname(reg.path)
mv(tmp, joinpath(registry_path, reg.name * ".tar.gz"); force = true)
reg_info = Dict("uuid" => string(reg.uuid), "git-tree-sha1" => string(hash), "path" => reg.name * ".tar.gz")
# Detect what we actually got from the server (defensive against servers that don't support zstd yet)
format = detect_archive_format(tmp)
ext = format == "zstd" ? ".tar.zst" : ".tar.gz"
mv(tmp, joinpath(registry_path, reg.name * ext); force = true)
reg_info = Dict("uuid" => string(reg.uuid), "git-tree-sha1" => string(hash), "path" => reg.name * ext)
atomic_toml_write(joinpath(registry_path, reg.name * ".toml"), reg_info)
registry_update_log[string(reg.uuid)] = now()
@label done_tarball_read
Expand Down
8 changes: 4 additions & 4 deletions src/Registry/registry_instance.jl
Original file line number Diff line number Diff line change
Expand Up @@ -272,14 +272,14 @@ function init_package_info!(pkg::PkgEntry)
end


function uncompress_registry(tar_gz::AbstractString)
if !isfile(tar_gz)
error("$(repr(tar_gz)): No such file")
function uncompress_registry(compressed_tar::AbstractString)
if !isfile(compressed_tar)
error("$(repr(compressed_tar)): No such file")
end
data = Dict{String, String}()
buf = Vector{UInt8}(undef, Tar.DEFAULT_BUFFER_SIZE)
io = IOBuffer()
open(`$(exe7z()) x $tar_gz -so`) do tar
open(get_extract_cmd(compressed_tar)) do tar
Tar.read_tarball(x -> true, tar; buf = buf) do hdr, _
if hdr.type == :file
Tar.read_data(tar, io; size = hdr.size, buf = buf)
Expand Down