Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 28 additions & 17 deletions src/create.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
struct RewriteEntry
hdr::Header
pos::Int64
end

struct RewriteTree
children::Dict{String, Union{RewriteTree, RewriteEntry}}
end
RewriteTree() = RewriteTree(Dict{String, Union{RewriteTree, RewriteEntry}}())

function create_tarball(
predicate::Function,
tar::IO,
Expand Down Expand Up @@ -47,42 +57,43 @@ function rewrite_tarball(
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
portable::Bool = false,
)
tree = Dict{String,Any}()
tree = RewriteTree()
read_tarball(predicate, old_tar; buf=buf) do hdr, parts
portable && check_windows_path(hdr.path, parts)
isempty(parts) && return
node = tree
name = pop!(parts)
for part in parts
node′ = get(node, part, nothing)
if !(node′ isa Dict)
node′ = node[part] = Dict{String,Any}()
child = get(node.children, part, nothing)
if !(child isa RewriteTree)
child = node.children[part] = RewriteTree()
end
node = node′
node = child
end
if hdr.type == :hardlink
node′ = tree
linked = tree
for part in split(hdr.link, '/')
node′ = node′[part]
linked = linked.children[part]
end
hdr′ = Header(node′[1], path=hdr.path, mode=hdr.mode)
node[name] = (hdr′, node′[2])
entry = linked::RewriteEntry
hdr′ = Header(entry.hdr, path=hdr.path, mode=hdr.mode)
node.children[name] = RewriteEntry(hdr′, entry.pos)
else
if !(hdr.type == :directory && get(node, name, nothing) isa Dict)
node[name] = (hdr, position(old_tar))
if !(hdr.type == :directory && get(node.children, name, nothing) isa RewriteTree)
node.children[name] = RewriteEntry(hdr, position(old_tar))
end
skip_data(old_tar, hdr.size)
end
end
write_tarball(new_tar, tree, buf=buf) do node, tar_path
if node isa Dict
if node isa RewriteTree
hdr = Header(tar_path, :directory, 0o755, 0, "")
return hdr, node
return hdr, node.children
else
hdr, pos = node
mode = hdr.type == :file && iszero(hdr.mode & 0o100) ? 0o644 : 0o755
hdr′ = Header(hdr; path=tar_path, mode=mode)
data = hdr.type == :directory ? nothing : (old_tar, pos)
entry = node::RewriteEntry
mode = entry.hdr.type == :file && iszero(entry.hdr.mode & 0o100) ? 0o644 : 0o755
hdr′ = Header(entry.hdr; path=tar_path, mode=mode)
data = entry.hdr.type == :directory ? nothing : (old_tar, entry.pos)
return hdr′, data
end
end
Expand Down
80 changes: 48 additions & 32 deletions src/extract.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
const PathInfo = Union{String, Int64, Symbol}

struct GitLeaf
mode::String
hash::String
end

struct GitTree
children::Dict{String, Union{GitTree, GitLeaf}}
end
GitTree() = GitTree(Dict{String, Union{GitTree, GitLeaf}}())

function iterate_headers(
callback::Function,
tar::IO;
Expand Down Expand Up @@ -168,7 +180,7 @@ end

# resolve symlink target or nothing if not valid
function link_target(
paths::Dict{String},
paths::Dict{String, PathInfo},
path::AbstractString,
link::AbstractString,
)
Expand Down Expand Up @@ -214,21 +226,21 @@ function git_tree_hash(
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
) where HashType <: SHA.SHA_CTX
# build tree with leaves for files and symlinks
tree = Dict{String,Any}()
tree = GitTree()
read_tarball(predicate, tar; buf=buf) do hdr, parts
isempty(parts) && return
name = pop!(parts)
node = tree
for part in parts
node′ = get(node, part, nothing)
if !(node′ isa Dict)
node′ = node[part] = Dict{String,Any}()
child = get(node.children, part, nothing)
if !(child isa GitTree)
child = node.children[part] = GitTree()
end
node = node′
node = child
end
if hdr.type == :directory
if !(get(node, name, nothing) isa Dict)
node[name] = Dict{String,Any}()
if !(get(node.children, name, nothing) isa GitTree)
node.children[name] = GitTree()
end
return
elseif hdr.type == :symlink
Expand All @@ -238,47 +250,51 @@ function git_tree_hash(
end
elseif hdr.type == :hardlink
mode = iszero(hdr.mode & 0o100) ? "100644" : "100755"
node′ = tree
linked = tree
for part in split(hdr.link, '/')
node′ = node′[part]
linked = linked.children[part]
end
hash = node′[2] # hash of linked file
hash = (linked::GitLeaf).hash
elseif hdr.type == :file
mode = iszero(hdr.mode & 0o100) ? "100644" : "100755"
hash = git_file_hash(tar, hdr.size, HashType, buf=buf)
else
error("unsupported type for git tree hashing: $(hdr.type)")
end
node[name] = (mode, hash)
node.children[name] = GitLeaf(mode, hash)
end

# prune directories that don't contain any files
if skip_empty
prune_empty!(node::Tuple) = true
function prune_empty!(node::Dict)
filter!(node) do (name, child)
prune_empty!(child)
end
return !isempty(node)
end
prune_empty!(tree)
end

# reduce the tree to a single hash value
hash_tree(node::Tuple) = node
function hash_tree(node::Dict)
by((name, child)) = child isa Dict ? "$name/" : name
hash = git_object_hash("tree", HashType) do io
for (name, child) in sort!(collect(node), by=by)
mode, hash = hash_tree(child)
print(io, mode, ' ', name, '\0')
write(io, hex2bytes(hash))
end
end
return "40000", hash
return hash_git_tree(tree, HashType)[end]
end

prune_empty!(node::GitLeaf) = true
function prune_empty!(node::GitTree)
filter!(node.children) do (name, child)
prune_empty!(child)
end
return !isempty(node.children)
end

return hash_tree(tree)[end]
function hash_git_tree(node::GitLeaf, ::Type{HashType}) where HashType <: SHA.SHA_CTX
return (node.mode, node.hash)
end

function hash_git_tree(node::GitTree, ::Type{HashType}) where HashType <: SHA.SHA_CTX
by((name, child)) = child isa GitTree ? "$name/" : name
hash = git_object_hash("tree", HashType) do io
for (name, child) in sort!(collect(node.children), by=by)
mode, hash = hash_git_tree(child, HashType)
print(io, mode, ' ', name, '\0')
write(io, hex2bytes(hash))
end
end
return ("40000", hash)
end

function git_object_hash(
Expand Down Expand Up @@ -350,7 +366,7 @@ function read_tarball(
)
write_skeleton_header(skeleton, buf=buf)
# symbols for path types except symlinks store the link
paths = Dict{String,Any}()
paths = Dict{String, PathInfo}()
globals = Dict{String,String}()
while !eof(tar)
hdr = read_header(tar, globals=globals, buf=buf, tee=skeleton)
Expand Down
Loading