Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add zip64 support #7

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 65 additions & 38 deletions lib/zipflow/spec/cdh.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,49 +22,76 @@ defmodule Zipflow.Spec.CDH do
"""
@spec encode((binary -> any), [{Zipflow.Spec.LFH.t, Zipflow.Spec.Entry.t}]) :: any
def encode(printer, contents) do
ctx = Enum.reduce(contents, %{entries: 0, offset: 0, size: 0}, fn {hframe, dframe}, acc ->
hdr = header(printer, acc, hframe, dframe)
acc
|> Map.update!(:size, & &1 + hdr[:size])
|> Map.update!(:offset, & &1 + hdr[:offset])
|> Map.update!(:entries, & &1 + 1)
end)
frame = << 0x06054b50 :: size(32)-little, # signature
0 :: size(16)-little, # number of this disk
0 :: size(16)-little, # number of the disk w/ ECD
ctx[:entries] :: size(16)-little, # total number of entries in this disk
ctx[:entries] :: size(16)-little, # total number of entries in the ECD
ctx[:size] :: size(32)-little, # size central directory
ctx[:offset] :: size(32)-little, # offset central directory
0 :: size(16)-little
>>
printer.(frame)
{entries, offset, size} =
Enum.reduce(contents, {0, 0, 0}, fn {hframe, dframe}, {entries, offset, size} ->
{file_header_offset, file_header_size} = header(printer, offset, hframe, dframe)
{entries + 1, offset + file_header_offset, size + file_header_size}
end)

frame = <<
# ZIP64 end of central directory record
0x06064b50 :: size(32)-little, # signature
44 :: size(64)-little, # size of ZIP64 end of central directory record
20 :: size(16)-little, # version made by
0x0a :: size(16)-little, # version needed to extract
0 :: size(32)-little, # number of this disk
0 :: size(32)-little, # number of disks w/ the start of the CD
entries :: size(64)-little, # total number of entries in the CD on this disk
entries :: size(64)-little, # total number of entries in the CD
size :: size(64)-little, # size of the CD
offset :: size(64)-little, # offset of the CD
# ZIP64 end of central directory locator
0x07064b50 :: size(32)-little, # signature
0 :: size(32)-little, # number of the disk w/ the start of the ZIP64 ECD
(offset + size) :: size(64)-little, # relative offset of the ZIP64 ECD
1 :: size(32)-little, # total number of disks
# end of central directory record
0x06054b50 :: size(32)-little, # signature
0 :: size(16)-little, # number of this disk
0 :: size(16)-little, # number of the disk w/ ECD
min(entries, 0xffff) :: size(16)-little, # total number of entries in this disk
min(entries, 0xffff) :: size(16)-little, # total number of entries in the ECD
min(size, 0xffffffff) :: size(32)-little, # size central directory
min(offset, 0xffffffff) :: size(32)-little, # offset central directory
0 :: size(16)-little # zip file comment length
>>

printer.(frame)
end

defp header(printer, ctx, hframe, dframe) do
defp header(printer, offset, hframe, dframe) do
frame = <<
0x02014b50 :: size(32)-little, # central file header signature
20 :: size(16)-little, # version made by
0x0a :: size(16)-little, # version to extract
8 :: size(16)-little, # general purpose flag
0 :: size(16)-little, # compression method
0 :: size(16)-little, # last mod file time
0 :: size(16)-little, # last mod file date
dframe[:crc] :: size(32)-little, # crc-32
dframe[:csize] :: size(32)-little, # compressed size
dframe[:usize] :: size(32)-little, # uncompressed size
hframe[:n_size] :: size(16)-little, # file name length
0 :: size(16)-little, # extra field length
0 :: size(16)-little, # file comment length
0 :: size(16)-little, # disk number start
0 :: size(16)-little, # internal file attribute
0 :: size(32)-little, # external file attribute
ctx[:offset] :: size(32)-little, # relative offset header
0x02014b50 :: size(32)-little, # central file header signature
20 :: size(16)-little, # version made by
0x0a :: size(16)-little, # version to extract
8 :: size(16)-little, # general purpose flag
0 :: size(16)-little, # compression method
0 :: size(16)-little, # last mod file time
0 :: size(16)-little, # last mod file date
dframe[:crc] :: size(32)-little, # crc-32
min(dframe[:csize], 0xffffffff) :: size(32)-little, # compressed size
min(dframe[:usize], 0xffffffff) :: size(32)-little, # uncompressed size
hframe[:n_size] :: size(16)-little, # file name length
32 :: size(16)-little, # extra field length
0 :: size(16)-little, # file comment length
0 :: size(16)-little, # disk number start
0 :: size(16)-little, # internal file attribute
0 :: size(32)-little, # external file attribute
min(offset, 0xffffffff) :: size(32)-little, # relative offset header
>>

extra_field = <<
0x0001 :: size(16)-little, # ZIP64 extended information
28 :: size(16)-little, # data size
dframe[:usize] :: size(64)-little, # uncompressed file size
dframe[:csize] :: size(64)-little, # compressed file size
offset :: size(64)-little, # relative offset of local header
0 :: size(32)-little # disk start number
>>

printer.(frame)
printer.(hframe[:name])
%{size: byte_size(frame) + hframe[:n_size],
offset: hframe[:size] + dframe[:size]}
printer.(extra_field)
{hframe[:size] + dframe[:size], byte_size(frame) + hframe[:n_size] + byte_size(extra_field)}
end

end
4 changes: 2 additions & 2 deletions lib/zipflow/spec/store_entry.ex
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ defmodule Zipflow.Spec.StoreEntry do
:zlib.close(ctx[:private][:z])
frame = << 0x08074b50 :: size(32)-little,
ctx[:crc] :: size(32)-little,
ctx[:csize] :: size(32)-little,
ctx[:usize] :: size(32)-little
ctx[:csize] :: size(64)-little,
ctx[:usize] :: size(64)-little
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT

 4.3.9.2 When compressing files, compressed and uncompressed sizes 
 SHOULD be stored in ZIP64 format (as 8 byte values) when a 
 file's size exceeds 0xFFFFFFFF.   However ZIP64 format MAY be 
 used regardless of the size of a file.  When extracting, if 
 the zip64 extended information extra field is present for 
 the file the compressed and uncompressed sizes will be 8
 byte values.  

When streaming we don't know upfront whether there will be any big files. Maybe the first file is < 4 Gb but the second can be > 4 Gb. However we have to choose if we're going to use 64-bit extensions or not when streaming the first file. So this code assumes to use ZIP64 in any case.

>>
ctx[:private][:printer].(frame)
Map.delete(ctx, :private)
Expand Down
6 changes: 2 additions & 4 deletions test/zipflow/spec/cdh_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ defmodule Zipflow.Spec.CDHTest do

alias Zipflow.Spec.CDH

test "encode returns ()" do
ans = CDH.encode(fn x -> assert is_binary(x); nil end, [])
assert () == ans
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This causes a warning in modern Elixir versions.

test "encode returns nil" do
refute CDH.encode(fn x -> assert is_binary(x); nil end, [])
end

end