Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revise a decoder and encoder, being pure #140

Merged
merged 14 commits into from
Aug 4, 2024
3 changes: 2 additions & 1 deletion bin/otar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)

(*
let () = Printexc.record_backtrace true

module Tar_gz = Tar_gz.Make
Expand Down Expand Up @@ -129,3 +129,4 @@ let () = match Sys.argv with
| _ ->
let cmd = Filename.basename Sys.argv.(0) in
Format.eprintf "%s <directory> [<filename.tar.gz>]\n%s list <filename.tar.gz>\n" cmd cmd
*)
371 changes: 149 additions & 222 deletions lib/tar.ml

Large diffs are not rendered by default.

75 changes: 31 additions & 44 deletions lib/tar.mli
Original file line number Diff line number Diff line change
Expand Up @@ -139,47 +139,34 @@ module Header : sig
val to_sectors: t -> int64
end

module type ASYNC = sig
type 'a t
val ( >>= ): 'a t -> ('a -> 'b t) -> 'b t
val return: 'a -> 'a t
end

module type READER = sig
type in_channel
type 'a io
val really_read: in_channel -> bytes -> unit io
val skip: in_channel -> int -> unit io
end

module type WRITER = sig
type out_channel
type 'a io
val really_write: out_channel -> string -> unit io
end

module type HEADERREADER = sig
type in_channel
type 'a io

(** Returns the next header block or error [`Eof] if two consecutive
zero-filled blocks are discovered. Assumes stream is positioned at the
possible start of a header block.
@param global Holds the current global pax extended header, if
any. Needs to be given to the next call to [read]. *)
val read : global:Header.Extended.t option -> in_channel ->
(Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result io
end

module type HEADERWRITER = sig
type out_channel
type 'a io
val write : ?level:Header.compatibility -> Header.t -> out_channel -> (unit, [> `Msg of string ]) result io
val write_global_extended_header : Header.Extended.t -> out_channel -> (unit, [> `Msg of string ]) result io
end

module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) :
HEADERREADER with type in_channel = Reader.in_channel and type 'a io = 'a Async.t

module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a io = 'a Async.t) :
HEADERWRITER with type out_channel = Writer.out_channel and type 'a io = 'a Async.t
(** {1 Decoding and encoding of a whole archive} *)

(** The type of the decode state. *)
type decode_state

(** [decode_state ~global ()] constructs a decode_state. *)
val decode_state : ?global:Header.Extended.t -> unit -> decode_state

(** [decode t data] decodes [data] taking the current state [t] into account.
It may result on success in a new state, optionally some action that should
be done ([`Read] or [`Skip]), or a decoded [`Header]. Possibly a new global
PAX header is provided as well.

If no [`Read] or [`Skip] is returned, the new state should be used with
[decode] with the next [Header.length] sized string, which will lead to
further decoding until [`Eof] (or an error) occurs. *)
val decode : decode_state -> string ->
(decode_state * [ `Read of int | `Skip of int | `Header of Header.t ] option * Header.Extended.t option,
[ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ])
result

(** [encode_header ~level hdr] encodes the header with the provided [level]
(defaults to [V7]) into a list of strings to be written to the disk.
Once a header is written, the payload (padded to multiples of
[Header.length]) should follow. *)
val encode_header : ?level:Header.compatibility ->
Header.t -> (string list, [> `Msg of string ]) result

(** [encode_global_extended_header hdr] encodes the global extended header as
a list of strings. *)
val encode_global_extended_header : ?level:Header.compatibility -> Header.Extended.t -> (string list, [> `Msg of string ]) result
3 changes: 2 additions & 1 deletion lib_test/dune
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
alcotest-lwt
lwt
tar-unix
tar-mirage))
tar-mirage
))
163 changes: 53 additions & 110 deletions lib_test/global_extended_headers_test.ml
Original file line number Diff line number Diff line change
@@ -1,37 +1,5 @@
let level = Tar.Header.Ustar

module Writer = struct
type out_channel = Stdlib.out_channel
type 'a io = 'a
let really_write oc str =
output_string oc str
end

module HW = Tar.HeaderWriter
(struct type 'a t = 'a
let ( >>= ) x f = f x
let return x = x end)
(Writer)

module Reader = struct
type in_channel = Stdlib.in_channel
type 'a io = 'a
let really_read ic buf =
really_input ic buf 0 (Bytes.length buf)
let skip ic len =
let cur = pos_in ic in
seek_in ic (cur + len)
let read ic buf =
let max = Bytes.length buf in
input ic buf 0 max
end

module HR = Tar.HeaderReader
(struct type 'a t = 'a
let ( >>= ) x f = f x
let return x = x end)
(Reader)

let make_extended user_id =
Tar.Header.Extended.make ~user_id ()

Expand All @@ -41,92 +9,67 @@ let make_file =
let name = "file" ^ string_of_int !gen in
incr gen;
let hdr = Tar.Header.make name 0L in
hdr, fun cout ->
Tar.Header.zero_padding hdr
|> output_string cout
hdr

let ( let* ) = Result.bind

(* Tests that global and per-file extended headers correctly override
each other. *)
let use_global_extended_headers _test_ctxt =
(* Write an archive using global and per-file pax extended headers *)
begin try Sys.remove "test.tar" with _ -> () end;
let cout = open_out_bin "test.tar" in
let cout = Unix.openfile "test.tar" [ Unix.O_CREAT ; Unix.O_WRONLY ] 0o644 in
let g0 = make_extended 1000 in
let hdr, f = make_file () in
match HW.write_global_extended_header g0 cout with
| Error `Msg msg -> Alcotest.failf "failed to write header %s" msg
let g1 = make_extended 3000 in
match
Fun.protect ~finally:(fun () -> Unix.close cout)
(fun () ->
let* () = Tar_unix.write_global_extended_header ~level g0 cout in
let hdr = make_file () in
let* () = Tar_unix.write_header ~level hdr cout in
let hdr = make_file () in
let hdr = { hdr with Tar.Header.extended = Some (make_extended 2000) } in
let* () = Tar_unix.write_header ~level hdr cout in
let hdr = make_file () in
let* () = Tar_unix.write_header ~level hdr cout in
let hdr = make_file () in
let* () = Tar_unix.write_global_extended_header ~level g1 cout in
let* () = Tar_unix.write_header ~level hdr cout in
Tar_unix.write_end cout)
with
| Error `Msg msg -> Alcotest.failf "failed to write something: %s" msg
| Error `Unix (err, f, a) ->
Alcotest.failf "failed to write: unix error %s %s %s" (Unix.error_message err) f a
| Ok () ->
match HW.write ~level hdr cout with
| Error `Msg msg -> Alcotest.failf "failed to write header %s" msg
| Ok () ->
f cout;
let hdr, f = make_file () in
let hdr = { hdr with Tar.Header.extended = Some (make_extended 2000) } in
match HW.write ~level hdr cout with
| Error `Msg msg -> Alcotest.failf "failed to write header %s" msg
| Ok () ->
f cout;
let hdr, f = make_file () in
match HW.write ~level hdr cout with
| Error `Msg msg -> Alcotest.failf "failed to write header %s" msg
| Ok () ->
f cout;
let g1 = make_extended 3000 in
let hdr, f = make_file () in
match HW.write_global_extended_header g1 cout with
| Error `Msg msg -> Alcotest.failf "failed to write header %s" msg
| Ok () ->
match HW.write ~level hdr cout with
| Error `Msg msg -> Alcotest.failf "failed to write header %s" msg
| Ok () ->
f cout;
Writer.really_write cout Tar.Header.zero_block;
Writer.really_write cout Tar.Header.zero_block;
close_out cout;
(* Read the same archive, testing that headers have been squashed. *)
let cin = open_in_bin "test.tar" in
let global = ref None in
let header =
let pp ppf hdr = Fmt.pf ppf "%s" (Tar.Header.Extended.to_detailed_string hdr) in
Alcotest.testable (fun ppf hdr -> Fmt.pf ppf "%a" Fmt.(option pp) hdr) ( = )
in
( match HR.read ~global:!global cin with
| Ok (hdr, global') ->
Alcotest.check header "expected global header" (Some g0) global';
global := global';
Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id;
let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in
Reader.skip cin to_skip;
| Error `Eof -> failwith "Couldn't read header, end of file"
| Error (`Fatal err) -> Fmt.failwith "Couldn't read header: %a" Tar.pp_error err );
( match HR.read ~global:!global cin with
| Ok (hdr, global') ->
Alcotest.check header "expected global header" (Some g0) global';
global := global';
Alcotest.(check int) "expected user" 2000 hdr.Tar.Header.user_id;
let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in
Reader.skip cin to_skip;
| Error _ -> failwith "Couldn't read header" );
( match HR.read ~global:!global cin with
| Ok (hdr, global') ->
Alcotest.check header "expected global header" (Some g0) global';
global := global';
Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id;
let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in
Reader.skip cin to_skip;
| Error _ -> failwith "Couldn't read header" );
( match HR.read ~global:!global cin with
| Ok (hdr, global') ->
Alcotest.check header "expected global header" (Some g1) global';
global := global';
Alcotest.(check int) "expected user" 3000 hdr.Tar.Header.user_id;
let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in
Reader.skip cin to_skip;
| Error _ -> failwith "Couldn't read header" );
( match HR.read ~global:!global cin with
| Error `Eof -> ()
| _ -> failwith "Should have found EOF");
()
(* Read the same archive, testing that headers have been squashed. *)
let header =
let pp ppf hdr = Fmt.pf ppf "%s" (Tar.Header.Extended.to_detailed_string hdr) in
Alcotest.testable (fun ppf hdr -> Fmt.pf ppf "%a" Fmt.(option pp) hdr) ( = )
in
let f _fd ?global hdr idx =
match idx with
| 0 ->
Alcotest.check header "expected global header" (Some g0) global;
Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id;
Ok 1
| 1 ->
Alcotest.check header "expected global header" (Some g0) global;
Alcotest.(check int) "expected user" 2000 hdr.Tar.Header.user_id;
Ok 2
| 2 ->
Alcotest.check header "expected global header" (Some g0) global;
Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id;
Ok 3
| 3 ->
Alcotest.check header "expected global header" (Some g1) global;
Alcotest.(check int) "expected user" 3000 hdr.Tar.Header.user_id;
Ok 4
| _ -> Alcotest.fail "too many headers"
in
match Tar_unix.fold f "test.tar" 0 with
| Ok 4 -> ()
| Ok n -> Alcotest.failf "early abort, expected 4, received %u" n
| Error e -> Alcotest.failf "failed to read: %a" Tar_unix.pp_decode_error e

let () =
let suite = "tar - pax global extended headers", [
Expand Down
Loading
Loading