Skip to content

Commit

Permalink
Merge pull request #127 from mirage/next
Browse files Browse the repository at this point in the history
- Remove Archive modules
- Add `Tar.HEADERREADER` and `Tar.HEADERWRITER` module types, and rename
  `Tar.{READER,WRITER}.t` to `io`.
- When reading the compatibility level is no longer considered. In most
  cases the compatibility level was not enforced, and the semantics were
  very unclear.
- Better support for GNU LongLink/LongName
- Add a separate `write_global` function for writing a global
  `Tar.Header.Extended.t`. This allows writing an archive with a PAX
  comment and nothing else.
- Simplify zero padding and to_sectors computations
- Rework IO-specific modules (tar-unix etc) and harmonizing them
- Remove debug code
- Remove camlp-streams
- Remove unused tar_cstruct module
- Remove unused End_of_stream exception
- Remove Checksum_mismatch exception, use result
- read/unmarshal: avoid exceptions, use Error instead
- Extend authors: add reynir and hannes
- Distinguish Eof from other errors, as suggested by @kit-ty-kate

---------

Co-authored-by: Robur <[email protected]>
Co-authored-by: Hannes Mehnert <[email protected]>
Co-authored-by: Kate <[email protected]>
  • Loading branch information
4 people authored Jan 10, 2024
2 parents 3953bc1 + 5a17082 commit 814a11d
Show file tree
Hide file tree
Showing 23 changed files with 738 additions and 1,265 deletions.
16 changes: 16 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,22 @@

- `tar`: support pax Global Extended Headers. This adds state to tar parsing.
(#119, #120, @MisterDA)
- Support GNU LongLink and LongName. Prior, `Tar.HeaderWriter` and
`Tar.HeaderReader` supported both, but `Tar.Header.Link` only had `LongLink`
and (de)serialized to (from) GNU LongName.
- Compatibility level when reading / parsing is removed. Only GNU
LongLink/LongName extensions were affected by the compatibility level when
reading.
- Add module types `Tar.HEADERREADER` and `Tar.HEADERWRITER` describing the
output of `Tar.HeaderReader` and `Tar.HeaderWriter` respectively.
- Types `Tar.READER.t` and `Tar.WRITER.t` are renamed to `io`.
- Add `write_global` function for writing a global PAX extended header.
- Rework IO-specific modules (tar-unix etc.) harmonizing them.
- Avoid exceptions in tar and use result instead. The exceptions
`End_of_stream` and `Checksum_mismatch` are removed.
- Remove the `Tar_cstruct` module as it was unused.
- Remove debug printers.
- Finally remove the unused camlp-streams dependency.

## v2.5.1 (2023-06-20)

Expand Down
24 changes: 16 additions & 8 deletions bin/otar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ module Tar_gz = Tar_gz.Make
let ( >>= ) x f = f x
let return x = x end)
(struct type out_channel = Stdlib.out_channel
type 'a t = 'a
type 'a io = 'a
let really_write oc cs =
let str = Cstruct.to_string cs in
output_string oc str end)
(struct type in_channel = Stdlib.in_channel
type 'a t = 'a
type 'a io = 'a
let really_read ic cs =
let len = Cstruct.length cs in
let buf = Bytes.create len in
Expand Down Expand Up @@ -61,8 +61,10 @@ let create_tarball directory oc =
let mtime = Unix.gettimeofday () in
let out_channel = Tar_gz.of_out_channel ~level:4 ~mtime:(Int32.of_float mtime) os oc in
let hdr = Tar.Header.make ~file_mode:0o755
~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in
Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel (always None) ;
~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in
(match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel (always None) with
| Ok () -> ()
| Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg);
Array.iter begin fun filename ->
let fd = Unix.openfile (directory / filename) Unix.[ O_RDONLY; O_CLOEXEC ] 0o644 in
let stat = Unix.LargeFile.lstat (directory / filename) in
Expand All @@ -76,7 +78,9 @@ let create_tarball directory oc =
let hdr = Tar.Header.make
~file_mode ~mod_time ~user_id ~group_id
(directory / filename) stat.Unix.LargeFile.st_size in
Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel stream ;
(match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel stream with
| Ok () -> ()
| Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg);
Unix.close fd ;
| _ ->
Format.eprintf "Skipping non-regular file %s\n" (Filename.concat directory filename)
Expand Down Expand Up @@ -104,8 +108,8 @@ let bytes_to_size ?(decimals = 2) ppf = function
let list filename =
let ic = open_in filename in
let ic = Tar_gz.of_in_channel ~internal:(Cstruct.create 0x1000) ic in
let rec go global () = match Tar_gz.get_next_header ~level:Tar.Header.Ustar ~global ic with
| (hdr, global) ->
let rec go global () = match Tar_gz.HeaderReader.read ~global ic with
| Ok (hdr, global) ->
Format.printf "%s (%s, %a)\n%!"
hdr.Tar.Header.file_name
(Tar.Header.Link.to_string hdr.link_indicator)
Expand All @@ -117,7 +121,11 @@ let list filename =
let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in
Tar_gz.skip ic to_skip ;
go global ()
| exception Tar.Header.End_of_stream -> () in
| Error `Eof -> ()
| Error `Fatal e ->
Format.eprintf "Error listing archive: %a\n%!" Tar.pp_error e;
exit 2
in
go None ()

let () = match Sys.argv with
Expand Down
3 changes: 2 additions & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
"Thomas Gazagnaire"
"David Allsopp"
"Antonin Décimo"
"Reynir Björnsson"
"Hannes Mehnert"
)
(maintainers
"Reynir Björnsson <[email protected]>"
Expand All @@ -29,7 +31,6 @@
)
(tags ("org:xapi-project" "org:mirage"))
(depends
camlp-streams
(ocaml (>= 4.08.0))
(cstruct (>= 6.0.0))
(decompress (>= 1.5.1))
Expand Down
119 changes: 6 additions & 113 deletions eio/tar_eio.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ module Monad = struct
let return = Fun.id
end

module Reader = struct
module Io = struct
type in_channel = Flow.source
type 'a t = 'a
type 'a io = 'a
let really_read f b = Flow.read_exact f b
let skip f (n: int) =
let buffer_size = 32768 in
Expand All @@ -39,36 +39,14 @@ module Reader = struct
really_read f block;
loop (n - amount) in
loop n
end
let really_read = Reader.really_read

module Writer = struct
type out_channel = Flow.sink
type 'a t = 'a
let really_write f b = Flow.write f [ b ]
end
let really_write = Writer.really_write

let copy_n ifd ofd n =
let block_size = 32768 in
let buffer = Cstruct.create block_size in
let rec loop remaining =
if remaining = 0L then () else begin
let this = Int64.(to_int (min (of_int block_size) remaining)) in
let block = Cstruct.sub buffer 0 this in
really_read ifd block;
really_write ofd block;
loop (Int64.(sub remaining (of_int this)))
end in
loop n

module HR = Tar.HeaderReader(Monad)(Reader)
module HW = Tar.HeaderWriter(Monad)(Writer)

let get_next_header ?level ~global ic =
match HR.read ?level ~global (ic :> Flow.source) with
| Error `Eof -> None
| Ok hdrs -> Some hdrs
include Io
module HeaderReader = Tar.HeaderReader(Monad)(Io)
module HeaderWriter = Tar.HeaderWriter(Monad)(Io)

(* Eio needs a non-file-opening stat. *)
let stat path =
Expand All @@ -77,7 +55,7 @@ let stat path =

(** Return the header needed for a particular file on disk *)
let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t =
let level = match level with None -> Tar.Header.V7 | Some level -> level in
let level = match level with None -> !Tar.Header.compatibility_level | Some level -> level in
let stat = stat filepath in
let pwent = Option.map (fun f -> f stat.uid) getpwuid in
let grent = Option.map (fun f -> f stat.gid) getgrgid in
Expand All @@ -94,88 +72,3 @@ let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t =
let devminor = if level = Ustar then stat.rdev |> Int64.to_int else 0 in
Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name
?uname ?gname ~devmajor ~devminor (snd filepath) file_size

let write_block ?level ?global (header: Tar.Header.t) (body: #Flow.sink -> unit) sink =
HW.write ?level ?global header (sink :> Flow.sink);
body sink;
really_write sink (Tar.Header.zero_padding header)

let write_end sink =
really_write sink Tar.Header.zero_block;
really_write sink Tar.Header.zero_block

(** Utility functions for operating over whole tar archives *)
module Archive = struct

(** Read the next header, apply the function 'f' to the fd and the header. The function
should leave the fd positioned immediately after the datablock. Finally the function
skips past the zero padding to the next header *)
let with_next_file src ~(global: Tar.Header.Extended.t option)
(f: Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) =
match get_next_header ~global src with
| Some (hdr, global) ->
let result = f src global hdr in
Reader.skip src (Tar.Header.compute_zero_padding_length hdr);
Some result
| None ->
None

(** List the contents of a tar *)
let list ?level fd =
let rec loop global acc =
match get_next_header ?level ~global (fd :> Flow.source) with
| None -> List.rev acc
| Some (hdr, global) ->
Reader.skip fd (Int64.to_int hdr.Tar.Header.file_size);
Reader.skip fd (Tar.Header.compute_zero_padding_length hdr);
loop global (hdr :: acc) in
loop None []

(** Extract the contents of a tar to directory 'dest' *)
let extract dest ifd =
let rec loop global () =
match get_next_header ~global ifd with
| None -> ()
| Some (hdr, global) ->
let filename = dest hdr.Tar.Header.file_name in
Eio.Path.(with_open_out ~create:(`Exclusive 0) filename) @@ fun ofd ->
copy_n ifd ofd hdr.Tar.Header.file_size;
Reader.skip ifd (Tar.Header.compute_zero_padding_length hdr);
loop global ()
in
loop None ()

let transform ?level f (ifd : #Flow.source) (ofd : #Flow.sink) =
let rec loop global () =
match get_next_header ~global ifd with
| None -> ()
| Some (header', global') ->
let header = f header' in
let body = fun _ -> copy_n ifd ofd header.Tar.Header.file_size in
write_block ?level ?global:(if global <> global' then global' else None) header body ofd;
Reader.skip ifd (Tar.Header.compute_zero_padding_length header');
loop global' ()
in
loop None ();
write_end ofd

(** Create a tar on file descriptor fd from the filename list
'files' *)
let create ?getpwuid ?getgrgid files ofd =
let file filename =
let stat = stat filename in
if stat.kind <> `Regular_file then
(* Skipping, not a regular file. *)
()
else begin
let hdr = header_of_file ?getpwuid ?getgrgid filename in
write_block hdr (fun ofd ->
Eio.Path.with_open_in filename @@ fun ifd ->
copy_n ifd ofd hdr.Tar.Header.file_size
) ofd
end in
List.iter file files;
(* Add two empty blocks *)
write_end ofd

end
53 changes: 13 additions & 40 deletions eio/tar_eio.mli
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@

(** I/O for tar-formatted data *)

(** Returns the next header block or None if two consecutive
zero-filled blocks are discovered. Assumes stream is positioned at the
possible start of a header block.
@raise End_of_file if the stream unexpectedly fails. *)
val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> Eio.Flow.source ->
(Tar.Header.t * Tar.Header.Extended.t option) option
val really_read: Eio.Flow.source -> Cstruct.t -> unit
(** [really_read fd buf] fills [buf] with data from [fd] or fails
with {!Stdlib.End_of_file}. *)

val really_write: Eio.Flow.sink -> Cstruct.t -> unit
(** [really_write fd buf] writes the full contents of [buf] to
[fd] or fails with {!Stdlib.End_of_file}. *)

val skip : Eio.Flow.source -> int -> unit
(** [skip fd n] reads [n] bytes from [fd] and discards them. If possible, you
should use [Lwt_unix.lseek fd n Lwt_unix.SEEK_CUR] instead. *)

(** Return the header needed for a particular file on disk. [getpwuid] and [getgrgid] are optional
functions that should take the uid and gid respectively and return the passwd and group entry
Expand All @@ -33,37 +38,5 @@ val header_of_file :
Eio.Fs.dir Eio.Path.t ->
Tar.Header.t

module Archive : sig
(** Utility functions for operating over whole tar archives *)

(** Read the next header, apply the function 'f' to the source and the header. The function
should leave the source positioned immediately after the datablock. Finally the function
skips past the zero padding to the next header. *)
val with_next_file : Eio.Flow.source -> global:Tar.Header.Extended.t option ->
(Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a option

(** List the contents of a tar to stdout. *)
val list : ?level:Tar.Header.compatibility -> #Eio.Flow.source -> Tar.Header.t list

(** [extract dest] extract the contents of a tar.
Apply [dest] on each source filename to change the destination
filename. It only supports extracting regular files from the
top-level of the archive. *)
val extract : (string -> Eio.Fs.dir Eio.Path.t) -> Eio.Flow.source -> unit

(** [transform f src sink] applies [f] to the header of each
file in the tar inputted in [src], and writes the resulting
headers to [sink] preserving the content and structure of the
archive. *)
val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> #Eio.Flow.source -> #Eio.Flow.sink -> unit

(** Create a tar in the sink from a list of file paths. It only supports regular files.
See {! header_of_file} for the meaning of [getpwuid] and [getgrgid]. *)
val create :
?getpwuid:(int64 -> string) ->
?getgrgid:(int64 -> string) ->
Eio.Fs.dir Eio.Path.t list ->
#Eio.Flow.sink ->
unit
end
module HeaderReader : Tar.HEADERREADER with type in_channel = Eio.Flow.source and type 'a io = 'a
module HeaderWriter : Tar.HEADERWRITER with type out_channel = Eio.Flow.sink and type 'a io = 'a
4 changes: 2 additions & 2 deletions lib/dune
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
(library
(name tar)
(modules tar tar_cstruct)
(modules tar)
(public_name tar)
(wrapped false)
(libraries cstruct camlp-streams))
(libraries cstruct))

(library
(name tar_gz)
Expand Down
Loading

0 comments on commit 814a11d

Please sign in to comment.