diff --git a/CHANGES.md b/CHANGES.md index 85daefa..701fff5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,22 @@ - `tar`: support pax Global Extended Headers. This adds state to tar parsing. (#119, #120, @MisterDA) +- Support GNU LongLink and LongName. Prior, `Tar.HeaderWriter` and + `Tar.HeaderReader` supported both, but `Tar.Header.Link` only had `LongLink` + and (de)serialized to (from) GNU LongName. +- Compatibility level when reading / parsing is removed. Only GNU + LongLink/LongName extensions were affected by the compatibility level when + reading. +- Add module types `Tar.HEADERREADER` and `Tar.HEADERWRITER` describing the + output of `Tar.HeaderReader` and `Tar.HeaderWriter` respectively. +- Types `Tar.READER.t` and `Tar.WRITER.t` are renamed to `io`. +- Add `write_global` function for writing a global PAX extended header. +- Rework IO-specific modules (tar-unix etc.) harmonizing them. +- Avoid exceptions in tar and use result instead. The exceptions + `End_of_stream` and `Checksum_mismatch` are removed. +- Remove the `Tar_cstruct` module as it was unused. +- Remove debug printers. +- Finally remove the unused camlp-streams dependency. ## v2.5.1 (2023-06-20) diff --git a/bin/otar.ml b/bin/otar.ml index fbc8a37..4c40592 100644 --- a/bin/otar.ml +++ b/bin/otar.ml @@ -21,12 +21,12 @@ module Tar_gz = Tar_gz.Make let ( >>= ) x f = f x let return x = x end) (struct type out_channel = Stdlib.out_channel - type 'a t = 'a + type 'a io = 'a let really_write oc cs = let str = Cstruct.to_string cs in output_string oc str end) (struct type in_channel = Stdlib.in_channel - type 'a t = 'a + type 'a io = 'a let really_read ic cs = let len = Cstruct.length cs in let buf = Bytes.create len in @@ -61,8 +61,10 @@ let create_tarball directory oc = let mtime = Unix.gettimeofday () in let out_channel = Tar_gz.of_out_channel ~level:4 ~mtime:(Int32.of_float mtime) os oc in let hdr = Tar.Header.make ~file_mode:0o755 - ~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in - Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel (always None) ; + ~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in + (match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel (always None) with + | Ok () -> () + | Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg); Array.iter begin fun filename -> let fd = Unix.openfile (directory / filename) Unix.[ O_RDONLY; O_CLOEXEC ] 0o644 in let stat = Unix.LargeFile.lstat (directory / filename) in @@ -76,7 +78,9 @@ let create_tarball directory oc = let hdr = Tar.Header.make ~file_mode ~mod_time ~user_id ~group_id (directory / filename) stat.Unix.LargeFile.st_size in - Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel stream ; + (match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel stream with + | Ok () -> () + | Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg); Unix.close fd ; | _ -> Format.eprintf "Skipping non-regular file %s\n" (Filename.concat directory filename) @@ -104,8 +108,8 @@ let bytes_to_size ?(decimals = 2) ppf = function let list filename = let ic = open_in filename in let ic = Tar_gz.of_in_channel ~internal:(Cstruct.create 0x1000) ic in - let rec go global () = match Tar_gz.get_next_header ~level:Tar.Header.Ustar ~global ic with - | (hdr, global) -> + let rec go global () = match Tar_gz.HeaderReader.read ~global ic with + | Ok (hdr, global) -> Format.printf "%s (%s, %a)\n%!" hdr.Tar.Header.file_name (Tar.Header.Link.to_string hdr.link_indicator) @@ -117,7 +121,11 @@ let list filename = let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in Tar_gz.skip ic to_skip ; go global () - | exception Tar.Header.End_of_stream -> () in + | Error `Eof -> () + | Error `Fatal e -> + Format.eprintf "Error listing archive: %a\n%!" Tar.pp_error e; + exit 2 + in go None () let () = match Sys.argv with diff --git a/dune-project b/dune-project index 3e43e53..a34106a 100644 --- a/dune-project +++ b/dune-project @@ -12,6 +12,8 @@ "Thomas Gazagnaire" "David Allsopp" "Antonin Décimo" + "Reynir Björnsson" + "Hannes Mehnert" ) (maintainers "Reynir Björnsson " @@ -29,7 +31,6 @@ ) (tags ("org:xapi-project" "org:mirage")) (depends - camlp-streams (ocaml (>= 4.08.0)) (cstruct (>= 6.0.0)) (decompress (>= 1.5.1)) diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml index b1d2c7f..27adba2 100644 --- a/eio/tar_eio.ml +++ b/eio/tar_eio.ml @@ -24,9 +24,9 @@ module Monad = struct let return = Fun.id end -module Reader = struct +module Io = struct type in_channel = Flow.source - type 'a t = 'a + type 'a io = 'a let really_read f b = Flow.read_exact f b let skip f (n: int) = let buffer_size = 32768 in @@ -39,36 +39,14 @@ module Reader = struct really_read f block; loop (n - amount) in loop n -end -let really_read = Reader.really_read -module Writer = struct type out_channel = Flow.sink - type 'a t = 'a let really_write f b = Flow.write f [ b ] end -let really_write = Writer.really_write - -let copy_n ifd ofd n = - let block_size = 32768 in - let buffer = Cstruct.create block_size in - let rec loop remaining = - if remaining = 0L then () else begin - let this = Int64.(to_int (min (of_int block_size) remaining)) in - let block = Cstruct.sub buffer 0 this in - really_read ifd block; - really_write ofd block; - loop (Int64.(sub remaining (of_int this))) - end in - loop n -module HR = Tar.HeaderReader(Monad)(Reader) -module HW = Tar.HeaderWriter(Monad)(Writer) - -let get_next_header ?level ~global ic = - match HR.read ?level ~global (ic :> Flow.source) with - | Error `Eof -> None - | Ok hdrs -> Some hdrs +include Io +module HeaderReader = Tar.HeaderReader(Monad)(Io) +module HeaderWriter = Tar.HeaderWriter(Monad)(Io) (* Eio needs a non-file-opening stat. *) let stat path = @@ -77,7 +55,7 @@ let stat path = (** Return the header needed for a particular file on disk *) let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t = - let level = match level with None -> Tar.Header.V7 | Some level -> level in + let level = match level with None -> !Tar.Header.compatibility_level | Some level -> level in let stat = stat filepath in let pwent = Option.map (fun f -> f stat.uid) getpwuid in let grent = Option.map (fun f -> f stat.gid) getgrgid in @@ -94,88 +72,3 @@ let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t = let devminor = if level = Ustar then stat.rdev |> Int64.to_int else 0 in Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name ?uname ?gname ~devmajor ~devminor (snd filepath) file_size - -let write_block ?level ?global (header: Tar.Header.t) (body: #Flow.sink -> unit) sink = - HW.write ?level ?global header (sink :> Flow.sink); - body sink; - really_write sink (Tar.Header.zero_padding header) - -let write_end sink = - really_write sink Tar.Header.zero_block; - really_write sink Tar.Header.zero_block - -(** Utility functions for operating over whole tar archives *) -module Archive = struct - - (** Read the next header, apply the function 'f' to the fd and the header. The function - should leave the fd positioned immediately after the datablock. Finally the function - skips past the zero padding to the next header *) - let with_next_file src ~(global: Tar.Header.Extended.t option) - (f: Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) = - match get_next_header ~global src with - | Some (hdr, global) -> - let result = f src global hdr in - Reader.skip src (Tar.Header.compute_zero_padding_length hdr); - Some result - | None -> - None - - (** List the contents of a tar *) - let list ?level fd = - let rec loop global acc = - match get_next_header ?level ~global (fd :> Flow.source) with - | None -> List.rev acc - | Some (hdr, global) -> - Reader.skip fd (Int64.to_int hdr.Tar.Header.file_size); - Reader.skip fd (Tar.Header.compute_zero_padding_length hdr); - loop global (hdr :: acc) in - loop None [] - - (** Extract the contents of a tar to directory 'dest' *) - let extract dest ifd = - let rec loop global () = - match get_next_header ~global ifd with - | None -> () - | Some (hdr, global) -> - let filename = dest hdr.Tar.Header.file_name in - Eio.Path.(with_open_out ~create:(`Exclusive 0) filename) @@ fun ofd -> - copy_n ifd ofd hdr.Tar.Header.file_size; - Reader.skip ifd (Tar.Header.compute_zero_padding_length hdr); - loop global () - in - loop None () - - let transform ?level f (ifd : #Flow.source) (ofd : #Flow.sink) = - let rec loop global () = - match get_next_header ~global ifd with - | None -> () - | Some (header', global') -> - let header = f header' in - let body = fun _ -> copy_n ifd ofd header.Tar.Header.file_size in - write_block ?level ?global:(if global <> global' then global' else None) header body ofd; - Reader.skip ifd (Tar.Header.compute_zero_padding_length header'); - loop global' () - in - loop None (); - write_end ofd - - (** Create a tar on file descriptor fd from the filename list - 'files' *) - let create ?getpwuid ?getgrgid files ofd = - let file filename = - let stat = stat filename in - if stat.kind <> `Regular_file then - (* Skipping, not a regular file. *) - () - else begin - let hdr = header_of_file ?getpwuid ?getgrgid filename in - write_block hdr (fun ofd -> - Eio.Path.with_open_in filename @@ fun ifd -> - copy_n ifd ofd hdr.Tar.Header.file_size - ) ofd - end in - List.iter file files; - (* Add two empty blocks *) - write_end ofd - -end diff --git a/eio/tar_eio.mli b/eio/tar_eio.mli index c604b7d..3c44aea 100644 --- a/eio/tar_eio.mli +++ b/eio/tar_eio.mli @@ -16,12 +16,17 @@ (** I/O for tar-formatted data *) -(** Returns the next header block or None if two consecutive - zero-filled blocks are discovered. Assumes stream is positioned at the - possible start of a header block. - @raise End_of_file if the stream unexpectedly fails. *) -val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> Eio.Flow.source -> - (Tar.Header.t * Tar.Header.Extended.t option) option +val really_read: Eio.Flow.source -> Cstruct.t -> unit +(** [really_read fd buf] fills [buf] with data from [fd] or fails + with {!Stdlib.End_of_file}. *) + +val really_write: Eio.Flow.sink -> Cstruct.t -> unit +(** [really_write fd buf] writes the full contents of [buf] to + [fd] or fails with {!Stdlib.End_of_file}. *) + +val skip : Eio.Flow.source -> int -> unit +(** [skip fd n] reads [n] bytes from [fd] and discards them. If possible, you + should use [Lwt_unix.lseek fd n Lwt_unix.SEEK_CUR] instead. *) (** Return the header needed for a particular file on disk. [getpwuid] and [getgrgid] are optional functions that should take the uid and gid respectively and return the passwd and group entry @@ -33,37 +38,5 @@ val header_of_file : Eio.Fs.dir Eio.Path.t -> Tar.Header.t -module Archive : sig - (** Utility functions for operating over whole tar archives *) - - (** Read the next header, apply the function 'f' to the source and the header. The function - should leave the source positioned immediately after the datablock. Finally the function - skips past the zero padding to the next header. *) - val with_next_file : Eio.Flow.source -> global:Tar.Header.Extended.t option -> - (Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a option - - (** List the contents of a tar to stdout. *) - val list : ?level:Tar.Header.compatibility -> #Eio.Flow.source -> Tar.Header.t list - - (** [extract dest] extract the contents of a tar. - Apply [dest] on each source filename to change the destination - filename. It only supports extracting regular files from the - top-level of the archive. *) - val extract : (string -> Eio.Fs.dir Eio.Path.t) -> Eio.Flow.source -> unit - - (** [transform f src sink] applies [f] to the header of each - file in the tar inputted in [src], and writes the resulting - headers to [sink] preserving the content and structure of the - archive. *) - val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> #Eio.Flow.source -> #Eio.Flow.sink -> unit - - (** Create a tar in the sink from a list of file paths. It only supports regular files. - - See {! header_of_file} for the meaning of [getpwuid] and [getgrgid]. *) - val create : - ?getpwuid:(int64 -> string) -> - ?getgrgid:(int64 -> string) -> - Eio.Fs.dir Eio.Path.t list -> - #Eio.Flow.sink -> - unit -end +module HeaderReader : Tar.HEADERREADER with type in_channel = Eio.Flow.source and type 'a io = 'a +module HeaderWriter : Tar.HEADERWRITER with type out_channel = Eio.Flow.sink and type 'a io = 'a diff --git a/lib/dune b/lib/dune index bf62cb9..1718831 100644 --- a/lib/dune +++ b/lib/dune @@ -1,9 +1,9 @@ (library (name tar) - (modules tar tar_cstruct) + (modules tar) (public_name tar) (wrapped false) - (libraries cstruct camlp-streams)) + (libraries cstruct)) (library (name tar_gz) diff --git a/lib/tar.ml b/lib/tar.ml index c44e6cb..08e9b61 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -15,56 +15,60 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) +type error = [ `Checksum_mismatch | `Corrupt_pax_header | `Zero_block | `Unmarshal of string ] + +let pp_error ppf = function + | `Checksum_mismatch -> Format.fprintf ppf "checksum mismatch" + | `Corrupt_pax_header -> Format.fprintf ppf "corrupt PAX header" + | `Zero_block -> Format.fprintf ppf "zero block" + | `Unmarshal e -> Format.fprintf ppf "unmarshal %s" e + +let ( let* ) = Result.bind + (** Process and create tar file headers *) module Header = struct (** Map of field name -> (start offset, length) taken from wikipedia: http://en.wikipedia.org/w/index.php?title=Tar_%28file_format%29&oldid=83554041 *) - (** For debugging: pretty-print a string as hex *) - let to_hex (x: string) : string = - let result = Bytes.make (String.length x * 3) ' ' in - for i = 0 to String.length x - 1 do - let byte = Printf.sprintf "%02x" (int_of_char x.[i]) in - Bytes.blit_string byte 0 result (i * 3) 2 - done; - Bytes.unsafe_to_string result - let trim_numerical s = String.(trim (map (function '\000' -> ' ' | x -> x) s)) (** Unmarshal an integer field (stored as 0-padded octal) *) - let unmarshal_int (x: string) : int = + let unmarshal_int x = let tmp = "0o0" ^ (trim_numerical x) in try - int_of_string tmp + Ok (int_of_string tmp) with Failure msg -> - failwith (Printf.sprintf "%s: failed to parse integer [%s] == %s" msg tmp (to_hex tmp)) + Error (`Unmarshal (Printf.sprintf "%s: failed to parse integer %S" msg tmp)) (** Unmarshal an int64 field (stored as 0-padded octal) *) - let unmarshal_int64 (x: string) : int64 = + let unmarshal_int64 x = let tmp = "0o0" ^ (trim_numerical x) in - Int64.of_string tmp + try + Ok (Int64.of_string tmp) + with Failure msg -> + Error (`Unmarshal (Printf.sprintf "%s: failed to parse int64 %S" msg tmp)) (** Unmarshal a string *) - let unmarshal_string (x: string) : string = + let unmarshal_string x = try let first_0 = String.index x '\000' in - String.sub x 0 first_0 - with - Not_found -> x (* TODO should error *) + Ok (String.sub x 0 first_0) + with Not_found -> + Ok x (** Marshal an integer field of size 'n' *) - let marshal_int (x: int) (n: int) = + let marshal_int x n = let octal = Printf.sprintf "%0*o" (n - 1) x in octal ^ "\000" (* space or NULL allowed *) (** Marshal an int64 field of size 'n' *) - let marshal_int64 (x: int64) (n: int) = + let marshal_int64 x n = let octal = Printf.sprintf "%0*Lo" (n - 1) x in octal ^ "\000" (* space or NULL allowed *) (** Marshal an string field of size 'n' *) - let marshal_string (x: string) (n: int) = + let marshal_string x n = if String.length x < n then let bytes = Bytes.make n '\000' in Bytes.blit_string x 0 bytes 0 (String.length x); @@ -75,11 +79,14 @@ module Header = struct (** Unmarshal a pax Extended Header File time It can contain a ( '.' ) for sub-second granularity, that we ignore. https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_05 *) - let unmarshal_pax_time (x:string ) : int64 = - match String.split_on_char '.' x with - | [seconds] -> Int64.of_string seconds - | [seconds; _subseconds] -> Int64.of_string seconds - | _ -> raise (Failure "Wrong pax Extended Header File Times format") + let unmarshal_pax_time x = + try + match String.split_on_char '.' x with + | [seconds] -> Ok (Int64.of_string seconds) + | [seconds; _subseconds] -> Ok (Int64.of_string seconds) + | _ -> raise (Failure "Wrong pax Extended Header File time format (at most one . allowed)") + with Failure msg -> + Error (`Unmarshal (Printf.sprintf "Failed to parse pax time %S (%s)" x msg)) let hdr_file_name_off = 0 let sizeof_hdr_file_name = 100 @@ -262,6 +269,7 @@ module Header = struct | GlobalExtendedHeader | PerFileExtendedHeader | LongLink + | LongName (* Strictly speaking, v7 supports Normal (as \0) and Hard only *) let to_char ?level = @@ -275,22 +283,21 @@ module Header = struct | FIFO -> '6' | GlobalExtendedHeader -> 'g' | PerFileExtendedHeader -> 'x' - | LongLink -> 'L' - - let of_char ?level = - let level = get_level level in function - | '1' -> Hard - | '2' -> Symbolic - | 'g' -> GlobalExtendedHeader - | 'x' -> PerFileExtendedHeader - | 'L' -> LongLink - (* All other types returned as Normal in V7 for compatibility with older versions of ocaml-tar *) - | _ when level = V7 -> Normal (* if value is malformed, treat as a normal file *) - | '3' -> Character - | '4' -> Block - | '5' -> Directory - | '6' -> FIFO - | _ -> Normal (* if value is malformed, treat as a normal file *) + | LongLink -> 'K' + | LongName -> 'L' + + let of_char = function + | '1' -> Hard + | '2' -> Symbolic + | 'g' -> GlobalExtendedHeader + | 'x' -> PerFileExtendedHeader + | 'K' -> LongLink + | 'L' -> LongName + | '3' -> Character + | '4' -> Block + | '5' -> Directory + | '6' -> FIFO + | _ -> Normal (* if value is malformed, treat as a normal file *) let to_string = function | Normal -> "Normal" @@ -303,6 +310,7 @@ module Header = struct | GlobalExtendedHeader -> "GlobalExtendedHeader" | PerFileExtendedHeader -> "PerFileExtendedHeader" | LongLink -> "LongLink" + | LongName -> "LongName" end module Extended = struct @@ -387,7 +395,19 @@ module Header = struct user_id; uname } | None -> extended - let unmarshal ~(global: t option) (c: Cstruct.t) : t = + let decode_int x = + try + Ok (int_of_string x) + with Failure msg -> + Error (`Unmarshal (Printf.sprintf "%s: failed to parse integer %S" msg x)) + + let decode_int64 x = + try + Ok (Int64.of_string x) + with Failure msg -> + Error (`Unmarshal (Printf.sprintf "%s: failed to parse integer %S" msg x)) + + let unmarshal ~(global: t option) c = (* "%d %s=%s\n", , , with constraints that - the cannot contain an equals sign - the is the number of octets of the record, including \n @@ -398,48 +418,59 @@ module Header = struct then None else if Cstruct.get_char buffer i = char then Some i - else loop (i + 1) in - loop 0 in + else loop (i + 1) + in + loop 0 + in let rec loop remaining = if Cstruct.length remaining = 0 - then [] + then Ok [] else begin (* Find the space, then decode the length *) match find remaining ' ' with - | None -> failwith "Failed to decode pax extended header record" + | None -> Error (`Unmarshal "Failed to decode pax extended header record") | Some i -> let length = int_of_string @@ Cstruct.to_string @@ Cstruct.sub remaining 0 i in let record = Cstruct.sub remaining 0 length in let remaining = Cstruct.shift remaining length in begin match find record '=' with - | None -> failwith "Failed to decode pax extended header record" + | None -> Error (`Unmarshal "Failed to decode pax extended header record") | Some j -> let keyword = Cstruct.to_string @@ Cstruct.sub record (i + 1) (j - i - 1) in let v = Cstruct.to_string @@ Cstruct.sub record (j + 1) (Cstruct.length record - j - 2) in - (keyword, v) :: (loop remaining) + let* rem = loop remaining in + Ok ((keyword, v) :: rem) end - end in - let pairs = loop c in + end + in + let* pairs = loop c in let option name f = if List.mem_assoc name pairs - then Some (f (List.assoc name pairs)) - else None in + then + let* v = f (List.assoc name pairs) in + Ok (Some v) + else + Ok None + in (* integers are stored as decimal, not octal here *) - let access_time = option "atime" unmarshal_pax_time in - let charset = option "charset" unmarshal_string in - let comment = option "comment" unmarshal_string in - let group_id = option "gid" int_of_string in - let gname = option "group_name" unmarshal_string in - let header_charset = option "hdrcharset" unmarshal_string in - let link_path = option "linkpath" unmarshal_string in - let mod_time = option "mtime" unmarshal_pax_time in - let path = option "path" unmarshal_string in - let file_size = option "size" Int64.of_string in - let user_id = option "uid" int_of_string in - let uname = option "uname" unmarshal_string in - { access_time; charset; comment; group_id; gname; - header_charset; link_path; mod_time; path; file_size; - user_id; uname } |> merge global + let* access_time = option "atime" unmarshal_pax_time in + let* charset = option "charset" unmarshal_string in + let* comment = option "comment" unmarshal_string in + let* group_id = option "gid" decode_int in + let* gname = option "group_name" unmarshal_string in + let* header_charset = option "hdrcharset" unmarshal_string in + let* link_path = option "linkpath" unmarshal_string in + let* mod_time = option "mtime" unmarshal_pax_time in + let* path = option "path" unmarshal_string in + let* file_size = option "size" decode_int64 in + let* user_id = option "uid" decode_int in + let* uname = option "uname" unmarshal_string in + let g = + { access_time; charset; comment; group_id; gname; + header_charset; link_path; mod_time; path; file_size; + user_id; uname } + in + Ok (merge global g) end @@ -463,7 +494,7 @@ module Header = struct let make ?(file_mode=0o400) ?(user_id=0) ?(group_id=0) ?(mod_time=0L) ?(link_indicator=Link.Normal) ?(link_name="") ?(uname="") ?(gname="") ?(devmajor=0) ?(devminor=0) file_name file_size = (* If some fields are too big, we must use a pax header *) let need_pax_header = - file_size > 0o077777777777L + Int64.unsigned_compare file_size 0o077777777777L > 0 || user_id > 0x07777777 || group_id > 0x07777777 in let extended = @@ -490,12 +521,6 @@ module Header = struct (** A blank header block (two of these in series mark the end of the tar) *) let zero_block = Cstruct.create length - (** [allzeroes buf] is true if [buf] contains only zero bytes *) - let allzeroes buf = - let rec loop i = - (i >= Cstruct.length buf) || (Cstruct.get_uint8 buf i = 0 && (loop (i + 1))) in - loop 0 - (** Pretty-print the header record *) let to_detailed_string (x: t) = let table = [ "file_name", x.file_name; @@ -508,9 +533,6 @@ module Header = struct "link_name", x.link_name ] in "{\n\t" ^ (String.concat "\n\t" (List.map (fun (k, v) -> k ^ ": " ^ v) table)) ^ "}" - (** Thrown when unmarshalling a header if the checksums don't match *) - exception Checksum_mismatch - (** From an already-marshalled block, compute what the checksum should be *) let checksum (x: Cstruct.t) : int64 = (* Sum of all the byte values of the header with the checksum field taken @@ -531,120 +553,131 @@ module Header = struct Int64.of_int !result (** Unmarshal a header block, returning None if it's all zeroes *) - let unmarshal ?level ?(extended = Extended.make ()) (c: Cstruct.t) : t option = - let level = get_level level in - if allzeroes c then None + let unmarshal ?(extended = Extended.make ()) (c: Cstruct.t) + : (t, [>`Zero_block | `Checksum_mismatch]) result = + if Cstruct.length c <> length then Error (`Unmarshal "buffer is not of block size") + else if Cstruct.equal zero_block c then Error `Zero_block else - let chksum = get_hdr_chksum c in - if checksum c <> chksum then raise Checksum_mismatch - else let ustar = - let magic = get_hdr_magic c in + let* chksum = get_hdr_chksum c in + if checksum c <> chksum then Error `Checksum_mismatch + else let* ustar = + let* magic = get_hdr_magic c in (* GNU tar and Posix differ in interpretation of the character following ustar. For Posix, it should be '\0' but GNU tar uses ' ' *) - String.length magic >= 5 && (String.sub magic 0 5 = "ustar") in - let prefix = if ustar then get_hdr_prefix c else "" in - let file_name = match extended.Extended.path with - | Some path -> path + Ok (String.length magic >= 5 && (String.sub magic 0 5 = "ustar")) in + let* prefix = if ustar then get_hdr_prefix c else Ok "" in + let* file_name = match extended.Extended.path with + | Some path -> Ok path | None -> - let file_name = get_hdr_file_name c in - if file_name = "" then prefix - else if prefix = "" then file_name - else Filename.concat prefix file_name in - let file_mode = get_hdr_file_mode c in - let user_id = match extended.Extended.user_id with + let* file_name = get_hdr_file_name c in + if file_name = "" then Ok prefix + else if prefix = "" then Ok file_name + else Ok (Filename.concat prefix file_name) in + let* file_mode = get_hdr_file_mode c in + let* user_id = match extended.Extended.user_id with | None -> get_hdr_user_id c - | Some x -> x in - let group_id = match extended.Extended.group_id with + | Some x -> Ok x in + let* group_id = match extended.Extended.group_id with | None -> get_hdr_group_id c - | Some x -> x in - let file_size = match extended.Extended.file_size with + | Some x -> Ok x in + let* file_size = match extended.Extended.file_size with | None -> get_hdr_file_size c - | Some x -> x in - let mod_time = match extended.Extended.mod_time with + | Some x -> Ok x in + let* mod_time = match extended.Extended.mod_time with | None -> get_hdr_mod_time c - | Some x -> x in - let link_indicator = Link.of_char ~level (get_hdr_link_indicator c) in - let uname = match extended.Extended.uname with - | None -> if ustar then get_hdr_uname c else "" - | Some x -> x in - let gname = match extended.Extended.gname with - | None -> if ustar then get_hdr_gname c else "" - | Some x -> x in - let devmajor = if ustar then get_hdr_devmajor c else 0 in - let devminor = if ustar then get_hdr_devminor c else 0 in - - let link_name = match extended.Extended.link_path with - | Some link_path -> link_path + | Some x -> Ok x in + let link_indicator = Link.of_char (get_hdr_link_indicator c) in + let* uname = match extended.Extended.uname with + | None -> if ustar then get_hdr_uname c else Ok "" + | Some x -> Ok x in + let* gname = match extended.Extended.gname with + | None -> if ustar then get_hdr_gname c else Ok "" + | Some x -> Ok x in + let* devmajor = if ustar then get_hdr_devmajor c else Ok 0 in + let* devminor = if ustar then get_hdr_devminor c else Ok 0 in + + let* link_name = match extended.Extended.link_path with + | Some link_path -> Ok link_path | None -> get_hdr_link_name c in - Some (make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator + Ok (make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name ~uname ~gname ~devmajor ~devminor file_name file_size) (** Marshal a header block, computing and inserting the checksum *) - let imarshal ~level c link_indicator (x: t) = + let marshal ?level c (x: t) = + let level = get_level level in (* The caller (e.g. write_block) is expected to insert the extra ././@LongLink header *) - if String.length x.file_name > sizeof_hdr_file_name && level <> GNU then - if level = Ustar then - if String.length x.file_name > 256 then failwith "file_name too long" - else let (prefix, file_name) = - let is_directory = if x.file_name.[String.length x.file_name - 1] = '/' then "/" else "" in - let rec split prefix file_name = - if String.length file_name > sizeof_hdr_file_name then failwith "file_name can't be split" - else if String.length prefix > sizeof_hdr_prefix then split (Filename.dirname prefix) (Filename.concat (Filename.basename prefix) file_name ^ is_directory) - else (prefix, file_name) in - split (Filename.dirname x.file_name) (Filename.basename x.file_name ^ is_directory) in - set_hdr_file_name c file_name; - set_hdr_prefix c prefix - else failwith "file_name too long" - else set_hdr_file_name c x.file_name; + let* () = + if String.length x.file_name > sizeof_hdr_file_name && level <> GNU then + if level = Ustar then + if String.length x.file_name > 256 then + Error (`Msg "file_name too long") + else + let* (prefix, file_name) = + let is_directory = if x.file_name.[String.length x.file_name - 1] = '/' then "/" else "" in + let rec split prefix file_name = + if String.length file_name > sizeof_hdr_file_name then + Error (`Msg "file_name can't be split") + else if String.length prefix > sizeof_hdr_prefix then + split (Filename.dirname prefix) (Filename.concat (Filename.basename prefix) file_name ^ is_directory) + else Ok (prefix, file_name) + in + split (Filename.dirname x.file_name) (Filename.basename x.file_name ^ is_directory) + in + set_hdr_file_name c file_name; + set_hdr_prefix c prefix; + Ok () + else Error (`Msg "file_name too long") + else (set_hdr_file_name c x.file_name; Ok ()) + in (* This relies on the fact that the block was initialised to null characters *) - if level = Ustar || (level = GNU && x.devmajor = 0 && x.devminor = 0) then begin - if level = Ustar then begin - set_hdr_magic c "ustar"; - set_hdr_version c "00"; - end else begin - (* OLD GNU MAGIC: use "ustar " as magic, and another " " in the version *) - set_hdr_magic c "ustar "; - set_hdr_version c " "; - end; - set_hdr_uname c x.uname; - set_hdr_gname c x.gname; - if level = Ustar then begin - set_hdr_devmajor c x.devmajor; - set_hdr_devminor c x.devminor; - end - end else begin - if x.devmajor <> 0 then failwith "devmajor not supported in this format"; - if x.devminor <> 0 then failwith "devminor not supported in this format"; - if x.uname <> "" then failwith "uname not supported in this format"; - if x.gname <> "" then failwith "gname not supported in this format"; - end; + let* () = + if level = Ustar || (level = GNU && x.devmajor = 0 && x.devminor = 0) then begin + if level = Ustar then begin + set_hdr_magic c "ustar"; + set_hdr_version c "00"; + end else begin + (* OLD GNU MAGIC: use "ustar " as magic, and another " " in the version *) + set_hdr_magic c "ustar "; + set_hdr_version c " "; + end; + set_hdr_uname c x.uname; + set_hdr_gname c x.gname; + if level = Ustar then begin + set_hdr_devmajor c x.devmajor; + set_hdr_devminor c x.devminor; + end; + Ok () + end else + if x.devmajor <> 0 then Error (`Msg "devmajor not supported in this format") + else if x.devminor <> 0 then Error (`Msg "devminor not supported in this format") + else if x.uname <> "" then Error (`Msg "uname not supported in this format") + else if x.gname <> "" then Error (`Msg "gname not supported in this format") + else Ok () + in set_hdr_file_mode c x.file_mode; set_hdr_user_id c x.user_id; set_hdr_group_id c x.group_id; set_hdr_file_size c x.file_size; set_hdr_mod_time c x.mod_time; - set_hdr_link_indicator c link_indicator; + set_hdr_link_indicator c (Link.to_char ~level x.link_indicator); (* The caller (e.g. write_block) is expected to insert the extra ././@LongLink header *) - if String.length x.link_name > sizeof_hdr_link_name && level <> GNU then failwith "link_name too long"; + let* () = + if String.length x.link_name > sizeof_hdr_link_name && level <> GNU then + Error (`Msg "link_name too long") + else + Ok () + in set_hdr_link_name c x.link_name; (* Finally, compute the checksum *) let chksum = checksum c in - set_hdr_chksum c chksum - - let marshal ?level c (x: t) = - let level = get_level level in - imarshal ~level c (Link.to_char ~level x.link_indicator) x - - (** Thrown if we detect the end of the tar (at least two zero blocks in sequence) *) - exception End_of_stream + set_hdr_chksum c chksum; + Ok () (** Compute the amount of zero-padding required to round up the file size to a whole number of blocks *) let compute_zero_padding_length (x: t) : int = (* round up to next whole number of block lengths *) - let length = Int64.of_int length in - let lenm1 = Int64.sub length Int64.one in - let next_block_length = (Int64.mul length (Int64.div (Int64.add x.file_size lenm1) length)) in - Int64.to_int (Int64.sub next_block_length x.file_size) + let last_block_size = Int64.to_int (Int64.rem x.file_size (Int64.of_int length)) in + if last_block_size = 0 then 0 else length - last_block_size (** Return the required zero-padding as a string *) let zero_padding (x: t) = @@ -652,8 +685,7 @@ module Header = struct Cstruct.sub zero_block 0 zero_padding_len let to_sectors (x: t) = - let bytes = Int64.(add x.file_size (of_int (compute_zero_padding_length x))) in - Int64.div bytes 512L + Int64.(div (add (pred (of_int length)) x.file_size) (of_int length)) end module type ASYNC = sig @@ -662,182 +694,198 @@ module type ASYNC = sig val return: 'a -> 'a t end -(* If we aren't using Lwt/Async style threads, instantiate the functor with - this. *) -module Direct = struct - type 'a t = 'a - let return x = x - let ( >>= ) m f = f m -end - module type READER = sig type in_channel - type 'a t - val really_read: in_channel -> Cstruct.t -> unit t - val skip: in_channel -> int -> unit t + type 'a io + val really_read: in_channel -> Cstruct.t -> unit io + val skip: in_channel -> int -> unit io end module type WRITER = sig type out_channel - type 'a t - val really_write: out_channel -> Cstruct.t -> unit t + type 'a io + val really_write: out_channel -> Cstruct.t -> unit io +end + +module type HEADERREADER = sig + type in_channel + type 'a io + val read : global:Header.Extended.t option -> in_channel -> + (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result io +end + +module type HEADERWRITER = sig + type out_channel + type 'a io + val write : ?level:Header.compatibility -> Header.t -> out_channel -> (unit, [> `Msg of string ]) result io + val write_global_extended_header : Header.Extended.t -> out_channel -> (unit, [> `Msg of string ]) result io end let longlink = "././@LongLink" -module HeaderReader(Async: ASYNC)(Reader: READER with type 'a t = 'a Async.t) = struct +module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) = struct open Async open Reader + type in_channel = Reader.in_channel + type 'a io = 'a t + + (* This is not a bind, but more a lift and bind combined. *) + let ( let^* ) x f = + match x with + | Ok x -> f x + | Error _ as e -> return e + + let fix_link_indicator x = + (* For backward compatibility we treat normal files ending in slash as + directories. Because [Link.of_char] treats unrecognized link indicator + values as normal files we check directly. This is not completely correct + as [Header.Link.of_char] turns unknown link indicators into + [Header.Link.Normal]. Ideally, it should only be done for '0' and + '\000'. *) + if String.length x.Header.file_name > 0 + && x.file_name.[String.length x.file_name - 1] = '/' + && x.link_indicator = Header.Link.Normal then + { x with link_indicator = Header.Link.Directory } + else + x - let read ?level ~global (ifd: Reader.in_channel) : (Header.t * Header.Extended.t option, [ `Eof ]) result t = - let level = Header.get_level level in + let read ~global (ifd: Reader.in_channel) : (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result t = (* We might need to read 2 headers at once if we encounter a Pax header *) let buffer = Cstruct.create Header.length in let real_header_buf = Cstruct.create Header.length in let next_block global () = - really_read ifd buffer - >>= fun () -> - match Header.unmarshal ?extended:global ~level buffer with - | None -> return None - | Some hdr -> return (Some hdr) - in + really_read ifd buffer >>= fun () -> + return (Header.unmarshal ?extended:global buffer) + in - let rec get_hdr global () : (Header.t * Header.Extended.t option, [> `Eof ]) result t = - next_block global () - >>= function - | Some x when x.Header.link_indicator = Header.Link.GlobalExtendedHeader -> + let rec get_hdr ~next_longname ~next_longlink global () : (Header.t * Header.Extended.t option, [> `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result t = + next_block global () >>= function + | Ok x when x.Header.link_indicator = Header.Link.GlobalExtendedHeader -> let extra_header_buf = Cstruct.create (Int64.to_int x.Header.file_size) in - really_read ifd extra_header_buf - >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) - >>= fun () -> + really_read ifd extra_header_buf >>= fun () -> + skip ifd (Header.compute_zero_padding_length x) >>= fun () -> (* unmarshal merges the previous global (if any) with the discovered global (if any) and returns the new global. *) - let global = Header.Extended.unmarshal ~global extra_header_buf in - get_hdr (Some global) () - | Some x when x.Header.link_indicator = Header.Link.PerFileExtendedHeader -> + let^* global = + Result.map_error + (fun e -> `Fatal e) + (Header.Extended.unmarshal ~global extra_header_buf) + in + get_hdr ~next_longname ~next_longlink (Some global) () + | Ok x when x.Header.link_indicator = Header.Link.PerFileExtendedHeader -> let extra_header_buf = Cstruct.create (Int64.to_int x.Header.file_size) in - really_read ifd extra_header_buf - >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) - >>= fun () -> - let extended = Header.Extended.unmarshal ~global extra_header_buf in - really_read ifd real_header_buf - >>= fun () -> - begin match Header.unmarshal ~level ~extended real_header_buf with - | None -> - (* Corrupt pax headers *) - return (Error `Eof) - | Some x -> return (Ok (x, global)) - end - | Some x when x.Header.link_indicator = Header.Link.LongLink && x.Header.file_name = longlink -> + really_read ifd extra_header_buf >>= fun () -> + skip ifd (Header.compute_zero_padding_length x) >>= fun () -> + let^* extended = + Result.map_error + (fun e -> `Fatal e) + (Header.Extended.unmarshal ~global extra_header_buf) + in + really_read ifd real_header_buf >>= fun () -> + let^* x = + Result.map_error + (fun _ -> `Fatal `Corrupt_pax_header) + (Header.unmarshal ~extended real_header_buf) + in + let x = fix_link_indicator x in + return (Ok (x, global)) + | Ok ({ Header.link_indicator = Header.Link.LongLink | Header.Link.LongName; _ } as x) when x.Header.file_name = longlink -> let extra_header_buf = Cstruct.create (Int64.to_int x.Header.file_size) in - really_read ifd extra_header_buf - >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) - >>= fun () -> - let file_name = Cstruct.(to_string @@ sub extra_header_buf 0 (length extra_header_buf - 1)) in - begin next_block global () - >>= function - | None -> return (Error `Eof) - | Some x -> return (Ok ({ x with file_name }, global)) - end - | Some x -> return (Ok (x, global)) - | None -> - begin - next_block global () - >>= function - | Some x -> return (Ok (x, global)) - | None -> return (Error `Eof) - end in - - let true_link_indicator link_indicator file_name = - (* For backward compatibility we treat normal files ending in slash - as directories. Because [Link.of_char] treats unrecognized link - indicator values as normal files we check directly. This is not - completely correct as [Header.Link.of_char] turns unknown link - indicators into [Header.Link.Normal]. Ideally, it should only be - done for '0' and '\000'. *) - if String.length file_name > 0 - && file_name.[String.length file_name - 1] = '/' - && link_indicator = Header.Link.Normal then - Header.Link.Directory - else - link_indicator + really_read ifd extra_header_buf >>= fun () -> + skip ifd (Header.compute_zero_padding_length x) >>= fun () -> + let name = Cstruct.to_string ~len:(Cstruct.length extra_header_buf - 1) extra_header_buf in + let next_longlink = if x.Header.link_indicator = Header.Link.LongLink then Some name else next_longlink in + let next_longname = if x.Header.link_indicator = Header.Link.LongName then Some name else next_longname in + get_hdr ~next_longname ~next_longlink global () + | Ok x -> + (* XXX: unclear how/if pax headers should interact with gnu extensions *) + let x = match next_longname with + | None -> x + | Some file_name -> { x with file_name } in - - let rec read_header global (file_name, link_name, hdr) : (Header.t * Header.Extended.t option, [`Eof]) result Async.t = - let raw_link_indicator = Header.get_hdr_link_indicator buffer in - if (raw_link_indicator = 'K' || raw_link_indicator = 'L') && level = Header.GNU then - let data = Cstruct.create (Int64.to_int hdr.Header.file_size) in - let pad = Cstruct.create (Header.compute_zero_padding_length hdr) in - really_read ifd data - >>= fun () -> - really_read ifd pad - >>= fun () -> - let data = Header.unmarshal_string (Cstruct.to_string data) in - get_hdr global () - >>= function - | Error `Eof -> return (Error `Eof) - | Ok (hdr, global) -> - if raw_link_indicator = 'K' - then read_header global (file_name, data, hdr) - else read_header global (data, link_name, hdr) - else begin - let link_name = if link_name = "" then hdr.Header.link_name else link_name in - let file_name = if file_name = "" then hdr.Header.file_name else file_name in - let link_indicator = true_link_indicator hdr.Header.link_indicator file_name in - return (Ok ({hdr with Header.link_name; file_name; link_indicator }, global)) - end in - - get_hdr global () - >>= function - | Error `Eof -> return (Error `Eof) - | Ok (hdr, global) -> - read_header global ("", "", hdr) + let x = match next_longlink with + | None -> x + | Some link_name -> { x with link_name } + in + let x = fix_link_indicator x in + return (Ok (x, global)) + | Error `Zero_block -> + begin + next_block global () >>= function + | Ok x -> return (Ok (x, global)) + | Error `Zero_block -> return (Error `Eof) + | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> return (Error (`Fatal e)) + end + | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> + return (Error (`Fatal e)) + in + get_hdr ~next_longname:None ~next_longlink:None global () end -module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a t = 'a Async.t) = struct +module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a io = 'a Async.t) = struct open Async open Writer + + type out_channel = Writer.out_channel + type 'a io = 'a t + let write_unextended ?level header fd = let level = Header.get_level level in let buffer = Cstruct.create Header.length in let blank = {Header.file_name = longlink; file_mode = 0; user_id = 0; group_id = 0; mod_time = 0L; file_size = 0L; link_indicator = Header.Link.LongLink; link_name = ""; uname = "root"; gname = "root"; devmajor = 0; devminor = 0; extended = None} in - ( if (String.length header.Header.link_name > Header.sizeof_hdr_link_name || String.length header.Header.file_name > Header.sizeof_hdr_file_name) && level = Header.GNU then begin - ( if String.length header.Header.link_name > Header.sizeof_hdr_link_name then begin + (if level = Header.GNU then begin + begin + if String.length header.Header.link_name > Header.sizeof_hdr_link_name then begin let file_size = String.length header.Header.link_name + 1 in let blank = {blank with Header.file_size = Int64.of_int file_size} in - Header.imarshal ~level buffer 'K' blank; - really_write fd buffer - >>= fun () -> - let payload = Cstruct.of_string (header.Header.link_name ^ "\000") in - really_write fd payload - >>= fun () -> - really_write fd (Header.zero_padding blank) - end else return () ) - >>= fun () -> - ( if String.length header.Header.file_name > Header.sizeof_hdr_file_name then begin - let file_size = String.length header.Header.file_name + 1 in - let blank = {blank with Header.file_size = Int64.of_int file_size} in - Header.imarshal ~level buffer 'L' blank; - really_write fd buffer - >>= fun () -> - let payload = Cstruct.of_string (header.Header.file_name ^ "\000") in - really_write fd payload - >>= fun () -> - really_write fd (Header.zero_padding blank) - end else return () ) - >>= fun () -> - Cstruct.memset buffer 0; - return () - end else return () ) - >>= fun () -> - Header.marshal ~level buffer header; - really_write fd buffer + match + Header.marshal ~level buffer { blank with link_indicator = Header.Link.LongLink } + with + | Error _ as e -> return e + | Ok () -> + really_write fd buffer >>= fun () -> + let payload = Cstruct.of_string (header.Header.link_name ^ "\000") in + really_write fd payload >>= fun () -> + really_write fd (Header.zero_padding blank) >>= fun () -> + return (Ok ()) + end else + return (Ok ()) + end >>= function + | Error _ as e -> return e + | Ok () -> + begin + if String.length header.Header.file_name > Header.sizeof_hdr_file_name then begin + let file_size = String.length header.Header.file_name + 1 in + let blank = {blank with Header.file_size = Int64.of_int file_size} in + match + Header.marshal ~level buffer { blank with link_indicator = Header.Link.LongName } + with + | Error _ as e -> return e + | Ok () -> + really_write fd buffer >>= fun () -> + let payload = Cstruct.of_string (header.Header.file_name ^ "\000") in + really_write fd payload >>= fun () -> + really_write fd (Header.zero_padding blank) >>= fun () -> + return (Ok ()) + end else + return (Ok ()) + end >>= function + | Error _ as e -> return e + | Ok () -> + Cstruct.memset buffer 0; + return (Ok ()) + end else + return (Ok ())) >>= function + | Error _ as e -> return e + | Ok () -> + match Header.marshal ~level buffer header with + | Error _ as e -> return e + | Ok () -> + really_write fd buffer >>= fun () -> + return (Ok ()) let write_extended ?level ~link_indicator hdr fd = let link_indicator_name = match link_indicator with @@ -848,168 +896,22 @@ module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a t = 'a Async.t) = let pax_payload = Header.Extended.marshal hdr in let pax = Header.make ~link_indicator link_indicator_name (Int64.of_int @@ Cstruct.length pax_payload) in - write_unextended ?level pax fd - >>= fun () -> - really_write fd pax_payload - >>= fun () -> - really_write fd (Header.zero_padding pax) - - let write ?level ?global header fd = - ( match global with - | None -> return () - | Some g -> - write_extended ?level ~link_indicator:Header.Link.GlobalExtendedHeader g fd ) - >>= fun () -> + write_unextended ?level pax fd >>= function + | Error _ as e -> return e + | Ok () -> + really_write fd pax_payload >>= fun () -> + really_write fd (Header.zero_padding pax) >>= fun () -> + return (Ok ()) + + let write ?level header fd = ( match header.Header.extended with - | None -> return () + | None -> return (Ok ()) | Some e -> write_extended ?level ~link_indicator:Header.Link.PerFileExtendedHeader e fd ) - >>= fun () -> - write_unextended ?level header fd -end - -module type IO = sig - type in_channel - type out_channel - - val really_input : in_channel -> bytes -> int -> int -> unit - val input : in_channel -> bytes -> int -> int -> int - val output : out_channel -> bytes -> int -> int -> unit - val close_out : out_channel -> unit -end - -module Make (IO : IO) = struct - module Reader = struct - type in_channel = IO.in_channel - type 'a t = 'a Direct.t - (* XXX: there's no function to read directly into a bigarray *) - let really_read (ifd: IO.in_channel) buffer : unit t = - let s = Bytes.create (Cstruct.length buffer) in - IO.really_input ifd s 0 (Cstruct.length buffer); - Cstruct.blit_from_bytes s 0 buffer 0 (Cstruct.length buffer) - - let skip (ifd: in_channel) (n: int) = - let buffer = Cstruct.create 4096 in - let rec loop (n: int) = - if n <= 0 then () - else - let amount = min n (Cstruct.length buffer) in - really_read ifd (Cstruct.sub buffer 0 amount); - loop (n - amount) in - loop n - end - module Writer = struct - type out_channel = IO.out_channel - type 'a t = 'a Direct.t - (* XXX: there's no function to write directly from a bigarray *) - let really_write fd buffer = - let s = Cstruct.to_string buffer |> Bytes.of_string in - if Bytes.length s > 0 - then IO.output fd s 0 (Bytes.length s) - end - let really_read = Reader.really_read - let really_write = Writer.really_write - - module HW = HeaderWriter(Direct)(Writer) - - let write_block ?level ?global (header: Header.t) (body: IO.out_channel -> unit) (fd : IO.out_channel) = - HW.write ?level ?global header fd; - body fd; - really_write fd (Header.zero_padding header) - - let write_end (fd: IO.out_channel) = - really_write fd Header.zero_block; - really_write fd Header.zero_block - - module HR = HeaderReader(Direct)(Reader) - - let get_next_header ?level ~global ic = match HR.read ?level ~global ic with - | Ok hdrs -> hdrs - | Error `Eof -> raise Header.End_of_stream - - (** Utility functions for operating over whole tar archives *) - module Archive = struct - - let skip = Reader.skip - - (** Read the next header, apply the function [f] to the fd, the global pax extended - header (if any), and the current header. The function should leave the fd positioned - immediately after the datablock. Finally the function skips past the zero padding to - the next header *) - let with_next_file (fd: IO.in_channel) ~(global: Header.Extended.t option) (f: IO.in_channel -> Header.Extended.t option -> Header.t -> 'a) = - match HR.read ~global fd with - | Ok (hdr, global) -> - (* NB if the function 'f' fails we're boned *) - Fun.protect (fun () -> f fd global hdr) - ~finally:(fun () -> Reader.skip fd (Header.compute_zero_padding_length hdr)) - | Error `Eof -> raise Header.End_of_stream - - (** List the contents of a tar *) - let list ?level fd = - let level = Header.get_level level in - let global = ref None in - let list = ref [] in - try - while true do - match HR.read ~level ~global:!global fd with - | Ok (hdr, global') -> - global := global'; - list := hdr :: !list; - Reader.skip fd (Int64.to_int hdr.Header.file_size); - Reader.skip fd (Header.compute_zero_padding_length hdr) - | Error `Eof -> raise Header.End_of_stream - done; - List.rev !list; - with - | End_of_file -> failwith "Unexpected end of file while reading stream" - | Header.End_of_stream -> List.rev !list - - let copy_n ifd ofd n = - let buffer = Bytes.create 16384 in - let rec loop remaining = - if remaining = 0L then () else begin - let this = Int64.(to_int (min (of_int (Bytes.length buffer)) remaining)) in - let n = IO.input ifd buffer 0 this in - if n = 0 then raise End_of_file; - begin - try - IO.output ofd buffer 0 n - with Failure _ -> raise End_of_file - end; - loop (Int64.(sub remaining (of_int n))) - end in - loop n - - (** [extract_gen dest] extract the contents of a tar. - Apply 'dest' on each header to get a handle to the file to write to *) - let extract_gen dest ifd = - let global = ref None in - try - while true do - match HR.read ~global:!global ifd with - | Ok (hdr, global') -> - global := global'; - let size = hdr.Header.file_size in - let padding = Header.compute_zero_padding_length hdr in - let ofd = dest hdr in - copy_n ifd ofd size; - IO.close_out ofd; - Reader.skip ifd padding - | Error `Eof -> raise Header.End_of_stream - done - with - | End_of_file -> failwith "Unexpected end of file while reading stream" - | Header.End_of_stream -> () - - (** Create a tar on file descriptor fd from the stream of headers. *) - let create_gen ?level files ofd = - let level = Header.get_level level in - let file (hdr, write) = - write_block ~level hdr write ofd; - in - Stream.iter file files; - (* Add two empty blocks *) - write_end ofd + >>= function + | Error _ as e -> return e + | Ok () -> write_unextended ?level header fd - end + let write_global_extended_header global fd = + write_extended ~link_indicator:Header.Link.GlobalExtendedHeader global fd end diff --git a/lib/tar.mli b/lib/tar.mli index 3a07911..d05eddd 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -18,6 +18,12 @@ {e %%VERSION%% - {{:%%PKG_HOMEPAGE%% }homepage}} *) +(** The type of errors that may occur. *) +type error = [`Checksum_mismatch | `Corrupt_pax_header | `Zero_block | `Unmarshal of string] + +(** [pp_error ppf e] pretty prints the error [e] on the formatter [ppf]. *) +val pp_error : Format.formatter -> [< error] -> unit + module Header : sig (** Process and create tar file headers. *) @@ -45,7 +51,8 @@ module Header : sig | FIFO (** a FIFO node *) | GlobalExtendedHeader (** a PaxExtension global header *) | PerFileExtendedHeader (** a PaxExtension per-file header *) - | LongLink (** a LongLink i.e. a very long filename *) + | LongLink (** a GNU LongLink i.e. a very long link name *) + | LongName (** a GNU LongName i.e. a very long filename *) val to_string: t -> string end @@ -75,7 +82,7 @@ module Header : sig (** Unmarshal a pax Extended Header block. This header block may be preceded by [global] blocks which will override some fields. *) - val unmarshal : global:t option -> Cstruct.t -> t + val unmarshal : global:t option -> Cstruct.t -> (t, [> error ]) result end (** Represents a standard archive (note checksum not stored). *) @@ -113,22 +120,13 @@ module Header : sig (** Pretty-print the header record. *) val to_detailed_string : t -> string - (** For debugging: pretty-print a string as hex. *) - val to_hex : string -> string - - (** Thrown when unmarshalling a header if the checksums don't match. *) - exception Checksum_mismatch - - (** Thrown if we detect the end of the tar (at least two zero blocks in sequence). *) - exception End_of_stream - (** Unmarshal a header block, returning [None] if it's all zeroes. This header block may be preceded by an [?extended] block which will override some fields. *) - val unmarshal : ?level:compatibility -> ?extended:Extended.t -> Cstruct.t -> t option + val unmarshal : ?extended:Extended.t -> Cstruct.t -> (t, [`Zero_block | `Checksum_mismatch | `Unmarshal of string]) result (** Marshal a header block, computing and inserting the checksum. *) - val marshal : ?level:compatibility -> Cstruct.t -> t -> unit + val marshal : ?level:compatibility -> Cstruct.t -> t -> (unit, [> `Msg of string ]) result (** Compute the amount of zero-padding required to round up the file size to a whole number of blocks. *) @@ -149,92 +147,39 @@ end module type READER = sig type in_channel - type 'a t - val really_read: in_channel -> Cstruct.t -> unit t - val skip: in_channel -> int -> unit t + type 'a io + val really_read: in_channel -> Cstruct.t -> unit io + val skip: in_channel -> int -> unit io end module type WRITER = sig type out_channel - type 'a t - val really_write: out_channel -> Cstruct.t -> unit t + type 'a io + val really_write: out_channel -> Cstruct.t -> unit io end -module HeaderReader(Async: ASYNC)(Reader: READER with type 'a t = 'a Async.t) : - sig - (** Returns the next header block or throws {!Header.End_of_stream} if two consecutive +module type HEADERREADER = sig + type in_channel + type 'a io + + (** Returns the next header block or error [`Eof] if two consecutive zero-filled blocks are discovered. Assumes stream is positioned at the possible start of a header block. @param global Holds the current global pax extended header, if - any. Needs to be given to the next call to [read]. - @raise Header.End_of_stream if the stream unexpectedly fails. *) - val read : ?level:Header.compatibility -> global:Header.Extended.t option -> Reader.in_channel -> (Header.t * Header.Extended.t option, [`Eof]) result Async.t -end - -module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a t = 'a Async.t) : sig - val write : ?level:Header.compatibility -> ?global:Header.Extended.t -> Header.t -> Writer.out_channel -> unit Async.t + any. Needs to be given to the next call to [read]. *) + val read : global:Header.Extended.t option -> in_channel -> + (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result io end -module type IO = sig - type in_channel +module type HEADERWRITER = sig type out_channel - - val really_input : in_channel -> bytes -> int -> int -> unit - val input : in_channel -> bytes -> int -> int -> int - val output : out_channel -> bytes -> int -> int -> unit - val close_out : out_channel -> unit + type 'a io + val write : ?level:Header.compatibility -> Header.t -> out_channel -> (unit, [> `Msg of string ]) result io + val write_global_extended_header : Header.Extended.t -> out_channel -> (unit, [> `Msg of string ]) result io end -module Make (IO : IO) : sig - val really_read: IO.in_channel -> Cstruct.t -> unit - (** [really_read fd buf] fills [buf] with data from [fd] or raises - {!Stdlib.End_of_file}. *) - - val really_write: IO.out_channel -> Cstruct.t -> unit - (** [really_write fd buf] writes the full contents of [buf] to [fd] - or raises {!Stdlib.End_of_file}. *) +module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) : + HEADERREADER with type in_channel = Reader.in_channel and type 'a io = 'a Async.t - (** Returns the next header block or fails with [`Eof] if two consecutive - zero-filled blocks are discovered. Assumes stream is positioned at the - possible start of a header block. - @raise Stdlib.End_of_file if the stream unexpectedly fails. *) - val get_next_header : ?level:Header.compatibility -> global:Header.Extended.t option -> IO.in_channel -> Header.t * Header.Extended.t option - - val write_block: ?level:Header.compatibility -> ?global:Header.Extended.t -> Header.t -> (IO.out_channel -> unit) -> IO.out_channel -> unit - [@@ocaml.deprecated "Deprecated: use Tar.HeaderWriter"] - (** Write [hdr], then call [write_body fd] to write the body, - then zero-pads so the stream is positioned for the next block. *) - - val write_end: IO.out_channel -> unit - [@@ocaml.deprecated "Deprecated: use Tar.HeaderWriter"] - (** Writes a stream terminator to [fd]. *) - - module Archive : sig - (** Utility functions for operating over whole tar archives. *) - - (** Read the next header, apply the function [f] to the fd, the global pax extended - header (if any), and the current header. The function should leave the fd positioned - immediately after the datablock. Finally the function skips past the zero padding to - the next header *) - val with_next_file : IO.in_channel -> global:Header.Extended.t option -> - (IO.in_channel -> Header.Extended.t option -> Header.t -> 'a) -> 'a - - (** List the contents of a tar. *) - val list : ?level:Header.compatibility -> IO.in_channel -> Header.t list - - (** [extract_gen dest] extract the contents of a tar. - Apply [dest] on each header to get a handle to the file to write to. *) - val extract_gen : (Header.t -> IO.out_channel) -> IO.in_channel -> unit - - (** Create a tar on file descriptor fd from the stream of headers. *) - val create_gen : ?level:Header.compatibility -> (Header.t * (IO.out_channel -> unit)) Stream.t -> IO.out_channel -> unit - - (** [copy_n ifd odf n] copies exactly [n] bytes from [ifd] to [ofd]. *) - val copy_n : IO.in_channel -> IO.out_channel -> int64 -> unit - [@@ocaml.deprecated "Deprecated: use your own helper function"] - - (** [skip fd n] reads and throws away [n] bytes from [fd]. *) - val skip : IO.in_channel -> int -> unit - [@@ocaml.deprecated "Deprecated: use your own helper function"] - end -end +module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a io = 'a Async.t) : + HEADERWRITER with type out_channel = Writer.out_channel and type 'a io = 'a Async.t diff --git a/lib/tar_cstruct.ml b/lib/tar_cstruct.ml deleted file mode 100644 index 051e4a2..0000000 --- a/lib/tar_cstruct.ml +++ /dev/null @@ -1,50 +0,0 @@ -module Cstruct_io = struct - (* Input from a single Cstruct.t value *) - - type in_channel = { - mutable pos : int; - data : Cstruct.t; - } - - let make_in_channel data = - { pos = 0; data } - - let check_available ch len = - min (Cstruct.length ch.data - ch.pos) len - - let really_input ic buf pos len = - if check_available ic len <> len then raise End_of_file; - Cstruct.blit_to_bytes ic.data ic.pos buf pos len; - ic.pos <- ic.pos + len - - let input ic buf pos len = - let available = check_available ic len in - Cstruct.blit_to_bytes ic.data ic.pos buf pos available; - ic.pos <- ic.pos + available; - available - - (* Output to a list of Cstruct.t values *) - - type out_channel = { - mutable data : Cstruct.t list; - } - - let make_out_channel () = { data = [] } - - let output oc buf pos len = - let elt = Cstruct.create len in - Cstruct.blit_from_bytes buf pos elt 0 len; - oc.data <- elt :: oc.data - - let close_out (_ : out_channel) = () - - let to_string oc = - Cstruct.copyv (List.rev oc.data) - - let to_cstruct oc = - Cstruct.concat (List.rev oc.data) -end - -include Cstruct_io - -include Tar.Make(Cstruct_io) diff --git a/lib/tar_cstruct.mli b/lib/tar_cstruct.mli deleted file mode 100644 index 93fd3e0..0000000 --- a/lib/tar_cstruct.mli +++ /dev/null @@ -1,44 +0,0 @@ -(** {1 Processing tar content with cstruct buffers} *) - -type in_channel -type out_channel - -val make_in_channel : Cstruct.t -> in_channel -(** [make_in_channel buf] uses [buf] as a source of raw tar content. *) - -val make_out_channel : unit -> out_channel -(** [make_out_channel ()] returns a buffer to hold serialized tar content. *) - -val to_string : out_channel -> string -(** [to_string oc] returns the contents of [oc] as a string of bytes. *) - -val to_cstruct : out_channel -> Cstruct.t -(** [to_cstruct oc] returns the contents of [oc] as a {!Cstruct.t}. *) - -val really_read : in_channel -> Cstruct.t -> unit -(** [really_read ic buf] fills [buf] with data from [ic] or raises - {!Stdlib.End_of_file} *) - -val really_write : out_channel -> Cstruct.t -> unit -(** [really_write oc buf] writes the full contents of [buf] to [oc] - or raises {!Stdlib.End_of_file}. *) - -val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> in_channel -> - Tar.Header.t * Tar.Header.Extended.t option -(** [get_next_header ?level ic] returns the next header block or fails with - [`Eof] if two consecutive zero-filled blocks are discovered. Assumes [ic] - is positioned at the possible start of a header block. - @raise Stdlib.End_of_file if the stream unexpectedly fails. *) - -module Archive : sig - val with_next_file : in_channel -> global:Tar.Header.Extended.t option -> - (in_channel -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a - (** [with_next_file ic f] Read the next header, apply the function [f] to - [ic] and the header. The function should leave [ic] positioned - immediately after the datablock. {!really_read} can be used for this - purpose. Finally the function skips past the zero padding to the next - header. *) - - val list : ?level:Tar.Header.compatibility -> in_channel -> Tar.Header.t list - (** List the contents of a tar. *) -end diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index c6c496b..1f4239c 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -17,13 +17,13 @@ module type READER = sig include Tar.READER - val read : in_channel -> Cstruct.t -> int t + val read : in_channel -> Cstruct.t -> int io end module Make (Async : Tar.ASYNC) - (Writer : Tar.WRITER with type 'a t = 'a Async.t) - (Reader : READER with type 'a t = 'a Async.t) + (Writer : Tar.WRITER with type 'a io = 'a Async.t) + (Reader : READER with type 'a io = 'a Async.t) = struct open Async @@ -34,7 +34,7 @@ module Make ; oc_buffer : Cstruct.t ; out_channel : Writer.out_channel } - type 'a t = 'a Async.t + type 'a io = 'a Async.t let really_write ({ gz; oc_buffer; out_channel; _ } as state) cs = let rec until_await gz = @@ -61,10 +61,10 @@ module Make ; in_channel : Reader.in_channel ; mutable pos : int } - type 'a t = 'a Async.t + type 'a io = 'a Async.t let really_read - : in_channel -> Cstruct.t -> unit t + : in_channel -> Cstruct.t -> unit io = fun ({ ic_buffer; oc_buffer; in_channel; _ } as state) res -> let rec until_full_or_end gz res = match Gz.Inf.decode gz with @@ -103,14 +103,14 @@ module Make until_full_or_end (Gz.Inf.flush state.gz) res ) let skip - : in_channel -> int -> unit t + : in_channel -> int -> unit io = fun state len -> let oc_buffer = Cstruct.create len in really_read state oc_buffer end - module TarGzHeaderWriter = Tar.HeaderWriter (Async) (Gz_writer) - module TarGzHeaderReader = Tar.HeaderReader (Async) (Gz_reader) + module HeaderWriter = Tar.HeaderWriter (Async) (Gz_writer) + module HeaderReader = Tar.HeaderReader (Async) (Gz_reader) type in_channel = Gz_reader.in_channel @@ -123,11 +123,6 @@ module Make ; in_channel ; pos= 0 } - let get_next_header ?level ~global ic = - TarGzHeaderReader.read ?level ~global ic >>= function - | Ok hdrs -> Async.return hdrs - | Error `Eof -> raise Tar.Header.End_of_stream - let really_read = Gz_reader.really_read let skip = Gz_reader.skip @@ -144,30 +139,33 @@ module Make let gz = Gz.Def.dst gz buffer off len in { Gz_writer.gz; ic_buffer; oc_buffer; out_channel; } - let write_block ?level ?global hdr ({ Gz_writer.ic_buffer= buf; oc_buffer; out_channel; _ } as state) block = - TarGzHeaderWriter.write ?level ?global hdr state >>= fun () -> - (* XXX(dinosaure): we can refactor this code with [Gz_writer.really_write] - but this loop saves and uses [ic_buffer]/[buf] to avoid extra - allocations on the case between [string] and [Cstruct.t]. *) - let rec deflate (str, off, len) gz = match Gz.Def.encode gz with - | `Await gz -> - if len = 0 - then block () >>= function - | None -> state.gz <- gz ; Async.return () - | Some str -> deflate (str, 0, String.length str) gz - else ( let len' = min len (Cstruct.length buf) in - Cstruct.blit_from_string str off buf 0 len' ; - let { Cstruct.buffer; off= cs_off; len= _; } = buf in - deflate (str, off + len', len - len') - (Gz.Def.src gz buffer cs_off len') ) - | `Flush gz -> - let max = Cstruct.length oc_buffer - Gz.Def.dst_rem gz in - Writer.really_write out_channel (Cstruct.sub oc_buffer 0 max) >>= fun () -> - let { Cstruct.buffer; off= cs_off; len= cs_len; } = oc_buffer in - deflate (str, off, len) (Gz.Def.dst gz buffer cs_off cs_len) - | `End _gz -> assert false in - deflate ("", 0, 0) state.gz >>= fun () -> - Gz_writer.really_write state (Tar.Header.zero_padding hdr) + let write_block ?level hdr ({ Gz_writer.ic_buffer= buf; oc_buffer; out_channel; _ } as state) block = + HeaderWriter.write ?level hdr state >>= function + | Error _ as e -> return e + | Ok () -> + (* XXX(dinosaure): we can refactor this codec with [Gz_writer.really_write] + but this loop saves and uses [ic_buffer]/[buf] to avoid extra + allocations on the case between [string] and [Cstruct.t]. *) + let rec deflate (str, off, len) gz = match Gz.Def.encode gz with + | `Await gz -> + if len = 0 + then block () >>= function + | None -> state.gz <- gz ; Async.return () + | Some str -> deflate (str, 0, String.length str) gz + else ( let len' = min len (Cstruct.length buf) in + Cstruct.blit_from_string str off buf 0 len' ; + let { Cstruct.buffer; off= cs_off; len= _; } = buf in + deflate (str, off + len', len - len') + (Gz.Def.src gz buffer cs_off len') ) + | `Flush gz -> + let max = Cstruct.length oc_buffer - Gz.Def.dst_rem gz in + Writer.really_write out_channel (Cstruct.sub oc_buffer 0 max) >>= fun () -> + let { Cstruct.buffer; off= cs_off; len= cs_len; } = oc_buffer in + deflate (str, off, len) (Gz.Def.dst gz buffer cs_off cs_len) + | `End _gz -> assert false in + deflate ("", 0, 0) state.gz >>= fun () -> + Gz_writer.really_write state (Tar.Header.zero_padding hdr) >>= fun () -> + return (Ok ()) let write_end ({ Gz_writer.oc_buffer; out_channel; _ } as state) = Gz_writer.really_write state Tar.Header.zero_block >>= fun () -> diff --git a/lib/tar_gz.mli b/lib/tar_gz.mli index 1b039dc..b4198b0 100644 --- a/lib/tar_gz.mli +++ b/lib/tar_gz.mli @@ -17,26 +17,18 @@ module type READER = sig include Tar.READER - val read : in_channel -> Cstruct.t -> int t + val read : in_channel -> Cstruct.t -> int io end module Make (Async : Tar.ASYNC) - (Writer : Tar.WRITER with type 'a t = 'a Async.t) - (Reader : READER with type 'a t = 'a Async.t) + (Writer : Tar.WRITER with type 'a io = 'a Async.t) + (Reader : READER with type 'a io = 'a Async.t) : sig type in_channel val of_in_channel : internal:Cstruct.t -> Reader.in_channel -> in_channel - (** Returns the next header block or fails with {!Tar.Header.End_of_stream} - if two consecutive zero-filled blocks are discovered. Assumes stream is - positioned at the possible start of a header block. - - @raise Stdlib.End_of_file if the stream unexpectedly fails. *) - val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option - -> in_channel -> (Tar.Header.t * Tar.Header.Extended.t option) Async.t - val really_read : in_channel -> Cstruct.t -> unit Async.t (** [really_read fd buf] fills [buf] with data from [fd] or raises {!Stdlib.End_of_file}. *) @@ -48,8 +40,8 @@ module Make val of_out_channel : ?bits:int -> ?q:int -> level:int -> mtime:int32 -> Gz.os -> Writer.out_channel -> out_channel - val write_block : ?level:Tar.Header.compatibility -> ?global:Tar.Header.Extended.t -> - Tar.Header.t -> out_channel -> (unit -> string option Async.t) -> unit Async.t + val write_block : ?level:Tar.Header.compatibility -> Tar.Header.t -> + out_channel -> (unit -> string option Async.t) -> (unit, [> `Msg of string ]) result Async.t (** [write_block hdr oc stream] writes [hdr], then {i deflate} the given [stream], then zero-pads so the stream is positionned for the next block. @@ -66,10 +58,17 @@ module Make let add_file oc filename = let fd = Unix.openfile filename Unix.[ O_RDONLY ] 0o644 in let hdr = Tar.Header.make ... in - write_block hdr oc (stream_of_fd fd) ; + (match write_block hdr oc (stream_of_fd fd) with + | Ok () -> () + | Error `Msg msg -> print_endline ("error: " ^ msg)); Unix.close fd ]} *) val write_end : out_channel -> unit Async.t (** [write_end oc] writes a stream terminator to [oc]. *) + + module HeaderReader : + Tar.HEADERREADER with type in_channel = in_channel and type 'a io = 'a Async.t + module HeaderWriter : + Tar.HEADERWRITER with type out_channel = out_channel and type 'a io = 'a Async.t end diff --git a/lib_test/global_extended_headers_test.ml b/lib_test/global_extended_headers_test.ml index 70e37c2..2affcde 100644 --- a/lib_test/global_extended_headers_test.ml +++ b/lib_test/global_extended_headers_test.ml @@ -2,7 +2,7 @@ let level = Tar.Header.Ustar module Writer = struct type out_channel = Stdlib.out_channel - type 'a t = 'a + type 'a io = 'a let really_write oc cs = let str = Cstruct.to_string cs in output_string oc str @@ -16,7 +16,7 @@ module HW = Tar.HeaderWriter module Reader = struct type in_channel = Stdlib.in_channel - type 'a t = 'a + type 'a io = 'a let really_read ic cs = let len = Cstruct.length cs in let buf = Bytes.create len in @@ -58,65 +58,79 @@ let use_global_extended_headers _test_ctxt = let cout = open_out_bin "test.tar" in let g0 = make_extended 1000 in let hdr, f = make_file () in - HW.write ~level ~global:g0 hdr cout; - f cout; - let hdr, f = make_file () in - let hdr = { hdr with Tar.Header.extended = Some (make_extended 2000) } in - HW.write ~level hdr cout; - f cout; - let hdr, f = make_file () in - HW.write ~level hdr cout; - f cout; - let g1 = make_extended 3000 in - let hdr, f = make_file () in - HW.write ~level ~global:g1 hdr cout; - f cout; - Writer.really_write cout Tar.Header.zero_block; - Writer.really_write cout Tar.Header.zero_block; - close_out cout; - (* Read the same archive, testing that headers have been squashed. *) - let cin = open_in_bin "test.tar" in - let global = ref None in - let header = - let pp ppf hdr = Fmt.pf ppf "%s" (Tar.Header.Extended.to_detailed_string hdr) in - Alcotest.testable (fun ppf hdr -> Fmt.pf ppf "%a" Fmt.(option pp) hdr) ( = ) - in - ( match HR.read ~level ~global:!global cin with - | Ok (hdr, global') -> - Alcotest.check header "expected global header" (Some g0) global'; - global := global'; - Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Reader.skip cin to_skip; - | Error `Eof -> failwith "Couldn't read header" ); - ( match HR.read ~level ~global:!global cin with - | Ok (hdr, global') -> - Alcotest.check header "expected global header" (Some g0) global'; - global := global'; - Alcotest.(check int) "expected user" 2000 hdr.Tar.Header.user_id; - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Reader.skip cin to_skip; - | Error `Eof -> failwith "Couldn't read header" ); - ( match HR.read ~level ~global:!global cin with - | Ok (hdr, global') -> - Alcotest.check header "expected global header" (Some g0) global'; - global := global'; - Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Reader.skip cin to_skip; - | Error `Eof -> failwith "Couldn't read header" ); - ( match HR.read ~level ~global:!global cin with - | Ok (hdr, global') -> - Alcotest.check header "expected global header" (Some g1) global'; - global := global'; - Alcotest.(check int) "expected user" 3000 hdr.Tar.Header.user_id; - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Reader.skip cin to_skip; - | Error `Eof -> failwith "Couldn't read header" ); - ( match HR.read ~level ~global:!global cin with - | Ok _ -> failwith "Should have found EOF" - | Error `Eof -> () ); - () + match HW.write_global_extended_header g0 cout with + | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg + | Ok () -> + match HW.write ~level hdr cout with + | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg + | Ok () -> + f cout; + let hdr, f = make_file () in + let hdr = { hdr with Tar.Header.extended = Some (make_extended 2000) } in + match HW.write ~level hdr cout with + | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg + | Ok () -> + f cout; + let hdr, f = make_file () in + match HW.write ~level hdr cout with + | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg + | Ok () -> + f cout; + let g1 = make_extended 3000 in + let hdr, f = make_file () in + match HW.write_global_extended_header g1 cout with + | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg + | Ok () -> + match HW.write ~level hdr cout with + | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg + | Ok () -> + f cout; + Writer.really_write cout Tar.Header.zero_block; + Writer.really_write cout Tar.Header.zero_block; + close_out cout; + (* Read the same archive, testing that headers have been squashed. *) + let cin = open_in_bin "test.tar" in + let global = ref None in + let header = + let pp ppf hdr = Fmt.pf ppf "%s" (Tar.Header.Extended.to_detailed_string hdr) in + Alcotest.testable (fun ppf hdr -> Fmt.pf ppf "%a" Fmt.(option pp) hdr) ( = ) + in + ( match HR.read ~global:!global cin with + | Ok (hdr, global') -> + Alcotest.check header "expected global header" (Some g0) global'; + global := global'; + Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; + let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in + Reader.skip cin to_skip; + | Error _ -> failwith "Couldn't read header" ); + ( match HR.read ~global:!global cin with + | Ok (hdr, global') -> + Alcotest.check header "expected global header" (Some g0) global'; + global := global'; + Alcotest.(check int) "expected user" 2000 hdr.Tar.Header.user_id; + let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in + Reader.skip cin to_skip; + | Error _ -> failwith "Couldn't read header" ); + ( match HR.read ~global:!global cin with + | Ok (hdr, global') -> + Alcotest.check header "expected global header" (Some g0) global'; + global := global'; + Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; + let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in + Reader.skip cin to_skip; + | Error _ -> failwith "Couldn't read header" ); + ( match HR.read ~global:!global cin with + | Ok (hdr, global') -> + Alcotest.check header "expected global header" (Some g1) global'; + global := global'; + Alcotest.(check int) "expected user" 3000 hdr.Tar.Header.user_id; + let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in + Reader.skip cin to_skip; + | Error _ -> failwith "Couldn't read header" ); + ( match HR.read ~global:!global cin with + | Error `Eof -> () + | _ -> failwith "Should have found EOF"); + () let () = let suite = "tar - pax global extended headers", [ diff --git a/lib_test/parse_test.ml b/lib_test/parse_test.ml index e54b660..c0ae596 100644 --- a/lib_test/parse_test.ml +++ b/lib_test/parse_test.ml @@ -12,8 +12,6 @@ * GNU Lesser General Public License for more details. *) -[@@@warning "-3-27"] (* FIXME: deprecation from the tar library *) - open Lwt.Infix let convert_path os path = @@ -33,12 +31,27 @@ module Unix = struct if Sys.win32 then truncate (convert_path `Windows path) else truncate path end -let cstruct = Alcotest.testable - (fun f x -> Fmt.pf f "%a" Cstruct.hexdump_pp x) - Cstruct.equal +let list fd = + let rec loop global acc = + match Tar_unix.HeaderReader.read ~global fd with + | Ok (hdr, global) -> + print_endline hdr.Tar.Header.file_name; + Tar_unix.skip fd + (Int64.to_int hdr.Tar.Header.file_size + Tar.Header.compute_zero_padding_length hdr); + loop global (hdr :: acc) + | Error `Eof -> + List.rev acc + | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e + in + let r = loop None [] in + List.iter (fun h -> print_endline h.Tar.Header.file_name) r; + r + +let cstruct = Alcotest.testable Cstruct.hexdump_pp Cstruct.equal let pp_header f x = Fmt.pf f "%s" (Tar.Header.to_detailed_string x) -let header = - Alcotest.testable (fun f x -> Fmt.pf f "%a" (Fmt.option pp_header) x) ( = ) +let header = Alcotest.testable pp_header ( = ) + +let error = Alcotest.testable Tar.pp_error ( = ) let link = Alcotest.testable (Fmt.of_to_string Tar.Header.Link.to_string) ( = ) @@ -50,10 +63,13 @@ let header () = Cstruct.blit_from_string txt 0 c 0 (String.length txt); let c' = Cstruct.create Tar.Header.length in for i = 0 to Tar.Header.length - 1 do Cstruct.set_uint8 c' i 0 done; - Tar.Header.marshal c' h; - Alcotest.(check cstruct) "marshalled headers" c c'; - Alcotest.(check header) "unmarshalled headers" (Some h) (Tar.Header.unmarshal c'); - Alcotest.(check int) "zero padding length" 302 (Tar.Header.compute_zero_padding_length h) + match Tar.Header.marshal c' h with + | Ok () -> + Alcotest.(check cstruct) "marshalled headers" c c'; + Alcotest.(check (result header error)) "unmarshalled headers" (Ok h) (Tar.Header.unmarshal c'); + Alcotest.(check int) "zero padding length" 302 (Tar.Header.compute_zero_padding_length h) + | Error `Msg msg -> + Alcotest.failf "error marshalling: %s" msg let set_difference a b = List.filter (fun a -> not(List.mem a b)) a @@ -90,7 +106,8 @@ let with_tar ?(level:Tar.Header.compatibility option) ?files ?(sector_size = 512 let can_read_tar () = with_tar () @@ fun tar_filename files -> let fd = Unix.openfile tar_filename [ O_RDONLY; O_CLOEXEC ] 0 in - let files' = List.map (fun t -> t.Tar.Header.file_name) (Tar_unix.Archive.list fd) in + let files' = List.map (fun t -> t.Tar.Header.file_name) (list fd) in + flush stdout; Unix.close fd; let missing = set_difference files files' in let missing' = set_difference files' files in @@ -98,7 +115,6 @@ let can_read_tar () = Alcotest.(check (list string)) "missing'" [] missing' let can_write_pax () = - let open Tar_unix in with_file ~prefix:"tar-test" ~suffix:".tar" @@ fun filename -> (* This userid is too large for a regular ustar header *) let user_id = 0x07777777 + 1 in @@ -107,25 +123,28 @@ let can_write_pax () = Fun.protect (fun () -> let hdr = Tar.Header.make ~user_id "test" 0L in - write_block hdr (fun _ -> ()) fd; - write_end fd; + match Tar_unix.HeaderWriter.write hdr fd with + | Ok () -> + Tar_unix.really_write fd Tar.Header.zero_block; + Tar_unix.really_write fd Tar.Header.zero_block; + | Error `Msg msg -> + Alcotest.failf "error writing header %s" msg ) ~finally:(fun () -> Unix.close fd); (* Read it back and verify the header was read *) let fd = Unix.openfile filename [ O_RDONLY; O_CLOEXEC ] 0 in Fun.protect (fun () -> - match Archive.list fd with + match list fd with | [ one ] -> Alcotest.(check int) "user_id" user_id one.Tar.Header.user_id | xs -> Alcotest.failf "Headers = %a" (Fmt.list pp_header) xs ) ~finally:(fun () -> Unix.close fd) let can_list_longlink_tar () = - let open Tar_unix in let fd = Unix.openfile "lib_test/long.tar" [ O_RDONLY; O_CLOEXEC ] 0o0 in Fun.protect (fun () -> - let all = Archive.list fd in + let all = list fd in let filenames = List.map (fun h -> h.Tar.Header.file_name) all in (* List.iteri (fun i x -> Printf.fprintf stderr "%d: %s\n%!" i x) filenames; *) let expected = [ @@ -137,11 +156,10 @@ let can_list_longlink_tar () = ) ~finally:(fun () -> Unix.close fd) let can_list_long_pax_tar () = - let open Tar_unix in let fd = Unix.openfile "lib_test/long-pax.tar" [ O_RDONLY; O_CLOEXEC ] 0x0 in Fun.protect (fun () -> - let all = Archive.list fd in + let all = list fd in let filenames = List.map (fun h -> h.Tar.Header.file_name) all in (* List.iteri (fun i x -> Printf.fprintf stderr "%d: %s\n%!" i x) filenames; *) let expected = [ @@ -167,9 +185,12 @@ let can_list_pax_implicit_dir () = let fd = Unix.openfile "lib_test/pax-shenanigans.tar" [ O_RDONLY; O_CLOEXEC ] 0x0 in Fun.protect ~finally:(fun () -> Unix.close fd) (fun () -> - let (hdr, _global) = Tar_unix.get_next_header ~global:None fd in - Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.link_indicator; - Alcotest.(check string) "filename is patched" "clearly/a/directory/" hdr.file_name) + match Tar_unix.HeaderReader.read ~global:None fd with + | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e + | Error `Eof -> Alcotest.fail "unexpected end of file" + | Ok (hdr, _global) -> + Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.link_indicator; + Alcotest.(check string) "filename is patched" "clearly/a/directory/" hdr.file_name) (* Sample tar generated with commit 1583f71ea33b2836d3fb996ac7dc35d55abe2777: [let buf = @@ -186,9 +207,12 @@ let can_list_longlink_implicit_dir () = let fd = Unix.openfile "lib_test/long-implicit-dir.tar" [ O_RDONLY; O_CLOEXEC ] 0x0 in Fun.protect ~finally:(fun () -> Unix.close fd) (fun () -> - let (hdr, _global) = Tar_unix.get_next_header ~global:None fd in - Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.link_indicator; - Alcotest.(check string) "filename is patched" "some/long/name/for/a/directory/" hdr.file_name) + match Tar_unix.HeaderReader.read ~global:None fd with + | Ok (hdr, _global) -> + Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.link_indicator; + Alcotest.(check string) "filename is patched" "some/long/name/for/a/directory/" hdr.file_name + | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e + | Error `Eof -> Alcotest.fail "unexpected end of file") let starts_with ~prefix s = @@ -201,6 +225,7 @@ let starts_with ~prefix s = in len_s >= len_pre && aux 0 let can_transform_tar () = + (* let level = Tar.Header.Ustar in with_tar ~level () @@ fun tar_in _file_list -> let fd_in = Unix.openfile tar_in [ O_RDONLY; O_CLOEXEC ] 0 in @@ -216,8 +241,9 @@ let can_transform_tar () = Tar_unix.Archive.with_next_file fd_in ~global:None (fun fd_file _global hdr -> Alcotest.(check string) "Filename was transformed" temp_dir (String.sub hdr.file_name 0 (min (String.length hdr.file_name) (String.length temp_dir))); - Tar_unix.Archive.skip fd_file (Int64.to_int hdr.file_size)); + Tar_unix.skip fd_file (Int64.to_int hdr.file_size)); Unix.close fd_in + *) () module Block4096 = struct include Block @@ -254,7 +280,7 @@ module B = struct end module Test(B: BLOCK) = struct - let add_data_to_tar ?(level:Tar.Header.compatibility option) ?files switch () f = + let add_data_to_tar ?(level:Tar.Header.compatibility option) ?files _switch () f = with_tar ?level ?files ~sector_size:B.sector_size () @@ fun tar_filename files -> let size = Unix.(stat tar_filename).st_size in let size = B.sector_size * ((pred size + 4096 + B.sector_size) / B.sector_size) in @@ -269,7 +295,7 @@ module Test(B: BLOCK) = struct let files = "barf" :: files in f tar_filename files - let add_more_data_to_tar ?(level:Tar.Header.compatibility option) ?files switch () f = + let add_more_data_to_tar ?(level:Tar.Header.compatibility option) ?files _switch () f = with_tar ?level ?files ~sector_size:B.sector_size () @@ fun tar_filename files -> let size = Unix.(stat tar_filename).st_size in (* Add 4 KB rounding up to block size *) @@ -289,8 +315,8 @@ module Test(B: BLOCK) = struct let files = "barf" :: "barf2" :: files in f tar_filename files - let write_with_full_archive ?(level:Tar.Header.compatibility option) ?files switch () = - with_tar ?level ?files () @@ fun tar_filename files -> + let write_with_full_archive ?(level:Tar.Header.compatibility option) ?files _switch () = + with_tar ?level ?files () @@ fun tar_filename _files -> B.with_block tar_filename @@ fun b -> let module KV_RW = Tar_mirage.Make_KV_RW(Pclock)(B) in KV_RW.connect b >>= fun t -> @@ -337,7 +363,7 @@ module Test(B: BLOCK) = struct Lwt.return_unit end else Lwt.return_unit - let can_read_through_BLOCK ~files switch () = + let can_read_through_BLOCK ~files _switch () = with_tar ~files ~sector_size:B.sector_size () check_tar let write_test switch () = diff --git a/mirage/tar_mirage.ml b/mirage/tar_mirage.ml index 746768e..9d42035 100644 --- a/mirage/tar_mirage.ml +++ b/mirage/tar_mirage.ml @@ -82,7 +82,7 @@ module Make_KV_RO (BLOCK : Mirage_block.S) = struct mutable offset: int64; info: Mirage_block.info; } - type 'a t = 'a Lwt.t + type 'a io = 'a Lwt.t let really_read in_channel buffer = let len = Cstruct.length buffer in assert(len <= 512); @@ -258,6 +258,8 @@ module Make_KV_RO (BLOCK : Mirage_block.S) = struct let rec loop ~global map = HR.read ~global in_channel >>= function | Error `Eof -> Lwt.return map + | Error `Fatal e -> + Format.kasprintf failwith "Error reading archive: %a" Tar.pp_error e | Ok (tar, global) -> let filename = trim_slash tar.Tar.Header.file_name in let map = @@ -289,7 +291,7 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc include Make_KV_RO(BLOCK) - type write_error = [ `Block of BLOCK.error | `Block_write of BLOCK.write_error | Mirage_kv.write_error | `Entry_already_exists | `Path_segment_is_a_value | `Append_only ] + type write_error = [ `Block of BLOCK.error | `Block_write of BLOCK.write_error | Mirage_kv.write_error | `Entry_already_exists | `Path_segment_is_a_value | `Append_only | `Write_header of string ] let pp_write_error ppf = function | `Block e -> Fmt.pf ppf "read error while writing: %a" BLOCK.pp_error e @@ -368,7 +370,7 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc mutable offset: int64; info: Mirage_block.info; } - type 'a t = 'a Lwt.t + type 'a io = 'a Lwt.t exception Read of BLOCK.error exception Write of BLOCK.write_error let really_write out_channel data = @@ -395,7 +397,9 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc header(s) taking up exactly 512 bytes. With [GNU] level extra blocks may be used for long names. *) Lwt.catch - (fun () -> HW.write ~level:Tar.Header.Ustar hdr hw >|= fun () -> Ok ()) + (fun () -> HW.write ~level:Tar.Header.Ustar hdr hw >|= function + | Ok () -> Ok () + | Error `Msg msg -> Error (`Write_header msg)) (function | Writer.Read e -> Lwt.return (Error (`Block e)) | Writer.Write e -> Lwt.return (Error (`Block_write e)) diff --git a/tar-eio.opam b/tar-eio.opam index 697f742..572845c 100644 --- a/tar-eio.opam +++ b/tar-eio.opam @@ -6,7 +6,14 @@ tar is a library to read and write tar files with an emphasis on streaming. This library uses Eio to provide a portable tar library. """ maintainer: ["Reynir Björnsson " "dave@recoil.org"] -authors: ["Dave Scott" "Thomas Gazagnaire" "David Allsopp" "Antonin Décimo"] +authors: [ + "Dave Scott" + "Thomas Gazagnaire" + "David Allsopp" + "Antonin Décimo" + "Reynir Björnsson" + "Hannes Mehnert" +] license: "ISC" tags: ["org:xapi-project" "org:mirage"] homepage: "https://github.com/mirage/ocaml-tar" diff --git a/tar-mirage.opam b/tar-mirage.opam index 6e884b0..b3f6d8f 100644 --- a/tar-mirage.opam +++ b/tar-mirage.opam @@ -7,7 +7,14 @@ streaming. This library is functorised over external OS dependencies to facilitate embedding within MirageOS. """ maintainer: ["Reynir Björnsson " "dave@recoil.org"] -authors: ["Dave Scott" "Thomas Gazagnaire" "David Allsopp" "Antonin Décimo"] +authors: [ + "Dave Scott" + "Thomas Gazagnaire" + "David Allsopp" + "Antonin Décimo" + "Reynir Björnsson" + "Hannes Mehnert" +] license: "ISC" tags: ["org:xapi-project" "org:mirage"] homepage: "https://github.com/mirage/ocaml-tar" diff --git a/tar-unix.opam b/tar-unix.opam index fbe3750..79710b0 100644 --- a/tar-unix.opam +++ b/tar-unix.opam @@ -6,7 +6,14 @@ tar is a library to read and write tar files with an emphasis on streaming. This library provides a Unix or Windows compatible interface. """ maintainer: ["Reynir Björnsson " "dave@recoil.org"] -authors: ["Dave Scott" "Thomas Gazagnaire" "David Allsopp" "Antonin Décimo"] +authors: [ + "Dave Scott" + "Thomas Gazagnaire" + "David Allsopp" + "Antonin Décimo" + "Reynir Björnsson" + "Hannes Mehnert" +] license: "ISC" tags: ["org:xapi-project" "org:mirage"] homepage: "https://github.com/mirage/ocaml-tar" diff --git a/tar.opam b/tar.opam index 1a09f13..d5ac9b0 100644 --- a/tar.opam +++ b/tar.opam @@ -8,7 +8,14 @@ streaming. This is pure OCaml code, no C bindings. """ maintainer: ["Reynir Björnsson " "dave@recoil.org"] -authors: ["Dave Scott" "Thomas Gazagnaire" "David Allsopp" "Antonin Décimo"] +authors: [ + "Dave Scott" + "Thomas Gazagnaire" + "David Allsopp" + "Antonin Décimo" + "Reynir Björnsson" + "Hannes Mehnert" +] license: "ISC" tags: ["org:xapi-project" "org:mirage"] homepage: "https://github.com/mirage/ocaml-tar" @@ -16,7 +23,6 @@ doc: "https://mirage.github.io/ocaml-tar/" bug-reports: "https://github.com/mirage/ocaml-tar/issues" depends: [ "dune" {>= "2.9"} - "camlp-streams" "ocaml" {>= "4.08.0"} "cstruct" {>= "6.0.0"} "decompress" {>= "1.5.1"} diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index 38623e2..d455c2b 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -17,11 +17,13 @@ open Lwt.Infix -module Reader = struct +module Io = struct type in_channel = Lwt_unix.file_descr - type 'a t = 'a Lwt.t + type 'a io = 'a Lwt.t let really_read fd = Lwt_cstruct.(complete (read fd)) let skip (ifd: Lwt_unix.file_descr) (n: int) = + (* Here it would make sense to use [Lwt_unix.lseek] if we can detect if + [ifd] is seekable *) let buffer_size = 32768 in let buffer = Cstruct.create buffer_size in let rec loop (n: int) = @@ -32,40 +34,18 @@ module Reader = struct really_read ifd block >>= fun () -> loop (n - amount) in loop n -end -let really_read = Reader.really_read -module Writer = struct + type out_channel = Lwt_unix.file_descr - type 'a t = 'a Lwt.t let really_write fd = Lwt_cstruct.(complete (write fd)) end -let really_write = Writer.really_write - -let copy_n ifd ofd n = - let block_size = 32768 in - let buffer = Cstruct.create block_size in - let rec loop remaining = - if remaining = 0L then Lwt.return_unit else begin - let this = Int64.(to_int (min (of_int block_size) remaining)) in - let block = Cstruct.sub buffer 0 this in - really_read ifd block >>= fun () -> - really_write ofd block >>= fun () -> - loop (Int64.(sub remaining (of_int this))) - end in - loop n - -module HR = Tar.HeaderReader(Lwt)(Reader) -module HW = Tar.HeaderWriter(Lwt)(Writer) -let get_next_header ?level ~global ic = - HR.read ?level ~global ic - >>= function - | Error `Eof -> Lwt.return None - | Ok hdrs -> Lwt.return (Some hdrs) +include Io +module HeaderReader = Tar.HeaderReader(Lwt)(Io) +module HeaderWriter = Tar.HeaderWriter(Lwt)(Io) (** Return the header needed for a particular file on disk *) let header_of_file ?level (file: string) : Tar.Header.t Lwt.t = - let level = match level with None -> Tar.Header.V7 | Some level -> level in + let level = match level with None -> !Tar.Header.compatibility_level | Some level -> level in Lwt_unix.LargeFile.stat file >>= fun stat -> Lwt_unix.getpwuid stat.Lwt_unix.LargeFile.st_uid >>= fun pwent -> Lwt_unix.getgrgid stat.Lwt_unix.LargeFile.st_gid >>= fun grent -> @@ -82,89 +62,3 @@ let header_of_file ?level (file: string) : Tar.Header.t Lwt.t = let devminor = if level = Ustar then stat.Lwt_unix.LargeFile.st_rdev else 0 in Lwt.return (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name ~uname ~gname ~devmajor ~devminor file file_size) - -let write_block ?level ?global (header: Tar.Header.t) (body: Lwt_unix.file_descr -> unit Lwt.t) (fd : Lwt_unix.file_descr) = - HW.write ?level ?global header fd - >>= fun () -> - body fd >>= fun () -> - really_write fd (Tar.Header.zero_padding header) - -let write_end (fd: Lwt_unix.file_descr) = - really_write fd Tar.Header.zero_block >>= fun () -> - really_write fd Tar.Header.zero_block - -(** Utility functions for operating over whole tar archives *) -module Archive = struct - - let with_file name flags perms f = - Lwt_unix.openfile name flags perms >>= fun fd -> - Lwt.finalize (fun () -> f fd) (fun () -> Lwt_unix.close fd) - - (** Read the next header, apply the function 'f' to the fd and the header. The function - should leave the fd positioned immediately after the datablock. Finally the function - skips past the zero padding to the next header *) - let with_next_file (fd: Lwt_unix.file_descr) ~(global: Tar.Header.Extended.t option) - (f: Lwt_unix.file_descr -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a Lwt.t) = - get_next_header ~global fd >>= function - | Some (hdr, global) -> - f fd global hdr >>= fun result -> - Reader.skip fd (Tar.Header.compute_zero_padding_length hdr) >>= fun () -> - Lwt.return (Some result) - | None -> - Lwt.return None - - (** List the contents of a tar *) - let list ?level fd = - let rec loop global acc = get_next_header ?level ~global fd >>= function - | None -> Lwt.return (List.rev acc) - | Some (hdr, global) -> - Reader.skip fd (Int64.to_int hdr.Tar.Header.file_size) >>= fun () -> - Reader.skip fd (Tar.Header.compute_zero_padding_length hdr) >>= fun () -> - loop global (hdr :: acc) in - loop None [] - - (** Extract the contents of a tar to directory 'dest' *) - let extract dest ifd = - let rec loop global () = get_next_header ~global ifd >>= function - | None -> Lwt.return_unit - | Some (hdr, global) -> - let filename = dest hdr.Tar.Header.file_name in - with_file filename [Unix.O_WRONLY; O_CLOEXEC] 0 @@ fun ofd -> - copy_n ifd ofd hdr.Tar.Header.file_size >>= fun () -> - Reader.skip ifd (Tar.Header.compute_zero_padding_length hdr) >>= fun () -> - loop global () in - loop None () - - let transform ?level f ifd ofd = - let rec loop global () = get_next_header ~global ifd >>= function - | None -> Lwt.return_unit - | Some (header', global') -> - let header = f header' in - let body = fun _ -> copy_n ifd ofd header.Tar.Header.file_size in - write_block ?level ?global:(if global <> global' then global' else None) header body ofd >>= fun () -> - Reader.skip ifd (Tar.Header.compute_zero_padding_length header') >>= fun () -> - loop global' () in - loop None () >>= fun () -> - write_end ofd - - (** Create a tar on file descriptor fd from the filename list - 'files' *) - let create files ofd = - let file filename = - Lwt_unix.stat filename >>= fun stat -> - if stat.Unix.st_kind <> Unix.S_REG then - (* Skipping, not a regular file. *) - Lwt.return_unit - else begin - header_of_file filename >>= fun hdr -> - - write_block hdr (fun ofd -> - with_file filename [O_RDONLY; O_CLOEXEC] 0 @@ fun ifd -> - copy_n ifd ofd hdr.Tar.Header.file_size - ) ofd - end in - Lwt_list.iter_s file files >>= fun () -> - (* Add two empty blocks *) - write_end ofd - -end diff --git a/unix/tar_lwt_unix.mli b/unix/tar_lwt_unix.mli index 6c8417d..f153391 100644 --- a/unix/tar_lwt_unix.mli +++ b/unix/tar_lwt_unix.mli @@ -24,41 +24,12 @@ val really_write: Lwt_unix.file_descr -> Cstruct.t -> unit Lwt.t (** [really_write fd buf] writes the full contents of [buf] to [fd] or fails with {!Stdlib.End_of_file}. *) -(** Returns the next header block or None if two consecutive - zero-filled blocks are discovered. Assumes stream is positioned at the - possible start of a header block. - @raise Stdlib.End_of_file if the stream unexpectedly fails. *) -val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> Lwt_unix.file_descr -> - (Tar.Header.t * Tar.Header.Extended.t option) option Lwt.t +val skip : Lwt_unix.file_descr -> int -> unit Lwt.t +(** [skip fd n] reads [n] bytes from [fd] and discards them. If possible, you + should use [Lwt_unix.lseek fd n Lwt_unix.SEEK_CUR] instead. *) (** Return the header needed for a particular file on disk. *) val header_of_file : ?level:Tar.Header.compatibility -> string -> Tar.Header.t Lwt.t -module Archive : sig - (** Utility functions for operating over whole tar archives *) - - (** Read the next header, apply the function 'f' to the fd and the header. The function - should leave the fd positioned immediately after the datablock. Finally the function - skips past the zero padding to the next header. *) - val with_next_file : Lwt_unix.file_descr -> global:Tar.Header.Extended.t option -> - (Lwt_unix.file_descr -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a Lwt.t) -> 'a option Lwt.t - - (** List the contents of a tar to stdout. *) - val list : ?level:Tar.Header.compatibility -> Lwt_unix.file_descr -> Tar.Header.t list Lwt.t - - (** [extract dest] extract the contents of a tar. - Apply [dest] on each source filename to change the destination - filename. It only supports extracting regular files from the - top-level of the archive. *) - val extract : (string -> string) -> Lwt_unix.file_descr -> unit Lwt.t - - (** [transform f in_fd out_fd] applies [f] to the header of each - file in the tar inputted in [in_fd], and writes the resulting - headers to [out_fd] preserving the content and structure of the - archive. *) - val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> Lwt_unix.file_descr -> Lwt_unix.file_descr -> unit Lwt.t - - (** Create a tar on file descriptor fd from a list of filenames. It - only supports regular files. *) - val create : string list -> Lwt_unix.file_descr -> unit Lwt.t -end +module HeaderReader : Tar.HEADERREADER with type in_channel = Lwt_unix.file_descr and type 'a io = 'a Lwt.t +module HeaderWriter : Tar.HEADERWRITER with type out_channel = Lwt_unix.file_descr and type 'a io = 'a Lwt.t diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 1938323..bc364b1 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -15,49 +15,64 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) -[@@@warning "-3"] (* FIXME Tar.HeaderWriter needs to be used here *) +module Direct = struct + type 'a t = 'a + let return x = x + let ( >>= ) m f = f m +end module Driver = struct + type 'a io = 'a Direct.t type in_channel = Unix.file_descr type out_channel = Unix.file_descr - let rec with_restart op fd buf off len = try op fd buf off len with Unix.Unix_error (Unix.EINTR,_,_) -> with_restart op fd buf off len - let rec really_input fd buf off = function - | 0 -> () - | len -> - let m = Unix.read fd buf off len in - if m = 0 then raise End_of_file; - really_input fd buf (off+m) (len-m) - - - let rec really_output fd buf off = function - | 0 -> () - | len -> - let m = Unix.write fd buf off len in - really_output fd buf (off+m) (len-m) - - let output = with_restart really_output - let input = with_restart Unix.read - let really_input = with_restart really_input - let close_out = Unix.close + let really_read fd buf = + (* Change code once this is merged: + https://github.com/mirage/ocaml-cstruct/pull/302 *) + let b = Bytes.create (min 4096 (Cstruct.length buf)) in + let rec loop offset = + let len = min (Bytes.length b) (Cstruct.length buf - offset) in + if len > 0 then + let n = with_restart Unix.read fd b 0 len in + if n = 0 then raise End_of_file; + Cstruct.blit_from_bytes b 0 buf offset n; + loop (offset + n) + in + loop 0 + + let skip fd n = + (* Here it would make sense to use [Lwt_unix.lseek] if we can detect if + [ifd] is seekable *) + let b = Bytes.create (min 4096 n) in + let rem = ref n in + while !rem > 0 do + let len = min (Bytes.length b) !rem in + rem := !rem - with_restart Unix.read fd b 0 len + done + + let really_write fd buf = + (* FIXME: This is not very good :( + also: https://github.com/mirage/ocaml-cstruct/pull/302 *) + let b = Cstruct.to_bytes buf in + let offset = ref 0 in + while !offset < Bytes.length b do + offset := !offset + with_restart Unix.write fd b 0 (Bytes.length b) + done end -module T = Tar.Make(Driver) - -let really_write = T.really_write - -let really_read = T.really_read +module HeaderReader = Tar.HeaderReader(Direct)(Driver) +module HeaderWriter = Tar.HeaderWriter(Direct)(Driver) -let get_next_header = T.get_next_header +include Driver (** Return the header needed for a particular file on disk *) let header_of_file ?level (file: string) : Tar.Header.t = - let level = match level with None -> Tar.Header.V7 | Some level -> level in + let level = match level with None -> !Tar.Header.compatibility_level | Some level -> level in let stat = Unix.LargeFile.lstat file in let file_mode = stat.Unix.LargeFile.st_perm in let user_id = stat.Unix.LargeFile.st_uid in @@ -71,71 +86,3 @@ let header_of_file ?level (file: string) : Tar.Header.t = let devminor = if level = Ustar then stat.Unix.LargeFile.st_rdev else 0 in Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name ~uname ~gname ~devmajor ~devminor file stat.Unix.LargeFile.st_size - -let write_block = T.write_block -let write_end = T.write_end - -(** Utility functions for operating over whole tar archives *) -module Archive = struct - include T.Archive - - let with_file name flags perms f = - let fd = Unix.openfile name flags perms in - Fun.protect ~finally:(fun () -> Unix.close fd) (fun () -> f fd) - - (** Extract the contents of a tar to directory 'dest' *) - let extract dest ifd = - let dest hdr = - let filename = dest hdr.Tar.Header.file_name in - Unix.openfile filename [O_WRONLY; O_CLOEXEC] 0 - in - extract_gen dest ifd - - let transform ?level f ifd ofd = - let rec loop global () = - match get_next_header ~global ifd with - | exception Tar.Header.End_of_stream -> () - | (header', global') -> - let header = f header' in - let body = fun _ -> copy_n ifd ofd header.Tar.Header.file_size in - write_block ?level ?global:(if global <> global' then global' else None) header body ofd; - skip ifd (Tar.Header.compute_zero_padding_length header'); - loop global' () in - loop None (); - write_end ofd - - (** Create a tar on file descriptor fd from the filename list - 'files' *) - let create files ofd = - let files = - let f filename = - let stat = Unix.stat filename in - if stat.Unix.st_kind <> Unix.S_REG - then - (* Skipping, not a regular file. *) - None - else - let hdr = header_of_file filename in - Some (hdr, (fun ofd -> - with_file filename [O_RDONLY; O_CLOEXEC] 0 @@ fun ifd -> - copy_n ifd ofd hdr.Tar.Header.file_size)) - in - List.filter_map f files - in - create_gen (Stream.of_list files) ofd - - (** Multicast 'n' bytes from input fd 'ifd' to output fds 'ofds'. NB if one deadlocks - they all stop.*) - let multicast_n ?(buffer_size=1024*1024) (ifd: Unix.file_descr) (ofds: Unix.file_descr list) (n: int64) = - let buffer = Bytes.make buffer_size '\000' in - let rec loop (n: int64) = - if n <= 0L then () - else - let amount = Int64.to_int (min n (Int64.of_int(Bytes.length buffer))) in - let read = Unix.read ifd buffer 0 amount in - if read = 0 then raise End_of_file; - List.iter (fun ofd -> ignore(Unix.write ofd buffer 0 read)) ofds; - loop (Int64.sub n (Int64.of_int read)) in - loop n - -end diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 44651e8..32ddd81 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -24,63 +24,12 @@ val really_write: Unix.file_descr -> Cstruct.t -> unit (** [really_write fd buf] writes the full contents of [buf] to [fd] or {!Stdlib.End_of_file}. *) -(** Returns the next header block or throws End_of_stream if two consecutive - zero-filled blocks are discovered. Assumes stream is positioned at the - possible start of a header block. - @raise Stdlib.End_of_file if the stream unexpectedly fails. *) -val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> - Unix.file_descr -> Tar.Header.t * Tar.Header.Extended.t option +val skip : Unix.file_descr -> int -> unit +(** [skip fd n] reads [n] bytes from [fd] and discards them. If possible, you + should use [Unix.lseek fd n Unix.SEEK_CUR] instead. *) (** Return the header needed for a particular file on disk. *) val header_of_file : ?level:Tar.Header.compatibility -> string -> Tar.Header.t -val write_block: ?level:Tar.Header.compatibility -> ?global:Tar.Header.Extended.t -> - Tar.Header.t -> (Unix.file_descr -> unit) -> Unix.file_descr -> unit - [@@ocaml.deprecated "Deprecated in favor of Tar.HeaderWriter"] - (** Write [hdr], then call [write_body fd] to write the body, - then zero-pads so the stream is positioned for the next block. *) - -val write_end: Unix.file_descr -> unit - [@@ocaml.deprecated "Deprecated in favor of Tar.HeaderWriter"] - (** Write a stream terminator to [fd]. *) - -module Archive : sig - (** Utility functions for operating over whole tar archives. *) - - (** Read the next header, apply the function 'f' to the fd and the header. The function - should leave the fd positioned immediately after the datablock. Finally the function - skips past the zero padding to the next header. *) - val with_next_file : Unix.file_descr -> global:Tar.Header.Extended.t option -> (Unix.file_descr -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a - - (** List the contents of a tar. *) - val list : ?level:Tar.Header.compatibility -> Unix.file_descr -> Tar.Header.t list - - (** [extract dest] extract the contents of a tar. - Apply [dest] on each source filename to change the destination - filename. It only supports extracting regular files from the - top-level of the archive. *) - val extract : (string -> string) -> Unix.file_descr -> unit - - (** [transform f in_fd out_fd] applies [f] to the header of each - file in the tar inputted in [in_fd], and writes the resulting - headers to [out_fd] preserving the content and structure of the - archive. *) - val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> Unix.file_descr -> Unix.file_descr -> unit - - (** Create a tar on file descriptor fd from the filename list - 'files'. It only supports regular files. *) - val create : string list -> Unix.file_descr -> unit - - (** [copy_n ifd odf n] copies exactly [n] bytes from [ifd] to [ofd]. *) - val copy_n : Unix.file_descr -> Unix.file_descr -> int64 -> unit - [@@ocaml.deprecated "Deprecated: use your own helper function"] - - (** [multicast_n ?buffer_size ifd ofds n] copies exactly [n] bytes from [ifd] to all [ofds]. *) - val multicast_n : ?buffer_size:int -> Unix.file_descr -> Unix.file_descr list -> int64 -> unit - [@@ocaml.deprecated "Deprecated: use your own helper function"] - - (** [skip fd n] reads and throws away [n] bytes from [fd]. *) - val skip : Unix.file_descr -> int -> unit - [@@ocaml.deprecated "Deprecated: use your own helper function"] - -end +module HeaderReader : Tar.HEADERREADER with type in_channel = Unix.file_descr and type 'a io = 'a +module HeaderWriter : Tar.HEADERWRITER with type out_channel = Unix.file_descr and type 'a io = 'a