diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml index 553b126..0731026 100644 --- a/eio/tar_eio.ml +++ b/eio/tar_eio.ml @@ -62,7 +62,7 @@ let stat path = (** Return the header needed for a particular file on disk *) let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t = - let level = match level with None -> !Tar.Header.compatibility_level | Some level -> level in + let level = Option.value ~default:!Tar.Header.compatibility_level level in let stat = stat filepath in let pwent = Option.map (fun f -> f stat.uid) getpwuid in let grent = Option.map (fun f -> f stat.gid) getgrgid in diff --git a/lib/tar.ml b/lib/tar.ml index 290ab1e..7da7367 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -763,128 +763,6 @@ let decode t data = | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> Error (`Fatal e) -module type ASYNC = sig - type 'a t - val ( >>= ): 'a t -> ('a -> 'b t) -> 'b t - val return: 'a -> 'a t -end - -module type READER = sig - type in_channel - type 'a io - val really_read: in_channel -> bytes -> unit io - val skip: in_channel -> int -> unit io -end - -module type WRITER = sig - type out_channel - type 'a io - val really_write: out_channel -> string -> unit io -end - -module type HEADERREADER = sig - type in_channel - type 'a io - val read : global:Header.Extended.t option -> in_channel -> - (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result io -end - -module type HEADERWRITER = sig - type out_channel - type 'a io - val write : ?level:Header.compatibility -> Header.t -> out_channel -> (unit, [> `Msg of string ]) result io - val write_global_extended_header : Header.Extended.t -> out_channel -> (unit, [> `Msg of string ]) result io -end - -module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) = struct - open Async - open Reader - - type in_channel = Reader.in_channel - type 'a io = 'a t - - (* This is not a bind, but more a lift and bind combined. *) - let ( let^* ) x f = - match x with - | Ok x -> f x - | Error _ as e -> return e - - - let read ~global (ifd: Reader.in_channel) : (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result t = - (* We might need to read 2 headers at once if we encounter a Pax header *) - let buffer = Bytes.make Header.length '\000' in - let real_header_buf = Bytes.make Header.length '\000' in - - let next_block global () = - really_read ifd buffer >>= fun () -> - return (Header.unmarshal ?extended:global (Bytes.unsafe_to_string buffer)) - in - - let rec get_hdr ~next_longname ~next_longlink global () : (Header.t * Header.Extended.t option, [> `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result t = - next_block global () >>= function - | Ok x when x.Header.link_indicator = Header.Link.GlobalExtendedHeader -> - let extra_header_buf = Bytes.make (Int64.to_int x.Header.file_size) '\000' in - really_read ifd extra_header_buf >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) >>= fun () -> - (* unmarshal merges the previous global (if any) with the - discovered global (if any) and returns the new global. *) - let^* global = - Result.map_error - (fun e -> `Fatal e) - (Header.Extended.unmarshal ~global (Bytes.unsafe_to_string extra_header_buf)) - in - get_hdr ~next_longname ~next_longlink (Some global) () - | Ok x when x.Header.link_indicator = Header.Link.PerFileExtendedHeader -> - let extra_header_buf = Bytes.make (Int64.to_int x.Header.file_size) '\000' in - really_read ifd extra_header_buf >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) >>= fun () -> - let^* extended = - Result.map_error - (fun e -> `Fatal e) - (Header.Extended.unmarshal ~global (Bytes.unsafe_to_string extra_header_buf)) - in - really_read ifd real_header_buf >>= fun () -> - let^* x = - Result.map_error - (fun _ -> `Fatal `Corrupt_pax_header) - (Header.unmarshal ~extended (Bytes.unsafe_to_string real_header_buf)) - in - let x = fix_link_indicator x in - return (Ok (x, global)) - | Ok ({ Header.link_indicator = Header.Link.LongLink | Header.Link.LongName; _ } as x) when x.Header.file_name = longlink -> - let extra_header_buf = Bytes.create (Int64.to_int x.Header.file_size) in - really_read ifd extra_header_buf >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) >>= fun () -> - let name = String.sub (Bytes.unsafe_to_string extra_header_buf) 0 (Bytes.length extra_header_buf - 1) in - let next_longlink = if x.Header.link_indicator = Header.Link.LongLink then Some name else next_longlink in - let next_longname = if x.Header.link_indicator = Header.Link.LongName then Some name else next_longname in - get_hdr ~next_longname ~next_longlink global () - | Ok x -> - (* XXX: unclear how/if pax headers should interact with gnu extensions *) - let x = match next_longname with - | None -> x - | Some file_name -> { x with file_name } - in - let x = match next_longlink with - | None -> x - | Some link_name -> { x with link_name } - in - let x = fix_link_indicator x in - return (Ok (x, global)) - | Error `Zero_block -> - begin - next_block global () >>= function - | Ok x -> return (Ok (x, global)) - | Error `Zero_block -> return (Error `Eof) - | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> return (Error (`Fatal e)) - end - | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> - return (Error (`Fatal e)) - in - get_hdr ~next_longname:None ~next_longlink:None global () - -end - let encode_long level link_indicator payload = let blank = {Header.file_name = longlink; file_mode = 0; user_id = 0; group_id = 0; mod_time = 0L; file_size = 0L; link_indicator = Header.Link.LongLink; link_name = ""; uname = "root"; gname = "root"; devmajor = 0; devminor = 0; extended = None} in let payload = payload ^ "\000" in @@ -941,94 +819,3 @@ let encode_header ?level header = let encode_global_extended_header global = encode_extended_header `Global global - -module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a io = 'a Async.t) = struct - open Async - open Writer - - type out_channel = Writer.out_channel - type 'a io = 'a t - - let write_unextended ?level header fd = - let level = Header.get_level level in - let blank = {Header.file_name = longlink; file_mode = 0; user_id = 0; group_id = 0; mod_time = 0L; file_size = 0L; link_indicator = Header.Link.LongLink; link_name = ""; uname = "root"; gname = "root"; devmajor = 0; devminor = 0; extended = None} in - (if level = Header.GNU then begin - begin - if String.length header.Header.link_name > Header.sizeof_hdr_link_name then begin - let file_size = String.length header.Header.link_name + 1 in - let blank = {blank with Header.file_size = Int64.of_int file_size} in - let buffer = Bytes.make Header.length '\000' in - match - Header.marshal ~level buffer { blank with link_indicator = Header.Link.LongLink } - with - | Error _ as e -> return e - | Ok () -> - really_write fd (Bytes.unsafe_to_string buffer) >>= fun () -> - let payload = header.Header.link_name ^ "\000" in - really_write fd payload >>= fun () -> - really_write fd (Header.zero_padding blank) >>= fun () -> - return (Ok ()) - end else - return (Ok ()) - end >>= function - | Error _ as e -> return e - | Ok () -> - begin - if String.length header.Header.file_name > Header.sizeof_hdr_file_name then begin - let file_size = String.length header.Header.file_name + 1 in - let blank = {blank with Header.file_size = Int64.of_int file_size} in - let buffer = Bytes.make Header.length '\000' in - match - Header.marshal ~level buffer { blank with link_indicator = Header.Link.LongName } - with - | Error _ as e -> return e - | Ok () -> - really_write fd (Bytes.unsafe_to_string buffer) >>= fun () -> - let payload = header.Header.file_name ^ "\000" in - really_write fd payload >>= fun () -> - really_write fd (Header.zero_padding blank) >>= fun () -> - return (Ok ()) - end else - return (Ok ()) - end >>= function - | Error _ as e -> return e - | Ok () -> return (Ok ()) - end else - return (Ok ())) >>= function - | Error _ as e -> return e - | Ok () -> - let buffer = Bytes.make Header.length '\000' in - match Header.marshal ~level buffer header with - | Error _ as e -> return e - | Ok () -> - really_write fd (Bytes.unsafe_to_string buffer) >>= fun () -> - return (Ok ()) - - let write_extended ?level ~link_indicator hdr fd = - let link_indicator_name = match link_indicator with - | Header.Link.PerFileExtendedHeader -> "paxheader" - | Header.Link.GlobalExtendedHeader -> "pax_global_header" - | _ -> assert false - in - let pax_payload = Header.Extended.marshal hdr in - let pax = Header.make ~link_indicator link_indicator_name - (Int64.of_int @@ String.length pax_payload) in - write_unextended ?level pax fd >>= function - | Error _ as e -> return e - | Ok () -> - really_write fd pax_payload >>= fun () -> - really_write fd (Header.zero_padding pax) >>= fun () -> - return (Ok ()) - - let write ?level header fd = - ( match header.Header.extended with - | None -> return (Ok ()) - | Some e -> - write_extended ?level ~link_indicator:Header.Link.PerFileExtendedHeader e fd ) - >>= function - | Error _ as e -> return e - | Ok () -> write_unextended ?level header fd - - let write_global_extended_header global fd = - write_extended ~link_indicator:Header.Link.GlobalExtendedHeader global fd -end diff --git a/lib/tar.mli b/lib/tar.mli index b1c114e..72cbe72 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -139,62 +139,34 @@ module Header : sig val to_sectors: t -> int64 end +(** {1 Decoding and encoding of a whole archive} *) + +(** The type of the decode state. *) type decode_state +(** [decode_state ~global ()] constructs a decode_state. *) val decode_state : ?global:Header.Extended.t -> unit -> decode_state +(** [decode t data] decodes [data] taking the current state [t] into account. + It may result on success in a new state, optionally some action that should + be done ([`Read] or [`Skip]), or a decoded [`Header]. Possibly a new global + PAX header is provided as well. + + If no [`Read] or [`Skip] is returned, the new state should be used with + [decode] with the next [Header.length] sized string, which will lead to + further decoding until [`Eof] (or an error) occurs. *) val decode : decode_state -> string -> (decode_state * [ `Read of int | `Skip of int | `Header of Header.t ] option * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result +(** [encode_header ~level hdr] encodes the header with the provided [level] + (defaults to [V7]) into a list of strings to be written to the disk. + Once a header is written, the payload (padded to multiples of + [Header.length]) should follow. *) val encode_header : ?level:Header.compatibility -> Header.t -> (string list, [> `Msg of string ]) result +(** [encode_global_extended_header hdr] encodes the global extended header as + a list of strings. *) val encode_global_extended_header : Header.Extended.t -> (string list, [> `Msg of string ]) result - - -module type ASYNC = sig - type 'a t - val ( >>= ): 'a t -> ('a -> 'b t) -> 'b t - val return: 'a -> 'a t -end - -module type READER = sig - type in_channel - type 'a io - val really_read: in_channel -> bytes -> unit io - val skip: in_channel -> int -> unit io -end - -module type WRITER = sig - type out_channel - type 'a io - val really_write: out_channel -> string -> unit io -end - -module type HEADERREADER = sig - type in_channel - type 'a io - - (** Returns the next header block or error [`Eof] if two consecutive - zero-filled blocks are discovered. Assumes stream is positioned at the - possible start of a header block. - @param global Holds the current global pax extended header, if - any. Needs to be given to the next call to [read]. *) - val read : global:Header.Extended.t option -> in_channel -> - (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result io -end - -module type HEADERWRITER = sig - type out_channel - type 'a io - val write : ?level:Header.compatibility -> Header.t -> out_channel -> (unit, [> `Msg of string ]) result io - val write_global_extended_header : Header.Extended.t -> out_channel -> (unit, [> `Msg of string ]) result io -end - -module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) : - HEADERREADER with type in_channel = Reader.in_channel and type 'a io = 'a Async.t - -module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a io = 'a Async.t) : - HEADERWRITER with type out_channel = Writer.out_channel and type 'a io = 'a Async.t diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index 8db418a..cc85db4 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -52,7 +52,7 @@ module HeaderWriter = Tar.HeaderWriter(Lwt)(Io) (** Return the header needed for a particular file on disk *) let header_of_file ?level (file: string) : Tar.Header.t Lwt.t = - let level = match level with None -> !Tar.Header.compatibility_level | Some level -> level in + let level = Option.value ~default:!Tar.Header.compatibility_level level in Lwt_unix.LargeFile.stat file >>= fun stat -> Lwt_unix.getpwuid stat.Lwt_unix.LargeFile.st_uid >>= fun pwent -> Lwt_unix.getgrgid stat.Lwt_unix.LargeFile.st_gid >>= fun grent -> diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 730b518..6093379 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -58,7 +58,7 @@ include Driver (** Return the header needed for a particular file on disk *) let header_of_file ?level (file: string) : Tar.Header.t = - let level = match level with None -> !Tar.Header.compatibility_level | Some level -> level in + let level = Option.valud ~default:!Tar.Header.compatibility_level level in let stat = Unix.LargeFile.lstat file in let file_mode = stat.Unix.LargeFile.st_perm in let user_id = stat.Unix.LargeFile.st_uid in