Skip to content

Commit

Permalink
initial compiling tar_unix
Browse files Browse the repository at this point in the history
  • Loading branch information
hannesm committed Feb 3, 2024
1 parent dddcf40 commit d85cb7e
Show file tree
Hide file tree
Showing 4 changed files with 253 additions and 62 deletions.
4 changes: 2 additions & 2 deletions lib/tar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -813,5 +813,5 @@ let encode_header ?level header =
let* rest = encode_unextended_header ?level header in
Ok (extended @ rest)

let encode_global_extended_header global =
encode_extended_header `Global global
let encode_global_extended_header ?level global =
encode_extended_header ?level `Global global
2 changes: 1 addition & 1 deletion lib/tar.mli
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,4 @@ val encode_header : ?level:Header.compatibility ->

(** [encode_global_extended_header hdr] encodes the global extended header as
a list of strings. *)
val encode_global_extended_header : Header.Extended.t -> (string list, [> `Msg of string ]) result
val encode_global_extended_header : ?level:Header.compatibility -> Header.Extended.t -> (string list, [> `Msg of string ]) result
259 changes: 221 additions & 38 deletions unix/tar_unix.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,60 +15,243 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)

module Direct = struct
type 'a t = 'a
let return x = x
let ( >>= ) m f = f m
end

module Driver = struct
type 'a io = 'a Direct.t
type in_channel = Unix.file_descr
type out_channel = Unix.file_descr

let rec with_restart op fd buf off len =
try op fd buf off len with
Unix.Unix_error (Unix.EINTR,_,_) ->
with_restart op fd buf off len

let really_read fd buf =
let len = Bytes.length buf in
let rec loop offset =
if offset < len then
let n = with_restart Unix.read fd buf offset (len - offset) in
if n = 0 then raise End_of_file;
let ( let* ) = Result.bind

let rec safe ~off f a =
try Ok (f a) with
| Unix.Unix_error (Unix.EINTR, _, _) -> safe ~off f a
| Unix.Unix_error (e, f, s) -> Error (`Unix (off, e, f, s))

let safe_close fd =
try Unix.close fd with _ -> ()

let read_complete ~off fd buf len =
let rec loop offset =
if offset < len then
let* n = safe ~off (Unix.read fd buf offset) (len - offset) in
if n = 0 then
Error (`Unexpected_end_of_file off)
else
loop (offset + n)
else
Ok ()
in
loop 0

let seek ~off fd n =
safe ~off (Unix.lseek fd n) Unix.SEEK_CUR

type decode_error = [
| `Fatal of int * [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ]
| `Unix of int * Unix.error * string * string
| `Unexpected_end_of_file of int
| `Msg of int * string
]

let pp_decode_error ppf = function
| `Fatal (off, err) ->
Format.fprintf ppf "Offset %u, %a" off Tar.pp_error err
| `Unix (off, err, fname, arg) ->
Format.fprintf ppf "Offset %u, Unix error %s (function %s, arg %s)" off
(Unix.error_message err) fname arg
| `Unexpected_end_of_file off ->
Format.fprintf ppf "Offset %u unexpected end of file" off
| `Msg (off, msg) ->
Format.fprintf ppf "Offset %u error %s" off msg

let fold f filename init =
let* fd = safe ~off:0 Unix.(openfile filename [ O_RDONLY ]) 0 in
let rec go ~off t fd ?global ?data acc =
let* data = match data with
| None ->
let buf = Bytes.make Tar.Header.length '\000' in
let* () = read_complete ~off fd buf Tar.Header.length in
Ok (Bytes.unsafe_to_string buf)
| Some data -> Ok data
in
loop 0
match Tar.decode t data with
| Ok (t, Some `Header hdr, g) ->
let global = Option.fold ~none:global ~some:(fun g -> Some g) g in
let* acc' =
Result.map_error
(fun (`Msg s) -> `Msg (off, s))
(f fd ?global hdr acc)
in
let* off = seek ~off fd (Tar.Header.compute_zero_padding_length hdr) in
go ~off t fd ?global acc'
| Ok (t, Some `Skip n, g) ->
let global = Option.fold ~none:global ~some:(fun g -> Some g) g in
let* off = seek ~off fd n in
go ~off t fd ?global acc
| Ok (t, Some `Read n, g) ->
let global = Option.fold ~none:global ~some:(fun g -> Some g) g in
let buf = Bytes.make n '\000' in
let* () = read_complete ~off fd buf n in
let data = Bytes.unsafe_to_string buf in
go ~off:(off + n) t fd ?global ~data acc
| Ok (t, None, g) ->
let global = Option.fold ~none:global ~some:(fun g -> Some g) g in
go ~off t fd ?global acc
| Error `Eof -> Ok acc
| Error `Fatal e -> Error (`Fatal (off, e))
in
Fun.protect
~finally:(fun () -> safe_close fd)
(fun () -> go ~off:0 (Tar.decode_state ()) fd init)

let skip fd n =
ignore (Unix.lseek fd n Unix.SEEK_CUR)
let map_to_msg = function
| `Unix (_off, e, f, s) ->
`Msg (Format.sprintf "error %s in function %s %s"
(Unix.error_message e) f s)

let really_write fd buf =
let offset = ref 0 in
while !offset < String.length buf do
offset := !offset + with_restart Unix.write_substring fd buf !offset (String.length buf - !offset)
done
end
let copy ~src_fd ~dst_fd len =
let blen = 65536 in
let buffer = Bytes.make blen '\000' in
let rec read_write ~src_fd ~dst_fd len =
if len = 0 then
Ok ()
else
let l = min blen len in
let* () =
Result.map_error
(function
| `Unix _ as e -> map_to_msg e
| `Unexpected_end_of_file _off ->
`Msg ("Unexpected end of file"))
(read_complete ~off:0 src_fd buffer l)
in
let* _written =
Result.map_error map_to_msg
(safe ~off:0 (Unix.write dst_fd buffer 0) l)
in
read_write ~src_fd ~dst_fd (len - l)
in
read_write ~src_fd ~dst_fd len

module HeaderReader = Tar.HeaderReader(Direct)(Driver)
module HeaderWriter = Tar.HeaderWriter(Direct)(Driver)
let extract ?(filter = fun _ -> true) ~src dst =
let f fd ?global:_ hdr () =
if filter hdr then
match hdr.Tar.Header.link_indicator with
| Tar.Header.Link.Normal ->
let* dst =
Result.map_error map_to_msg
(safe ~off:0 Unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name)
[ O_WRONLY ; O_CREAT ]) hdr.Tar.Header.file_mode)
in
Fun.protect ~finally:(fun () -> safe_close dst)
(fun () -> copy ~src_fd:fd ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size))
(* TODO set owner / mode / mtime etc. *)
| _ -> Error (`Msg "not yet handled")
else
let* _off =
Result.map_error (fun (`Unix (_off, e, f, s)) ->
`Msg (Format.sprintf "error %s in function %s %s"
(Unix.error_message e) f s))
(seek ~off:0 fd (Int64.to_int hdr.Tar.Header.file_size))
in
Ok ()
in
fold f src ()
include Driver
(** Return the header needed for a particular file on disk *)
let header_of_file ?level (file: string) : Tar.Header.t =
let header_of_file ?level file =
let level = Tar.Header.compatibility level in
let stat = Unix.LargeFile.lstat file in
let* stat = safe ~off:0 Unix.LargeFile.lstat file in
let file_mode = stat.Unix.LargeFile.st_perm in
let user_id = stat.Unix.LargeFile.st_uid in
let group_id = stat.Unix.LargeFile.st_gid in
let mod_time = Int64.of_float stat.Unix.LargeFile.st_mtime in
(* TODO evaluate stat.st_kind *)
let link_indicator = Tar.Header.Link.Normal in
let link_name = "" in
let uname = if level = V7 then "" else (Unix.getpwuid stat.Unix.LargeFile.st_uid).Unix.pw_name in
let devmajor = if level = Ustar then stat.Unix.LargeFile.st_dev else 0 in
let gname = if level = V7 then "" else (Unix.getgrgid stat.Unix.LargeFile.st_gid).Unix.gr_name in
let devminor = if level = Ustar then stat.Unix.LargeFile.st_rdev else 0 in
Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name
~uname ~gname ~devmajor ~devminor file stat.Unix.LargeFile.st_size
Ok (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name
~uname ~gname ~devmajor ~devminor file stat.Unix.LargeFile.st_size)
let append_file ?level ?header filename fd =
let* header = match header with
| None -> header_of_file ?level filename
| Some x -> Ok x
in
let* header_strings = Tar.encode_header ?level header in
let* _off =
List.fold_left (fun acc d ->
let* _off = acc in
Result.map_error map_to_msg
(safe ~off:0 (Unix.write_substring fd d 0) (String.length d)))
(Ok 0) header_strings
in
let* src =
Result.map_error (fun (`Unix (_off, e, f, s)) ->
`Msg (Format.sprintf "error %s in function %s %s"
(Unix.error_message e) f s))
(safe ~off:0 Unix.(openfile filename [ O_RDONLY ]) 0)
in
(* TOCTOU [also, header may not be valid for file] *)
Fun.protect ~finally:(fun () -> safe_close src)
(fun () -> copy ~src_fd:src ~dst_fd:fd
(Int64.to_int header.Tar.Header.file_size))
let write_global_extended_header ?level header fd =
let* header_strings = Tar.encode_global_extended_header ?level header in
let* _off =
List.fold_left (fun acc d ->
let* _off = acc in
Result.map_error map_to_msg
(safe ~off:0 (Unix.write_substring fd d 0) (String.length d)))
(Ok 0) header_strings
in
Ok ()
let write_end fd =
let* _written =
Result.map_error map_to_msg
(safe ~off:0
(Unix.write_substring fd (Tar.Header.zero_block ^ Tar.Header.zero_block) 0)
(Tar.Header.length + Tar.Header.length))
in
Ok ()
let create ?level ?global ?(filter = fun _ -> true) ~src dst =
let* dst_fd =
Result.map_error map_to_msg
(safe ~off:0 Unix.(openfile dst [ O_WRONLY ; O_CREAT ])
0o644)
in
Fun.protect ~finally:(fun () -> safe_close dst_fd)
(fun () ->
let* () = match global with
| None -> Ok ()
| Some hdr ->
write_global_extended_header ?level hdr dst_fd
in
let rec copy_files directory =
let* dir = safe ~off:0 Unix.opendir directory in
Fun.protect ~finally:(fun () -> try Unix.closedir dir with _ -> ())
(fun () ->
let rec next () =
try
let* name = safe ~off:0 Unix.readdir dir in
let filename = Filename.concat directory name in
let* header = header_of_file ?level filename in
if filter header then
match header.Tar.Header.link_indicator with
| Normal ->
let* () = append_file ?level ~header filename dst_fd in
next ()
| Directory ->
(* TODO first finish curdir (and close the dir fd), then go deeper *)
let* () = copy_files filename in
next ()
| _ -> Ok () (* NYI *)
else Ok ()
with End_of_file -> Ok ()
in
next ())
in
let* () = copy_files src in
write_end dst_fd)
50 changes: 29 additions & 21 deletions unix/tar_unix.mli
Original file line number Diff line number Diff line change
Expand Up @@ -16,52 +16,60 @@

(** Unix I/O for tar-formatted data. *)

(* TODO provide a type error and a pretty-printer *)

type decode_error = [
| `Fatal of int * [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ]
| `Unix of int * Unix.error * string * string
| `Unexpected_end_of_file of int
| `Msg of int * string
]

val pp_decode_error : Format.formatter -> decode_error -> unit

(** [fold f filename acc] folds over the tar archive. The function [f] is called
for each [hdr : Tar.Header.t]. It should forward the position in the file
descriptor by [hdr.Tar.Header.file_size]. *)
val fold :
((Unix.file_descr * Tar.Header.t * Tar.Header.Extended.t option,
[
| `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ]
| `Unix of Unix.error * string * string
| `End_of_file
]) result -> 'a -> 'a) ->
string -> 'a -> 'a
(Unix.file_descr -> ?global:Tar.Header.Extended.t -> Tar.Header.t ->
'a -> ('a, [ `Msg of string ]) result) ->
string -> 'a -> ('a, decode_error) result

(** [extract ~filter ~src ~dst] extracts the tar archive [src] into the
(** [extract ~filter ~src dst] extracts the tar archive [src] into the
directory [dst]. If [dst] does not exist, it is created. If [filter] is
provided (defaults to [fun _ -> true]), any file where [filter hdr] returns
[false], is skipped. *)
val extract :
?filter:(Tar.Header.t -> bool) ->
src:string -> dst:string ->
(unit, [ `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ]
| `Unix of Unix.error ]) result
src:string -> string ->
(unit, decode_error) result

(** [create ~level ~filter ~src ~dst] creates a tar archive at [dst]. It uses
[src], a filename or directory name, as input. If [filter] is provided
(** [create ~level ~filter ~src dst] creates a tar archive at [dst]. It uses
[src], a directory name, as input. If [filter] is provided
(defaults to [fun _ -> true]), any file where [filter hdr] returns [false]
is skipped. *)
val create : ?level:Tar.Header.compatibility ->
?global:Tar.Header.Extended.t ->
?filter:(Tar.Header.t -> bool) ->
src:string -> dst:string ->
(unit, [ `Msg of string | `Unix of Unix.error ]) result
src:string -> string ->
(unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result

(** [header_of_file ~level filename] returns the tar header of [filename]. *)
val header_of_file : ?level:Tar.Header.compatibility -> string -> Tar.Header.t
val header_of_file : ?level:Tar.Header.compatibility -> string ->
(Tar.Header.t, [ `Unix of (int * Unix.error * string * string) ]) result

(** [append_file ~level ~header filename fd] appends the contents of [filename]
to the tar archive [fd]. If [header] is not provided, {header_of_file} is
used for constructing a header. *)
val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t ->
string -> Unix.file_descr ->
(unit, [ `Msg of string | `Unix of Unix.error ]) result
(unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result

(** [write_extended_header ~level hdr fd] writes the extended header [hdr] to
(** [write_global_extended_header ~level hdr fd] writes the extended header [hdr] to
[fd]. *)
val write_extended_header : ?level:Tar.Header.compatibility ->
val write_global_extended_header : ?level:Tar.Header.compatibility ->
Tar.Header.Extended.t -> Unix.file_descr ->
(unit, [ `Msg of string | `Unix of Unix.error ]) result
(unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result

(** [write_end fd] writes the tar end marker to [fd]. *)
val write_end : Unix.file_descr -> (unit, Unix.error) result
val write_end : Unix.file_descr -> (unit, [ `Msg of string ]) result

0 comments on commit d85cb7e

Please sign in to comment.