From 8a77efee125685fedfff2d4ab8173e1bf70e524b Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Fri, 24 May 2024 18:55:00 +0200 Subject: [PATCH] Remove the carton distribution which is available at https://git.robur.coop/robur/carton.git --- CHANGES.carton.md | 76 -- bench/bench_pack.ml | 96 -- bench/benchmark.ml | 61 - bench/dune | 34 - bench/linear_algebra.ml | 128 -- bench/rdtsc.c | 33 - bin/carton/dune | 69 -- bin/carton/get.ml | 188 --- bin/carton/hxd_cmdliner.ml | 346 ------ bin/carton/index_pack.ml | 400 ------ bin/carton/verify_pack.ml | 312 ----- carton-git.opam | 39 - carton-lwt.opam | 41 - carton.opam | 47 - fuzz/dune | 12 - src/carton-git/carton_git.ml | 206 ---- src/carton-git/carton_git.mli | 74 -- src/carton-git/carton_git_unix.ml | 94 -- src/carton-git/carton_git_unix.mli | 41 - src/carton-git/dune | 21 - src/carton-lwt/carton_lwt.ml | 194 --- src/carton-lwt/carton_lwt.mli | 294 ----- src/carton-lwt/dune | 11 - src/carton-lwt/lwt_io.ml | 45 - src/carton/README.md | 81 -- src/carton/carton.ml | 3 - src/carton/dec.ml | 1839 ---------------------------- src/carton/dec.mli | 478 -------- src/carton/dune | 29 - src/carton/enc.ml | 611 --------- src/carton/enc.mli | 105 -- src/carton/h.ml | 620 ---------- src/carton/h.mli | 33 - src/carton/idx.ml | 670 ---------- src/carton/idx.mli | 87 -- src/carton/index.mld | 15 - src/carton/sigs.ml | 81 -- src/carton/sigs.mli | 75 -- src/carton/thin.ml | 381 ------ src/carton/thin.mli | 79 -- src/carton/zh.ml | 266 ---- src/carton/zh.mli | 47 - test/bin/dune | 8 - test/bin/get.t | 65 - test/bin/index.t | 13 - test/bin/verify.t | 24 - test/carton/bomb.idx | Bin 1576 -> 0 bytes test/carton/bomb.pack | Bin 2160 -> 0 bytes test/carton/dune | 33 - test/carton/prelude.ml | 256 ---- test/carton/test.ml | 1133 ----------------- test/carton/test_lwt.ml | 123 -- test/carton/uid.ml | 17 - 53 files changed, 10034 deletions(-) delete mode 100644 CHANGES.carton.md delete mode 100644 bench/bench_pack.ml delete mode 100644 bench/benchmark.ml delete mode 100644 bench/dune delete mode 100644 bench/linear_algebra.ml delete mode 100644 bench/rdtsc.c delete mode 100644 bin/carton/dune delete mode 100644 bin/carton/get.ml delete mode 100644 bin/carton/hxd_cmdliner.ml delete mode 100644 bin/carton/index_pack.ml delete mode 100644 bin/carton/verify_pack.ml delete mode 100644 carton-git.opam delete mode 100644 carton-lwt.opam delete mode 100644 carton.opam delete mode 100644 src/carton-git/carton_git.ml delete mode 100644 src/carton-git/carton_git.mli delete mode 100644 src/carton-git/carton_git_unix.ml delete mode 100644 src/carton-git/carton_git_unix.mli delete mode 100644 src/carton-git/dune delete mode 100644 src/carton-lwt/carton_lwt.ml delete mode 100644 src/carton-lwt/carton_lwt.mli delete mode 100644 src/carton-lwt/dune delete mode 100644 src/carton-lwt/lwt_io.ml delete mode 100644 src/carton/README.md delete mode 100644 src/carton/carton.ml delete mode 100644 src/carton/dec.ml delete mode 100644 src/carton/dec.mli delete mode 100644 src/carton/dune delete mode 100644 src/carton/enc.ml delete mode 100644 src/carton/enc.mli delete mode 100644 src/carton/h.ml delete mode 100644 src/carton/h.mli delete mode 100644 src/carton/idx.ml delete mode 100644 src/carton/idx.mli delete mode 100644 src/carton/index.mld delete mode 100644 src/carton/sigs.ml delete mode 100644 src/carton/sigs.mli delete mode 100644 src/carton/thin.ml delete mode 100644 src/carton/thin.mli delete mode 100644 src/carton/zh.ml delete mode 100644 src/carton/zh.mli delete mode 100644 test/bin/dune delete mode 100644 test/bin/get.t delete mode 100644 test/bin/index.t delete mode 100644 test/bin/verify.t delete mode 100644 test/carton/bomb.idx delete mode 100644 test/carton/bomb.pack delete mode 100644 test/carton/dune delete mode 100644 test/carton/prelude.ml delete mode 100644 test/carton/test.ml delete mode 100644 test/carton/test_lwt.ml delete mode 100644 test/carton/uid.ml diff --git a/CHANGES.carton.md b/CHANGES.carton.md deleted file mode 100644 index 0528ccd5c..000000000 --- a/CHANGES.carton.md +++ /dev/null @@ -1,76 +0,0 @@ -### 0.7.1 (2024-02-12) Paris - France - -- Add a `Lwt.pause` to insert a cooperative point when we verify a PACK file (@dinosaure, @hannesm, #631) - -### 0.7.0 (2023-09-28) Paris - France - -- Extend the API with the ability to choose the compression level (@dinosaure, #616) -- Extend the API about `*.idx` file and be able to map entries (@dinosaure, #619) - -### 0.6.0 (2022-10-19) Paris - France - -- Add few functions to introspect target when we encode and be able to construct objects with source - (@dinosaure, #595) - -### 0.5.0 (2022-09-29) Paris - France - -- Add missing dependencies on Unix (@dra27, #573) -- Be able to choose the zlib compression level when we generate a PACK file (@dinosaure, #578) -- Fix spurious bug when we encode a patch into a PACK file (@dinosaure, #578) -- Add an accessor to get the hash `ctx` computed by the first-pass of a PACK file (@dinosaure, #584) -- Fix how we record the _weight_ of Git objects into a PACK file (@dinosaure, #591) - -### 0.4.4 (2022-04-11) Paris - France - -- Remove `bigarray-compat` and `mmap` dependencies (@dinosaure, @hannesm, #568) - -### 0.4.3 (2021-03-08) Paris - France - -- Use `Cstruct.length` instead of `Cstruct.len` (@dinosaure, #522) -- Fix big endian support via `decompress` and `checkseum` (@dinosaure, @talex5, @tmcgilchrist, #523) -- Handle huge PACK files (@dinosaure, @TheLortex, #526) - -### 0.4.2 (2021-11-05) Paris - France - -- Fix stream of inflated contents on PACK entry (@dinosaure, @talex5, @emillon, #515, #514) - -### 0.4.1 (2021-22-04) Paris - France - -- Add a `sector_size` argument when we make a PACK decoder - (@dinosaure, #493, #497) - -### 0.4.0 (2021-15-03) Paris - France - -- Handle `trunc` argument when we process a `thin` PACK file - **breaking changes** - An optional argument is added on the record which abstract the file-system. - It should be correctly handled by underlying implementation of the - file-system. It appears that, at top, we need to figure out such option, - specially for Git and `Cstruct_append` to correctly access to memories. - -### 0.3.0 (2021-05-03) Paris - France - -- Provides binaries to manipulate PACK files (@dinosaure, #475) - **breaking changes** - A transitive breaking changes from decompress.1.3.0 when - the compressor expects a `De.Lz77.window` instead of - `De.window` -- Update to decompress.1.3.0 (@dinosaure, #477) - -### 0.2.0 (2021-05-02) Saint-Malo - France - -- Unmonad `mmap` (@dinosaure, #454) - `mmap` is a _syscall_ which does not block. The ability to use it outside - the scheduler monad (like LWT) permits us to _detach_ multiple processes - to analyze a PACK file. - - With this PR, we take the advantage of `Thread` or `Lwt_preemptive` - (or more acccurately, the concurrency) to analyze a large PACK file and - speed-up the `clone`/`fetch` process. - - The distribution comes with a new binary, `carton.verify-pack` which is - `git verify-pack`. - -### 0.1.0 (2021-08-01) Paris - France - -- First release of carton diff --git a/bench/bench_pack.ml b/bench/bench_pack.ml deleted file mode 100644 index 4725c72e7..000000000 --- a/bench/bench_pack.ml +++ /dev/null @@ -1,96 +0,0 @@ -module Unix_scheduler = Carton.Make (struct - type 'a t = 'a -end) - -open Unix_scheduler - -let bomb_pack = "../test/carton/bomb.pack" -let bomb_idx = "../test/carton/bomb.idx" - -let scheduler = - { Carton.bind = (fun x f -> f (prj x)); Carton.return = (fun x -> inj x) } - -let map fd ~pos len = - let { Unix.LargeFile.st_size; _ } = Unix.LargeFile.fstat fd in - let len = - if Int64.of_int len <= Int64.sub st_size pos then len - else Int64.(to_int (sub st_size pos)) - in - let res = - Unix.map_file fd ~pos Bigarray.char Bigarray.c_layout false [| len |] - in - Bigarray.array1_of_genarray res - -let fd = Unix.openfile bomb_pack Unix.[ O_RDONLY ] 0o644 -let () = at_exit (fun () -> Unix.close fd) - -let index = - let tbl = Hashtbl.create 0x100 in - - let fd = Unix.openfile bomb_idx Unix.[ O_RDONLY ] 0o644 in - let st = Unix.fstat fd in - let payload = map fd ~pos:0L st.Unix.st_size in - Unix.close fd; - - let idx = - Carton.Dec.Idx.make payload ~uid_ln:Digestif.SHA1.digest_size - ~uid_rw:Digestif.SHA1.to_raw_string ~uid_wr:Digestif.SHA1.of_raw_string - in - let f ~uid ~offset ~crc:_ = Hashtbl.add tbl uid offset in - Carton.Dec.Idx.iter ~f idx; - tbl - -let z = De.bigstring_create De.io_buffer_size -let w = De.make_window ~bits:15 -let allocate _ = w - -let pack = - Carton.Dec.make fd ~z ~allocate ~uid_ln:Digestif.SHA1.digest_size - ~uid_rw:Digestif.SHA1.of_raw_string (fun uid -> Hashtbl.find index uid) - -let ( >>= ) = scheduler.Carton.bind -let return = scheduler.Carton.return -let uid_0 = Digestif.SHA1.of_hex "7af99c9e7d4768fa681f4fe4ff61259794cf719b" -let uid_1 = Digestif.SHA1.of_hex "d9513477b01825130c48c4bebed114c4b2d50401" - -let load uid = - let weight = Carton.Dec.weight_of_uid ~map pack ~weight:Carton.Dec.null uid in - let raw = Carton.Dec.make_raw ~weight in - let _ = Carton.Dec.of_uid ~map pack raw uid in - return () - -let fn_map = Benchmark.V (fun () -> ignore (map fd ~pos:10L (1024 * 1024))) -let fn_load_0 = Benchmark.V (fun () -> ignore (prj (load uid_0))) -let fn_load_1 = Benchmark.V (fun () -> ignore (prj (load uid_1))) -let s x = Mtime.Span.of_uint64_ns (Int64.mul (Int64.of_int x) 1_000_000_000L) - -let run fn_load title = - let (Benchmark.V fn) = fn_load in - let _ = fn () in - let samples_map = Benchmark.run (s 8) fn_map in - let samples_load = Benchmark.run (s 8) fn_load in - - match - ( Linear_algebra.ols - (fun m -> m.(1)) - [| (fun m -> m.(0)); (fun _ -> 1.) |] - samples_map, - Linear_algebra.ols - (fun m -> m.(1)) - [| (fun m -> m.(0)); (fun _ -> 1.) |] - samples_load ) - with - | Ok (estimate_map, r_map), Ok (estimate_load, r_load) -> - Fmt.pr "%15.2fns (r²: %f) [map syscall].\n%!" estimate_map.(0) r_map; - Fmt.pr "%15.2fns (r²: %f) [load %s].\n%!" estimate_load.(0) r_load title - | Error (`Msg err), _ | _, Error (`Msg err) -> - Fmt.epr "%s: %s.\n%!" Sys.argv.(0) err - -let () = - match Sys.argv with - | [| _; "0" |] -> run fn_load_0 "commit (0 delta)" - | [| _; "1" |] -> run fn_load_1 "tree (1 delta)" - | _ -> - Fmt.epr "%s [0|1].\n" Sys.argv.(0); - Fmt.epr "0 commit (0 delta).\n%!"; - Fmt.epr "1 tree (1 delta).\n%!" diff --git a/bench/benchmark.ml b/bench/benchmark.ml deleted file mode 100644 index ae7d8bc6f..000000000 --- a/bench/benchmark.ml +++ /dev/null @@ -1,61 +0,0 @@ -external tick : unit -> (int64[@unboxed]) = "none" "get_tick" [@@noalloc] -external now : unit -> (int64[@unboxed]) = "none" "get_now" [@@noalloc] - -type t = V : (unit -> 'a) -> t - -let stabilize_garbage_collector () = - let rec go limit last_heap_live_words = - if limit <= 0 then - failwith "Unable to stabilize the number of live words in the heap"; - Gc.compact (); - let stat = Gc.stat () in - if stat.Gc.live_words <> last_heap_live_words then - go (pred limit) stat.Gc.live_words - in - go 10 0 - -let runnable f i = - for _ = 1 to i do - ignore @@ Sys.opaque_identity (f ()) - done -[@@inline] - -let samples = 1000 - -let exceeded_allowed_time allowed_time_span t = - let t' = Mtime.of_uint64_ns (now ()) in - Mtime.Span.compare (Mtime.span t t') allowed_time_span > 0 - -let run quota t = - let idx = ref 0 in - let run = ref 0 in - let (V fn) = t in - - let m = Array.create_float (samples * 2) in - - stabilize_garbage_collector (); - let init_time = Mtime.of_uint64_ns (now ()) in - - while (not (exceeded_allowed_time quota init_time)) && !idx < samples do - let current_run = !run in - let current_idx = !idx in - - let time_0 = now () in - - runnable fn current_run; - - let time_1 = now () in - - m.((current_idx * 2) + 0) <- float_of_int current_run; - m.((current_idx * 2) + 1) <- Int64.to_float (Int64.sub time_1 time_0); - - let next = - (max : int -> int -> int) - (int_of_float (float_of_int current_run *. 1.01)) - (succ current_run) - in - run := next; - incr idx - done; - - Array.init samples (fun i -> [| m.((i * 2) + 0); m.((i * 2) + 1) |]) diff --git a/bench/dune b/bench/dune deleted file mode 100644 index a5033840a..000000000 --- a/bench/dune +++ /dev/null @@ -1,34 +0,0 @@ -(executable - (name bench_pack) - (enabled_if - (or - (= %{architecture} "amd64") - (= %{architecture} "x86_64"))) - (libraries - bigstringaf - mtime - fmt - decompress.de - decompress.zl - digestif.c - carton - unix) - (foreign_stubs - (language c) - (names rdtsc))) - -(rule - (alias runbench) - (package carton) - (enabled_if - (or - (= %{architecture} "amd64") - (= %{architecture} "x86_64"))) - (deps - (:bench bench_pack.exe) - ../test/carton/bomb.idx - ../test/carton/bomb.pack) - (action - (progn - (run %{bench} 0) - (run %{bench} 1)))) diff --git a/bench/linear_algebra.ml b/bench/linear_algebra.ml deleted file mode 100644 index ced83a608..000000000 --- a/bench/linear_algebra.ml +++ /dev/null @@ -1,128 +0,0 @@ -(* Code under Apache License 2.0 - Jane Street Group, LLC *) - -let col_norm a column = - let acc = ref 0. in - for i = 0 to Array.length a - 1 do - let entry = a.(i).(column) in - acc := !acc +. (entry *. entry) - done; - sqrt !acc - -let col_inner_prod t j1 j2 = - let acc = ref 0. in - for i = 0 to Array.length t - 1 do - acc := !acc +. (t.(i).(j1) *. t.(i).(j2)) - done; - !acc - -let qr_in_place a = - let m = Array.length a in - if m = 0 then [||], [||] - else - let n = Array.length a.(0) in - let r = Array.make_matrix n n 0. in - for j = 0 to n - 1 do - let alpha = col_norm a j in - r.(j).(j) <- alpha; - let one_over_alpha = 1. /. alpha in - for i = 0 to m - 1 do - a.(i).(j) <- a.(i).(j) *. one_over_alpha - done; - for j2 = j + 1 to n - 1 do - let c = col_inner_prod a j j2 in - r.(j).(j2) <- c; - for i = 0 to m - 1 do - a.(i).(j2) <- a.(i).(j2) -. (c *. a.(i).(j)) - done - done - done; - a, r - -let qr ?(in_place = false) a = - let a = if in_place then a else Array.map Array.copy a in - qr_in_place a - -let mul_mv ?(trans = false) a x = - let rows = Array.length a in - if rows = 0 then [||] - else - let cols = Array.length a.(0) in - let m, n, get = - if trans then - let get i j = a.(j).(i) in - cols, rows, get - else - let get i j = a.(i).(j) in - rows, cols, get - in - if n <> Array.length x then failwith "Dimension mismatch"; - let result = Array.make m 0. in - for i = 0 to m - 1 do - let v, _ = - Array.fold_left - (fun (acc, j) x -> acc +. (get i j *. x), succ j) - (0., 0) x - in - result.(i) <- v - done; - result - -let is_nan v = match classify_float v with FP_nan -> true | _ -> false -let error_msg msg = Error (`Msg msg) - -let triu_solve r b = - let m = Array.length b in - if m <> Array.length r then - error_msg - "triu_solve R b requires R to be square with same number of rows as b" - else if m = 0 then Ok [||] - else if m <> Array.length r.(0) then - error_msg "triu_solve R b requires R to be a square" - else - let sol = Array.copy b in - for i = m - 1 downto 0 do - sol.(i) <- sol.(i) /. r.(i).(i); - for j = 0 to i - 1 do - sol.(j) <- sol.(j) -. (r.(j).(i) *. sol.(i)) - done - done; - if Array.exists is_nan sol then error_msg "triu_solve detected NaN result" - else Ok sol - -let ols ?(in_place = false) a b = - let q, r = qr ~in_place a in - triu_solve r (mul_mv ~trans:true q b) - -let make_lr_inputs responder predictors m = - ( Array.init (Array.length m) (fun i -> - Array.map (fun a -> a m.(i)) predictors), - Array.init (Array.length m) (fun i -> responder m.(i)) ) - -let r_square m responder predictors r = - let predictors_matrix, responder_vector = - make_lr_inputs responder predictors m - in - let sum_responder = Array.fold_left ( +. ) 0. responder_vector in - let mean = sum_responder /. float (Array.length responder_vector) in - let tot_ss = ref 0. in - let res_ss = ref 0. in - let predicted i = - let x = ref 0. in - for j = 0 to Array.length r - 1 do - x := !x +. (predictors_matrix.(i).(j) *. r.(j)) - done; - !x - in - for i = 0 to Array.length responder_vector - 1 do - tot_ss := !tot_ss +. ((responder_vector.(i) -. mean) ** 2.); - res_ss := !res_ss +. ((responder_vector.(i) -. predicted i) ** 2.) - done; - 1. -. (!res_ss /. !tot_ss) - -let ols responder predictors m = - let matrix, vector = make_lr_inputs responder predictors m in - match ols ~in_place:true matrix vector with - | Ok estimates -> - let r_square = r_square m responder predictors estimates in - Ok (estimates, r_square) - | Error _ as err -> err diff --git a/bench/rdtsc.c b/bench/rdtsc.c deleted file mode 100644 index e55ae16af..000000000 --- a/bench/rdtsc.c +++ /dev/null @@ -1,33 +0,0 @@ -#include -#include - -#include -#include -#include -#include - -#ifndef __unused -#define __unused(x) x __attribute((unused)) -#endif -#define __unit() value __unused(unit) - -uint64_t -get_now(__unit ()) -{ - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC, &ts); - - return ((uint64_t) ts.tv_sec - * (uint64_t) 1000000000LL - + (uint64_t) ts.tv_nsec); -} - -uint64_t -get_tick(__unit ()) -{ - unsigned hi, lo; - __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); - - return (((unsigned long long) lo) | (((unsigned long long) hi) << 32)); -} diff --git a/bin/carton/dune b/bin/carton/dune deleted file mode 100644 index 5921c115b..000000000 --- a/bin/carton/dune +++ /dev/null @@ -1,69 +0,0 @@ -(executable - (name verify_pack) - (public_name carton.verify-pack) - (modules verify_pack) - (package carton) - (libraries - cmdliner - bos - rresult - fpath - decompress.de - decompress.zl - bigstringaf - fmt - carton - fiber - unix - digestif.c)) - -(executable - (name index_pack) - (public_name carton.index-pack) - (modules index_pack) - (package carton) - (flags - (:standard -thread)) - (libraries - carton.thin - cmdliner - bos - rresult - fpath - ke - cstruct - decompress.de - decompress.zl - bigstringaf - fmt - carton - fiber - unix - digestif.c)) - -(executable - (name get) - (public_name carton.get) - (modules get) - (package carton) - (libraries - hxd.core - hxd.string - hxd_cmdliner - cmdliner - bos - rresult - fpath - decompress.de - decompress.zl - bigstringaf - fmt - carton - fiber - unix - digestif.c)) - -(library - (name hxd_cmdliner) - (modules hxd_cmdliner) - (libraries fmt rresult hxd.core cmdliner)) diff --git a/bin/carton/get.ml b/bin/carton/get.ml deleted file mode 100644 index a2cb53b7d..000000000 --- a/bin/carton/get.ml +++ /dev/null @@ -1,188 +0,0 @@ -let failwith fmt = Fmt.kstr failwith fmt - -module SHA1 = struct - include Digestif.SHA1 - - let feed ctx ?off ?len bs = feed_bigstring ctx ?off ?len bs - let null = digest_string "" - let length = digest_size - let compare a b = String.compare (to_raw_string a) (to_raw_string b) -end - -let load_idx fpath = - let fd = Unix.openfile (Fpath.to_string fpath) Unix.[ O_RDONLY ] 0o644 in - let ln = (Unix.fstat fd).Unix.st_size in - let mp = - Unix.map_file fd ~pos:0L Bigarray.char Bigarray.c_layout false [| ln |] - in - let mp = Bigarray.array1_of_genarray mp in - Unix.close fd; - Carton.Dec.Idx.make mp ~uid_ln:SHA1.length ~uid_rw:SHA1.to_raw_string - ~uid_wr:SHA1.of_raw_string - -let z = De.bigstring_create De.io_buffer_size -let allocate bits = De.make_window ~bits - -let map fd ~pos len = - let max = (Unix.LargeFile.fstat fd).Unix.LargeFile.st_size in - let len = min (Int64.sub max pos) (Int64.of_int len) in - let len = Int64.to_int len in - let res = - Unix.map_file fd ~pos Bigarray.char Bigarray.c_layout false [| len |] - in - Bigarray.array1_of_genarray res - -type cfg = Hex of { hxd : Hxd.cfg; path : bool; info : bool } | Raw - -let show_path path = - let lst = Carton.Dec.path_to_list path in - let arr = Array.of_list lst in - Format.printf "path: "; - for i = 0 to Array.length arr - 1 do - if i > 0 then Format.printf " "; - Format.printf "%8Lx\n" arr.(i) - done - -let pp_kind ppf = function - | `A -> Format.pp_print_string ppf "a" - | `B -> Format.pp_print_string ppf "b" - | `C -> Format.pp_print_string ppf "c" - | `D -> Format.pp_print_string ppf "d" - -let show_info v = - Format.printf "depth: %8d\n%!" (Carton.Dec.depth v); - Format.printf "length: %8d\n%!" (Carton.Dec.len v); - Format.printf "kind: %a\n%!" pp_kind (Carton.Dec.kind v) - -let show cfg path v = - match cfg with - | Raw -> - let len = Carton.Dec.len v in - let raw = Bigstringaf.substring (Carton.Dec.raw v) ~off:0 ~len in - print_string raw - | Hex { hxd; path = p; info = i } -> - let len = Carton.Dec.len v in - let raw = Bigstringaf.substring (Carton.Dec.raw v) ~off:0 ~len in - if p then show_path path; - if i then show_info v; - if p || i then Format.printf "\n%!"; - Format.printf "%a%!" (Hxd_string.pp hxd) raw; - Format.printf "%!" - -let get_object_from_uid cfg fpath uid = - let open Rresult in - let open Bos in - OS.File.must_exist (Fpath.set_ext "idx" fpath) >>| load_idx >>= fun idx -> - let find uid = - match Carton.Dec.Idx.find idx uid with - | Some (_crc, offset) -> offset - | None -> failwith "object %a not found" SHA1.pp uid - in - let fd = Unix.openfile (Fpath.to_string fpath) Unix.[ O_RDONLY ] 0o644 in - let pack = - Carton.Dec.make fd ~z ~allocate ~uid_ln:SHA1.length - ~uid_rw:SHA1.of_raw_string find - in - let path = Carton.Dec.path_of_uid ~map pack uid in - let weight = Carton.Dec.weight_of_uid ~map pack ~weight:Carton.Dec.null uid in - let raw = Carton.Dec.make_raw ~weight in - let v = Carton.Dec.of_uid ~map pack raw uid in - show cfg path v; - Ok () - -let never _ = assert false - -let get_object_from_offset cfg fpath offset = - let fd = Unix.openfile (Fpath.to_string fpath) Unix.[ O_RDONLY ] 0o644 in - let pack = - Carton.Dec.make fd ~z ~allocate ~uid_ln:SHA1.length - ~uid_rw:SHA1.of_raw_string never - in - let path = Carton.Dec.path_of_offset ~map pack ~cursor:offset in - let weight = - Carton.Dec.weight_of_offset ~map pack ~weight:Carton.Dec.null offset - in - let raw = Carton.Dec.make_raw ~weight in - let v = Carton.Dec.of_offset ~map pack raw ~cursor:offset in - show cfg path v; - Ok () - -let run hxd path info raw pack uid_or_offset = - match raw, uid_or_offset with - | true, `Uid uid -> get_object_from_uid Raw pack uid - | true, `Offset ofs -> get_object_from_offset Raw pack ofs - | false, `Uid uid -> get_object_from_uid (Hex { hxd; path; info }) pack uid - | false, `Offset ofs -> - get_object_from_offset (Hex { hxd; path; info }) pack ofs - -let run hxd path info raw pack uid_or_offset = - match run hxd path info raw pack uid_or_offset with - | Ok () -> Ok () - | Error (`Msg err) -> Error (Fmt.str "%s." err) - -open Cmdliner - -let existing_file = - let parser str = - match Fpath.of_string str with - | Ok v when Sys.file_exists str -> Ok v - | Ok v -> Rresult.R.error_msgf "%a not found" Fpath.pp v - | Error _ as err -> err - in - Arg.conv (parser, Fpath.pp) - -let uid_or_offset = - let parser str = - match Int64.of_string_opt str, SHA1.of_hex_opt str with - | Some v, _ -> Ok (`Offset v) - | _, Some uid -> Ok (`Uid uid) - | _ -> Rresult.R.error_msgf "Invalid value: %S" str - in - let pp ppf = function - | `Uid v -> SHA1.pp ppf v - | `Offset v -> Format.fprintf ppf "0x%Lx" v - in - Arg.conv (parser, pp) - -let with_path = - let doc = "Show the delta-path." in - Arg.(value & flag & info [ "with-path" ] ~doc) - -let with_info = - let doc = "Show $(i,metadata) of the given object." in - Arg.(value & flag & info [ "with-info" ] ~doc) - -let raw = - let doc = - "Show as is the given object without extra-informations or post-treatments." - in - Arg.(value & flag & info [ "r"; "raw" ] ~doc) - -let pack = - Arg.(required & pos 0 (some existing_file) None & info [] ~docv:"") - -let uid_or_offset = - Arg.(required & pos 1 (some uid_or_offset) None & info [] ~docv:"") - -let cmd = - let doc = "Extract an object from a PACK file." in - let man = - [ - `S Manpage.s_description; - `P - "Extract an object from the given PACK file and its SHA-1 or its \ - offset."; - ] - in - let info = Cmd.info "get" ~doc ~man in - Cmd.v info - Term.( - const run - $ Hxd_cmdliner.cmd - $ with_path - $ with_info - $ raw - $ pack - $ uid_or_offset) - -let () = exit @@ Cmd.eval_result cmd diff --git a/bin/carton/hxd_cmdliner.ml b/bin/carton/hxd_cmdliner.ml deleted file mode 100644 index 5e3fb8d61..000000000 --- a/bin/carton/hxd_cmdliner.ml +++ /dev/null @@ -1,346 +0,0 @@ -let run_fmt = function - | Some v -> Hxd.Fmt.set_style_renderer Format.std_formatter v - | None -> () - -let notzen = Array.make 256 `None - -let notzen = - for i = 0 to 31 do - notzen.(i) <- `Style (`Fg, `bit24 (0xaf, 0xd7, 0xff)) - done; - for i = 48 to 57 do - notzen.(i) <- `Style (`Fg, `bit24 (0xaf, 0xdf, 0x77)) - done; - for i = 65 to 90 do - notzen.(i) <- `Style (`Fg, `bit24 (0xff, 0xaf, 0x5f)) - done; - for i = 97 to 122 do - notzen.(i) <- `Style (`Fg, `bit24 (0xff, 0xaf, 0xd7)) - done; - Hxd.colorscheme_of_array notzen - -let pixzen = - [| - 0x000000; - 0x560000; - 0x640000; - 0x750000; - 0x870000; - 0x9b0000; - 0xb00000; - 0xc60000; - 0xdd0000; - 0xf50000; - 0xff0f0f; - 0xff2828; - 0xff4343; - 0xff5e5e; - 0xff7979; - 0xfe9595; - 0x4c1600; - 0x561900; - 0x641e00; - 0x752300; - 0x872800; - 0x9b2e00; - 0xb03400; - 0xc63b00; - 0xdd4200; - 0xf54900; - 0xff570f; - 0xff6928; - 0xff7b43; - 0xff8e5e; - 0xffa179; - 0xfeb595; - 0x4c3900; - 0x564000; - 0x644b00; - 0x755700; - 0x876500; - 0x9b7400; - 0xb08400; - 0xc69400; - 0xdda600; - 0xf5b800; - 0xffc30f; - 0xffc928; - 0xffd043; - 0xffd65e; - 0xffdd79; - 0xfee495; - 0x4c4c00; - 0x565600; - 0x646400; - 0x757500; - 0x878700; - 0x9b9b00; - 0xb0b000; - 0xc6c600; - 0xdddd00; - 0xf5f500; - 0xffff0f; - 0xffff28; - 0xffff43; - 0xffff5e; - 0xffff79; - 0xfffe95; - 0x324c00; - 0x395600; - 0x426400; - 0x4e7500; - 0x5a8700; - 0x679b00; - 0x75b000; - 0x84c600; - 0x93dd00; - 0xa3f500; - 0xafff0f; - 0xb7ff28; - 0xc0ff43; - 0xc9ff5e; - 0xd2ff79; - 0xdbfe95; - 0x1f4c00; - 0x235600; - 0x296400; - 0x307500; - 0x388700; - 0x409b00; - 0x49b000; - 0x52c600; - 0x5cdd00; - 0x66f500; - 0x73ff0f; - 0x82ff28; - 0x91ff43; - 0xa1ff5e; - 0xb1ff79; - 0xc1fe95; - 0x004c00; - 0x005600; - 0x006400; - 0x007500; - 0x008700; - 0x009b00; - 0x00b000; - 0x00c600; - 0x00dd00; - 0x00f500; - 0x0fff0f; - 0x28ff28; - 0x43ff43; - 0x5eff5e; - 0x79ff79; - 0x95fe95; - 0x004c19; - 0x00561c; - 0x006421; - 0x007527; - 0x00872d; - 0x009b33; - 0x00b03a; - 0x00c642; - 0x00dd49; - 0x00f551; - 0x0fff5f; - 0x28ff70; - 0x43ff81; - 0x5eff93; - 0x79ffa6; - 0x95feb8; - 0x004c4c; - 0x005656; - 0x006464; - 0x007575; - 0x008787; - 0x009b9b; - 0x00b0b0; - 0x00c6c6; - 0x00dddd; - 0x00f5f5; - 0x0ffffe; - 0x28fffe; - 0x43fffe; - 0x5efffe; - 0x79ffff; - 0x95fffe; - 0x00394c; - 0x004056; - 0x004b64; - 0x005775; - 0x006587; - 0x00749b; - 0x0084b0; - 0x0094c6; - 0x00a6dd; - 0x00b8f5; - 0x0fc3ff; - 0x28c9ff; - 0x43d0ff; - 0x5ed6ff; - 0x79ddff; - 0x95e4fe; - 0x00264c; - 0x002b56; - 0x003264; - 0x003a75; - 0x004387; - 0x004d9b; - 0x0058b0; - 0x0063c6; - 0x006edd; - 0x007af5; - 0x0f87ff; - 0x2893ff; - 0x43a1ff; - 0x5eaeff; - 0x79bcff; - 0x95cafe; - 0x00134c; - 0x001556; - 0x001964; - 0x001d75; - 0x002187; - 0x00269b; - 0x002cb0; - 0x0031c6; - 0x0037dd; - 0x003df5; - 0x0f4bff; - 0x285eff; - 0x4372ff; - 0x5e86ff; - 0x799aff; - 0x95b0fe; - 0x19004c; - 0x1c0056; - 0x210064; - 0x270075; - 0x2d0087; - 0x33009b; - 0x3a00b0; - 0x4200c6; - 0x4900dd; - 0x5100f5; - 0x5f0fff; - 0x7028ff; - 0x8143ff; - 0x935eff; - 0xa679ff; - 0xb895fe; - 0x33004c; - 0x390056; - 0x420064; - 0x4e0075; - 0x5a0087; - 0x67009b; - 0x7500b0; - 0x8400c6; - 0x9300dd; - 0xa300f5; - 0xaf0fff; - 0xb728ff; - 0xc043ff; - 0xc95eff; - 0xd279ff; - 0xdb95fe; - 0x4c004c; - 0x560056; - 0x640064; - 0x750075; - 0x870087; - 0x9b009b; - 0xb000b0; - 0xc600c6; - 0xdd00dd; - 0xf500f5; - 0xfe0fff; - 0xfe28ff; - 0xfe43ff; - 0xfe5eff; - 0xfe79ff; - 0xfe95fe; - 0x4c0032; - 0x560039; - 0x640042; - 0x75004e; - 0x87005a; - 0x9b0067; - 0xb00075; - 0xc60084; - 0xdd0093; - 0xf500a3; - 0xff0faf; - 0xff28b7; - 0xff43c0; - 0xff5ec9; - 0xff79d2; - 0xffffff; - |] - -let pixzen = - Hxd.colorscheme_of_array - (Array.map - (fun v -> - `Style (`Bg, `bit24 (v lsr 16, (v lsr 8) land 0xff, v land 0xff))) - pixzen) - -let make () cols groupsize uppercase pixel = - Hxd.xxd ~cols ~groupsize ~uppercase (if pixel then pixzen else notzen) - -open Cmdliner - -let cols = - let parser str = - match int_of_string str with - | n -> - if n < 1 || n > 256 then - Rresult.R.error_msgf "Invalid value (must <= 256 && > 0): %d" n - else Ok n - | exception _ -> Rresult.R.error_msgf "Invalid value: %S" str - in - Arg.conv ~docv:"" (parser, Format.pp_print_int) - -let number = - let parser str = - match int_of_string str with - | n -> - if n < 0 then - Rresult.R.error_msgf "Invalid value (must be positive): %d" n - else Ok n - | exception _ -> Rresult.R.error_msgf "Invalid value: %S" str - in - Arg.conv ~docv:"" (parser, Format.pp_print_int) - -let cols = - let doc = "Format octets per line. Default 16. Max 256." in - Arg.(value & opt cols 16 & info [ "c"; "cols" ] ~doc ~docv:"") - -let groupsize = - let doc = - "Separate the output of every bytes (two hex characters) by a \ - whitespace." - in - Arg.(value & opt number 2 & info [ "g"; "groupsize" ] ~doc ~docv:"") - -let uppercase = - let doc = "Use upper case hex letters. Default is lower case." in - Arg.(value & flag & info [ "u" ] ~doc) - -let pixel = - let doc = "Use background colors instead of foreground colors." in - Arg.(value & flag & info [ "pixel" ] ~doc) - -let style_renderer ?env () = - let enum = [ "auto", None; "always", Some `Ansi; "never", Some `None ] in - let color = Arg.enum enum in - let enum_alts = Arg.doc_alts_enum enum in - let doc = Fmt.str "Colorize the output. $(docv) must be %s." enum_alts in - Arg.(value & opt color None & info [ "color" ] ?env ~doc ~docv:"") - -let setup_fmt = - let env = Cmd.Env.info "HXD_COLOR" in - Term.(const run_fmt $ style_renderer ~env ()) - -let cmd = Term.(const make $ setup_fmt $ cols $ groupsize $ uppercase $ pixel) diff --git a/bin/carton/index_pack.ml b/bin/carton/index_pack.ml deleted file mode 100644 index e7892a2f3..000000000 --- a/bin/carton/index_pack.ml +++ /dev/null @@ -1,400 +0,0 @@ -open Rresult - -let ( <.> ) f g x = f (g x) - -let verbosef max ppf fmt = - let counter = ref 0 in - let mutex = Mutex.create () in - fun () -> - Mutex.lock mutex; - Format.fprintf ppf fmt (!counter * 100 / max) !counter max; - incr counter; - Mutex.unlock mutex - -module Scheduler = Carton.Make (Fiber) -open Scheduler - -module SHA1 = struct - include Digestif.SHA1 - - let feed ctx ?off ?len bs = feed_bigstring ctx ?off ?len bs - let null = digest_string "" - let length = digest_size - let compare a b = String.compare (to_raw_string a) (to_raw_string b) -end - -module Verify = Carton.Dec.Verify (SHA1) (Scheduler) (Fiber) -module First_pass = Carton.Dec.Fp (SHA1) -module Encoder = Carton.Dec.Idx.N (SHA1) -open Fiber - -let sched = - let open Scheduler in - { - Carton.bind = (fun x f -> inj (bind (prj x) (fun x -> prj (f x)))); - return = (fun x -> inj (return x)); - } - -let z = De.bigstring_create De.io_buffer_size -let allocate bits = De.make_window ~bits - -let replace hashtbl k v = - match Hashtbl.find_opt hashtbl k with - | Some v' -> if v' < v then Hashtbl.replace hashtbl k v - | None -> Hashtbl.add hashtbl k v - -let never _ = assert false - -let zip a b = - if Array.length a <> Array.length b then invalid_arg "zip: lengths mismatch"; - Array.init (Array.length a) (fun i -> a.(i), b.(i)) - -exception Exists - -let share l0 l1 = - try - List.iter - (fun (v, _) -> if List.exists (Int64.equal v) l1 then raise Exists) - l0; - false - with Exists -> true - -let digest ~kind ?(off = 0) ?len buf = - let len = - match len with Some len -> len | None -> Bigstringaf.length buf - off - in - let ctx = SHA1.empty in - let ctx = - match kind with - | `A -> SHA1.feed_string ctx (Fmt.str "commit %d\000" len) - | `B -> SHA1.feed_string ctx (Fmt.str "tree %d\000" len) - | `C -> SHA1.feed_string ctx (Fmt.str "blob %d\000" len) - | `D -> SHA1.feed_string ctx (Fmt.str "tag %d\000" len) - in - let ctx = SHA1.feed_bigstring ctx ~off ~len buf in - SHA1.get ctx - -let ( >>= ) = sched.bind -let return = sched.return - -let blit_from_bytes src src_off dst dst_off len = - Bigstringaf.blit_from_bytes src ~src_off dst ~dst_off ~len - -let read ?save ic = - let tp = Bytes.create 0x1000 in - let ke = Ke.Rke.create ~capacity:0x1000 Bigarray.char in - let rec go filled inputs = - match Ke.Rke.N.peek ke with - | [] -> - let len = input ic tp 0 0x1000 in - Option.iter (fun save -> Buffer.add_subbytes save tp 0 len) save; - if len > 0 then ( - Ke.Rke.N.push ke ~blit:blit_from_bytes ~length:Bytes.length ~off:0 - ~len tp; - go filled inputs) - else return filled - | src :: _ -> - let src = Cstruct.of_bigarray src in - let len = min (Cstruct.length inputs) (Cstruct.length src) in - Cstruct.blit src 0 inputs 0 len; - Ke.Rke.N.shift_exn ke len; - if len < Cstruct.length inputs then - go (filled + len) (Cstruct.shift inputs len) - else return (filled + len) - in - fun filled inputs -> go filled inputs - -let first_pass ?(verbose = false) ?save ic = - let fl_buffer = Cstruct.create De.io_buffer_size in - let zw = De.make_window ~bits:15 in - let allocate _ = zw in - - let read_cstruct = read ?save ic in - let read_bytes () buf ~off ~len = - let rec go rest raw = - if rest <= 0 then ( - Cstruct.blit_to_bytes fl_buffer 0 buf off len; - return (abs rest + len)) - else - read_cstruct 0 raw >>= function - | 0 -> return (len - rest) - | filled -> go (rest - filled) (Cstruct.shift raw filled) - in - go len fl_buffer - in - - First_pass.check_header sched read_bytes () >>= fun (max, _, len) -> - let decoder = First_pass.decoder ~o:z ~allocate `Manual in - let decoder = First_pass.src decoder (Cstruct.to_bigarray fl_buffer) 0 len in - - let children = Hashtbl.create 0x100 in - let where = Hashtbl.create 0x100 in - let weight = Hashtbl.create 0x100 in - let length = Hashtbl.create 0x100 in - let checks = Hashtbl.create 0x100 in - let matrix = Array.make max Verify.unresolved_node in - - let rec go decoder = - match First_pass.decode decoder with - | `Await decoder -> - read_cstruct 0 fl_buffer >>= fun len -> - go (First_pass.src decoder (Cstruct.to_bigarray fl_buffer) 0 len) - | `Peek decoder -> - let keep = First_pass.src_rem decoder in - read_cstruct 0 (Cstruct.shift fl_buffer keep) >>= fun len -> - go - (First_pass.src decoder - (Cstruct.to_bigarray fl_buffer) - 0 (keep + len)) - | `Entry ({ First_pass.kind = Base _; offset; crc; size; _ }, decoder) -> - let n = First_pass.count decoder - 1 in - if verbose then - Fmt.pr "\rIndexing objects: %3d%% (%d/%d)%!" (n * 100 / max) n max; - Hashtbl.add weight offset size; - Hashtbl.add length offset size; - Hashtbl.add checks offset crc; - Hashtbl.add where offset n; - matrix.(n) <- Verify.unresolved_base ~cursor:offset; - go decoder - | `Entry - ( { - First_pass.kind = Ofs { sub = s; source; target }; - offset; - crc; - size; - _; - }, - decoder ) -> - let n = First_pass.count decoder - 1 in - if verbose then - Fmt.pr "\rIndexing objects: %3d%% (%d/%d)%!" (n * 100 / max) n max; - replace weight Int64.(sub offset (Int64.of_int s)) source; - replace weight offset target; - Hashtbl.add length offset size; - Hashtbl.add checks offset crc; - Hashtbl.add where offset n; - (try - let vs = - Hashtbl.find children (`Ofs Int64.(sub offset (of_int s))) - in - Hashtbl.replace children - (`Ofs Int64.(sub offset (of_int s))) - (offset :: vs) - with _ -> - Hashtbl.add children (`Ofs Int64.(sub offset (of_int s))) [ offset ]); - go decoder - | `Entry - ( { First_pass.kind = Ref { ptr; target; source }; offset; crc; size; _ }, - decoder ) -> - let n = First_pass.count decoder - 1 in - if verbose then - Fmt.pr "\rIndexing objects: %3d%% (%d/%d)%!" (n * 100 / max) n max; - replace weight offset (Stdlib.max target source); - Hashtbl.add length offset size; - Hashtbl.add checks offset crc; - Hashtbl.add where offset n; - (try - let vs = Hashtbl.find children (`Ref ptr) in - Hashtbl.replace children (`Ref ptr) (offset :: vs) - with _ -> Hashtbl.add children (`Ref ptr) [ offset ]); - go decoder - | `End hash -> - if verbose then - Fmt.pr "\rIndexing objects: 100%% (%d/%d), done.\n%!" max max; - close_in ic; - return (Ok hash) - | `Malformed err -> return (Error (`Msg err)) - in - go decoder >>= function - | Error _ as err -> return err - | Ok hash -> - let weight ~cursor = Hashtbl.find weight cursor in - let oracle = - let where ~cursor = Hashtbl.find where cursor in - let children ~cursor ~uid = - match - ( Hashtbl.find_opt children (`Ofs cursor), - Hashtbl.find_opt children (`Ref uid) ) - with - | Some a, Some b -> List.sort_uniq compare (a @ b) - | Some x, None | None, Some x -> x - | None, None -> [] - in - { Carton.Dec.where; children; digest; weight } - in - return (Ok (hash, oracle, matrix, where, checks)) - -let ( >>? ) x f = x >>= function Ok x -> f x | Error err -> return (Error err) -let ( let* ) x f = x >>= f -let ( let+ ) x f = x >>? f - -let second_pass ?(verbose = false) ~map oracle matrix = - let pack = - Carton.Dec.make () ~allocate ~z ~uid_ln:SHA1.length - ~uid_rw:SHA1.of_raw_string never - in - let max_unresolveds = - Array.fold_left (fun a x -> if Verify.is_base x then a else succ a) 0 matrix - in - let verbose_deltas = - match verbose with - | false -> ignore - | true -> - verbosef max_unresolveds Fmt.stdout - "\rResolving deltas: %3d%% (%d/%d).%!" - in - let* () = - Verify.verify ~threads:(Fiber.get_concurrency ()) pack ~map ~oracle - ~verbose:verbose_deltas ~matrix - |> inj - in - if verbose then - Fmt.pr "\rResolving deltas: 100%% (%d/%d), done.\n%!" max_unresolveds - max_unresolveds; - return () - -type fd = Unix : Unix.file_descr -> fd | Stdin : fd - -let close_fd = function - | Unix fd -> - Unix.close fd; - return () - | Stdin -> return () - -module Set = Set.Make (SHA1) - -let close_in ic = - close_in ic; - return () - -let ignore_ic _ = return () - -let values_from_src = function - | `Stdin -> - let buf = Buffer.create 0x1000 in - let map () ~pos len = - let max = Buffer.length buf in - let pos = Int64.to_int pos in - let len = Stdlib.min len (max - pos) in - Bigstringaf.of_string ~off:pos ~len (Buffer.contents buf) - in - Some buf, stdin, ignore_ic, Stdin, map - | `File fpath -> - let fd = Unix.openfile (Fpath.to_string fpath) Unix.[ O_RDONLY ] 0o644 in - let max = (Unix.LargeFile.fstat fd).Unix.LargeFile.st_size in - let map () ~pos len = - let len = Stdlib.min len (Int64.to_int (Int64.sub max pos)) in - let res = - Unix.map_file fd ~pos Bigarray.char Bigarray.c_layout false [| len |] - in - Bigarray.array1_of_genarray res - in - None, open_in (Fpath.to_string fpath), close_in, Unix fd, map - -let split where checks matrix = - let offsets = - Hashtbl.fold (fun k _ a -> k :: a) where [] - |> List.sort Int64.compare - |> Array.of_list - in - let fold (unresolveds, resolveds) (offset, status) = - if Verify.is_resolved status then - let uid = Verify.uid_of_status status in - let crc = Hashtbl.find checks offset in - unresolveds, { Carton.Dec.Idx.crc; offset; uid } :: resolveds - else - let crc = Hashtbl.find checks offset in - (offset, crc) :: unresolveds, resolveds - in - Array.fold_left fold ([], []) (zip offsets matrix) - -let make_index_pack hash src dst resolveds = - let oc, close = - match src, dst with - | `File fpath, None -> - (* XXX(dinosaure): infinite loop on [Fiber] if [open_out] raises an exception. *) - open_out Fpath.(to_string (set_ext "idx" fpath)), close_out - | _, Some fpath -> open_out Fpath.(to_string fpath), close_out - | `Stdin, None -> stdout, ignore - in - let encoder = - Encoder.encoder (`Channel oc) ~pack:hash (Array.of_list resolveds) - in - let[@warning "-8"] (`Ok : [ `Ok | `Partial ]) = - Encoder.encode encoder `Await - in - close oc; - Option.iter (fun _ -> Fmt.pr "%a\n%!" SHA1.pp hash) dst; - return (Ok ()) - -let index_pack ?(verbose = false) src dst = - let verbose = match dst with None -> false | Some _ -> verbose in - (* XXX(dinosaure): if the user wants to /stream/ the index file, we must - set verbose to [false] - otherwise, we produce a bad index file/output. *) - let save, ic, close_ic, fd, map = values_from_src src in - let+ hash, oracle, matrix, where, checks = first_pass ~verbose ?save ic in - let* () = close_ic ic in - let* _ = second_pass ~verbose ~map oracle matrix in - let* () = close_fd fd in - match split where checks matrix with - | [], resolveds -> make_index_pack hash src dst resolveds - | _ -> - let err = - R.error_msgf - "Carton is not able to canonicalize a PACK file without the Git \ - layer." - in - return err - -let run verbose src dst = - match (Fiber.run <.> prj) (index_pack ~verbose src dst) with - | Ok () -> Ok () - | Error (`Msg err) -> Error (Fmt.str "%s." err) - -open Cmdliner - -let src = - let parser str = - match Fpath.of_string str with - | Ok v when Sys.file_exists str -> Ok (`File v) - | Ok v -> Rresult.R.error_msgf "%a not found" Fpath.pp v - | Error _ as err -> err - in - let pp ppf = function - | `File v -> Fpath.pp ppf v - | `Stdin -> Fmt.string ppf "" - in - Arg.conv (parser, pp) - -let dst = Arg.conv (Fpath.of_string, Fpath.pp) - -let src = - let doc = "PACK file." in - Arg.(value & pos ~rev:true 0 src `Stdin & info [] ~doc) - -let dst = - let doc = - "Write the generated index into the specified file. Without this option \ - the name of pack index file is constructed from the name of packed \ - archive file by replacing .pack with .idx." - in - Arg.(value & opt (some dst) None & info [ "o" ] ~doc ~docv:"") - -let verbose = - let doc = "Be verbose about what is going on, including progress status." in - Arg.(value & flag & info [ "v" ] ~doc) - -let cmd = - let doc = "Build pack index for an packed archive." in - let man = - [ - `S Manpage.s_description; - `P - "Reads a packed archive (.pack) from a specified file (or standard \ - input), and builds a pack index file (.idx) for it."; - ] - in - let info = Cmd.info "index-pack" ~doc ~man in - Cmd.v info Term.(const run $ verbose $ src $ dst) - -let () = exit @@ Cmd.eval_result cmd diff --git a/bin/carton/verify_pack.ml b/bin/carton/verify_pack.ml deleted file mode 100644 index e7156de7d..000000000 --- a/bin/carton/verify_pack.ml +++ /dev/null @@ -1,312 +0,0 @@ -open Rresult - -let ( <.> ) f g x = f (g x) - -module Scheduler = Carton.Make (Fiber) - -module SHA1 = struct - include Digestif.SHA1 - - let feed ctx ?off ?len bs = feed_bigstring ctx ?off ?len bs - let null = digest_string "" - let length = digest_size - let compare a b = String.compare (to_raw_string a) (to_raw_string b) -end - -module Verify = Carton.Dec.Verify (SHA1) (Scheduler) (Fiber) -module First_pass = Carton.Dec.Fp (SHA1) -open Fiber - -let sched = - let open Scheduler in - { - Carton.bind = (fun x f -> inj (bind (prj x) (fun x -> prj (f x)))); - return = (fun x -> inj (return x)); - } - -let z = De.bigstring_create De.io_buffer_size -let allocate bits = De.make_window ~bits - -let replace hashtbl k v = - match Hashtbl.find_opt hashtbl k with - | Some v' -> if v' < v then Hashtbl.replace hashtbl k v - | None -> Hashtbl.add hashtbl k v - -let never _ = assert false - -let digest ~kind ?(off = 0) ?len buf = - let len = - match len with Some len -> len | None -> Bigstringaf.length buf - off - in - let ctx = SHA1.empty in - let ctx = - match kind with - | `A -> SHA1.feed_string ctx (Fmt.str "commit %d\000" len) - | `B -> SHA1.feed_string ctx (Fmt.str "tree %d\000" len) - | `C -> SHA1.feed_string ctx (Fmt.str "blob %d\000" len) - | `D -> SHA1.feed_string ctx (Fmt.str "tag %d\000" len) - in - let ctx = SHA1.feed_bigstring ctx ~off ~len buf in - SHA1.get ctx - -let ( >>= ) = sched.bind -let return = sched.return - -let read fd buf ~off ~len = - let len = input fd buf off len in - return len - -let first_pass fpath = - let ic = open_in (Fpath.to_string fpath) in - let zw = De.make_window ~bits:15 in - let allocate _ = zw in - - First_pass.check_header sched read ic >>= fun (max, _, _) -> - seek_in ic 0; - - let decoder = First_pass.decoder ~o:z ~allocate (`Channel ic) in - let children = Hashtbl.create 0x100 in - let where = Hashtbl.create 0x100 in - let weight = Hashtbl.create 0x100 in - let length = Hashtbl.create 0x100 in - let carbon = Hashtbl.create 0x100 in - let matrix = Array.make max Verify.unresolved_node in - - let rec go decoder = - match First_pass.decode decoder with - | `Await _ | `Peek _ -> assert false - | `Entry ({ First_pass.kind = Base _; offset; size; consumed; _ }, decoder) - -> - let n = First_pass.count decoder - 1 in - Hashtbl.add weight offset size; - Hashtbl.add length offset size; - Hashtbl.add carbon offset consumed; - Hashtbl.add where offset n; - matrix.(n) <- Verify.unresolved_base ~cursor:offset; - go decoder - | `Entry - ( { - First_pass.kind = Ofs { sub = s; source; target }; - offset; - size; - consumed; - _; - }, - decoder ) -> - let n = First_pass.count decoder - 1 in - replace weight Int64.(sub offset (Int64.of_int s)) source; - replace weight offset target; - Hashtbl.add length offset size; - Hashtbl.add carbon offset consumed; - Hashtbl.add where offset n; - (try - let vs = - Hashtbl.find children (`Ofs Int64.(sub offset (of_int s))) - in - Hashtbl.replace children - (`Ofs Int64.(sub offset (of_int s))) - (offset :: vs) - with _ -> - Hashtbl.add children (`Ofs Int64.(sub offset (of_int s))) [ offset ]); - go decoder - | `Entry - ( { - First_pass.kind = Ref { ptr; target; source }; - offset; - size; - consumed; - _; - }, - decoder ) -> - let n = First_pass.count decoder - 1 in - replace weight offset (Stdlib.max target source); - Hashtbl.add length offset size; - Hashtbl.add carbon offset consumed; - Hashtbl.add where offset n; - (try - let vs = Hashtbl.find children (`Ref ptr) in - Hashtbl.replace children (`Ref ptr) (offset :: vs) - with _ -> Hashtbl.add children (`Ref ptr) [ offset ]); - go decoder - | `End hash -> - close_in ic; - return (Ok hash) - | `Malformed err -> return (Error (`Msg err)) - in - go decoder >>= function - | Error _ as err -> return err - | Ok hash -> - let where ~cursor = Hashtbl.find where cursor in - let children ~cursor ~uid = - match - ( Hashtbl.find_opt children (`Ofs cursor), - Hashtbl.find_opt children (`Ref uid) ) - with - | Some a, Some b -> List.sort_uniq compare (a @ b) - | Some x, None | None, Some x -> x - | None, None -> [] - in - let weight ~cursor = Hashtbl.find weight cursor in - let oracle = { Carton.Dec.where; children; digest; weight } in - return (Ok (hash, oracle, matrix, length, carbon)) - -let map ~max fd ~pos len = - let len = Stdlib.min len (Int64.to_int (Int64.sub max pos)) in - let res = - Unix.map_file fd ~pos Bigarray.char Bigarray.c_layout false [| len |] - in - Bigarray.array1_of_genarray res - -let second_pass fpath (hash, oracle, matrix) = - let open Fiber in - let fd = Unix.openfile (Fpath.to_string fpath) Unix.[ O_RDONLY ] 0o644 in - let max = (Unix.LargeFile.fstat fd).Unix.LargeFile.st_size in - let map fd ~pos len = map ~max fd ~pos len in - let pack = - Carton.Dec.make fd ~allocate ~z ~uid_ln:SHA1.length - ~uid_rw:SHA1.of_raw_string never - in - Verify.verify ~threads:(Fiber.get_concurrency ()) pack ~map ~oracle - ~verbose:ignore ~matrix - >>= fun () -> - match Array.for_all Verify.is_resolved matrix with - | false -> return (R.error_msgf "Thin PACK file") - | true -> return (Ok (hash, matrix)) - -let pp_kind ppf = function - | `A -> Fmt.string ppf "commit" - | `B -> Fmt.string ppf "tree " - | `C -> Fmt.string ppf "blob " - | `D -> Fmt.string ppf "tag " - -let pp_delta ppf status = - match Verify.source_of_status status with - | Some uid -> Fmt.pf ppf " %d %a" (Verify.depth_of_status status) SHA1.pp uid - | None -> () - -let verify_hash ~memory hash = - let max = Bigstringaf.length memory in - let hash' = - SHA1.of_raw_string - (Bigstringaf.substring memory - ~off:(max - (2 * SHA1.length)) - ~len:SHA1.length) - in - SHA1.equal hash hash' - -let verify ~verbose fpath hash length carbon matrix = - let fd = Unix.openfile (Fpath.to_string fpath) Unix.[ O_RDONLY ] 0o644 in - let len = (Unix.fstat fd).Unix.st_size in - let memory = - Unix.map_file fd ~pos:0L Bigarray.char Bigarray.c_layout false [| len |] - in - let memory = Bigarray.array1_of_genarray memory in - Unix.close fd; - let idx = - Carton.Dec.Idx.make memory ~uid_ln:SHA1.length ~uid_rw:SHA1.to_raw_string - ~uid_wr:SHA1.of_raw_string - in - if not (verify_hash ~memory hash) then - return (R.error_msgf "Invalid PACK hash") - else - match verbose with - | false -> - if - Array.for_all - (Carton.Dec.Idx.exists idx <.> Verify.uid_of_status) - matrix - then return (Ok ()) - else return (R.error_msgf "Invalid PACK file") - | true -> ( - let chains = Hashtbl.create 0x10 in - - let f status = - let uid = Verify.uid_of_status status in - let kind = Verify.kind_of_status status in - let offset = Verify.offset_of_status status in - let (size : Carton.Dec.weight) = Hashtbl.find length offset in - let size_in_pack = Hashtbl.find carbon offset in - let depth = Verify.depth_of_status status in - (match Hashtbl.find chains depth with - | v -> Hashtbl.replace chains depth (succ v) - | exception _ -> Hashtbl.replace chains depth 1); - match Carton.Dec.Idx.find idx uid with - | Some (_crc, offset') when offset = offset' -> - Fmt.pr "%a %a %d %d %Ld%a\n%!" SHA1.pp uid pp_kind kind - (size :> int) - size_in_pack offset pp_delta status - | _ -> Fmt.failwith "Invalid PACK file" - in - - let pp_chain ppf (depth, n) = - match depth with - | 0 -> Fmt.pf ppf "non delta: %d objects\n%!" n - | _ -> Fmt.pf ppf "chain length = %d: %d objects\n%!" depth n - in - - try - Array.iter f matrix; - let chains = - List.sort_uniq - (fun (a, _) (b, _) -> compare a b) - ((List.of_seq <.> Hashtbl.to_seq) chains) - in - Fmt.pr "%a%!" (Fmt.list ~sep:Fmt.nop pp_chain) chains; - Fmt.pr "%a: ok\n%!" Fpath.pp Fpath.(set_ext "pack" (base fpath)); - return (Ok ()) - with _exn -> return (R.error_msgf "Invalid PACK file")) - -let ( >>? ) x f = - let open Fiber in - x >>= function Ok x -> f x | Error err -> return (Error err) - -let run ~verbose fpath = - let pack = Fpath.set_ext "pack" fpath in - Bos.OS.File.must_exist pack |> Fiber.return >>? fun pack -> - first_pass pack |> Scheduler.prj - >>? fun (hash, oracle, matrix, length, carbon) -> - second_pass pack (hash, oracle, matrix) >>? fun (hash, matrix) -> - verify ~verbose fpath hash length carbon matrix |> Scheduler.prj - -let run verbose fpath = - match Fiber.run (run ~verbose fpath) with - | Ok () -> Ok () - | Error (`Msg err) -> Error (Fmt.str "%s." err) - -open Cmdliner - -let verbose = - let doc = - "After verifying the pack, show list of objects contained in the pack and \ - histogram of delta chain length." - in - Arg.(value & flag & info [ "v"; "verbose" ] ~doc) - -let fpath = - let parser x = - match Fpath.of_string x with - | Ok v when Sys.file_exists x -> Ok v - | Ok v -> R.error_msgf "%a not found" Fpath.pp v - | Error _ as err -> err - in - Arg.conv (parser, Fpath.pp) - -let fpath = - let doc = "The idx files to verify." in - Arg.(required & pos ~rev:true 0 (some fpath) None & info [] ~doc) - -let cmd = - let doc = "Validate packed Git archive files" in - let man = - [ - `S Manpage.s_description; - `P - "Reads given idx file for packed Git archive created with the $(i,git) \ - $(i,pack-objets) command and verifies idx file and the corresponding \ - pack file."; - ] - in - let info = Cmd.info "verify-pack" ~doc ~man in - Cmd.v info Term.(const run $ verbose $ fpath) - -let () = exit @@ Cmd.eval_result cmd diff --git a/carton-git.opam b/carton-git.opam deleted file mode 100644 index f085b58db..000000000 --- a/carton-git.opam +++ /dev/null @@ -1,39 +0,0 @@ -opam-version: "2.0" -name: "carton-git" -synopsis: "Implementation of PACK file in OCaml" -description: """\ -Carton is an implementation of the PACK file -in OCaml. PACK file is used by Git to store Git objects. Carton is more -abstracted when it can store any objects.""" -maintainer: "Romain Calascibetta " -authors: "Romain Calascibetta " -license: "MIT" -homepage: "https://github.com/mirage/ocaml-git" -doc: "https://mirage.github.io/ocaml-git/" -bug-reports: "https://github.com/mirage/ocaml-git/issues" -depends: [ - "ocaml" {>= "4.08.0"} - "dune" {>= "2.8.0"} - "carton" {= version} - "carton-lwt" {= version} - "bigstringaf" {>= "0.9.0"} - "lwt" - "fpath" - "fmt" {>= "0.8.9"} - "base-unix" - "decompress" {>= "1.4.3"} - "astring" {>= "0.8.5"} - "alcotest" {>= "1.2.3" & with-test} - "alcotest-lwt" {>= "1.2.3" & with-test} - "cstruct" {>= "6.1.0" & with-test} - "logs" {>= "0.7.0"} - "mirage-flow" {>= "2.0.1" & with-test} - "rresult" {>= "0.6.0" & with-test} - "ke" {>= "0.6" & with-test} -] -conflicts: [ "result" {< "1.5"} ] -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] -dev-repo: "git+https://github.com/mirage/ocaml-git.git" diff --git a/carton-lwt.opam b/carton-lwt.opam deleted file mode 100644 index 515772b68..000000000 --- a/carton-lwt.opam +++ /dev/null @@ -1,41 +0,0 @@ -opam-version: "2.0" -name: "carton-lwt" -synopsis: "Implementation of PACK file in OCaml" -description: """\ -Carton is an implementation of the PACK file -in OCaml. PACK file is used by Git to store Git objects. Carton is more -abstracted when it can store any objects.""" -maintainer: "Romain Calascibetta " -authors: "Romain Calascibetta " -license: "MIT" -homepage: "https://github.com/mirage/ocaml-git" -doc: "https://mirage.github.io/ocaml-git/" -bug-reports: "https://github.com/mirage/ocaml-git/issues" -depends: [ - "ocaml" {>= "4.08.0"} - "dune" {>= "2.8.0"} - "carton" {= version} - "lwt" - "decompress" {>= "1.4.3"} - "optint" {>= "0.0.4"} - "bigstringaf" {>= "0.9.0"} - "alcotest" {>= "1.2.3" & with-test} - "alcotest-lwt" {>= "1.2.3" & with-test} - "cstruct" {>= "6.1.0" & with-test} - "fmt" {>= "0.8.9" & with-test} - "logs" {>= "0.7.0" & with-test} - "mirage-flow" {>= "2.0.1" & with-test} - "rresult" {>= "0.6.0" & with-test} - "ke" {>= "0.6" & with-test} - "base64" {>= "3.4.0" & with-test} - "bos" {>= "0.2.0" & with-test} - "checkseum" {>= "0.3.3" & with-test} - "digestif" {>= "1.1.2" & with-test} - "fpath" {>= "0.7.3" & with-test} -] -conflicts: [ "result" {< "1.5"} ] -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] -dev-repo: "git+https://github.com/mirage/ocaml-git.git" diff --git a/carton.opam b/carton.opam deleted file mode 100644 index e557b972c..000000000 --- a/carton.opam +++ /dev/null @@ -1,47 +0,0 @@ -opam-version: "2.0" -name: "carton" -synopsis: "Implementation of PACKv2 file in OCaml" -description: """\ -Carton is an implementation of the PACKv2 file -in OCaml. PACKv2 file is used by Git to store Git objects. -Carton is more abstracted when it can store any objects.""" -maintainer: "Romain Calascibetta " -authors: "Romain Calascibetta " -license: "MIT" -homepage: "https://github.com/mirage/ocaml-git" -doc: "https://mirage.github.io/ocaml-git/" -bug-reports: "https://github.com/mirage/ocaml-git/issues" -depends: [ - "ocaml" {>= "4.08.0"} - "dune" {>= "2.8.0"} - "ke" {>= "0.6"} - "duff" {>= "0.5"} - "decompress" {>= "1.4.3"} - "cstruct" {>= "6.1.0"} - "optint" {>= "0.0.4"} - "bigstringaf" {>= "0.9.0"} - "checkseum" {>= "0.3.3"} - "logs" - "cmdliner" {>= "1.1.0"} - "hxd" {>= "0.3.2"} - "psq" {>= "0.2.0"} - "fmt" {>= "0.8.9"} - "rresult" {>= "0.7.0"} - "fpath" - "base64" {with-test & >= "3.0.0"} - "bos" - "digestif" {>= "1.1.2"} - "base-unix" {with-test} - "base-threads" {with-test} - "alcotest" {with-test} - "crowbar" {with-test & >= "0.2.1"} - "alcotest-lwt" {>= "1.2.3" & with-test} - "lwt" {>= "5.3.0" & with-test} - "mirage-flow" {>= "2.0.1" & with-test} -] -conflicts: [ "result" {< "1.5"} ] -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] -dev-repo: "git+https://github.com/mirage/ocaml-git.git" diff --git a/fuzz/dune b/fuzz/dune index 2c7cbf54e..cb847821a 100644 --- a/fuzz/dune +++ b/fuzz/dune @@ -13,18 +13,6 @@ (modules smart) (libraries fmt crowbar digestif.c git.nss.smart)) -(rule - (alias runtest) - (package carton) - (action - (run ./pack_headers.exe))) - -(rule - (alias runtest) - (package carton) - (action - (run ./binary_search.exe))) - (rule (alias runtest) (package git) diff --git a/src/carton-git/carton_git.ml b/src/carton-git/carton_git.ml deleted file mode 100644 index a5ba76100..000000000 --- a/src/carton-git/carton_git.ml +++ /dev/null @@ -1,206 +0,0 @@ -module type STORE = sig - type 'a rd = < rd : unit ; .. > as 'a - type 'a wr = < wr : unit ; .. > as 'a - - type 'a mode = - | Rd : < rd : unit > mode - | Wr : < wr : unit > mode - | RdWr : < rd : unit ; wr : unit > mode - - type t - type uid - type 'a fd - type error - type +'a fiber - - val pp_error : error Fmt.t - - val create : - ?trunc:bool -> mode:'a mode -> t -> uid -> ('a fd, error) result fiber - - val map : t -> 'm rd fd -> pos:int64 -> int -> Bigstringaf.t - val close : t -> 'm fd -> (unit, error) result fiber - val list : t -> uid list fiber - val length : 'm fd -> int64 fiber -end - -let src = Logs.Src.create "carton-git" ~doc:"logs git's carton event" - -module Log = (val Logs.src_log src : Logs.LOG) - -module type IO = sig - type +'a t - - val catch : (unit -> 'a t) -> (exn -> 'a t) -> 'a t - val bind : 'a t -> ('a -> 'b t) -> 'b t - val return : 'a -> 'a t -end - -type ('fd, 'uid) pack = { - pack : ('fd * int64, 'uid) Carton.Dec.t; - index : 'uid Carton.Dec.Idx.idx; - z : Bigstringaf.t; - w : De.window; -} - -type ('path, 'fd, 'uid) t = { tbl : ('path, ('fd, 'uid) pack) Hashtbl.t } -[@@unbox] - -type 'fd buffers = { - z : Bigstringaf.t; - allocate : int -> De.window; - w : 'fd Carton.Dec.W.t; -} - -module Make - (Scheduler : Carton.SCHEDULER) - (IO : IO with type +'a t = 'a Scheduler.s) - (Store : STORE with type +'a fiber = 'a Scheduler.s) - (Uid : Carton.UID) = -struct - let ( >>= ) = IO.bind - let return = IO.return - let ( >>? ) x f = x >>= function Ok x -> f x | Error _ as err -> return err - let ( >>| ) x f = x >>= fun x -> return (f x) - - let idx (root : Store.t) acc path = - Store.create ~trunc:false ~mode:Store.Rd root path >>? fun fd -> - Store.length fd >>= fun length -> - let payload = Store.map root fd ~pos:0L (Int64.to_int length) in - Store.close root fd >>? fun () -> - let idx = - Carton.Dec.Idx.make payload ~uid_ln:Uid.length ~uid_rw:Uid.to_raw_string - ~uid_wr:Uid.of_raw_string - in - return (Ok (idx :: acc)) - - let pack (root : Store.t) acc (index, pack) = - Store.create ~trunc:false ~mode:Store.Rd root pack >>? fun fd -> - Store.length fd >>= fun length -> - let z = Bigstringaf.create De.io_buffer_size in - let w = De.make_window ~bits:15 in - let pack = - Carton.Dec.make (fd, length) ~z - ~allocate:(fun _ -> w) - ~uid_ln:Uid.length ~uid_rw:Uid.of_raw_string - (fun uid -> - match Carton.Dec.Idx.find index uid with - | Some (_, offset) -> offset - | None -> Fmt.invalid_arg "Object %a does not exist" Uid.pp uid) - in - return (Ok ({ pack; index; z; w } :: acc)) - - let fold_left_r ?(err = fun _ -> return ()) f a l = - let rec go a = function - | [] -> return a - | x :: r -> ( - f a x >>= function - | Ok a -> go a r - | Error x -> err x >>= fun () -> go a r) - in - go a l - - let ( <.> ) f g x = f (g x) - - (* XXX(dinosaure): about design, I think that a listing of PACK files should be done - outside the scope of this module (or more generally outside the scope of the Git's core). *) - let make : - Store.t -> - uid_of_major_uid:(Store.uid -> 'uid) -> - idx_major_uid_of_uid:(Store.t -> 'uid -> Store.uid) -> - (Store.uid, < rd : unit > Store.fd, Uid.t) t IO.t = - fun root ~uid_of_major_uid ~idx_major_uid_of_uid -> - Store.list root >>= fun pcks -> - let idxs = List.map (idx_major_uid_of_uid root <.> uid_of_major_uid) pcks in - fold_left_r (idx root) [] idxs >>| List.rev >>= fun idxs -> - fold_left_r (pack root) [] (List.combine idxs pcks) >>| List.rev - >>= fun vs -> - let tbl = Hashtbl.create 10 in - List.iter (fun (k, v) -> Hashtbl.add tbl k v) (List.combine pcks vs); - return { tbl } - - let map root (fd, top) ~pos len = - let max = Int64.sub top pos in - let len = min (Int64.of_int len) max in - let len = Int64.to_int len in - Store.map root fd ~pos len - - let add : - Store.t -> - (Store.uid, < rd : unit > Store.fd, Uid.t) t -> - idx:Store.uid -> - Store.uid -> - (< rd : unit > Store.fd * int64, Store.error) result IO.t = - fun root p ~idx:idx_uid pck -> - idx root [] idx_uid >>? fun idxs -> - let[@warning "-8"] [ idx ] = idxs in - pack root [] (idx, pck) >>? fun vs -> - List.iter (fun (k, v) -> Hashtbl.add p.tbl k v) (List.combine [ pck ] vs); - let[@warning "-8"] [ v ] = vs in - return (Ok (Carton.Dec.fd v.pack)) - - let with_resources root pack uid buffers = - IO.catch - (fun () -> - let map fd ~pos len = map root fd ~pos len in - let pack = Carton.Dec.with_z buffers.z pack in - let pack = Carton.Dec.with_allocate ~allocate:buffers.allocate pack in - let pack = Carton.Dec.with_w buffers.w pack in - let weight = - Carton.Dec.weight_of_uid ~map pack ~weight:Carton.Dec.null uid - in - let raw = Carton.Dec.make_raw ~weight in - let v = Carton.Dec.of_uid ~map pack raw uid in - return v) - (fun exn -> - Printexc.print_backtrace stderr; - raise exn) - - let get : - Store.t -> - resources:('fd -> ('fd buffers -> 'a IO.t) -> 'a IO.t) -> - (Store.uid, < rd : unit > Store.fd, Uid.t) t -> - Uid.t -> - (Carton.Dec.v, [> `Msg of string ]) result IO.t = - fun root ~resources p uid -> - let res = ref None in - Hashtbl.iter - (fun _ ({ index; _ } as x) -> - let v = Carton.Dec.Idx.exists index uid in - Log.debug (fun m -> m "%a exists into the *.idx file? %b" Uid.pp uid v); - if v then res := Some x) - p.tbl; - match !res with - | Some { pack; _ } -> - Log.debug (fun m -> m "Start to load the object from the PACK file."); - resources (Carton.Dec.fd pack) (with_resources root pack uid) - >>= fun v -> - Log.debug (fun m -> m "Object %a loaded." Uid.pp uid); - return (Ok v) - | None -> return (Error (`Not_found uid)) - - let list : Store.t -> (Store.uid, 'm Store.fd, Uid.t) t -> Uid.t list = - fun _ p -> - let fold _ { index; _ } a = - let res = ref [] in - Carton.Dec.Idx.iter - ~f:(fun ~uid ~offset:_ ~crc:_ -> res := uid :: !res) - index; - List.rev_append !res a - in - Hashtbl.fold fold p.tbl [] - - let exists : Store.t -> (Store.uid, 'm Store.fd, Uid.t) t -> Uid.t -> bool = - fun _ p uid -> - let res = ref false in - Hashtbl.iter - (fun _ { index; _ } -> - if Carton.Dec.Idx.exists index uid then res := true) - p.tbl; - !res - - let fds : (Store.uid, 'm Store.fd, Uid.t) t -> ('m Store.fd * int64) list = - fun { tbl } -> - let fold _ { pack; _ } a = Carton.Dec.fd pack :: a in - Hashtbl.fold fold tbl [] -end diff --git a/src/carton-git/carton_git.mli b/src/carton-git/carton_git.mli deleted file mode 100644 index 487e34d0e..000000000 --- a/src/carton-git/carton_git.mli +++ /dev/null @@ -1,74 +0,0 @@ -module type STORE = sig - type 'a rd = < rd : unit ; .. > as 'a - type 'a wr = < wr : unit ; .. > as 'a - - type 'a mode = - | Rd : < rd : unit > mode - | Wr : < wr : unit > mode - | RdWr : < rd : unit ; wr : unit > mode - - type t - type uid - type 'a fd - type error - type +'a fiber - - val pp_error : error Fmt.t - - val create : - ?trunc:bool -> mode:'a mode -> t -> uid -> ('a fd, error) result fiber - - val map : t -> 'm rd fd -> pos:int64 -> int -> Bigstringaf.t - val close : t -> 'm fd -> (unit, error) result fiber - val list : t -> uid list fiber - val length : 'm fd -> int64 fiber -end - -module type IO = sig - type +'a t - - val catch : (unit -> 'a t) -> (exn -> 'a t) -> 'a t - val bind : 'a t -> ('a -> 'b t) -> 'b t - val return : 'a -> 'a t -end - -type ('path, 'fd, 'uid) t - -type 'fd buffers = { - z : Bigstringaf.t; - allocate : int -> De.window; - w : 'fd Carton.Dec.W.t; -} - -module Make - (Scheduler : Carton.SCHEDULER) - (IO : IO with type +'a t = 'a Scheduler.s) - (Store : STORE with type +'a fiber = 'a Scheduler.s) - (Uid : Carton.UID) : sig - val make : - Store.t -> - uid_of_major_uid:(Store.uid -> 'uid) -> - idx_major_uid_of_uid:(Store.t -> 'uid -> Store.uid) -> - (Store.uid, < rd : unit > Store.fd, Uid.t) t IO.t - - val add : - Store.t -> - (Store.uid, < rd : unit > Store.fd, Uid.t) t -> - idx:Store.uid -> - Store.uid -> - (< rd : unit > Store.fd * int64, Store.error) result IO.t - - val get : - Store.t -> - resources: - (< rd : unit > Store.fd * int64 -> - ((< rd : unit > Store.fd * int64) buffers -> Carton.Dec.v IO.t) -> - Carton.Dec.v IO.t) -> - (Store.uid, < rd : unit > Store.fd, Uid.t) t -> - Uid.t -> - (Carton.Dec.v, [> `Msg of string | `Not_found of Uid.t ]) result IO.t - - val exists : Store.t -> (Store.uid, 'm Store.fd, Uid.t) t -> Uid.t -> bool - val list : Store.t -> (Store.uid, 'm Store.fd, Uid.t) t -> Uid.t list - val fds : (Store.uid, 'm Store.fd, Uid.t) t -> ('m Store.fd * int64) list -end diff --git a/src/carton-git/carton_git_unix.ml b/src/carton-git/carton_git_unix.ml deleted file mode 100644 index d4ee5f403..000000000 --- a/src/carton-git/carton_git_unix.ml +++ /dev/null @@ -1,94 +0,0 @@ -open Lwt.Infix - -module Store = struct - type 'a rd = < rd : unit ; .. > as 'a - type 'a wr = < wr : unit ; .. > as 'a - - type 'a mode = - | Rd : < rd : unit > mode - | Wr : < wr : unit > mode - | RdWr : < rd : unit ; wr : unit > mode - - type t = Fpath.t - type uid = Fpath.t - type 'a fd = Lwt_unix.file_descr - type error = [ `Not_found of uid ] - type +'a fiber = 'a Lwt.t - - let pp_error : error Fmt.t = - fun ppf -> function - | `Not_found uid -> Fmt.pf ppf "%a not found" Fpath.pp uid - - let create : - type a. - ?trunc:bool -> mode:a mode -> t -> uid -> (a fd, error) result fiber = - fun ?(trunc = true) ~mode root path -> - let flags, perm = - match mode with - | Rd -> Unix.[ O_RDONLY ], 0o400 - | Wr -> Unix.[ O_WRONLY; O_CREAT; O_APPEND ], 0o600 - | RdWr -> Unix.[ O_RDWR; O_CREAT; O_APPEND ], 0o600 - in - let flags = if trunc then Unix.O_TRUNC :: flags else flags in - let path = Fpath.(root // path) in - let process () = - Lwt_unix.openfile (Fpath.to_string path) flags perm >>= fun fd -> - Lwt.return_ok fd - in - let error = function - | Unix.Unix_error (Unix.EACCES, _, _) -> - Lwt.return_error (`Not_found path) - | exn -> Lwt.fail exn - in - Lwt.catch process error - - let map : t -> 'm rd fd -> pos:int64 -> int -> Bigstringaf.t = - fun _ fd ~pos len -> - let fd = Lwt_unix.unix_file_descr fd in - let payload = - Unix.map_file fd ~pos Bigarray.char Bigarray.c_layout false [| len |] - in - Bigarray.array1_of_genarray payload - - let close _ fd = Lwt_unix.close fd >>= fun () -> Lwt.return_ok () - - let length fd = - Lwt_unix.LargeFile.fstat fd >>= fun st -> - Lwt.return st.Unix.LargeFile.st_size - - let list root = - Lwt_unix.opendir (Fpath.to_string root) >>= fun dh -> - let rec go acc = - Lwt.catch - (fun () -> - Lwt_unix.readdir dh >>= function - | "." | ".." -> go acc - | entry -> ( - match Fpath.of_string entry with - | Ok x -> if Fpath.has_ext "pack" x then go (x :: acc) else go acc - | Error (`Msg _) -> (* ignore *) go acc)) - (function End_of_file -> Lwt.return acc | exn -> Lwt.fail exn) - in - go [] -end - -module Make (Uid : sig - include Carton.UID - - val of_hex : string -> t - val to_hex : t -> string -end) = -struct - include Carton_git.Make (Carton_lwt.Scheduler) (Lwt) (Store) (Uid) - - let idx_major_uid_of_uid root uid = - Fpath.(root / Fmt.str "pack-%s.idx" (Uid.to_hex uid)) - - let uid_of_major_uid path = - let str = Fpath.basename (Fpath.rem_ext path) in - match Astring.String.cut ~sep:"pack-" str with - | Some ("", uid) -> Uid.of_hex uid - | _ -> Fmt.failwith "Invalid path of major file: %a" Fpath.pp path - - let make store = make ~uid_of_major_uid ~idx_major_uid_of_uid store -end diff --git a/src/carton-git/carton_git_unix.mli b/src/carton-git/carton_git_unix.mli deleted file mode 100644 index cceaff47f..000000000 --- a/src/carton-git/carton_git_unix.mli +++ /dev/null @@ -1,41 +0,0 @@ -module Store : - Carton_git.STORE - with type t = Fpath.t - and type uid = Fpath.t - and type 'a fd = Lwt_unix.file_descr - and type error = [ `Not_found of Fpath.t ] - and type +'a fiber = 'a Lwt.t - -open Carton_git - -module Make (Uid : sig - include Carton.UID - - val of_hex : string -> t - val to_hex : t -> string -end) : sig - val make : Store.t -> (Fpath.t, Lwt_unix.file_descr, Uid.t) t Lwt.t - - val add : - Store.t -> - (Fpath.t, Lwt_unix.file_descr, Uid.t) t -> - idx:Fpath.t -> - Fpath.t -> - (Lwt_unix.file_descr * int64, Store.error) result Lwt.t - - val get : - Store.t -> - resources: - (Lwt_unix.file_descr * int64 -> - ((Lwt_unix.file_descr * int64) buffers -> Carton.Dec.v Lwt.t) -> - Carton.Dec.v Lwt.t) -> - (Fpath.t, Lwt_unix.file_descr, Uid.t) t -> - Uid.t -> - (Carton.Dec.v, [> `Msg of string | `Not_found of Uid.t ]) result Lwt.t - - val list : Store.t -> (Fpath.t, Lwt_unix.file_descr, Uid.t) t -> Uid.t list - - val fds : - (Fpath.t, Lwt_unix.file_descr, Uid.t) t -> - (Lwt_unix.file_descr * int64) list -end diff --git a/src/carton-git/dune b/src/carton-git/dune deleted file mode 100644 index ab7b28881..000000000 --- a/src/carton-git/dune +++ /dev/null @@ -1,21 +0,0 @@ -(library - (name carton_git) - (public_name carton-git) - (modules carton_git) - (libraries logs decompress.zl lwt decompress.de bigstringaf fmt carton)) - -(library - (name carton_git_unix) - (public_name carton-git.unix) - (modules carton_git_unix) - (libraries - astring - bigstringaf - fmt - lwt - fpath - carton - carton-lwt - carton-git - unix - lwt.unix)) diff --git a/src/carton-lwt/carton_lwt.ml b/src/carton-lwt/carton_lwt.ml deleted file mode 100644 index 6361a52be..000000000 --- a/src/carton-lwt/carton_lwt.ml +++ /dev/null @@ -1,194 +0,0 @@ -open Lwt_io - -type lwt = Lwt_io.lwt - -external inj : 'a Lwt.t -> ('a, lwt) Carton.io = "%identity" -external prj : ('a, lwt) Carton.io -> 'a Lwt.t = "%identity" - -let lwt_bind x f = - let open Lwt.Infix in - inj (prj x >>= fun x -> prj (f x)) -[@@inline] - -let lwt_return x = inj (Lwt.return x) [@@inline] -let lwt = { Carton.bind = lwt_bind; Carton.return = lwt_return } - -module Scheduler = Lwt_scheduler - -module Dec = struct - module W = struct - type 'fd t = 'fd Carton.Dec.W.t - - and slice = Carton.Dec.W.slice = { - offset : int64; - length : int; - payload : Bigstringaf.t; - } - - and 'fd map = 'fd Carton.Dec.W.map - - let make fd = Carton.Dec.W.make fd - end - - type weight = Carton.Dec.weight - type 'fd read = 'fd -> bytes -> off:int -> len:int -> int Lwt.t - - module Idx = Carton.Dec.Idx - - module Fp (Uid : Carton.UID) = struct - include Carton.Dec.Fp (Uid) - - let check_header read fd = - let read fd buf ~off ~len = inj (read fd buf ~off ~len) in - prj (check_header lwt read fd) - end - - type ('fd, 'uid) t = ('fd, 'uid) Carton.Dec.t - - let with_z buf t = Carton.Dec.with_z buf t - let with_w lru t = Carton.Dec.with_w lru t - let with_allocate ~allocate t = Carton.Dec.with_allocate ~allocate t - let fd t = Carton.Dec.fd t - - type raw = Carton.Dec.raw - - let make_raw ~weight = Carton.Dec.make_raw ~weight - - type v = Carton.Dec.v - - let v ~kind ?depth buf = Carton.Dec.v ~kind ?depth buf - let kind v = Carton.Dec.kind v - let raw v = Carton.Dec.raw v - let len v = Carton.Dec.len v - let depth v = Carton.Dec.depth v - - let make fd ~z ~allocate ~uid_ln ~uid_rw where = - Carton.Dec.make fd ~z ~allocate ~uid_ln ~uid_rw where - - (* XXX(dinosaure): [?visited] disappeared but it's only - * about internal use. *) - - let weight_of_offset ~map t ~weight cursor = - Carton.Dec.weight_of_offset ~map t ~weight cursor - - let weight_of_uid ~map t ~weight uid = - Carton.Dec.weight_of_uid ~map t ~weight uid - - let of_offset ~map t raw ~cursor = Carton.Dec.of_offset ~map t raw ~cursor - let of_uid ~map t raw uid = Carton.Dec.of_uid ~map t raw uid - - type path = Carton.Dec.path - - let path_to_list path = Carton.Dec.path_to_list path - let kind_of_path path = Carton.Dec.kind_of_path path - let path_of_offset ~map t ~cursor = Carton.Dec.path_of_offset ~map t ~cursor - let path_of_uid ~map t uid = Carton.Dec.path_of_uid ~map t uid - - let of_offset_with_path ~map t ~path raw ~cursor = - Carton.Dec.of_offset_with_path ~map t ~path raw ~cursor - - type 'uid digest = 'uid Carton.Dec.digest - - let uid_of_offset ~map ~digest t raw ~cursor = - Carton.Dec.uid_of_offset ~map ~digest t raw ~cursor - - let uid_of_offset_with_source ~map ~digest t ~kind raw ~depth ~cursor = - Carton.Dec.uid_of_offset_with_source ~map ~digest t ~kind raw ~depth ~cursor - - type 'uid oracle = 'uid Carton.Dec.oracle - - module Verify (Uid : Carton.UID) = struct - include Carton.Dec.Verify (Uid) (Lwt_scheduler) (Lwt_io) - - let verify ~threads ~map ~oracle ~verbose t ~matrix = - verify ~threads ~map ~oracle ~verbose t ~matrix - end - - module Ip (Uid : Carton.UID) = Carton.Dec.Ip (Lwt_scheduler) (Lwt_io) (Uid) -end - -module Enc = struct - type 'uid entry = 'uid Carton.Enc.entry - type 'uid delta = 'uid Carton.Enc.delta = From of 'uid | Zero - - let make_entry ~kind ~length ?preferred ?delta uid = - Carton.Enc.make_entry ~kind ~length ?preferred ?delta uid - - let length entry = Carton.Enc.length entry - - type 'uid q = 'uid Carton.Enc.q - type 'uid p = 'uid Carton.Enc.p - type 'uid patch = 'uid Carton.Enc.patch - type 'uid load = 'uid -> Dec.v Lwt.t - type 'uid find = 'uid -> int option Lwt.t - - type 'uid uid = 'uid Carton.Enc.uid = { - uid_ln : int; - uid_rw : 'uid -> string; - } - - let target_to_source target = Carton.Enc.target_to_source target - let target_uid target = Carton.Enc.target_uid target - - let entry_to_target ~load entry = - let load uid = inj (load uid) in - prj (Carton.Enc.entry_to_target lwt ~load entry) - - let apply ~load ~uid_ln ~source ~target = - let load uid = inj (load uid) in - prj (Carton.Enc.apply lwt ~load ~uid_ln ~source ~target) - - module type VERBOSE = Carton.Enc.VERBOSE with type 'a fiber = 'a Lwt.t - module type UID = Carton.Enc.UID - - module Delta (Uid : UID) (Verbose : VERBOSE) = struct - include Carton.Enc.Delta (Lwt_scheduler) (Lwt_io) (Uid) (Verbose) - - let delta ~threads ~weight ~uid_ln matrix = - let threads = List.map (fun load uid -> inj (load uid)) threads in - delta ~threads ~weight ~uid_ln matrix - end - - module N = struct - include Carton.Enc.N - - let encoder ~b ~load target = - let load uid = inj (load uid) in - prj (encoder lwt ~b ~load target) - end - - type b = Carton.Enc.b = { - i : Bigstringaf.t; - q : De.Queue.t; - w : De.Lz77.window; - o : Bigstringaf.t; - } - - let header_of_pack ~length buf off len = - Carton.Enc.header_of_pack ~length buf off len - - let encode_target ?level ~b ~find ~load ~uid target ~cursor = - let load uid = inj (load uid) in - let find uid = inj (find uid) in - prj (Carton.Enc.encode_target lwt ?level ~b ~find ~load ~uid target ~cursor) -end - -module Thin = struct - type 'uid light_load = 'uid -> (Carton.kind * int) Lwt.t - type 'uid heavy_load = 'uid -> Carton.Dec.v Lwt.t - type optint = Optint.t - - module Make (Uid : Carton.UID) = struct - include Thin.Make (Lwt_scheduler) (Lwt_io) (Uid) - - let with_pause f x = - let open Lwt.Infix in - f x >>= fun r -> - Lwt.pause () >|= fun () -> r - - let canonicalize ~light_load ~heavy_load ~src ~dst fs n requireds weight = - let light_load uid = inj (with_pause light_load uid) in - let heavy_load uid = inj (with_pause heavy_load uid) in - canonicalize ~light_load ~heavy_load ~src ~dst fs n requireds weight - end -end diff --git a/src/carton-lwt/carton_lwt.mli b/src/carton-lwt/carton_lwt.mli deleted file mode 100644 index 2c93cbaf8..000000000 --- a/src/carton-lwt/carton_lwt.mli +++ /dev/null @@ -1,294 +0,0 @@ -type lwt - -val lwt : lwt Carton.scheduler -external inj : 'a Lwt.t -> ('a, lwt) Carton.io = "%identity" -external prj : ('a, lwt) Carton.io -> 'a Lwt.t = "%identity" - -module Scheduler : Carton.SCHEDULER with type +'a s = 'a Lwt.t and type t = lwt - -module Dec : sig - module W : sig - type 'fd t - - and slice = Carton.Dec.W.slice = { - offset : int64; - length : int; - payload : Bigstringaf.t; - } - - and 'fd map = 'fd -> pos:int64 -> int -> Bigstringaf.t - - val make : 'fd -> 'fd t - end - - type weight = Carton.Dec.weight - type 'fd read = 'fd -> bytes -> off:int -> len:int -> int Lwt.t - - module Idx = Carton.Dec.Idx - - module Fp (Uid : Carton.UID) : sig - type optint = Optint.t - - type kind = - | Base of [ `A | `B | `C | `D ] - | Ofs of { sub : int; source : weight; target : weight } - | Ref of { ptr : Uid.t; source : weight; target : weight } - - type entry = { - offset : int64; - kind : kind; - size : weight; - consumed : int; - crc : optint; - } - - val check_header : 'fd read -> 'fd -> (int * string * int) Lwt.t - - type decoder - type src = [ `Channel of in_channel | `String of string | `Manual ] - - type decode = - [ `Await of decoder - | `Peek of decoder - | `Entry of entry * decoder - | `End of Uid.t - | `Malformed of string ] - - type header = Consumed of Bigstringaf.t | None - - val decoder : - o:Bigstringaf.t -> allocate:(int -> De.window) -> src -> decoder - - val decode : decoder -> decode - val number : decoder -> int - val version : decoder -> int - val count : decoder -> int - val src_rem : decoder -> int - val src : decoder -> Bigstringaf.t -> int -> int -> decoder - end - - type ('fd, 'uid) t = ('fd, 'uid) Carton.Dec.t - - val with_z : Bigstringaf.t -> ('fd, 'uid) t -> ('fd, 'uid) t - val with_w : 'fd W.t -> ('fd, 'uid) t -> ('fd, 'uid) t - - val with_allocate : - allocate:(int -> De.window) -> ('fd, 'uid) t -> ('fd, 'uid) t - - val fd : ('fd, 'uid) t -> 'fd - - type raw = Carton.Dec.raw - - val make_raw : weight:weight -> raw - - type v = Carton.Dec.v - - val v : kind:Carton.kind -> ?depth:int -> Bigstringaf.t -> v - val kind : v -> Carton.kind - val raw : v -> Bigstringaf.t - val len : v -> int - val depth : v -> int - - val make : - 'fd -> - z:Zl.bigstring -> - allocate:(int -> Zl.window) -> - uid_ln:int -> - uid_rw:(string -> 'uid) -> - ('uid -> int64) -> - ('fd, 'uid) t - - val weight_of_offset : - map:'fd W.map -> ('fd, 'uid) t -> weight:weight -> int64 -> weight - - val weight_of_uid : - map:'fd W.map -> ('fd, 'uid) t -> weight:weight -> 'uid -> weight - - val of_offset : map:'fd W.map -> ('fd, 'uid) t -> raw -> cursor:int64 -> v - val of_uid : map:'fd W.map -> ('fd, 'uid) t -> raw -> 'uid -> v - - type path = Carton.Dec.path - - val path_to_list : path -> int64 list - val kind_of_path : path -> [ `A | `B | `C | `D ] - val path_of_offset : map:'fd W.map -> ('fd, 'uid) t -> cursor:int64 -> path - val path_of_uid : map:'fd W.map -> ('fd, 'uid) t -> 'uid -> path - - val of_offset_with_path : - map:'fd W.map -> ('fd, 'uid) t -> path:path -> raw -> cursor:int64 -> v - - type 'uid digest = 'uid Carton.Dec.digest - - val uid_of_offset : - map:'fd W.map -> - digest:'uid digest -> - ('fd, 'uid) t -> - raw -> - cursor:int64 -> - Carton.kind * 'uid - - val uid_of_offset_with_source : - map:'fd W.map -> - digest:'uid digest -> - ('fd, 'uid) t -> - kind:Carton.kind -> - raw -> - depth:int -> - cursor:int64 -> - 'uid - - type 'uid oracle = 'uid Carton.Dec.oracle - - module Verify (Uid : Carton.UID) : sig - type status - - val pp : Format.formatter -> status -> unit - val is_resolved : status -> bool - val uid_of_status : status -> Uid.t - val kind_of_status : status -> Carton.kind - val depth_of_status : status -> int - val source_of_status : status -> Uid.t option - val offset_of_status : status -> int64 - val unresolved_base : cursor:int64 -> status - val unresolved_node : status - - val verify : - threads:int -> - map:'fd W.map -> - oracle:Uid.t oracle -> - verbose:(unit -> unit) -> - ('fd, Uid.t) t -> - matrix:status array -> - unit Lwt.t - end - - module Ip (Uid : Carton.UID) : sig - val iter : - threads:'a list -> - f:('a -> uid:Uid.t -> offset:int64 -> crc:Idx.optint -> unit Lwt.t) -> - Uid.t Idx.idx -> - unit Lwt.t - end -end - -module Enc : sig - type 'uid entry = 'uid Carton.Enc.entry - type 'uid delta = 'uid Carton.Enc.delta = From of 'uid | Zero - - val make_entry : - kind:Carton.kind -> - length:int -> - ?preferred:bool -> - ?delta:'uid delta -> - 'uid -> - 'uid entry - - val length : 'uid entry -> int - - type 'uid q = 'uid Carton.Enc.q - type 'uid p = 'uid Carton.Enc.p - type 'uid patch = 'uid Carton.Enc.patch - type 'uid load = 'uid -> Dec.v Lwt.t - type 'uid find = 'uid -> int option Lwt.t - - type 'uid uid = 'uid Carton.Enc.uid = { - uid_ln : int; - uid_rw : 'uid -> string; - } - - val target_to_source : 'uid q -> 'uid p - val target_uid : 'uid q -> 'uid - val entry_to_target : load:'uid load -> 'uid entry -> 'uid q Lwt.t - - val apply : - load:'uid load -> uid_ln:int -> source:'uid p -> target:'uid q -> unit Lwt.t - - module type VERBOSE = Carton.Enc.VERBOSE with type 'a fiber = 'a Lwt.t - module type UID = Carton.Enc.UID - - module Delta (Uid : UID) (Verbose : VERBOSE) : sig - val delta : - threads:Uid.t load list -> - weight:int -> - uid_ln:int -> - Uid.t entry array -> - Uid.t q array Lwt.t - end - - module N : sig - type encoder = Carton.Enc.N.encoder - - type b = Carton.Enc.N.b = { - i : Bigstringaf.t; - q : De.Queue.t; - w : De.Lz77.window; - } - - val encoder : b:b -> load:'uid load -> 'uid q -> encoder Lwt.t - val encode : o:Bigstringaf.t -> encoder -> [ `Flush of encoder * int | `End ] - val dst : encoder -> Bigstringaf.t -> int -> int -> encoder - end - - type b = Carton.Enc.b = { - i : Bigstringaf.t; - q : De.Queue.t; - w : De.Lz77.window; - o : Bigstringaf.t; - } - - val header_of_pack : length:int -> Bigstringaf.t -> int -> int -> unit - - val encode_target : - ?level:int -> - b:b -> - find:'uid find -> - load:'uid load -> - uid:'uid uid -> - 'uid q -> - cursor:int -> - (int * N.encoder) Lwt.t -end - -module Thin : sig - type 'uid light_load = 'uid -> (Carton.kind * int) Lwt.t - type 'uid heavy_load = 'uid -> Carton.Dec.v Lwt.t - type optint = Optint.t - - module Make (Uid : Carton.UID) : sig - type ('t, 'path, 'fd, 'error) fs = { - create : ?trunc:bool -> 't -> 'path -> ('fd, 'error) result Lwt.t; - append : 't -> 'fd -> string -> unit Lwt.t; - map : 't -> 'fd -> pos:int64 -> int -> Bigstringaf.t; - close : 't -> 'fd -> (unit, 'error) result Lwt.t; - } - - val verify : - ?threads:int -> - digest:Uid.t Carton.Dec.digest -> - 't -> - 'path -> - ('t, 'path, 'fd, ([> `Msg of string ] as 'error)) fs -> - (unit -> (string * int * int) option Lwt.t) -> - ( int - * Uid.t list - * (int64 * optint) list - * Uid.t Dec.Idx.entry list - * int64 - * Uid.t, - 'error ) - result - Lwt.t - - val canonicalize : - light_load:Uid.t light_load -> - heavy_load:Uid.t heavy_load -> - src:'path -> - dst:'path -> - 't -> - ('t, 'path, 'fd, ([> `Msg of string ] as 'error)) fs -> - int -> - Uid.t list -> - int64 -> - (int64 * int64 * Uid.t * Uid.t Dec.Idx.entry list, 'error) result Lwt.t - end -end diff --git a/src/carton-lwt/dune b/src/carton-lwt/dune deleted file mode 100644 index c0b74e898..000000000 --- a/src/carton-lwt/dune +++ /dev/null @@ -1,11 +0,0 @@ -(library - (name carton_lwt) - (public_name carton-lwt) - (libraries - decompress.zl - decompress.de - optint - bigstringaf - carton - carton.thin - lwt)) diff --git a/src/carton-lwt/lwt_io.ml b/src/carton-lwt/lwt_io.ml deleted file mode 100644 index 38f9c27bd..000000000 --- a/src/carton-lwt/lwt_io.ml +++ /dev/null @@ -1,45 +0,0 @@ -module Lwt_scheduler = Carton.Make (struct - type +'a t = 'a Lwt.t -end) - -type lwt = Lwt_scheduler.t - -let prj x = Lwt_scheduler.prj x [@@inline] -let inj x = Lwt_scheduler.inj x [@@inline] - -module Mutex = struct - type 'a fiber = 'a Lwt.t - type t = Lwt_mutex.t - - let create () = Lwt_mutex.create () - let lock t = Lwt_mutex.lock t - let unlock t = Lwt_mutex.unlock t -end - -module Condition = struct - type 'a fiber = 'a Lwt.t - type mutex = Mutex.t - type t = unit Lwt_condition.t - - let create () = Lwt_condition.create () - let wait t mutex = Lwt_condition.wait ~mutex t - let signal t = Lwt_condition.signal t () - let broadcast t = Lwt_condition.broadcast t () -end - -type 'a t = 'a Lwt.t - -let bind x f = Lwt.bind x f -let return x = Lwt.return x -let parallel_map ~f lst = Lwt_list.map_p f lst -let parallel_iter ~f lst = Lwt_list.iter_p f lst - -(* XXX(dinosaure): provide the opportunity to use - * [Lwt_preemptive.detach]? *) -let detach f = - let th, wk = Lwt.wait () in - Lwt.async (fun () -> - let open Lwt.Infix in - let res = f () in - Lwt.pause () >|= fun () -> Lwt.wakeup_later wk res); - th diff --git a/src/carton/README.md b/src/carton/README.md deleted file mode 100644 index ad62e1941..000000000 --- a/src/carton/README.md +++ /dev/null @@ -1,81 +0,0 @@ -# Carton, a library to manipulate PACK files - -A PACK file is an archive of several objects. The goal of it is to store many -objects in a light weight way. PACK files are used by Git and the distribution -provides some tools to introspect them: - -## carton.get - -On your Git repository: - -```sh -$ ls .git/objects/pack -pack-066646d20f899ba23e2839340a04c9e0aa87e2e7.idx -pack-066646d20f899ba23e2839340a04c9e0aa87e2e7.pack -$ carton.get .git/objects/pack/*.pack 12 -00000000: 7472 6565 2064 3138 6130 3161 3837 6262 tree d18a01a87bb -00000010: 3033 3431 6662 6230 6564 6234 6633 3532 0341fbb0edb4f352 -00000020: 3233 6564 6633 3365 6163 3761 630a 7061 23edf33eac7ac.pa -00000030: 7265 6e74 2038 3437 3739 3433 6564 6465 rent 8477943edde -00000040: 3861 6563 3233 3237 3035 3235 3864 3938 8aec232705258d98 -00000050: 3963 3938 3766 6164 6562 3132 390a 6175 9c987fadeb129.au -00000060: 7468 6f72 2064 696e 6f73 6175 7265 203c thor dinosaure < -00000070: 726f 6d61 696e 2e63 616c 6173 6369 6265 romain.calascibe -00000080: 7474 6140 676d 6169 6c2e 636f 6d3e 2031 tta@gmail.com> 1 -00000090: 3631 3431 3636 3333 3020 2b30 3130 300a 614166330 +0100. -000000a0: 636f 6d6d 6974 7465 7220 6469 6e6f 7361 committer dinosa -000000b0: 7572 6520 3c72 6f6d 6169 6e2e 6361 6c61 ure 1614876875 + -000000e0: 3031 3030 0a0a 4164 6420 736f 6d65 2062 0100..Add some b -000000f0: 696e 6172 6965 7320 746f 2073 686f 7720 inaries to show -00000100: 686f 7720 746f 2075 7365 2063 6172 746f how to use carto -00000110: 6e0a n. -``` - -You extracted a Git object at the offset `12` into the only PACK file of your -Git repository. You can have several informations such as: -- the depth of the object -- the path to re-construct it -- the length -- the kind - -Several informations are not _like Git_ but they are similar. Indeed, carton -does not know about Git objects and it does not have clues to understand them -such as a _commit_. - -In our case, the kind: -- `a` is a commit -- `b` is a tree -- `c` is a blob -- `d` is a tag - -The path and the depth is how many objects we need to reconstruct to finally -reconstruct the requested objects. Indeed, the PACK file compresses objects -together. By this way, similar objects are highly compressed. - -## carton.index-pack - -On your Git repository, you can look at the `*.idx` file. This file is an -_index_ of the PACK file and it allows a fast access on it. When the user -requests an object with its hash, we do a lookup on the index file. - -Carton is able to (re-)generate the index file: -```sh -$ carton.index-pack .git/objects/pack/*.pack -o pack.idx -066646d20f899ba23e2839340a04c9e0aa87e2e7 -$ diff pack.idx .git/objects/pack/*.idx -``` - -Depending on your PACK file (if it's huge or not), the process can take a time. -The program generates a index file `pack.idx` which is strictly the same as -the index of your Git repository. - -## carton.verify-pack - -Of course, from a given index file, we can check an entire PACK file: -```sh -$ carton.verify-pack .git/objects/pack/*.idx -``` - -This program produces the same output as `git verify-pack` diff --git a/src/carton/carton.ml b/src/carton/carton.ml deleted file mode 100644 index 22f6b06e2..000000000 --- a/src/carton/carton.ml +++ /dev/null @@ -1,3 +0,0 @@ -include Sigs -module Dec = Dec -module Enc = Enc diff --git a/src/carton/dec.ml b/src/carton/dec.ml deleted file mode 100644 index 2863e7054..000000000 --- a/src/carton/dec.ml +++ /dev/null @@ -1,1839 +0,0 @@ -open Sigs - -let input_bigstring ic buf off len = - let tmp = Bytes.create len in - let len = input ic tmp 0 len in - Bigstringaf.blit_from_bytes tmp ~src_off:0 buf ~dst_off:off ~len; - len - -module Idx = Idx - -type ('fd, 's) read = 'fd -> bytes -> off:int -> len:int -> (int, 's) io - -module Fp (Uid : UID) = struct - type src = [ `Channel of in_channel | `String of string | `Manual ] - type optint = Optint.t - - type nonrec kind = - | Base of kind - | Ofs of { sub : int; source : int; target : int } - | Ref of { ptr : Uid.t; source : int; target : int } - - type decoder = { - src : src; - i : Bigstringaf.t; - i_pos : int; - i_len : int; - n : int; - (* number of objects *) - c : int; - (* counter of objects *) - v : int; - (* version of PACK file *) - r : int64; - (* how many bytes consumed *) - s : s; - o : Bigstringaf.t; - t_tmp : Bigstringaf.t; - t_len : int; - t_need : int; - t_peek : int; - ctx : Uid.ctx; - z : Zl.Inf.decoder; - k : decoder -> decode; - } - - and s = Header | Entry | Inflate of entry | Hash - - and decode = - [ `Await of decoder - | `Peek of decoder - | `Entry of entry * decoder - | `End of Uid.t - | `Malformed of string ] - - and entry = { - offset : int64; - kind : kind; - size : int; - consumed : int; - crc : optint; - } - - let with_source source entry = - match entry.kind with - | Ofs { sub; target; _ } -> - { entry with kind = Ofs { sub; source; target } } - | Ref { ptr; target; _ } -> - { entry with kind = Ref { ptr; source; target } } - | _ -> entry - - let source entry = - match entry.kind with - | Ofs { source; _ } | Ref { source; _ } -> source - | _ -> assert false - - let target entry = - match entry.kind with - | Ofs { target; _ } | Ref { target; _ } -> target - | _ -> assert false - - let with_target target entry = - match entry.kind with - | Ofs { sub; source; _ } -> - { entry with kind = Ofs { sub; source; target } } - | Ref { ptr; source; _ } -> - { entry with kind = Ref { ptr; source; target } } - | _ -> entry - - let i_rem d = d.i_len - d.i_pos + 1 - let number { n; _ } = n - let version { v; _ } = v - let count { c; _ } = c - let is_inflate = function Inflate _ -> true | _ -> false - let src_rem = i_rem - let eoi d = { d with i = Bigstringaf.empty; i_pos = 0; i_len = min_int } - let malformedf fmt = Fmt.kstr (fun err -> `Malformed err) fmt - let ctx { ctx; _ } = ctx - - let src d s j l = - if j < 0 || l < 0 || j + l > Bigstringaf.length s then - Fmt.invalid_arg "Source out of bounds"; - if l == 0 then eoi d - else - let z = if is_inflate d.s then Zl.Inf.src d.z s j l else d.z in - { d with i = s; i_pos = j; i_len = j + l - 1; z } - - let refill k d = - match d.src with - | `String _ -> k (eoi d) - | `Channel ic -> - let res = input_bigstring ic d.i 0 (Bigstringaf.length d.i) in - k (src d d.i 0 res) - | `Manual -> `Await { d with k } - - let rec peek k d = - match d.src with - | `String _ -> malformedf "Unexpected end of input" - | `Channel ic -> - let rem = i_rem d in - - if rem < d.t_peek then ( - Bigstringaf.blit d.i ~src_off:d.i_pos d.i ~dst_off:0 ~len:rem; - (* compress *) - let res = input_bigstring ic d.i rem (Bigstringaf.length d.i - rem) in - peek k (src d d.i 0 (rem + res))) - else k d - | `Manual -> - let rem = i_rem d in - - if rem < d.t_peek then ( - Bigstringaf.blit d.i ~src_off:d.i_pos d.i ~dst_off:0 ~len:rem; - (* compress *) - `Peek { d with k = peek k; i_pos = 0; i_len = rem - 1 }) - else k d - - let t_need d n = { d with t_need = n } - let t_peek d n = { d with t_peek = n } - - let rec t_fill k d = - let blit d len = - Bigstringaf.blit d.i ~src_off:d.i_pos d.t_tmp ~dst_off:d.t_len ~len; - { - d with - i_pos = d.i_pos + len; - r = Int64.add d.r (Int64.of_int len); - t_len = d.t_len + len; - } - in - let rem = i_rem d in - if rem < 0 then malformedf "Unexpected end of input" - else - let need = d.t_need - d.t_len in - - (* XXX(dinosaure): in the [`Manual] case, [i_pos = 1] and [blit] will fail where - offset with an empty buffer raises an exception. We protect it by [rem = 0] and - directly ask to refill inputs. *) - if rem = 0 then refill (t_fill k) d - else if rem < need then - let d = blit d rem in - refill (t_fill k) d - else - let d = blit d need in - k { d with t_need = 0 } - - let variable_length buf off top = - let p = ref off in - let i = ref 0 in - let len = ref 0 in - - while - let cmd = Char.code (Bigstringaf.get buf !p) in - incr p; - len := !len lor ((cmd land 0x7f) lsl !i); - i := !i + 7; - cmd land 0x80 != 0 && !p <= top - do - () - done; - !p - off, !len - - external get_int32 : bytes -> int -> int32 = "%caml_bytes_get32" - external swap32 : int32 -> int32 = "%bswap_int32" - - let get_int32_be = - if Sys.big_endian then fun buf off -> get_int32 buf off - else fun buf off -> swap32 (get_int32 buf off) - - let check_header : - type fd s. s scheduler -> (fd, s) read -> fd -> (int * string * int, s) io - = - fun { bind; return } read fd -> - let ( >>= ) = bind in - let tmp = Bytes.create 12 in - read fd tmp ~off:0 ~len:12 >>= fun len -> - if len < 12 then Fmt.invalid_arg "Invalid PACK file"; - let h = get_int32_be tmp 0 in - let v = get_int32_be tmp 4 in - let n = get_int32_be tmp 8 in - if h <> 0x5041434bl then - Fmt.invalid_arg "Invalid PACK file (header: %lx <> %lx)" h 0x5041434bl; - if v <> 0x2l then Fmt.invalid_arg "Invalid version of PACK file"; - return (Int32.to_int n, Bytes.unsafe_to_string tmp, len) - - let rec decode d = - match d.s with - | Header -> - let refill_12 k d = - if i_rem d >= 12 then - k d.i d.i_pos { d with i_pos = d.i_pos + 12; r = Int64.add d.r 12L } - else t_fill (k d.t_tmp 0) (t_need d 12) - in - let k buf off d = - let _ = Bigstringaf.get_int32_be buf off in - let v = Bigstringaf.get_int32_be buf (off + 4) |> Int32.to_int in - let n = Bigstringaf.get_int32_be buf (off + 8) |> Int32.to_int in - if v <> 2 then Fmt.invalid_arg "Carton handles only PACKv2"; - if d.c == n then - decode - { - d with - v; - n; - s = Hash; - k = decode; - ctx = Uid.feed d.ctx buf ~off ~len:12; - } - else - decode - { - d with - v; - n; - s = Entry; - k = decode; - ctx = Uid.feed d.ctx buf ~off ~len:12; - } - in - refill_12 k d - | Entry -> - (* TODO(dinosaure): we need something more robust than [15] where when it's not - enough to have the ofs-header and the zlib-header, [decompress] returns - an error - because we fill at the beginning the input buffer with [0] (then, - we reach end-of-input). *) - let peek_15 k d = peek k (t_peek d 15) in - let peek_uid k d = peek k (t_peek d (Uid.length + (* zlib *) 2)) in - - let k_ref_header crc offset size d = - let anchor = d.i_pos in - let uid = Bigstringaf.substring d.i ~off:d.i_pos ~len:Uid.length in - let uid = Uid.of_raw_string uid in - let d = { d with i_pos = d.i_pos + Uid.length } in - - let z = Zl.Inf.reset d.z in - let z = Zl.Inf.src z d.i d.i_pos (i_rem d) in - let crc = - Checkseum.Crc32.digest_bigstring d.i anchor (d.i_pos - anchor) crc - in - let e = - { - offset; - kind = Ref { ptr = uid; source = -1; target = -1 }; - size; - consumed = 0; - crc; - } - in - - decode - { - d with - r = Int64.add d.r (Int64.of_int Uid.length); - c = succ d.c; - z; - s = Inflate e; - k = decode; - ctx = Uid.feed d.ctx d.i ~off:anchor ~len:(d.i_pos - anchor); - } - in - - let k_ofs_header crc offset size d = - let p = ref d.i_pos in - let c = ref (Char.code (Bigstringaf.get d.i !p)) in - incr p; - let base_offset = ref (!c land 127) in - - while !c land 128 != 0 do - incr base_offset; - c := Char.code (Bigstringaf.get d.i !p); - incr p; - base_offset := (!base_offset lsl 7) + (!c land 127) - done; - - let z = Zl.Inf.reset d.z in - let z = Zl.Inf.src z d.i !p (i_rem { d with i_pos = !p }) in - let crc = - Checkseum.Crc32.digest_bigstring d.i d.i_pos (!p - d.i_pos) crc - in - let e = - { - offset; - kind = Ofs { sub = !base_offset; source = -1; target = -1 }; - size; - consumed = 0; - crc; - } - in - - decode - { - d with - i_pos = !p; - r = Int64.add d.r (Int64.of_int (!p - d.i_pos)); - c = succ d.c; - z; - s = Inflate e; - k = decode; - ctx = Uid.feed d.ctx d.i ~off:d.i_pos ~len:(!p - d.i_pos); - } - in - - let k_header d = - let p = ref d.i_pos in - let c = ref (Char.code (Bigstringaf.get d.i !p)) in - incr p; - let kind = (!c asr 4) land 7 in - let size = ref (!c land 15) in - let shft = ref 4 in - - while !c land 0x80 != 0 do - c := Char.code (Bigstringaf.get d.i !p); - incr p; - size := !size + ((!c land 0x7f) lsl !shft); - shft := !shft + 7 - done; - - match kind with - | 0b000 | 0b101 -> malformedf "Invalid type" - | (0b001 | 0b010 | 0b011 | 0b100) as kind -> - let z = Zl.Inf.reset d.z in - let z = Zl.Inf.src z d.i !p (i_rem { d with i_pos = !p }) in - let k = - match kind with - | 0b001 -> `A - | 0b010 -> `B - | 0b011 -> `C - | 0b100 -> `D - | _ -> assert false - in - let crc = - Checkseum.Crc32.digest_bigstring d.i d.i_pos (!p - d.i_pos) - Checkseum.Crc32.default - in - let e = - { offset = d.r; kind = Base k; size = !size; consumed = 0; crc } - in - - decode - { - d with - i_pos = !p; - r = Int64.add d.r (Int64.of_int (!p - d.i_pos)); - c = succ d.c; - z; - s = Inflate e; - k = decode; - ctx = Uid.feed d.ctx d.i ~off:d.i_pos ~len:(!p - d.i_pos); - } - | 0b110 -> - let offset = d.r in - let crc = - Checkseum.Crc32.digest_bigstring d.i d.i_pos (!p - d.i_pos) - Checkseum.Crc32.default - in - - peek_15 - (k_ofs_header crc offset !size) - { - d with - i_pos = !p; - r = Int64.add d.r (Int64.of_int (!p - d.i_pos)); - ctx = Uid.feed d.ctx d.i ~off:d.i_pos ~len:(!p - d.i_pos); - } - | 0b111 -> - let offset = d.r in - let crc = - Checkseum.Crc32.digest_bigstring d.i d.i_pos (!p - d.i_pos) - Checkseum.Crc32.default - in - - peek_uid - (k_ref_header crc offset !size) - { - d with - i_pos = !p; - r = Int64.add d.r (Int64.of_int (!p - d.i_pos)); - ctx = Uid.feed d.ctx d.i ~off:d.i_pos ~len:(!p - d.i_pos); - } - | _ -> assert false - in - peek_15 k_header d - | Inflate ({ kind = Base _; crc; _ } as entry) -> - let rec go z = - match Zl.Inf.decode z with - | `Await z -> - let len = i_rem d - Zl.Inf.src_rem z in - let crc = Checkseum.Crc32.digest_bigstring d.i d.i_pos len crc in - refill decode - { - d with - z; - i_pos = d.i_pos + len; - r = Int64.add d.r (Int64.of_int len); - s = Inflate { entry with crc }; - ctx = Uid.feed d.ctx d.i ~off:d.i_pos ~len; - } - | `Flush z -> go (Zl.Inf.flush z) - | `Malformed err -> `Malformed (Fmt.str "inflate: %s" err) - | `End z -> - let len = i_rem d - Zl.Inf.src_rem z in - let crc = Checkseum.Crc32.digest_bigstring d.i d.i_pos len crc in - let z = Zl.Inf.reset z in - let decoder = - { - d with - i_pos = d.i_pos + len; - r = Int64.add d.r (Int64.of_int len); - z; - s = (if d.c == d.n then Hash else Entry); - k = decode; - ctx = Uid.feed d.ctx d.i ~off:d.i_pos ~len; - } - in - let entry = - { - entry with - consumed = Int64.to_int (Int64.sub decoder.r entry.offset); - crc; - } - in - `Entry (entry, decoder) - in - go d.z - | Inflate ({ kind = Ofs _ | Ref _; crc; _ } as entry) -> - let source = ref (source entry) in - let target = ref (target entry) in - let first = ref (!source = -1 && !target = -1) in - - let rec go z = - match Zl.Inf.decode z with - | `Await z -> - let len = i_rem d - Zl.Inf.src_rem z in - let crc = Checkseum.Crc32.digest_bigstring d.i d.i_pos len crc in - let entry = with_source !source entry in - let entry = with_target !target entry in - refill decode - { - d with - z; - i_pos = d.i_pos + len; - r = Int64.add d.r (Int64.of_int len); - s = Inflate { entry with crc }; - ctx = Uid.feed d.ctx d.i ~off:d.i_pos ~len; - } - | `Flush z -> - if !first then ( - let len = Bigstringaf.length d.o - Zl.Inf.dst_rem z in - let x, src_len = variable_length d.o 0 len in - let _, dst_len = variable_length d.o x len in - source := src_len; - target := dst_len; - first := false); - - go (Zl.Inf.flush z) - | `Malformed err -> `Malformed (Fmt.str "inflate: %s" err) - | `End z -> - if !first then ( - let len = Bigstringaf.length d.o - Zl.Inf.dst_rem z in - let x, src_len = variable_length d.o 0 len in - let _, dst_len = variable_length d.o x len in - source := src_len; - target := dst_len; - first := false); - - let len = i_rem d - Zl.Inf.src_rem z in - let crc = Checkseum.Crc32.digest_bigstring d.i d.i_pos len crc in - let z = Zl.Inf.reset z in - let decoder = - { - d with - i_pos = d.i_pos + len; - r = Int64.add d.r (Int64.of_int len); - z; - s = (if d.c == d.n then Hash else Entry); - k = decode; - ctx = Uid.feed d.ctx d.i ~off:d.i_pos ~len; - } - in - let entry = - { - entry with - crc; - consumed = Int64.to_int (Int64.sub decoder.r entry.offset); - } - in - let entry = with_source !source entry in - let entry = with_target !target entry in - `Entry (entry, decoder) - in - go d.z - | Hash -> - let refill_uid k d = - if i_rem d >= Uid.length then - k d.i d.i_pos - { - d with - i_pos = d.i_pos + Uid.length; - r = Int64.add d.r (Int64.of_int Uid.length); - } - else t_fill (k d.t_tmp 0) (t_need d Uid.length) - in - let k buf off d = - let expect = - Uid.of_raw_string (Bigstringaf.substring buf ~off ~len:Uid.length) - in - let have = Uid.get d.ctx in - - if Uid.equal expect have then `End have - else malformedf "Unexpected hash: %a <> %a" Uid.pp expect Uid.pp have - in - refill_uid k d - - type header = Consumed of Bigstringaf.t | None - - let decoder ~o ~allocate src = - let i, i_pos, i_len = - match src with - | `Manual -> Bigstringaf.empty, 1, 0 - | `String x -> - ( Bigstringaf.of_string x ~off:0 ~len:(String.length x), - 0, - String.length x - 1 ) - | `Channel _ -> Bigstringaf.create Zl.io_buffer_size, 1, 0 - in - { - src; - i; - i_pos; - i_len; - n = 0; - c = 0; - v = 0; - r = 0L; - o; - s = Header; - t_tmp = Bigstringaf.create Uid.length; - t_len = 0; - t_need = 0; - t_peek = 0; - ctx = Uid.empty; - z = Zl.Inf.decoder `Manual ~o ~allocate; - k = decode; - } - - let decode d = d.k d -end - -module W = struct - type 'fd t = { - mutable cur : int; - w : slice Weak.t; - m : int; - fd : 'fd; - sector : int64; - } - - and slice = { offset : int64; length : int; payload : Bigstringaf.t } - and 'fd map = 'fd -> pos:int64 -> int -> Bigstringaf.t - - let make ?(sector = 4096L) fd = - { cur = 0; w = Weak.create (0xffff + 1); m = 0xffff; fd; sector } - - let reset { w; _ } = Weak.fill w 0 (Weak.length w) None - let sector { sector; _ } = sector - - (* XXX(dinosaure): memoization. *) - - let heavy_load : type fd. map:fd map -> fd t -> int64 -> slice option = - fun ~map t w -> - let pos = Int64.(div w t.sector) in - let pos = Int64.(mul pos t.sector) in - - let payload = map t.fd ~pos (Int64.to_int t.sector) in - let slice = - Some { offset = pos; length = Bigstringaf.length payload; payload } - in - Weak.set t.w (t.cur land 0xffff) slice; - t.cur <- t.cur + 1; - slice - - let load : type fd. map:fd map -> fd t -> int64 -> slice option = - fun ~map t w -> - let exception Found in - let slice = ref None in - try - for i = 0 to Weak.length t.w - 1 do - match Weak.get t.w i with - | Some ({ offset; length; _ } as s) -> - if - w >= offset - && (w < Int64.(add offset (of_int length))) - && (length - Int64.(to_int (sub w offset))) >= 20 - (* XXX(dinosaure): when we want to load a new window, we need to see - if we have, at least, 20 bytes between the given offset and the - end of the window. Otherwise, we can return a window with 0 bytes - available according the given offset. *) - then ( - slice := Some s; - raise_notrace Found) - | None -> () - done; - heavy_load ~map t w - with Found -> !slice -end - -type raw = { raw0 : Bigstringaf.t; raw1 : Bigstringaf.t; flip : bool } -type v = { kind : kind; raw : raw; len : int; depth : int } - -type ('fd, 'uid) t = { - ws : 'fd W.t; - fd : 'uid -> int64; - uid_ln : int; - uid_rw : string -> 'uid; - tmp : Bigstringaf.t; - allocate : int -> Zl.window; -} - -let with_z tmp t = { t with tmp } -let with_w ws t = { t with ws } -let with_allocate ~allocate t = { t with allocate } -let fd { ws = { W.fd; _ }; _ } = fd - -let make : - type fd uid. - fd -> - ?sector:int64 -> - z:Bigstringaf.t -> - allocate:(int -> Zl.window) -> - uid_ln:int -> - uid_rw:(string -> uid) -> - (uid -> int64) -> - (fd, uid) t = - fun fd ?sector ~z ~allocate ~uid_ln ~uid_rw where -> - { ws = W.make ?sector fd; fd = where; uid_ln; uid_rw; tmp = z; allocate } - -type weight = int - -let weight_of_int_exn x = - if x < 0 then Fmt.invalid_arg "weight_of_int_exn" else x - -let null = 0 - -let weight_of_delta : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - weight:weight -> - cursor:int64 -> - W.slice -> - weight = - fun ~map t ~weight ~cursor slice -> - let decoder = Zh.M.decoder ~o:t.tmp ~allocate:t.allocate `Manual in - let rec go cursor decoder = - match Zh.M.decode decoder with - | `End _ -> - assert false - (* XXX(dinosaure): [`End] never appears before [`Header]. *) - | `Malformed err -> failwith err - | `Header (src_len, dst_len, _) -> max weight (max src_len dst_len) - | `Await decoder -> ( - match W.load ~map t.ws cursor with - | None -> - let decoder = Zh.M.src decoder De.bigstring_empty 0 0 in - (* XXX(dinosaure): End of stream, [Zh] should return [`Malformed] then. *) - (go [@tailcall]) cursor decoder - | Some slice -> - let off = Int64.(to_int (sub cursor slice.W.offset)) in - let len = slice.W.length - off in - let decoder = Zh.M.src decoder slice.W.payload off len in - (go [@tailcall]) - Int64.(add slice.W.offset (of_int slice.W.length)) - decoder) - in - let off = Int64.(to_int (sub cursor slice.W.offset)) in - let len = slice.W.length - off in - let decoder = Zh.M.src decoder slice.W.payload off len in - go Int64.(add slice.W.offset (of_int slice.W.length)) decoder - -let header_of_ref_delta ~map t cursor slice = - let slice = ref slice in - let i_pos = ref Int64.(to_int (sub cursor !slice.W.offset)) in - let i_rem = !slice.W.length - !i_pos in - - let consume = - if i_rem >= t.uid_ln then fun () -> incr i_pos - else - match - W.load ~map t.ws Int64.(add !slice.W.offset (of_int !slice.W.length)) - with - | None -> assert false - | Some next_slice -> - let consume () = - incr i_pos; - if !i_pos == !slice.W.length then ( - assert (!slice != next_slice); - (i_pos := - Int64.( - to_int - (sub - (add !slice.W.offset (of_int !slice.W.length)) - next_slice.W.offset))); - slice := next_slice) - in - consume - in - let uid = - if i_rem >= t.uid_ln then ( - let uid = - Bigstringaf.substring !slice.W.payload ~off:!i_pos ~len:t.uid_ln - in - let uid = t.uid_rw uid in - for _ = 0 to t.uid_ln - 1 do - consume () - done; - uid) - else - let uid = Bytes.create t.uid_ln in - for i = 0 to t.uid_ln - 1 do - Bytes.unsafe_set uid i (Bigstringaf.get !slice.W.payload !i_pos); - consume () - done; - t.uid_rw (Bytes.unsafe_to_string uid) - in - - uid, !i_pos, !slice - -let header_of_ofs_delta ~map t cursor slice = - let slice = ref slice in - let i_pos = ref Int64.(to_int (sub cursor !slice.W.offset)) in - let i_rem = !slice.W.length - !i_pos in - - let consume = - if i_rem >= 10 then fun () -> incr i_pos - else - match - W.load ~map t.ws Int64.(add !slice.W.offset (of_int !slice.W.length)) - with - | None -> assert false - | Some next_slice -> - let consume () = - incr i_pos; - if !i_pos == !slice.W.length then ( - assert (!slice != next_slice); - (i_pos := - Int64.( - to_int - (sub - (add !slice.W.offset (of_int !slice.W.length)) - next_slice.W.offset))); - slice := next_slice) - in - consume - in - let c = ref (Char.code (Bigstringaf.get !slice.W.payload !i_pos)) in - consume (); - let base_offset = ref (!c land 127) in - - while !c land 128 != 0 do - incr base_offset; - c := Char.code (Bigstringaf.get !slice.W.payload !i_pos); - consume (); - base_offset := (!base_offset lsl 7) + (!c land 127) - done; - - !base_offset, !i_pos, !slice - -let header_of_entry ~map t cursor slice0 = - let slice = ref slice0 in - let i_pos = ref Int64.(to_int (sub cursor !slice.W.offset)) in - let i_rem = !slice.W.length - !i_pos in - - let consume = - if i_rem >= 10 then fun () -> incr i_pos - else - match - W.load ~map t.ws Int64.(add !slice.W.offset (of_int !slice.W.length)) - with - | None -> assert false - | Some next_slice -> - let consume () = - incr i_pos; - if !i_pos == !slice.W.length then ( - assert (!slice != next_slice); - (i_pos := - Int64.( - to_int - (sub - (add !slice.W.offset (of_int !slice.W.length)) - next_slice.W.offset))); - slice := next_slice) - in - consume - in - try - let c = ref (Char.code (Bigstringaf.get !slice.W.payload !i_pos)) in - consume (); - let kind = (!c asr 4) land 7 in - let size = ref (!c land 15) in - let shft = ref 4 in - - while !c land 0x80 != 0 do - c := Char.code (Bigstringaf.get !slice.W.payload !i_pos); - consume (); - size := !size + ((!c land 0x7f) lsl !shft); - shft := !shft + 7 - done; - - kind, !size, !i_pos, !slice - with Invalid_argument _index_out_of_bounds -> - let i_pos = Int64.(to_int (sub cursor slice0.W.offset)) in - 0, 0, i_pos, slice0 - -(* TODO(dinosaure): use [ewah] instead a list to check [visited]. *) - -exception Cycle - -let rec weight_of_ref_delta : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - weight:weight -> - ?visited:int64 list -> - cursor:int64 -> - W.slice -> - weight = - fun ~map t ~weight ?(visited = []) ~cursor slice -> - let uid, pos, slice = header_of_ref_delta ~map t cursor slice in - let len = Bigstringaf.length slice.W.payload - pos in - let pos, slice = - match len with - | 0 -> ( - match - W.load ~map t.ws Int64.(add slice.W.offset (of_int slice.W.length)) - with - | Some slice -> 0, slice - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_ref_delta])" - Int64.(add slice.W.offset (of_int slice.W.length))) - | _ -> pos, slice - in - let weight = - weight_of_delta ~map t ~weight - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - (weight_of_uid [@tailcall]) ~map t ~weight ~visited uid - -and weight_of_ofs_delta : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - weight:weight -> - ?visited:int64 list -> - anchor:int64 -> - cursor:int64 -> - W.slice -> - weight = - fun ~map t ~weight ?(visited = []) ~anchor ~cursor slice -> - let base_offset, pos, slice = header_of_ofs_delta ~map t cursor slice in - let len = Bigstringaf.length slice.W.payload - pos in - let pos, slice = - match len with - | 0 -> ( - match - W.load ~map t.ws Int64.(add slice.W.offset (of_int slice.W.length)) - with - | Some slice -> 0, slice - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_ofs_delta])" - Int64.(add slice.W.offset (of_int slice.W.length))) - | _ -> pos, slice - in - let weight = - weight_of_delta ~map t ~weight - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - (weight_of_offset [@tailcall]) ~map t ~weight ~visited - Int64.(sub anchor (of_int base_offset)) - -and weight_of_uid : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - weight:weight -> - ?visited:int64 list -> - uid -> - weight = - fun ~map t ~weight ?(visited = []) uid -> - let cursor = t.fd uid in - (weight_of_offset [@tailcall]) ~map t ~weight ~visited cursor - -and weight_of_offset : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - weight:weight -> - ?visited:int64 list -> - int64 -> - weight = - fun ~map t ~weight ?(visited = []) cursor -> - if List.exists (Int64.equal cursor) visited then raise Cycle; - let visited = cursor :: visited in - - match W.load ~map t.ws cursor with - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_offset])" cursor - | Some slice -> ( - let kind, size, pos, slice = header_of_entry ~map t cursor slice in - match kind with - | 0b000 | 0b101 -> failwith "bad type" - | 0b001 | 0b010 | 0b011 | 0b100 -> max size weight - | 0b110 -> - (weight_of_ofs_delta [@tailcall]) ~map t ~weight:(max size weight) - ~visited ~anchor:cursor - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b111 -> - (weight_of_ref_delta [@tailcall]) ~map t ~weight:(max size weight) - ~visited - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | _ -> assert false) - -let length_of_offset : type fd uid. map:fd W.map -> (fd, uid) t -> int64 -> int - = - fun ~map t cursor -> - match W.load ~map t.ws cursor with - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_offset])" cursor - | Some slice -> - let _, size, _, _ = header_of_entry ~map t cursor slice in - size - -let v ~kind ?(depth = 1) raw = - let len = Bigstringaf.length raw in - { - kind; - raw = { raw0 = raw; raw1 = Bigstringaf.empty; flip = true }; - len; - depth; - } - -let kind { kind; _ } = kind - -let make_raw ~weight = - let raw = Bigstringaf.create (weight * 2) in - { - raw0 = Bigstringaf.sub raw ~off:0 ~len:weight; - raw1 = Bigstringaf.sub raw ~off:weight ~len:weight; - flip = false; - } - -let weight_of_raw { raw0; _ } = Bigstringaf.length raw0 -let get_payload { raw0; raw1; flip } = if flip then raw0 else raw1 -let get_source { raw0; raw1; flip } = if flip then raw1 else raw0 -let flip t = { t with flip = not t.flip } -let raw { raw; _ } = get_payload raw -let len { len; _ } = len -let depth { depth; _ } = depth - -let copy ?(flip = false) ?weight v = - let weight = - match weight with - | Some weight -> weight - | None -> Bigstringaf.length v.raw.raw0 - in - let raw = Bigstringaf.create (weight * 2) in - Bigstringaf.unsafe_blit v.raw.raw0 ~src_off:0 raw ~dst_off:0 - ~len:(Bigstringaf.length v.raw.raw0); - Bigstringaf.unsafe_blit v.raw.raw1 ~src_off:0 raw ~dst_off:weight - ~len:(Bigstringaf.length v.raw.raw1); - { - kind = v.kind; - raw = - { - raw0 = Bigstringaf.sub raw ~off:0 ~len:weight; - raw1 = Bigstringaf.sub raw ~off:weight ~len:weight; - flip = (if not flip then v.raw.flip else not v.raw.flip); - }; - len = v.len; - depth = v.depth; - } - -let uncompress : - type fd uid. - map:fd W.map -> (fd, uid) t -> kind -> raw -> cursor:int64 -> W.slice -> v = - fun ~map t kind raw ~cursor slice -> - let o = get_payload raw in - let decoder = Zl.Inf.decoder `Manual ~o ~allocate:t.allocate in - let anchor = cursor in - - let rec go l p cursor decoder = - match Zl.Inf.decode decoder with - | `Malformed err -> Fmt.failwith "object <%08Lx>: %s" anchor err - | `End decoder -> - let len = Bigstringaf.length o - Zl.Inf.dst_rem decoder in - assert (p || ((not p) && len = 0)); - (* XXX(dinosaure): we gave a [o] buffer which is enough to store - inflated data. At the end, [decoder] should not return more than one - [`Flush]. A special case is when we inflate nothing: [`Flush] never - appears and we reach [`End] directly, so [!p (still) = false and len (must) = 0]. *) - { kind; raw; len = l; depth = 1 } - | `Flush decoder -> - let l = Bigstringaf.length o - Zl.Inf.dst_rem decoder in - assert (not p); - let p = true in - let decoder = Zl.Inf.flush decoder in - (go [@tailcall]) l p cursor decoder - | `Await decoder -> ( - match W.load ~map t.ws cursor with - | Some slice -> - let off = Int64.(to_int (sub cursor slice.W.offset)) in - let len = slice.W.length - off in - let decoder = Zl.Inf.src decoder slice.W.payload off len in - (go [@tailcall]) l p - Int64.(add slice.W.offset (of_int slice.W.length)) - decoder - | None -> - let decoder = Zl.Inf.src decoder Bigstringaf.empty 0 0 in - (go [@tailcall]) l p cursor decoder) - in - let off = Int64.(to_int (sub cursor slice.W.offset)) in - let len = slice.W.length - off in - let decoder = Zl.Inf.src decoder slice.W.payload off len in - go 0 false Int64.(add slice.W.offset (of_int slice.W.length)) decoder - -let of_delta : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - kind -> - raw -> - depth:int -> - cursor:int64 -> - W.slice -> - v = - fun ~map t kind raw ~depth ~cursor slice -> - let decoder = Zh.M.decoder ~o:t.tmp ~allocate:t.allocate `Manual in - - let rec go cursor raw decoder = - match Zh.M.decode decoder with - | `End decoder -> - let len = Zh.M.dst_len decoder in - { kind; raw; len; depth } - | `Malformed err -> failwith err - | `Header (_src_len, dst_len, decoder) -> - let source = get_source raw in - let payload = get_payload raw in - - let decoder = Zh.M.source decoder source in - let decoder = Zh.M.dst decoder payload 0 dst_len in - (go [@tailcall]) cursor raw decoder - | `Await decoder -> ( - match W.load ~map t.ws cursor with - | None -> - let decoder = Zh.M.src decoder Bigstringaf.empty 0 0 in - (go [@tailcall]) cursor raw decoder - | Some slice -> - let off = Int64.(to_int (sub cursor slice.W.offset)) in - let len = slice.W.length - off in - let decoder = Zh.M.src decoder slice.W.payload off len in - (go [@tailcall]) - Int64.(add slice.W.offset (of_int slice.W.length)) - raw decoder) - in - let off = Int64.(to_int (sub cursor slice.W.offset)) in - let len = slice.W.length - off in - let decoder = Zh.M.src decoder slice.W.payload off len in - go Int64.(add slice.W.offset (of_int slice.W.length)) raw decoder - -let rec of_ofs_delta : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - raw -> - anchor:int64 -> - cursor:int64 -> - W.slice -> - v = - fun ~map t raw ~anchor ~cursor slice -> - let base_offset, pos, slice = header_of_ofs_delta ~map t cursor slice in - let len = Bigstringaf.length slice.W.payload - pos in - let pos, slice = - match len with - | 0 -> ( - match - W.load ~map t.ws Int64.(add slice.W.offset (of_int slice.W.length)) - with - | Some slice -> 0, slice - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [of_ofs_delta])" - Int64.(add slice.W.offset (of_int slice.W.length))) - | _ -> pos, slice - in - let v = - of_offset ~map t (flip raw) ~cursor:Int64.(sub anchor (of_int base_offset)) - in - of_delta ~map t v.kind raw ~depth:(succ v.depth) - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - -and of_ref_delta : - type fd uid. - map:fd W.map -> (fd, uid) t -> raw -> cursor:int64 -> W.slice -> v = - fun ~map t raw ~cursor slice -> - let uid, pos, slice = header_of_ref_delta ~map t cursor slice in - let len = Bigstringaf.length slice.W.payload - pos in - let pos, slice = - match len with - | 0 -> ( - match - W.load ~map t.ws Int64.(add slice.W.offset (of_int slice.W.length)) - with - | Some slice -> 0, slice - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [of_ref_delta])" - Int64.(add slice.W.offset (of_int slice.W.length))) - | _ -> pos, slice - in - let v = of_uid ~map t (flip raw) uid in - of_delta ~map t v.kind raw ~depth:(succ v.depth) - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - -and of_uid : type fd uid. map:fd W.map -> (fd, uid) t -> raw -> uid -> v = - fun ~map t raw uid -> - let cursor = t.fd uid in - of_offset ~map t raw ~cursor - -and of_offset : - type fd uid. map:fd W.map -> (fd, uid) t -> raw -> cursor:int64 -> v = - fun ~map t raw ~cursor -> - match W.load ~map t.ws cursor with - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_offset])" cursor - | Some slice -> ( - let kind, _, pos, slice = header_of_entry ~map t cursor slice in - match kind with - | 0b000 | 0b101 -> failwith "bad type" - | 0b001 -> - uncompress ~map t `A raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b010 -> - uncompress ~map t `B raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b011 -> - uncompress ~map t `C raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b100 -> - uncompress ~map t `D raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b110 -> - of_ofs_delta ~map t raw ~anchor:cursor - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b111 -> - of_ref_delta ~map t raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | _ -> assert false) - -type path = { path : int64 array; depth : int; kind : [ `A | `B | `C | `D ] } - -let path_to_list { path; depth; _ } = Array.sub path 0 depth |> Array.to_list - -let kind_of_int = function - | 0b001 -> `A - | 0b010 -> `B - | 0b011 -> `C - | 0b100 -> `D - | _ -> assert false - -let rec fill_path_from_ofs_delta : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - depth:int -> - int64 array -> - anchor:int64 -> - cursor:int64 -> - W.slice -> - int * [ `A | `B | `C | `D ] = - fun ~map t ~depth path ~anchor ~cursor slice -> - let base_offset, _, _ = header_of_ofs_delta ~map t cursor slice in - (fill_path_from_offset [@tailcall]) ~map t ~depth:(succ depth) path - ~cursor:Int64.(sub anchor (of_int base_offset)) - -and fill_path_from_ref_delta : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - depth:int -> - int64 array -> - cursor:int64 -> - W.slice -> - int * [ `A | `B | `C | `D ] = - fun ~map t ~depth path ~cursor slice -> - let uid, _, _ = header_of_ref_delta ~map t cursor slice in - (fill_path_from_uid [@tailcall]) ~map t ~depth path uid - -and fill_path_from_uid : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - depth:int -> - int64 array -> - uid -> - int * [ `A | `B | `C | `D ] = - fun ~map t ~depth path uid -> - let cursor = t.fd uid in - path.(depth - 1) <- cursor; - (fill_path_from_offset [@tailcall]) ~map t ~depth:(succ depth) path ~cursor - -and fill_path_from_offset : - type fd uid. - map:fd W.map -> - (fd, uid) t -> - depth:int -> - int64 array -> - cursor:int64 -> - int * [ `A | `B | `C | `D ] = - fun ~map t ~depth path ~cursor -> - match W.load ~map t.ws cursor with - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_offset])" cursor - | Some slice -> ( - path.(depth - 1) <- cursor; - let kind, _, pos, slice = header_of_entry ~map t cursor slice in - match kind with - | 0b000 | 0b101 -> failwith "bad type" - | (0b001 | 0b010 | 0b011 | 0b100) as v -> depth, kind_of_int v - | 0b110 -> - (fill_path_from_ofs_delta [@tailcall]) ~map t ~depth path - ~anchor:cursor - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b111 -> - (fill_path_from_ref_delta [@tailcall]) ~map t ~depth path - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | _ -> assert false) - -let path_of_offset : - type fd uid. map:fd W.map -> (fd, uid) t -> cursor:int64 -> path = - fun ~map t ~cursor -> - let path = Array.make _max_depth 0L in - let depth, kind = fill_path_from_offset ~map t ~depth:1 path ~cursor in - { depth; path; kind } - -let path_of_uid : type fd uid. map:fd W.map -> (fd, uid) t -> uid -> path = - fun ~map t uid -> - let cursor = t.fd uid in - path_of_offset ~map t ~cursor - -let of_offset_with_source : - type fd uid. - map:fd W.map -> (fd, uid) t -> kind -> raw -> depth:int -> cursor:int64 -> v - = - fun ~map t kind raw ~depth ~cursor -> - match W.load ~map t.ws cursor with - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_offset])" cursor - | Some slice -> ( - let hdr, _, pos, slice = header_of_entry ~map t cursor slice in - match hdr with - | 0b000 | 0b101 -> failwith "bad type" - | 0b001 -> - assert (kind = `A); - uncompress ~map t `A raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b010 -> - assert (kind = `B); - uncompress ~map t `B raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b011 -> - assert (kind = `C); - uncompress ~map t `C raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b100 -> - assert (kind = `D); - uncompress ~map t `D raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b110 -> - let cursor = Int64.add slice.W.offset (Int64.of_int pos) in - let _, pos, slice = header_of_ofs_delta ~map t cursor slice in - of_delta ~map t kind raw ~depth - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | 0b111 -> - let cursor = Int64.add slice.W.offset (Int64.of_int pos) in - let _, pos, slice = header_of_ref_delta ~map t cursor slice in - of_delta ~map t kind raw ~depth - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - | _ -> assert false) - -let base_of_offset : - type fd uid. map:fd W.map -> (fd, uid) t -> raw -> cursor:int64 -> v = - fun ~map t raw ~cursor -> - match W.load ~map t.ws cursor with - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_offset])" cursor - | Some slice -> - let hdr, _, pos, slice = header_of_entry ~map t cursor slice in - let kind = - match hdr with - | 0b001 -> `A - | 0b010 -> `B - | 0b011 -> `C - | 0b100 -> `D - | _ -> failwith "Invalid object" - in - uncompress ~map t kind raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - -let base_of_path { depth; path; _ } = path.(depth - 1) -let kind_of_path { kind; _ } = kind - -let of_offset_with_path : - type fd uid. - map:fd W.map -> (fd, uid) t -> path:path -> raw -> cursor:int64 -> v = - fun ~map t ~path raw ~cursor -> - assert (cursor = path.path.(0)); - let base = base_of_offset ~map t raw ~cursor:(base_of_path path) in - let rec go depth raw = - let v = - of_offset_with_source ~map t base.kind raw ~depth - ~cursor:path.path.(depth - 1) - in - if depth == 1 then v else (go [@tailcall]) (pred depth) (flip raw) - in - if path.depth > 1 then go (path.depth - 1) (flip raw) else base - -let of_offset_with_source : - type fd uid. map:fd W.map -> (fd, uid) t -> v -> cursor:int64 -> v = - fun ~map t { kind; raw; depth; _ } ~cursor -> - of_offset_with_source ~map t kind raw ~depth ~cursor - -type 'uid digest = kind:kind -> ?off:int -> ?len:int -> Bigstringaf.t -> 'uid - -let uid_of_offset : - type fd uid. - map:fd W.map -> - digest:uid digest -> - (fd, uid) t -> - raw -> - cursor:int64 -> - kind * uid = - fun ~map ~digest t raw ~cursor -> - match W.load ~map t.ws cursor with - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_offset])" cursor - | Some slice -> - let hdr, _, pos, slice = header_of_entry ~map t cursor slice in - let kind = - match hdr with - | 0b001 -> `A - | 0b010 -> `B - | 0b011 -> `C - | 0b100 -> `D - | _ -> failwith "Invalid object" - in - let v = - uncompress ~map t kind raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - kind, digest ~kind ~len:v.len (get_payload raw) - -let uid_of_offset_with_source : - type fd uid. - map:fd W.map -> - digest:uid digest -> - (fd, uid) t -> - kind:kind -> - raw -> - depth:int -> - cursor:int64 -> - uid = - fun ~map ~digest t ~kind raw ~depth ~cursor -> - match W.load ~map t.ws cursor with - | None -> - Fmt.failwith "Reach end of pack (ask: %Ld, [weight_of_offset])" cursor - | Some slice -> ( - let hdr, _, pos, slice = header_of_entry ~map t cursor slice in - match hdr with - | 0b000 | 0b101 -> failwith "bad type" - | 0b001 -> - assert (kind = `A); - assert (depth = 1); - let v = - uncompress ~map t `A raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - digest ~kind ~len:v.len (get_payload raw) - | 0b010 -> - assert (kind = `B); - assert (depth = 1); - let v = - uncompress ~map t `B raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - digest ~kind ~len:v.len (get_payload raw) - | 0b011 -> - assert (kind = `C); - assert (depth = 1); - let v = - uncompress ~map t `C raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - digest ~kind ~len:v.len (get_payload raw) - | 0b100 -> - assert (kind = `D); - assert (depth = 1); - let v = - uncompress ~map t `D raw - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - digest ~kind ~len:v.len (get_payload raw) - | 0b110 -> - let _, pos, slice = - header_of_ofs_delta ~map t - Int64.(add slice.W.offset (of_int pos)) - slice - in - let v = - of_delta ~map t kind raw ~depth - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - digest ~kind ~len:v.len (get_payload raw) - | 0b111 -> - let _, pos, slice = - header_of_ref_delta ~map t - Int64.(add slice.W.offset (of_int pos)) - slice - in - let ({ raw; _ } as v) = - of_delta ~map t kind raw ~depth - ~cursor:Int64.(add slice.W.offset (of_int pos)) - slice - in - digest ~kind ~len:v.len (get_payload raw) - | _ -> assert false) - -type 'uid node = Node of int64 * 'uid * 'uid node list | Leaf of int64 * 'uid -and 'uid tree = Base of kind * int64 * 'uid * 'uid node list - -type 'uid children = cursor:int64 -> uid:'uid -> int64 list -type where = cursor:int64 -> int - -type 'uid oracle = { - digest : 'uid digest; - children : 'uid children; - where : where; - weight : cursor:int64 -> int; -} - -(* TODO: hide it with [weight]. *) - -module Verify - (Uid : UID) - (Scheduler : SCHEDULER) - (IO : IO with type 'a t = 'a Scheduler.s) = -struct - let s = - let open Scheduler in - { - bind = (fun x f -> inj (IO.bind (prj x) (fun x -> prj (f x)))); - return = (fun x -> inj (IO.return x)); - } - - let ( >>= ) = IO.bind - - type status = - | Unresolved_base of int64 - | Unresolved_node - | Resolved_base of int64 * Uid.t * kind - | Resolved_node of int64 * Uid.t * kind * int * Uid.t - - let pp ppf = function - | Unresolved_base offset -> Fmt.pf ppf "(unresolved base %Ld)" offset - | Unresolved_node -> Fmt.pf ppf "unresolved node" - | Resolved_base (offset, uid, _) -> - Fmt.pf ppf "(resolved base <%a> %Ld)" Uid.pp uid offset - | Resolved_node (offset, uid, _, _, _) -> - Fmt.pf ppf "(resolved node <%a> %Ld)" Uid.pp uid offset - - let uid_of_status = function - | Resolved_node (_, uid, _, _, _) | Resolved_base (_, uid, _) -> uid - | Unresolved_node -> Fmt.invalid_arg "Current status is not resolved" - | Unresolved_base offset -> - Fmt.invalid_arg "Current status is not resolved (offset: %Ld)" offset - - let offset_of_status = function - | Resolved_node (offset, _, _, _, _) - | Resolved_base (offset, _, _) - | Unresolved_base offset -> - offset - | Unresolved_node -> Fmt.invalid_arg "Current status is not resolved" - - let kind_of_status = function - | Resolved_base (_, _, kind) | Resolved_node (_, _, kind, _, _) -> kind - | _ -> Fmt.invalid_arg "Current status is not resolved" - - let depth_of_status = function - | Resolved_base _ | Unresolved_base _ -> 0 - | Resolved_node (_, _, _, depth, _) -> depth - | Unresolved_node -> Fmt.invalid_arg "Current status is not resolved" - - let source_of_status = function - | Resolved_base _ | Unresolved_base _ -> None - | Resolved_node (_, _, _, _, source) -> Some source - | Unresolved_node -> Fmt.invalid_arg "Current status is not resolved" - - let rec nodes_of_offsets : - type fd. - map:fd W.map -> - oracle:Uid.t oracle -> - verbose:(unit -> unit) -> - (fd, Uid.t) t -> - kind:kind -> - raw -> - depth:int -> - cursors:int64 list -> - Uid.t node list = - fun ~map ~oracle ~verbose t ~kind raw ~depth ~cursors -> - match cursors with - | [] -> [] - | [ cursor ] -> ( - let uid = - uid_of_offset_with_source ~map ~digest:oracle.digest t ~kind raw - ~depth ~cursor - in - verbose (); - match oracle.children ~cursor ~uid with - | [] -> [ Leaf (cursor, uid) ] - | cursors -> - let nodes = - nodes_of_offsets ~map ~oracle ~verbose t ~kind (flip raw) - ~depth:(succ depth) ~cursors - in - [ Node (cursor, uid, nodes) ]) - | cursors -> - let source = get_source raw in - let source = - Bigstringaf.copy ~off:0 ~len:(Bigstringaf.length source) source - in - (* allocation *) - let res = Array.make (List.length cursors) (Leaf (-1L, Uid.null)) in - - List.iteri - (fun i cursor -> - let uid = - uid_of_offset_with_source ~map ~digest:oracle.digest t ~kind raw - ~depth ~cursor - in - verbose (); - match oracle.children ~cursor ~uid with - | [] -> res.(i) <- Leaf (cursor, uid) - | cursors -> - let nodes = - nodes_of_offsets ~map ~oracle ~verbose t ~kind (flip raw) - ~depth:(succ depth) ~cursors - in - Bigstringaf.blit source ~src_off:0 (get_source raw) ~dst_off:0 - ~len:(Bigstringaf.length source); - res.(i) <- Node (cursor, uid, nodes)) - cursors; - Array.to_list res - - let weight_of_tree : cursor:int64 -> ?uid:Uid.t -> Uid.t oracle -> int = - fun ~cursor ?uid oracle -> - let rec go cursor uid w0 = - let w1 = oracle.weight ~cursor in - let uid = Stdlib.Option.value uid ~default:Uid.null in - match oracle.children ~cursor ~uid with - | [] -> (max : int -> int -> int) w0 w1 - | cursors -> - let w1 = ref w1 in - List.iter (fun cursor -> w1 := go cursor None !w1) cursors; - (max : int -> int -> int) w0 !w1 - in - go cursor uid 0 - - (* XXX(dinosaure): we can do something which is tail-rec, TODO! *) - - let resolver : - type fd. - map:fd W.map -> - oracle:Uid.t oracle -> - verbose:(unit -> unit) -> - (fd, Uid.t) t -> - cursor:int64 -> - Uid.t tree = - fun ~map ~oracle ~verbose t ~cursor -> - let weight = weight_of_tree ~cursor oracle in - let raw = make_raw ~weight in - (* allocation *) - let kind, uid = uid_of_offset ~map ~digest:oracle.digest t raw ~cursor in - match oracle.children ~cursor ~uid with - | [] -> Base (kind, cursor, uid, []) - | cursors -> - let weight' = weight_of_tree ~cursor ~uid oracle in - let raw = - if weight' > weight then ( - let raw' = make_raw ~weight:weight' in - Bigstringaf.blit (get_payload raw) ~src_off:0 (get_payload raw') - ~dst_off:0 ~len:weight; - raw') - else raw - in - let nodes = - nodes_of_offsets ~map ~oracle ~verbose t ~kind (flip raw) ~depth:1 - ~cursors - in - Base (kind, cursor, uid, nodes) - - let update : - type fd. - map:fd W.map -> - oracle:Uid.t oracle -> - verbose:(unit -> unit) -> - (fd, Uid.t) t -> - cursor:int64 -> - matrix:status array -> - unit = - fun ~map ~oracle ~verbose t ~cursor ~matrix -> - let (Base (kind, cursor, uid, children)) = - resolver ~map ~oracle ~verbose t ~cursor - in - matrix.(oracle.where ~cursor) <- Resolved_base (cursor, uid, kind); - let rec go depth source = function - | Leaf (cursor, uid) -> - matrix.(oracle.where ~cursor) <- - Resolved_node (cursor, uid, kind, depth, source) - | Node (cursor, uid, children) -> - matrix.(oracle.where ~cursor) <- - Resolved_node (cursor, uid, kind, depth, source); - List.iter (go (succ depth) uid) children - in - List.iter (go 1 uid) children - - type m = { mutable v : int; m : IO.Mutex.t } - - let is_not_unresolved_base = function Unresolved_base _ -> false | _ -> true - - let is_resolved = function - | Unresolved_base _ | Unresolved_node -> false - | Resolved_base _ | Resolved_node _ -> true - - let is_base = function - | Unresolved_base _ | Resolved_base _ -> true - | _ -> false - - let unresolved_base ~cursor = Unresolved_base cursor - let unresolved_node = Unresolved_node - - let dispatcher : - type fd. - i:int -> - map:fd W.map -> - oracle:Uid.t oracle -> - verbose:(unit -> unit) -> - (fd, Uid.t) t -> - matrix:status array -> - mutex:m -> - unit IO.t = - fun ~i:_ ~map ~oracle ~verbose t ~matrix ~mutex -> - let rec go () = - IO.Mutex.lock mutex.m >>= fun () -> - while - mutex.v < Array.length matrix && is_not_unresolved_base matrix.(mutex.v) - do - mutex.v <- mutex.v + 1 - done; - if mutex.v >= Array.length matrix then ( - IO.Mutex.unlock mutex.m; - IO.return ()) - else - let root = mutex.v in - mutex.v <- mutex.v + 1; - IO.Mutex.unlock mutex.m; - match matrix.(root) with - | Unresolved_base cursor -> - IO.detach (fun () -> update ~map ~oracle ~verbose t ~cursor ~matrix) - >>= fun () -> (go [@tailcall]) () - | _ -> assert false - in - go () - - let verify : - type fd. - threads:int -> - map:fd W.map -> - oracle:Uid.t oracle -> - verbose:(unit -> unit) -> - (fd, Uid.t) t -> - matrix:status array -> - unit IO.t = - fun ~threads ~map ~oracle ~verbose t0 ~matrix -> - let mutex = { v = 0; m = IO.Mutex.create () } in - - IO.parallel_iter - ~f:(fun (i, t) -> dispatcher ~i ~map ~oracle ~verbose t ~matrix ~mutex) - (List.init threads (fun th -> - let z = - Bigstringaf.copy t0.tmp ~off:0 ~len:(Bigstringaf.length t0.tmp) - in - ( th, - { - t0 with - ws = W.make ~sector:t0.ws.sector t0.ws.W.fd; - tmp = z; - allocate = t0.allocate; - } ))) -end - -module Ip - (Scheduler : SCHEDULER) - (IO : IO with type 'a t = 'a Scheduler.s) - (Uid : UID) = -struct - type optint = Idx.optint - - let ( >>= ) = IO.bind - let return = IO.return - - module K = struct - type t = Uid.t - - let compare = Uid.compare - end - - module V = struct - type t = int64 * optint - - let compare (a, _) (b, _) = compare a b - end - - module Q = Psq.Make (K) (V) - - let consumer ~f ~q ~finish ~signal ~mutex = - let rec go () = - IO.Mutex.lock mutex >>= fun () -> - let rec wait () = - if Q.is_empty q.contents && not !finish then - IO.Condition.wait signal mutex >>= wait - else return () - in - wait () >>= fun () -> - match Q.pop q.contents with - | Some ((uid, (offset, crc)), q') -> - q := q'; - IO.Mutex.unlock mutex; - f ~uid ~offset ~crc >>= go - | None -> - assert !finish; - IO.Mutex.unlock mutex; - return () - in - go () - - let producer ~idx ~q ~finish ~signal ~mutex = - let p = ref 0 in - - let rec go () = - IO.Mutex.lock mutex >>= fun () -> - let v = !p in - - if v >= Idx.max idx then ( - finish := true; - IO.Condition.broadcast signal; - IO.Mutex.unlock mutex; - return ()) - else ( - incr p; - let uid = Idx.get_uid idx v - and offset = Idx.get_offset idx v - and crc = Idx.get_crc idx v in - - q := Q.add uid (offset, crc) !q; - - IO.Condition.signal signal; - IO.Mutex.unlock mutex; - go ()) - in - go () - - type 'a rdwr = Producer | Consumer of 'a - - (* XXX(dinosaure): priority queue is needed to avoid fragmentation of [mmap] - and explosion of virtual memory. *) - - let iter ~threads ~f idx = - let mutex = IO.Mutex.create () in - let signal = IO.Condition.create () in - let finish = ref false in - let q = ref Q.empty in - - IO.parallel_iter - ~f:(function - | Producer -> producer ~idx ~q ~finish ~signal ~mutex - | Consumer t -> consumer ~f:(f t) ~q ~finish ~signal ~mutex) - (Producer :: List.map (fun x -> Consumer x) threads) -end diff --git a/src/carton/dec.mli b/src/carton/dec.mli deleted file mode 100644 index 77b54379c..000000000 --- a/src/carton/dec.mli +++ /dev/null @@ -1,478 +0,0 @@ -(** Decoder of a PACK file. - - Along this module, the type [('a, 's) io] with a ['s scheduler] is needed - for some operations (which use a {i syscall}). To be able to use them, the - use must create a new type ['s] which represents the scheduler. To do that - with LWT for example: - - {[ - module Lwt_scheduler = Make (Lwt) - - let scheduler = - let open Lwt.Infix in - let open Lwt_scheduler in - { - bind = (fun x f -> inj (x >>= fun x -> prj (f x))); - return = (fun x -> inj x); - } - ]} - - The produced module has 2 functions [inj] and [prj] to pass from or to an - LWT value. The user can use these functions like: - - {[ - let fiber = - let ( >>= ) = scheduler.bind in - let return = scheduler.return in - - weight_of_offset scheduler ~map t ~weight:null 0L >>= fun weight -> - let raw = make_raw ~weight in - of_offset scheduler ~map t raw ~cursor:0L in - prj fiber ;; - - : (Carton.v, [> error ]) Lwt.t = - ]} *) - -open Sigs - -(** {1 {i Memoization} of [map].} - - Instead to systematically call [map] to load {i memory-page}, we {i memoize} - the call by a simple internal table of {i weak pointers}. *) -module W : sig - type 'fd t - and slice = { offset : int64; length : int; payload : Bigstringaf.t } - and 'fd map = 'fd -> pos:int64 -> int -> Bigstringaf.t - - val reset : 'fd t -> unit - val make : ?sector:int64 -> 'fd -> 'fd t - val sector : 'fd t -> int64 - val load : map:'fd map -> 'fd t -> int64 -> slice option -end - -type weight = private int -(** Type of [weight]. [weight] is {b not} {i length} of object but bytes needed - to extract it. *) - -val null : weight -(** {i zero} weight. *) - -val weight_of_int_exn : int -> weight -(** [weight_of_int_exn n] is the weight of [n]. *) - -type ('fd, 's) read = 'fd -> bytes -> off:int -> len:int -> (int, 's) io -(** Type of read {i syscall}. *) - -module Idx = Idx - -(** {1 First-pass of a PACK file.} - - From a {i stream}, it is possible to infer information needed then to apply - a second analyse to extract all objects of a the given PACK file. From Git, - this {i pass} appears when the client [clone]/[fetch] and the program counts - how many objects the PACK file has. - - {[ - $ git clone ... - remote: Enumerating objects: 105, done. - remote: Counting objects: 100% (105/105), done. - remote: Compressing objects: 100% (81/81), done. - remote: Total 305 (delta 41), reused 75 (delta 23), pack-reused 200 - Receiving objects: 100% (305/305), 104.46 KiB | 0 bytes/s, done. # first pass - ]} *) -module Fp (Uid : UID) : sig - type optint = Optint.t - - type kind = - | Base of [ `A | `B | `C | `D ] - | Ofs of { sub : int; source : weight; target : weight } - | Ref of { ptr : Uid.t; source : weight; target : weight } - (** Type of PACK objects. *) - - type entry = { - offset : int64; (** Absolute offset into the given PACK file. *) - kind : kind; (** Kind of the object. *) - size : weight; (** Length of the inflated object. *) - consumed : int; - (** Length of the deflated object (as it is into the PACK file). *) - crc : optint; - (** Check-sum of the entry (header plus the deflated object). *) - } - (** Type of a PACK entry. *) - - val check_header : - 's scheduler -> ('fd, 's) read -> 'fd -> (int * string * int, 's) io - - type decoder - (** The type for decoders. *) - - type src = [ `Channel of in_channel | `String of string | `Manual ] - (** The type for input sources. With a [`Manual] source the client must - provide input with {!src}. *) - - type decode = - [ `Await of decoder - | `Peek of decoder - | `Entry of entry * decoder - | `End of Uid.t - | `Malformed of string ] - - type header = Consumed of Bigstringaf.t | None - - val decoder : o:Bigstringaf.t -> allocate:(int -> De.window) -> src -> decoder - val decode : decoder -> decode - val number : decoder -> int - val version : decoder -> int - val count : decoder -> int - val ctx : decoder -> Uid.ctx - val src_rem : decoder -> int - val src : decoder -> Bigstringaf.t -> int -> int -> decoder -end - -type ('fd, 'uid) t -(** Type of state used to access to any objects into a [Carton] file. *) - -(**/*) - -val header_of_entry : - map:'fd W.map -> - ('fd, 'uid) t -> - int64 -> - W.slice -> - int * int * int * W.slice - -(**/*) - -val with_z : Bigstringaf.t -> ('fd, 'uid) t -> ('fd, 'uid) t -(** [with_z new t] replaces the used temporary buffer by [t] by [new]. Indeed, - when the user wants to extract an object, the internal temporary buffer is - used to store the inflated object. By this way, a parallel/concurrent - computation of 2 extractions with the same [t] is unsafe. - - So, this function allows the user to create a {i new} [t] with a new - dedicated temporary buffer (physically different from the old one) to be - able to start a parallel/concurrent process. *) - -val with_w : 'fd W.t -> ('fd, 'uid) t -> ('fd, 'uid) t -(** [with_w w t] replaces the used table {!W.t} by [w]. As {!with_z}, the - purpose of this function is to be able to {i parallelize} multiple {!t}. *) - -val with_allocate : - allocate:(int -> De.window) -> ('fd, 'uid) t -> ('fd, 'uid) t -(** [with_allocate allocate t] replaces the function to allocate the window - needed to inflate objects by [allocate]. As {!with_z}, the purpose of this - function is to be able to {i parallelize} multiple [t]. *) - -val fd : ('fd, 'uid) t -> 'fd -(** [fd t] returns the underlying used [fd] resource to map memory parts of it. - On [Unix], even if a mapped memory part can live if [fd] is the close, the - resource should be open as long as the user extracts objects. *) - -type raw -(** Type of a [Carton] object as is into a [Carton] file. *) - -val make_raw : weight:weight -> raw -(** [make_raw ~weight] allocates a raw. *) - -val weight_of_raw : raw -> weight - -type v -(** Type of values. *) - -val v : kind:kind -> ?depth:int -> Bigstringaf.t -> v -(** [v ~kind ?depth raw] is a value [raw] typed by [kind]. [?depth] is an - optional value to know at which depth the object exists into the PACK file - it came from (default to [1]). *) - -val kind : v -> kind -(** [kind v] is the type of the object [v]. *) - -val raw : v -> Bigstringaf.t -(** [raw v] is the contents of the object [v]. - - {b Note.} The {!Bigstringaf.t} can be larger (and contain extra contents) - than [len v] (see {!len}). The user should {!Bigstringaf.sub} it with the - real length of the object. *) - -val len : v -> int -(** [len v] is the length of the object [v]. *) - -val depth : v -> int -(** [depth v] is the depth of the object into the PACK file it came from. *) - -val copy : ?flip:bool -> ?weight:weight -> v -> v -(** [copy v] creates a fresh new object which is equal to the given [v]. *) - -val make : - 'fd -> - ?sector:int64 -> - z:Zl.bigstring -> - allocate:(int -> Zl.window) -> - uid_ln:int -> - uid_rw:(string -> 'uid) -> - ('uid -> int64) -> - ('fd, 'uid) t -(** [make fd ~z ~allocate ~uid_ln ~uid_rw where] returns a state associated to - [fd] which is the user-defined representation of a [Carton] file. Some - informations are needed: - - - [z] is an underlying buffer used to {i inflate} an object. - - [allocate] is an {i allocator} of underlying {i window} used to - {i inflate} an object. - - [uid_ln] is the length of {i raw} representation of user-defined {i uid}. - - [uid_rw] is the {i cast-function} from a string to user-defined {i uid}. - - [where] is the function to associate an {i uid} to an {i offset} into the - associated [Carton] file. - - Each argument depends on what the user wants. For example, if [t] is used by - {!Verify.verify}, [allocate] {b must} be thread-safe according to {!IO}. - [where] is not used by {!Verify.verify}. [uid_ln] and [uid_rw] depends on - the [Carton] file associated by [fd]. Each functions available below - describes precisely what they do on [t]. *) - -(** {3 Weight of object.} - - Before to extract an object, we must know resources needed to extract it. - [weight_of_offset]/[weight_of_uid] do an simple analyse and return the - larger length needed to store the requested object such as: - - {[ - weight_of_offset unix ~map t ~weight:null 0L >>= fun weight -> - assert ((null :> int) <= (weight :> int)) ; - Fmt.epr "Object at %08Lx needs %d byte(s).\n%!" 0L (weight :> int) ; - let resource = make_raw ~weight in - ... - ]} - - An object can need an other object (see [OBJ_OFS_DELTA] and - [OBJ_REF_DELTA]). In this case, the resource needed must be larger/enough to - store both objects. So the analyse is recursive over the {i delta-chain}. - - {b Note.} If the given PACK file represented by [t] is bad, [Cycle] is - raised. It means that an object A refers to an object B which refers to our - last object A. - - {b Note.} This process is not {i tail-rec} and discover at each step if it - needs to continue the {i delta-chain} or not. *) - -exception Cycle - -val weight_of_offset : - map:'fd W.map -> - ('fd, 'uid) t -> - weight:weight -> - ?visited:int64 list -> - int64 -> - weight -(** [weight_of_offset sched ~map t ~weight offset] returns the [weight] of the - given object available at [offset] into [t]. This function assumes: - - {[ - weight_of_offset sched ~map t ~weight:a offset >>= fun b -> - assert ((a :> int) <= (b :> int)) - ]} - - {b Note.} This function can try to partially inflate objects. So, this - function can use internal buffers and it is not {i thread-safe}. - - {b Note.} This function can try to {i look-up} an other object if it - extracts an [OBJ_REF_DELTA] object. However, if we suppose that we process a - PACKv2, an [OBJ_REF_DELTA] {i usually} points to an external object (see - {i thin}-pack). *) - -val weight_of_uid : - map:'fd W.map -> - ('fd, 'uid) t -> - weight:weight -> - ?visited:int64 list -> - 'uid -> - weight -(** [weight_of_offset sched ~map t ~weight uid] returns the [weight] of the - given object identified by [uid] into [t]. This function assumes the same - assumption as {!weight_of_offset}. - - {b Note.} As {!weight_of_offset}, this function can inflate objects and use - internal buffers and it is not {i thread-safe}. - - {b Note.} Despite {!weight_of_offset}, this function {b look-up} the object - from the given reference. *) - -val length_of_offset : map:'fd W.map -> ('fd, 'uid) t -> int64 -> int - -(** {3 Value of object.} *) - -val of_offset : map:'fd W.map -> ('fd, 'uid) t -> raw -> cursor:int64 -> v -(** [of_offset sched ~map raw ~cursor] is the object at the offset [cursor] into - [t]. The function is not {i tail-recursive}. It discovers at each step if - the object depends on another one (see [OBJ_REF_DELTA] or [OBJ_OFS_DELTA]). - - {b Note.} This function does not allocate larges resources (or, at least, - only the given [allocate] function to {!t} is able to allocate a large - resource). [raw] (which should be created with the associated {!weight} - given by {!weight_of_offset}) is enough to extract the object. *) - -val of_uid : map:'fd W.map -> ('fd, 'uid) t -> raw -> 'uid -> v -(** As {!of_offset}, [of_uid sched ~map raw uid] is the object identified by - [uid] into [t]. *) - -(** {3 Path of object.} - - Due to the fact that {!of_offset}/{!of_uid} are not {i tail-rec}, an other - solution exists to extract an object from the PACK file. However, this - solution requires a {i meta-data} {!path} to be able to extract an object. - - A {!path} is the {i delta-chain} of the object. It assumes that a - {i delta-chain} can not be larger than [60] (see Git assumptions). From it, - the way to construct an object is well-know and the step to discover if an - object depends on an other one is deleted - and we ensure that the - reconstruction is bound over our {!path}. - - This solution fits well when we want to {i memoize} the extraction. *) - -type path -(** The type of paths. *) - -val path_to_list : path -> int64 list -(** [path_to_list path] returns the {i delta-chain} of the given [path]. *) - -val kind_of_path : path -> [ `A | `B | `C | `D ] -(** [kind_of_path path] returns the kind of the object associated to the given - [path]. An assumption exists about PACK format, a {i delta-chain} refers to - several objects which must have the same type/kind. *) - -val path_of_offset : map:'fd W.map -> ('fd, 'uid) t -> cursor:int64 -> path -(** [path_of_offset sched ~map t ~cursor] is that {!path} of the given object - available at [cursor]. - - {b Note.} This function can try to partially inflate objects. So, this - function can use internal buffers and it is not {i thread-safe}. - - {b Note.} This function can try to {i look-up} an other object if it - extracts an [OBJ_REF_DELTA] object. However, if we suppose that we process a - PACKv2, an [OBJ_REF_DELTA] {i usually} points to an external object (see - {i thin}-pack). *) - -val path_of_uid : map:'fd W.map -> ('fd, 'uid) t -> 'uid -> path -(** [path_of_uid sched ~map t uid] is the {!path} of the given object identified - by [uid] into [t]. - - {b Note.} As {!weight_of_offset}, this function can inflate objects and use - internal buffers and it is not {i thread-safe}. - - {b Note.} Despite {!weight_of_offset}, this function {b look-up} the object - from the given reference. *) - -val of_offset_with_path : - map:'fd W.map -> ('fd, 'uid) t -> path:path -> raw -> cursor:int64 -> v -(** [of_offset_with_path sched ~map t ~path raw ~cursor] is the object available - at [cursor] into [t]. This function is {i tail-recursive} and bound to the - given [path]. *) - -val of_offset_with_source : - map:'fd W.map -> ('fd, 'uid) t -> v -> cursor:int64 -> v -(** [of_offset_with_source ~map t ~path source ~cursor] is the object available - at [cursor] into [t]. This function is {i tail-recursive} and use the given - [source] if the requested object is a patch. *) - -(** {3 Uid of object.} - - Unique identifier of objects is a user-defined type which is not described - by the format of the PACK file. By this fact, the way to {i digest} an - object is at the user's discretion. For example, Git {i prepends} the value - by an header such as: - - {[ - let digest v = - let kind = match kind v with - | `A -> "commit" - | `B -> "tree" - | `C -> "blob" - | `D -> "tag" in - let hdr = Fmt.str "%s %d\000" kind (len v) int - let ctx = Digest.empty in - feed_string ctx hdr ; - feed_bigstring ctx (Bigstringaf.sub (raw v) 0 (len v)) ; - finalize ctx - ]} - - Of course, the user can decide how to digest a value (see {!digest}). - However, 2 objects with the same contents but different types should have - different unique identifier. *) - -type 'uid digest = kind:kind -> ?off:int -> ?len:int -> Bigstringaf.t -> 'uid - -val uid_of_offset : - map:'fd W.map -> - digest:'uid digest -> - ('fd, 'uid) t -> - raw -> - cursor:int64 -> - kind * 'uid - -val uid_of_offset_with_source : - map:'fd W.map -> - digest:'uid digest -> - ('fd, 'uid) t -> - kind:kind -> - raw -> - depth:int -> - cursor:int64 -> - 'uid - -type 'uid children = cursor:int64 -> uid:'uid -> int64 list -type where = cursor:int64 -> int - -type 'uid oracle = { - digest : 'uid digest; - children : 'uid children; - where : where; - weight : cursor:int64 -> weight; -} - -(** {3 Verify.} - - When the user get a PACK file, he must generate an IDX file (see {!Idx}) - from it - to be able to look-up objects from their [uid]. [Verify] is a - process which try to create an OCaml representation of the IDX file. This - process requires some information (see {!oracle}) which can be collected by - a first analyse (see {!Fp}). Then, the process wants to take the opportunity - to {i parallelize} extraction (depending on the {!IO} implementation). *) - -module Verify - (Uid : UID) - (Scheduler : SCHEDULER) - (IO : IO with type 'a t = 'a Scheduler.s) : sig - val s : Scheduler.t scheduler - - type status - - val pp : Format.formatter -> status -> unit - val is_resolved : status -> bool - val is_base : status -> bool - val uid_of_status : status -> Uid.t - val kind_of_status : status -> kind - val depth_of_status : status -> int - val source_of_status : status -> Uid.t option - val offset_of_status : status -> int64 - val unresolved_base : cursor:int64 -> status - val unresolved_node : status - - val verify : - threads:int -> - map:'fd W.map -> - oracle:Uid.t oracle -> - verbose:(unit -> unit) -> - ('fd, Uid.t) t -> - matrix:status array -> - unit IO.t -end - -module Ip - (Scheduler : SCHEDULER) - (IO : IO with type 'a t = 'a Scheduler.s) - (Uid : UID) : sig - val iter : - threads:'a list -> - f:('a -> uid:Uid.t -> offset:int64 -> crc:Idx.optint -> unit IO.t) -> - Uid.t Idx.idx -> - unit IO.t -end diff --git a/src/carton/dune b/src/carton/dune deleted file mode 100644 index 0a37ca462..000000000 --- a/src/carton/dune +++ /dev/null @@ -1,29 +0,0 @@ -(library - (name carton) - (modules carton dec enc h idx sigs zh) - (public_name carton) - (libraries - ke - duff - optint - checkseum - decompress.de - decompress.zl - bigstringaf - psq - fmt)) - -(library - (name thin) - (modules thin) - (public_name carton.thin) - (libraries - optint - checkseum - decompress.de - decompress.zl - bigstringaf - logs - carton - cstruct - ke)) diff --git a/src/carton/enc.ml b/src/carton/enc.ml deleted file mode 100644 index a6612611b..000000000 --- a/src/carton/enc.ml +++ /dev/null @@ -1,611 +0,0 @@ -open Sigs - -type 'uid entry = { - uid : 'uid; - kind : kind; - length : int; - preferred : bool; - delta : 'uid delta; -} - -and 'uid delta = From of 'uid | Zero - -let make_entry ~kind ~length ?(preferred = false) ?(delta = Zero) uid = - { uid; kind; length; preferred; delta } - -let length { length; _ } = length - -module Utils = struct - let length_of_variable_length n = - let rec go r = function 0 -> r | n -> go (succ r) (n lsr 7) in - go 1 (n lsr 7) - - let cmd off len = - let cmd = ref 0 in - - if off land 0x000000ff <> 0 then cmd := !cmd lor 0x01; - if off land 0x0000ff00 <> 0 then cmd := !cmd lor 0x02; - if off land 0x00ff0000 <> 0 then cmd := !cmd lor 0x04; - if off land 0x7f000000 <> 0 then cmd := !cmd lor 0x08; - - if len land 0x0000ff <> 0 then cmd := !cmd lor 0x10; - if len land 0x00ff00 <> 0 then cmd := !cmd lor 0x20; - if len land 0xff0000 <> 0 then cmd := !cmd lor 0x40; - - !cmd - [@@inline] - - let length_of_copy_code ~off ~len = - let required = - let a = [| 0; 1; 1; 2; 1; 2; 2; 3; 1; 2; 2; 3; 2; 3; 3; 4 |] in - fun x -> a.(x land 0xf) + a.(x lsr 4) - in - let cmd = cmd off len in - required cmd - - let length ~source ~target hunks = - length_of_variable_length source - + length_of_variable_length target - + List.fold_left - (fun acc -> function - | Duff.Insert (_, len) -> 1 + len + acc - | Duff.Copy (off, len) -> 1 + length_of_copy_code ~off ~len + acc) - 0 hunks -end - -module W = struct - type 'a t = 'a Weak.t - - let create () = Weak.create 1 - - let create_with v = - let t = Weak.create 1 in - Weak.set t 0 (Some v); - t - - let set t v = Weak.set t 0 (Some v) - let get t = Weak.get t 0 -end - -type 'uid p = { - index : Duff.index W.t; - entry : 'uid entry; - depth : int; - v : Dec.v W.t; -} - -type 'uid patch = { - hunks : Duff.hunk list; - depth : int; - source : 'uid; - source_length : int; -} - -type 'uid q = { - mutable patch : 'uid patch option; - entry : 'uid entry; - v : Dec.v W.t; -} - -let target_uid { entry; _ } = entry.uid -let target_length { entry; _ } = entry.length -let target_patch { patch; _ } = patch -let source_of_patch { source; _ } = source - -let pp_patch target_length pp_uid ppf patch = - Fmt.pf ppf - "{ @[hunks= %d;@ depth= %d;@ source= %a;@ source_length= %d;@] }" - (Utils.length ~source:patch.source_length ~target:target_length patch.hunks) - patch.depth pp_uid patch.source patch.source_length - -[@@@warning "-32"] (* XXX(dinosaure): pretty-printers. *) - -let pp_kind ppf = function - | `A -> Fmt.string ppf "a" - | `B -> Fmt.string ppf "b" - | `C -> Fmt.string ppf "c" - | `D -> Fmt.string ppf "d" - -let pp_delta pp_uid ppf = function - | Zero -> Fmt.string ppf "" - | From uid -> Fmt.pf ppf "@[<1>(From %a)@]" pp_uid uid - -let pp_entry pp_uid ppf entry = - Fmt.pf ppf - "{ @[uid= %a;@ kind= %a;@ length= %d;@ preferred= %b;@ delta= \ - @[%a@];@] }" - pp_uid entry.uid pp_kind entry.kind entry.length entry.preferred - (pp_delta pp_uid) entry.delta - -let pp_q pp_uid ppf q = - Fmt.pf ppf "{ @[patch= @[%a@]; entry= @[%a@]; v= %s@] }" - Fmt.(Dump.option (pp_patch q.entry.length pp_uid)) - q.patch (pp_entry pp_uid) q.entry - (if Weak.check q.v 0 then "#raw" else "NULL") - -[@@@warning "+32"] - -type ('uid, 's) load = 'uid -> (Dec.v, 's) io - -let depth_of_source : 'uid p -> int = fun { depth; _ } -> depth - -let depth_of_target : 'uid q -> int = - fun { patch; _ } -> match patch with None -> 1 | Some { depth; _ } -> depth - -let target_to_source : 'uid q -> 'uid p = - fun target -> - { - index = W.create (); - entry = target.entry; - depth = depth_of_target target; - v = target.v (* XXX(dinosaure): dragoon here! *); - } - -let entry_to_target : - type s. s scheduler -> load:('uid, s) load -> 'uid entry -> ('uid q, s) io = - fun { bind; return } ~load entry -> - let ( >>= ) = bind in - - load entry.uid >>= fun v -> - (match entry.delta with - | From uid -> - load uid >>= fun s -> - let source = Bigstringaf.sub ~off:0 ~len:(Dec.len s) (Dec.raw s) in - let target = Bigstringaf.sub ~off:0 ~len:(Dec.len v) (Dec.raw v) in - let index = - Duff.make (Bigstringaf.sub ~off:0 ~len:(Dec.len s) (Dec.raw s)) - in - let hunks = Duff.delta index ~source ~target in - return - (Some - { - hunks; - depth = Dec.depth v; - source = uid; - source_length = Dec.len s; - }) - | Zero -> return None) - >>= fun patch -> return { patch; entry; v = W.create_with v } - -let length_of_delta ~source ~target hunks = Utils.length ~source ~target hunks - -exception Break -exception Next - -(* XXX(dinosaure): [apply] tries to generate a patch between [source] and [target]. - If the resulted patch is good enough, we set [target.patch] to it. [apply] can raise - two exceptions: - - - [Break] where it is not able to generate a patch (different kinds) - - [Next] when it reaches the depth limit or resulted patch is not good enough - - NOTE: [load] must create a new [Bigstringaf.t]! No cache are expected at this - layer where we already handle it with [W.t] (weak reference). *) -let apply : - type s uid. - s scheduler -> - load:(uid, s) load -> - uid_ln:int -> - source:uid p -> - target:uid q -> - (unit, s) io = - fun { bind; return } ~load ~uid_ln ~source ~target -> - let ( >>= ) = bind in - - (* Don't bother doing diffs between different types. *) - if source.entry.kind <> target.entry.kind then raise_notrace Break; - - (* Let's not bust the allowed depth. *) - if depth_of_source source >= _max_depth then raise_notrace Next; - - (* Now some size filtering heuristics. *) - let max_length, ref_depth = - match target.patch with - | Some { hunks; source_length; depth; _ } -> - ( length_of_delta ~source:source_length ~target:target.entry.length hunks, - depth ) - | None -> (target.entry.length / 2) - uid_ln, 1 - in - - let max_length = - max_length - * (_max_depth - depth_of_source source) - / (_max_depth - ref_depth + 1) - in - - if max_length == 0 then raise_notrace Next; - - let diff = - if source.entry.length < target.entry.length then - target.entry.length - source.entry.length - else 0 - in - - if diff >= max_length then raise_notrace Next; - if target.entry.length < source.entry.length / 32 then raise_notrace Next; - - (* Load data if not already done. *) - let load_if weak uid = - match W.get weak with - | Some v -> return v - | None -> - load uid >>= fun v -> - W.set weak v; - return v - in - (* Load index if not already done (TODO: check it!). *) - let index_if weak v = - match W.get weak with - | Some index -> index - | None -> - let index = - Duff.make (Bigstringaf.sub ~off:0 ~len:(Dec.len v) (Dec.raw v)) - in - W.set weak index; - index - in - - load_if source.v source.entry.uid >>= fun source_v -> - load_if target.v target.entry.uid >>= fun target_v -> - index_if source.index source_v |> fun source_index -> - let target_r = - Bigstringaf.sub ~off:0 ~len:(Dec.len target_v) (Dec.raw target_v) - in - let source_r = - Bigstringaf.sub ~off:0 ~len:(Dec.len source_v) (Dec.raw source_v) - in - let hunks = Duff.delta source_index ~source:source_r ~target:target_r in - - target.patch <- - Some - { - hunks; - source = source.entry.uid; - source_length = source.entry.length; - depth = source.depth + 1; - }; - return () - -module type VERBOSE = sig - type 'a fiber - - val succ : unit -> unit fiber - val print : unit -> unit fiber -end - -module type UID = sig - type t - - val hash : t -> int - val equal : t -> t -> bool -end - -module Delta - (Scheduler : SCHEDULER) - (IO : IO with type 'a t = 'a Scheduler.s) - (Uid : UID) - (Verbose : VERBOSE with type 'a fiber = 'a IO.t) = -struct - let ( >>= ) = IO.bind - let return = IO.return - - let s = - let open Scheduler in - { - bind = (fun x f -> inj (IO.bind (prj x) (fun x -> prj (f x)))); - return = (fun x -> inj (IO.return x)); - } - - let delta : - load:(Uid.t, Scheduler.t) load -> - weight:int -> - uid_ln:int -> - Uid.t q array -> - unit IO.t = - fun ~load ~weight ~uid_ln targets -> - let window = Array.make weight None in - - let find_delta idx target = - let best : int ref = ref (-1) in - let try_delta j source = - let other_idx = idx + j in - let other_idx = - if other_idx >= weight then other_idx - weight else other_idx - in - try - apply s ~load ~uid_ln ~source ~target |> Scheduler.prj >>= fun () -> - best := other_idx; - return () - with - | Next -> return () - | Break as exn -> raise_notrace exn - in - let rec go j = - if j < 0 then return () - else - match window.(j) with - | Some (m : Uid.t p) -> - if m.entry.uid <> target.entry.uid then - try try_delta j m >>= fun () -> (go [@tailcall]) (pred j) - with Break -> return () - else return () - | None -> return () - (* TODO: check it! *) - in - go (Array.length window - 1) >>= fun () -> - (if !best >= 0 then Verbose.succ () else return ()) >>= fun () -> - return !best - in - (* XXX(dinosaure): [git] does something a bit complex between the iteration - over [targets] and the [window]. [n] is the current [target] where we will try - to apply a patch and [idx] seems a lower-bound of the LRU-cache [window]. *) - let rec iter n idx = - if n < Array.length targets then ( - find_delta idx targets.(n) >>= fun best -> - (* [git] does this update __before__ to try to find a patch. However, it seems fine - to do that after when an object can not be patched with itself. *) - window.(idx) <- Some (target_to_source targets.(n)); - Verbose.print () >>= fun () -> - (* [git] wants to deflate and cache the delta data. Should we do the same? TODO *) - if - depth_of_target targets.(n) > 1 - && depth_of_target targets.(n) < _max_depth - then ( - (* XXX(dinosaure): a slightly assumption, if [target] has a patch, - [!best] (into [go]) was properly set to a valid source. Of course, that - means that given [targets] contains non-delta-ified objects. *) - let swap = window.(best) in - - (* Move the best delta base up in the window, after the currently deltified object, to - keep it longer. It will be the first base object to be attempted next. *) - let v = ref best in - for _ = (weight + idx - best) mod weight to 0 do - window.(!v) <- window.((!v + 1) mod weight); - v := (!v + 1) mod weight - done; - - window.(!v) <- swap); - - if depth_of_target targets.(n) < _max_depth then - (iter [@tailcall]) (succ n) (if idx + 1 >= weight then 0 else idx + 1) - else (iter [@tailcall]) (succ n) idx) - else return () - in - iter 0 0 - - type m = { mutable v : int; m : IO.Mutex.t } - - let dispatcher : - load:(Uid.t, Scheduler.t) load -> - mutex:m -> - entries:Uid.t entry array -> - targets:Uid.t q option array -> - unit IO.t = - fun ~load ~mutex ~entries ~targets -> - let rec go () = - IO.Mutex.lock mutex.m >>= fun () -> - let v = mutex.v in - mutex.v <- mutex.v + 1; - if v >= Array.length entries then ( - IO.Mutex.unlock mutex.m; - IO.return ()) - else ( - IO.Mutex.unlock mutex.m; - entry_to_target s ~load entries.(v) |> Scheduler.prj >>= fun target -> - targets.(v) <- Some target; - go ()) - in - go () - - let get = function Some x -> x | None -> assert false - - let delta ~threads ~weight ~uid_ln entries = - let mutex = { v = 0; m = IO.Mutex.create () } in - let targets = Array.make (Array.length entries) None in - IO.parallel_iter - ~f:(fun load -> dispatcher ~load ~mutex ~entries ~targets) - threads - >>= fun () -> - let targets = Array.map get targets in - delta ~load:(List.hd threads) ~weight ~uid_ln targets >>= fun () -> - return targets -end - -module N : sig - type encoder - type b = { i : Bigstringaf.t; q : De.Queue.t; w : De.Lz77.window } - - val encoder : - 's scheduler -> - ?level:int -> - b:b -> - load:('uid, 's) load -> - 'uid q -> - (encoder, 's) io - - val encode : o:Bigstringaf.t -> encoder -> [ `Flush of encoder * int | `End ] - val dst : encoder -> Bigstringaf.t -> int -> int -> encoder -end = struct - type b = { i : Bigstringaf.t; q : De.Queue.t; w : De.Lz77.window } - type encoder = H of Zh.N.encoder | Z of Zl.Def.encoder - - let rec encode_zlib ~o encoder = - match Zl.Def.encode encoder with - | `Await encoder -> - encode_zlib ~o (Zl.Def.src encoder Bigstringaf.empty 0 0) - | `Flush encoder -> - let len = Bigstringaf.length o - Zl.Def.dst_rem encoder in - `Flush (encoder, len) - | `End encoder -> - let len = Bigstringaf.length o - Zl.Def.dst_rem encoder in - if len > 0 then `Flush (encoder, len) else `End - - let encode_hunk ~o encoder = - match Zh.N.encode encoder with - | `Flush encoder -> - let len = Bigstringaf.length o - Zh.N.dst_rem encoder in - `Flush (encoder, len) - | `End -> `End - - let encode ~o = function - | Z encoder -> ( - match encode_zlib ~o encoder with - | `Flush (encoder, len) -> `Flush (Z encoder, len) - | `End -> `End) - | H encoder -> ( - match encode_hunk ~o encoder with - | `Flush (encoder, len) -> `Flush (H encoder, len) - | `End -> `End) - - let dst encoder s j l = - match encoder with - | Z encoder -> - let encoder = Zl.Def.dst encoder s j l in - Z encoder - | H encoder -> - let encoder = Zh.N.dst encoder s j l in - H encoder - - let encoder : - type s. - s scheduler -> - ?level:int -> - b:b -> - load:('uid, s) load -> - 'uid q -> - (encoder, s) io = - fun { bind; return } ?(level = 4) ~b ~load target -> - let ( >>= ) = bind in - - let load_if weak uid = - match W.get weak with - | Some v -> return v - | None -> - load uid >>= fun v -> - W.set weak v; - return v - in - - match target.patch with - | Some { hunks; source_length; _ } -> - load_if target.v target.entry.uid >>= fun v -> - let raw = Bigstringaf.sub ~off:0 ~len:(Dec.len v) (Dec.raw v) in - let encoder = - Zh.N.encoder ~level ~i:b.i ~q:b.q ~w:b.w ~source:source_length raw - `Manual hunks - in - return (H encoder) - | None -> - load_if target.v target.entry.uid >>= fun v -> - let encoder = Zl.Def.encoder `Manual `Manual ~q:b.q ~w:b.w ~level in - let encoder = Zl.Def.src encoder (Dec.raw v) 0 (Dec.len v) in - - return (Z encoder) -end - -type ('uid, 's) find = 'uid -> (int option, 's) io - -type b = { - i : Bigstringaf.t; - q : De.Queue.t; - w : De.Lz77.window; - o : Bigstringaf.t; -} - -let encode_header ~o kind length = - if length < 0 then invalid_arg "encode_header: length must be positive"; - let c = ref ((kind lsl 4) lor (length land 15)) in - let l = ref (length asr 4) in - let p = ref 0 in - let n = ref 1 in - - while !l != 0 do - Bigstringaf.set o !p (Char.chr (!c lor 0x80 land 0xff)); - incr p; - c := !l land 0x7f; - l := !l asr 7; - incr n - done; - - Bigstringaf.set o !p (Char.unsafe_chr !c); - !n - -type 'uid uid = { uid_ln : int; uid_rw : 'uid -> string } - -let kind_to_int = function - | `A -> 0b001 - | `B -> 0b010 - | `C -> 0b011 - | `D -> 0b100 - -let header_of_pack ~length buf off len = - if off < 0 || len < 0 || off + len > Bigstringaf.length buf || len < 4 + 4 + 4 - then Fmt.invalid_arg "header_of_pack"; - Bigstringaf.set_int32_be buf (off + 0) 0x5041434bl; - Bigstringaf.set_int32_be buf (off + 4) 0x2l; - Bigstringaf.set_int32_be buf (off + 8) (Int32.of_int length) - -let encode_target : - type s. - s scheduler -> - ?level:int -> - b:b -> - find:('uid, s) find -> - load:('uid, s) load -> - uid:'uid uid -> - 'uid q -> - cursor:int -> - (int * N.encoder, s) io = - fun ({ bind; return } as s) ?level ~b ~find ~load ~uid target ~cursor -> - let ( >>= ) = bind in - - match target.patch with - | None -> - let off = - encode_header ~o:b.o (kind_to_int target.entry.kind) target.entry.length - in - N.encoder s ?level ~b:{ i = b.i; q = b.q; w = b.w } ~load target - >>= fun encoder -> - return (off, N.dst encoder b.o off (Bigstringaf.length b.o - off)) - | Some { source; source_length; hunks; _ } -> ( - find source >>= function - | Some offset -> - let off = - encode_header ~o:b.o 0b110 - (Utils.length ~source:source_length ~target:target.entry.length - hunks) - in - let buf = Bytes.create 10 in - - let p = ref (10 - 1) in - let n = ref (cursor - offset) in - - Bytes.set buf !p (Char.unsafe_chr (!n land 127)); - while !n asr 7 <> 0 do - n := !n asr 7; - decr p; - Bytes.set buf !p (Char.unsafe_chr (128 lor ((!n - 1) land 127))); - decr n - done; - - Bigstringaf.blit_from_bytes buf ~src_off:!p b.o ~dst_off:off - ~len:(10 - !p); - N.encoder s ~b:{ i = b.i; q = b.q; w = b.w } ~load target - >>= fun encoder -> - let off = off + (10 - !p) in - let len = Bigstringaf.length b.o - off in - return (off, N.dst encoder b.o off len) - | None -> - let off = - encode_header ~o:b.o 0b111 - (Utils.length ~source:source_length ~target:target.entry.length - hunks) - in - let raw = uid.uid_rw source in - Bigstringaf.blit_from_string raw ~src_off:0 b.o ~dst_off:off - ~len:uid.uid_ln; - N.encoder s ~b:{ i = b.i; q = b.q; w = b.w } ~load target - >>= fun encoder -> - let off = off + uid.uid_ln in - let len = Bigstringaf.length b.o - off in - return (off, N.dst encoder b.o off len)) diff --git a/src/carton/enc.mli b/src/carton/enc.mli deleted file mode 100644 index f4c844a72..000000000 --- a/src/carton/enc.mli +++ /dev/null @@ -1,105 +0,0 @@ -open Sigs - -type 'uid entry -type 'uid delta = From of 'uid | Zero - -val make_entry : - kind:kind -> - length:int -> - ?preferred:bool -> - ?delta:'uid delta -> - 'uid -> - 'uid entry - -val length : 'uid entry -> int - -type 'uid q -and 'uid p -and 'uid patch - -type ('uid, 's) load = 'uid -> (Dec.v, 's) io -type ('uid, 's) find = 'uid -> (int option, 's) io -type 'uid uid = { uid_ln : int; uid_rw : 'uid -> string } - -val target_to_source : 'uid q -> 'uid p -val target_uid : 'uid q -> 'uid -val target_length : 'uid q -> int -val target_patch : 'uid q -> 'uid patch option -val source_of_patch : 'uid patch -> 'uid - -val entry_to_target : - 's scheduler -> load:('uid, 's) load -> 'uid entry -> ('uid q, 's) io - -val apply : - 's scheduler -> - load:('uid, 's) load -> - uid_ln:int -> - source:'uid p -> - target:'uid q -> - (unit, 's) io - -module type VERBOSE = sig - type 'a fiber - - val succ : unit -> unit fiber - val print : unit -> unit fiber -end - -module type UID = sig - type t - - val hash : t -> int - val equal : t -> t -> bool -end - -module Delta - (Scheduler : SCHEDULER) - (IO : IO with type 'a t = 'a Scheduler.s) - (Uid : UID) - (Verbose : VERBOSE with type 'a fiber = 'a IO.t) : sig - val s : Scheduler.t scheduler - - val delta : - threads:(Uid.t, Scheduler.t) load list -> - weight:int -> - uid_ln:int -> - Uid.t entry array -> - Uid.t q array IO.t -end - -module N : sig - type encoder - type b = { i : Bigstringaf.t; q : De.Queue.t; w : De.Lz77.window } - - val encoder : - 's scheduler -> - ?level:int -> - b:b -> - load:('uid, 's) load -> - 'uid q -> - (encoder, 's) io - - val encode : o:Bigstringaf.t -> encoder -> [ `Flush of encoder * int | `End ] - val dst : encoder -> Bigstringaf.t -> int -> int -> encoder -end - -type b = { - i : Bigstringaf.t; - q : De.Queue.t; - w : De.Lz77.window; - o : Bigstringaf.t; -} - -val encode_header : o:Bigstringaf.t -> int -> int -> int -val header_of_pack : length:int -> Bigstringaf.t -> int -> int -> unit - -val encode_target : - 's scheduler -> - ?level:int -> - b:b -> - find:('uid, 's) find -> - load:('uid, 's) load -> - uid:'uid uid -> - 'uid q -> - cursor:int -> - (int * N.encoder, 's) io diff --git a/src/carton/h.ml b/src/carton/h.ml deleted file mode 100644 index b4d1cb8b9..000000000 --- a/src/carton/h.ml +++ /dev/null @@ -1,620 +0,0 @@ -type bigstring = - (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t - -let bigstring_empty = Bigarray.Array1.create Bigarray.char Bigarray.c_layout 0 -let bigstring_length x = Bigarray.Array1.dim x [@@inline] - -let bigstring_create l = - Bigarray.Array1.create Bigarray.char Bigarray.c_layout l - -external unsafe_get_uint8 : bigstring -> int -> int = "%caml_ba_ref_1" -external unsafe_get_char : bigstring -> int -> char = "%caml_ba_ref_1" -external unsafe_get_uint32 : bigstring -> int -> int32 = "%caml_bigstring_get32" - -external unsafe_set_uint32 : bigstring -> int -> int32 -> unit - = "%caml_bigstring_set32" - -external unsafe_set_uint8 : bigstring -> int -> int -> unit = "%caml_ba_set_1" -external unsafe_set_char : bigstring -> int -> char -> unit = "%caml_ba_set_1" - -let unsafe_blit_from_string src src_off dst dst_off len = - for i = 0 to len - 1 do - unsafe_set_char dst (dst_off + i) src.[src_off + i] - done - -let invalid_bounds off len = - Fmt.invalid_arg "Out of bounds (off: %d, len: %d)" off len - -let invalid_encode () = Fmt.invalid_arg "expected `Await encode" - -let bigstring_to_string v = - let len = bigstring_length v in - let res = Bytes.create len in - for i = 0 to len - 1 do - Bytes.set res i (unsafe_get_char v i) - done; - Bytes.unsafe_to_string res - -let output_bigstring oc buf off len = - (* XXX(dinosaure): stupidly slow! *) - let v = Bigarray.Array1.sub buf off len in - let v = bigstring_to_string v in - output_string oc v - -external bytes_unsafe_get_uint32 : bytes -> int -> int32 = "%caml_bytes_get32" - -let bytes_unsafe_get_uint8 : bytes -> int -> int = - fun buf off -> Char.code (Bytes.get buf off) - -let input_bigstring ic buf off len = - let tmp = Bytes.create len in - let res = input ic tmp 0 len in - - let len0 = res land 3 in - let len1 = res asr 2 in - - for i = 0 to len1 - 1 do - let i = i * 4 in - let v = bytes_unsafe_get_uint32 tmp i in - unsafe_set_uint32 buf (off + i) v - done; - - for i = 0 to len0 - 1 do - let i = (len1 * 4) + i in - let v = bytes_unsafe_get_uint8 tmp i in - unsafe_set_uint8 buf (off + i) v - done; - res - -let slow_blit src src_off dst dst_off len = - for i = 0 to len - 1 do - let v = unsafe_get_uint8 src (src_off + i) in - unsafe_set_uint8 dst (dst_off + i) v - done - -let unsafe_blit src src_off dst dst_off len = - if src_off - dst_off < 4 || len < 4 then slow_blit src src_off dst dst_off len - else - let len0 = len land 3 in - let len1 = len asr 2 in - - for i = 0 to len1 - 1 do - let i = i * 4 in - let v = unsafe_get_uint32 src (src_off + i) in - unsafe_set_uint32 dst (dst_off + i) v - done; - - for i = 0 to len0 - 1 do - let i = (len1 * 4) + i in - let v = unsafe_get_uint8 src (src_off + i) in - unsafe_set_uint8 dst (dst_off + i) v - done - -external string_unsafe_get_uint32 : string -> int -> int32 - = "%caml_string_get32" - -let string_unsafe_get_uint8 : string -> int -> int = - fun buf off -> Char.code buf.[off] - -let bigstring_of_string v = - let len = String.length v in - let res = bigstring_create len in - let len0 = len land 3 in - let len1 = len asr 2 in - - for i = 0 to len1 - 1 do - let i = i * 4 in - let v = string_unsafe_get_uint32 v i in - unsafe_set_uint32 res i v - done; - - for i = 0 to len0 - 1 do - let i = (len1 * 4) + i in - let v = string_unsafe_get_uint8 v i in - unsafe_set_uint8 res i v - done; - res - -let io_buffer_size = 65536 - -external ( < ) : 'a -> 'a -> bool = "%lessthan" -external ( <= ) : 'a -> 'a -> bool = "%lessequal" -external ( > ) : 'a -> 'a -> bool = "%greaterthan" - -let ( > ) (x : int) y = x > y [@@inline] -let ( < ) (x : int) y = x < y [@@inline] -let ( <= ) (x : int) y = x <= y [@@inline] - -module M = struct - type src = [ `Channel of in_channel | `Manual | `String of string ] - type decode = [ `Await | `Header of int * int | `End | `Malformed of string ] - - type decoder = { - mutable source : bigstring; - src : src; - mutable dst : bigstring; - mutable i : bigstring; - mutable i_pos : int; - mutable i_len : int; - mutable t_len : int; - mutable t_need : int; - t_tmp : bigstring; - mutable o_pos : int; - mutable src_len : int; - mutable dst_len : int; - mutable s : state; - mutable k : decoder -> ret; - } - - and ret = Await | Stop | End | Malformed of string - and state = Header | Postprocess | Cmd | Cp of int | It of int - - let variable_length buf off top = - let p = ref off in - let i = ref 0 in - let len = ref 0 in - - while - let cmd = unsafe_get_uint8 buf !p in - incr p; - len := !len lor ((cmd land 0x7f) lsl !i); - i := !i + 7; - cmd land 0x80 != 0 && !p <= top - do - () - done; - !p - off, !len - [@@inline] - - let eoi d = - d.i <- bigstring_empty; - d.i_pos <- 0; - d.i_len <- min_int - - let i_rem d = d.i_len - d.i_pos + 1 [@@inline] - let src_rem = i_rem - let dst_rem d = bigstring_length d.dst - d.o_pos - let src_len { src_len; _ } = src_len - let dst_len { dst_len; _ } = dst_len - let malformedf fmt = Fmt.kstr (fun s -> Malformed s) fmt - - let t_need d n = - d.t_len <- 0; - d.t_need <- n - - let src d s j l = - if j < 0 || l < 0 || j + l > bigstring_length s then invalid_bounds j l; - if l == 0 then eoi d - else ( - d.i <- s; - d.i_pos <- j; - d.i_len <- j + l - 1) - - let dst d s j l = - match d.s with - | Postprocess -> - if j < 0 || l < 0 || j + l > bigstring_length s then invalid_bounds j l - else if l < d.dst_len then Fmt.invalid_arg "Invalid destination" - else ( - d.dst <- s; - d.o_pos <- j; - if bigstring_length d.source >= d.src_len then d.s <- Cmd) - | _ -> Fmt.invalid_arg "Invalid call of dst" - - let pp_state ppf = function - | Header -> Fmt.string ppf "Header" - | Postprocess -> Fmt.string ppf "Postprocess" - | Cmd -> Fmt.string ppf "Cmd" - | Cp _ -> Fmt.string ppf "Cd" - | It _ -> Fmt.string ppf "It" - - let source d src = - match d.s with - | Postprocess -> - if bigstring_length src < d.src_len then - Fmt.invalid_arg "Invalid source" - else d.source <- src - | _ -> Fmt.invalid_arg "Invalid call of source (state: %a)" pp_state d.s - - (* get new input in [d.i] and [k]ontinue. *) - let refill k d = - match d.src with - | `String _ -> - eoi d; - k d - | `Channel ic -> - let res = input_bigstring ic d.i 0 (bigstring_length d.i) in - src d d.i 0 res; - k d - | `Manual -> - d.k <- k; - Await - - let rec t_fill k d = - let blit d len = - unsafe_blit d.i d.i_pos d.t_tmp d.t_len len; - d.i_pos <- d.i_pos + len; - d.t_len <- d.t_len + len - in - let rem = i_rem d in - if rem < 0 then k d - else - let need = d.t_need - d.t_len in - if rem < need then ( - blit d rem; - refill (t_fill k) d) - else ( - blit d need; - k d) - - let required = - let a = [| 0; 1; 1; 2; 1; 2; 2; 3; 1; 2; 2; 3; 2; 3; 3; 4 |] in - fun x -> a.(x land 0xf) + a.(x lsr 4) - - let enough d = - match d.s with - | Cp cmd -> i_rem d >= required (cmd land 0x7f) - | It len -> i_rem d >= len - | _ -> assert false - - (* XXX(dinosaure): [enough] is called only after a [d.s <- (It _ | Cp _)]. *) - - let need d = - match d.s with - | Cp cmd -> required (cmd land 0x7f) - | It len -> len - | _ -> assert false - - (* XXX(dinosaure): [flambda] is able to optimize [let rec a .. and b .. and c ..] - instead [match .. with A -> .. | B -> .. | C -> ..]. *) - - let rec cp d = - let[@warning "-8"] (Cp command) = d.s in - let p = ref (if d.t_len > 0 then 0 else d.i_pos) in - let i = if d.t_len > 0 then d.t_tmp else d.i in - let cp_off = ref 0 in - let cp_len = ref 0 in - - if command land 0x01 != 0 then ( - let v = unsafe_get_uint8 i !p in - cp_off := v; - incr p); - if command land 0x02 != 0 then ( - let v = unsafe_get_uint8 i !p in - cp_off := !cp_off lor (v lsl 8); - incr p); - if command land 0x04 != 0 then ( - let v = unsafe_get_uint8 i !p in - cp_off := !cp_off lor (v lsl 16); - incr p); - if command land 0x08 != 0 then ( - let v = unsafe_get_uint8 i !p in - cp_off := !cp_off lor (v lsl 24); - incr p); - if command land 0x10 != 0 then ( - let v = unsafe_get_uint8 i !p in - cp_len := v; - incr p); - if command land 0x20 != 0 then ( - let v = unsafe_get_uint8 i !p in - cp_len := !cp_len lor (v lsl 8); - incr p); - if command land 0x40 != 0 then ( - let v = unsafe_get_uint8 i !p in - cp_len := !cp_len lor (v lsl 16); - incr p); - if !cp_len == 0 then cp_len := 0x10000; - - unsafe_blit d.source !cp_off d.dst d.o_pos !cp_len; - if d.t_len > 0 then d.t_len <- 0 else d.i_pos <- !p; - d.o_pos <- d.o_pos + !cp_len; - d.s <- Cmd; - d.k <- decode_k; - decode_k d - - and it d = - let[@warning "-8"] (It len) = d.s in - - if d.t_len > 0 then ( - unsafe_blit d.t_tmp 0 d.dst d.o_pos len; - d.t_len <- 0; - d.o_pos <- d.o_pos + len; - d.s <- Cmd; - d.k <- decode_k; - decode_k d) - else ( - unsafe_blit d.i d.i_pos d.dst d.o_pos len; - d.i_pos <- d.i_pos + len; - d.o_pos <- d.o_pos + len; - d.s <- Cmd; - d.k <- decode_k; - decode_k d) - - and cmd d = - let c = unsafe_get_uint8 d.i d.i_pos in - - if c == 0 then malformedf "Invalid delta code (%02x)" c - else ( - d.s <- (if c land 0x80 != 0 then Cp c else It c); - d.i_pos <- d.i_pos + 1; - - if enough d then if c land 0x80 != 0 then cp d else it d - else ( - t_need d (need d); - t_fill (if c land 0x80 != 0 then cp else it) d)) - - and decode_k d = - let rem = i_rem d in - - if rem <= 0 then if rem < 0 then End else refill decode_k d - else - match d.s with - | Header -> - if rem < 2 then Fmt.invalid_arg "Not enough space"; - (* TODO: [malformedf]? *) - let x, src_len = variable_length d.i d.i_pos d.i_len in - let y, dst_len = variable_length d.i (d.i_pos + x) d.i_len in - - (* XXX(dinosaure): ok, this code can only work if the first given buffer - is large enough to store header. In the case of [carton], output buffer - of [zlib]/input buffer of [h] is [io_buffer_size]. *) - d.i_pos <- d.i_pos + x + y; - d.src_len <- src_len; - d.dst_len <- dst_len; - - d.s <- Postprocess; - Stop - | Postprocess -> Stop - | Cmd -> cmd d - | Cp cmd -> - if required (cmd land 0x7f) <= rem then cp d - else ( - t_need d (need d); - t_fill cp d) - | It len -> - if len <= rem then it d - else ( - t_need d (need d); - t_fill it d) - - let decode d = - match d.k d with - | Await -> `Await - | Stop -> `Header (d.src_len, d.dst_len) - | End -> `End - | Malformed err -> `Malformed err - - let decoder ?(source = bigstring_empty) src = - let i, i_pos, i_len = - match src with - | `Manual -> bigstring_empty, 1, 0 - | `String x -> bigstring_of_string x, 0, String.length x - 1 - | `Channel _ -> bigstring_create io_buffer_size, 1, 0 - in - { - src; - source; - dst = bigstring_empty; - i; - i_pos; - i_len; - t_len = 0; - t_need = 0; - t_tmp = bigstring_create 128; - o_pos = 0; - src_len = 0; - dst_len = 0; - s = Header; - k = decode_k; - } -end - -module N = struct - type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] - type encode = [ `Await | `Copy of int * int | `Insert of string | `End ] - - type encoder = { - dst : dst; - src_len : int; - dst_len : int; - mutable o : bigstring; - mutable o_pos : int; - mutable o_max : int; - t : bigstring; - (* XXX(dinosaure): [bytes]? *) - mutable t_pos : int; - mutable t_max : int; - mutable s : s; - mutable k : encoder -> encode -> [ `Ok | `Partial ]; - } - - and s = Header | Contents - - let o_rem e = e.o_max - e.o_pos + 1 [@@inline] - - let dst e s j l = - if j < 0 || l < 0 || j + l > bigstring_length s then invalid_bounds j l; - e.o <- s; - e.o_pos <- j; - e.o_max <- j + l - 1 - - let dst_rem encoder = o_rem encoder - - let partial k e = function - | `Await -> k e - | `Copy _ | `Insert _ | `End -> invalid_encode () - - let flush k e = - match e.dst with - | `Manual -> - e.k <- partial k; - `Partial - | `Channel oc -> - output_bigstring oc e.o 0 e.o_pos; - e.o_pos <- 0; - k e - | `Buffer b -> - (* XXX(dinosaure): optimize it! *) - for i = 0 to e.o_pos - 1 do - Buffer.add_char b (unsafe_get_char e.o i) - done; - e.o_pos <- 0; - k e - - let cmd off len = - let cmd = ref 0 in - - if off land 0x000000ff <> 0 then cmd := !cmd lor 0x01; - if off land 0x0000ff00 <> 0 then cmd := !cmd lor 0x02; - if off land 0x00ff0000 <> 0 then cmd := !cmd lor 0x04; - if off land 0x7f000000 <> 0 then cmd := !cmd lor 0x08; - - if len land 0x0000ff <> 0 then cmd := !cmd lor 0x10; - if len land 0x00ff00 <> 0 then cmd := !cmd lor 0x20; - if len land 0xff0000 <> 0 then cmd := !cmd lor 0x40; - - !cmd - [@@inline] - - let t_range e max = - e.t_pos <- 0; - e.t_max <- max - - let rec t_flush k e = - let blit e l = - unsafe_blit e.t e.t_pos e.o e.o_pos l; - e.o_pos <- e.o_pos + l; - e.t_pos <- e.t_pos + l - in - let rem = o_rem e in - let len = e.t_max - e.t_pos + 1 in - - if rem < len then ( - blit e rem; - flush (t_flush k) e) - else ( - blit e len; - k e) - - let rec encode_contents e v = - let k e = - e.k <- encode_contents; - `Ok - in - match v with - | `Await -> k e - | `Copy (off, len) -> - let rem = o_rem e in - let cmd = cmd off len in - let required = 1 + M.required cmd in - - let s, j, k = - if rem < required then ( - t_range e (required - 1); - e.t, 0, t_flush k) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + required; - e.o, j, k - in - - unsafe_set_uint8 s j (cmd lor 0x80); - let pos = ref (j + 1) in - let off = ref off in - while !off <> 0 do - if !off land 0xff != 0 then ( - unsafe_set_uint8 s !pos !off; - incr pos); - off := !off asr 8 - done; - let len = ref len in - while !len <> 0 do - if !len land 0xff != 0 then ( - unsafe_set_uint8 s !pos !len; - incr pos); - len := !len asr 8 - done; - k e - | `Insert p -> - let rem = o_rem e in - let required = 1 + String.length p in - - let s, j, k = - if rem < required then ( - t_range e (required - 1); - e.t, 0, t_flush k) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + required; - e.o, j, k - in - - unsafe_set_uint8 s j (String.length p); - unsafe_blit_from_string p 0 s (j + 1) (String.length p); - k e - | `End -> flush k e - - let store_variable_length buf off length = - let l = ref length in - let off = ref off in - while !l >= 0x80 do - unsafe_set_uint8 buf !off (!l lor 0x80 land 0xff); - incr off; - l := !l asr 7 - done; - unsafe_set_uint8 buf !off !l - - let needed length = - let l = ref length in - let o = ref 0 in - while !l >= 0x80 do - incr o; - l := !l asr 7 - done; - incr o; - !o - [@@inline] - - let encode_header e v = - let k e = - e.k <- encode_contents (* XXX(dinosaure): short-cut [encode]. *); - e.s <- Contents; - e.k e v - in - let ndd = needed e.src_len + needed e.dst_len in - let rem = o_rem e in - (* assert (ndd <= 10) ; *) - if rem >= ndd then ( - store_variable_length e.o e.o_pos e.src_len; - store_variable_length e.o (e.o_pos + needed e.src_len) e.dst_len; - e.o_pos <- e.o_pos + ndd; - k e) - else ( - t_range e ndd; - store_variable_length e.t 0 e.src_len; - store_variable_length e.t (needed e.src_len) e.dst_len; - t_flush k e) - - let encode e v = e.k e v - - let encoder dst ~src_len ~dst_len = - let o, o_pos, o_max = - match dst with - | `Manual -> bigstring_empty, 1, 0 - | `Buffer _ | `Channel _ -> - bigstring_create io_buffer_size, 0, io_buffer_size - 1 - in - { - dst; - src_len; - dst_len; - o; - o_pos; - o_max; - t = bigstring_create 128; - t_pos = 1; - t_max = 0; - s = Header; - k = encode_header; - } -end diff --git a/src/carton/h.mli b/src/carton/h.mli deleted file mode 100644 index 29b714cec..000000000 --- a/src/carton/h.mli +++ /dev/null @@ -1,33 +0,0 @@ -type bigstring = - (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t - -val io_buffer_size : int -val bigstring_create : int -> bigstring -val bigstring_empty : bigstring - -module M : sig - type src = [ `Channel of in_channel | `Manual | `String of string ] - type decode = [ `Await | `Header of int * int | `End | `Malformed of string ] - type decoder - - val src : decoder -> bigstring -> int -> int -> unit - val dst : decoder -> bigstring -> int -> int -> unit - val source : decoder -> bigstring -> unit - val src_rem : decoder -> int - val dst_rem : decoder -> int - val src_len : decoder -> int - val dst_len : decoder -> int - val decode : decoder -> decode - val decoder : ?source:bigstring -> src -> decoder -end - -module N : sig - type dst = [ `Channel of out_channel | `Manual | `Buffer of Buffer.t ] - type encode = [ `Await | `Copy of int * int | `Insert of string | `End ] - type encoder - - val dst : encoder -> bigstring -> int -> int -> unit - val dst_rem : encoder -> int - val encoder : dst -> src_len:int -> dst_len:int -> encoder - val encode : encoder -> encode -> [ `Ok | `Partial ] -end diff --git a/src/carton/idx.ml b/src/carton/idx.ml deleted file mode 100644 index ffff2d47a..000000000 --- a/src/carton/idx.ml +++ /dev/null @@ -1,670 +0,0 @@ -[@@@warning "-32"] - -type bigstring = - (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t - -external get_int32 : bigstring -> int -> int32 = "%caml_bigstring_get32" -external get_int64 : bigstring -> int -> int64 = "%caml_bigstring_get64" -external get_int16 : bigstring -> int -> int = "%caml_bigstring_get16" -external swap32 : int32 -> int32 = "%bswap_int32" -external swap64 : int64 -> int64 = "%bswap_int64" -external swap16 : int -> int = "%bswap16" - -let get_int16_be = - if Sys.big_endian then fun buf off -> get_int16 buf off - else fun buf off -> swap16 (get_int16 buf off) - -let get_int64_be = - if Sys.big_endian then fun buf off -> get_int64 buf off - else fun buf off -> swap64 (get_int64 buf off) - -external string_get_int16 : string -> int -> int = "%caml_string_get16" -external string_get_int32 : string -> int -> int32 = "%caml_string_get32" -external string_get_int64 : string -> int -> int64 = "%caml_string_get64" - -let string_get_int16_be = - if Sys.big_endian then fun buf off -> string_get_int16 buf off - else fun buf off -> swap16 (string_get_int16 buf off) - -let string_get_int32_be = - if Sys.big_endian then fun buf off -> string_get_int32 buf off - else fun buf off -> swap32 (string_get_int32 buf off) - -let string_get_int64_be = - if Sys.big_endian then fun buf off -> string_get_int64 buf off - else fun buf off -> swap64 (string_get_int64 buf off) - -let string_get_int8 s i = Char.code s.[i] - -let get_int32_be = - if Sys.big_endian then fun buf off -> get_int32 buf off - else fun buf off -> swap32 (get_int32 buf off) - -type 'uid idx = { - mp : bigstring; - n : int; - uid_ln : int; - uid_rw : 'uid -> string; - uid_wr : string -> 'uid; -} - -and sub = { off : int; len : int } -and optint = Optint.t - -let make : - bigstring -> - uid_ln:int -> - uid_rw:('uid -> string) -> - uid_wr:(string -> 'uid) -> - 'uid idx = - fun mp ~uid_ln ~uid_rw ~uid_wr -> - let i = get_int32_be mp 0 in - let v = get_int32_be mp 4 in - let n = get_int32_be mp (8 + (255 * 4)) in - - if i <> 0xff744f63l then - Fmt.invalid_arg "Invalid IDX file (header: %lx <> %lx)" i 0xff744f63l; - if v <> 0x2l then Fmt.invalid_arg "Invalid version of IDX file"; - { mp; n = Int32.to_int n; uid_ln; uid_rw; uid_wr } - -let compare_bigstring idx a hash = - let ps = ref 0 in - let c1 = ref 0 in - let c2 = ref 0 in - - let exception Equal in - try - while - c1 := get_int16_be idx.mp (a.off + !ps); - c2 := string_get_int16_be hash !ps; - !c1 == !c2 - do - ps := !ps + 2; - if !ps == idx.uid_ln then raise_notrace Equal - done; - - let res0 = (!c1 land 0xff) - (!c2 land 0xff) in - let res1 = (!c1 asr 8) - (!c2 asr 8) in - if res1 == 0 then res0 else res1 - with Equal -> 0 - -let ( <-> ) a b = Int32.sub a b -let fanout_offset = 8 -let hashes_offset = 8 + (256 * 4) - -let bsearch idx hash = - let n = string_get_int8 hash 0 in - let a = - if n = 0 then 0l else get_int32_be idx.mp (fanout_offset + (4 * (n - 1))) - in - let b = get_int32_be idx.mp (fanout_offset + (4 * n)) in - - let abs_off = hashes_offset + (Int32.to_int a * idx.uid_ln) in - let len = Int32.to_int (b <-> a) * idx.uid_ln in - - let rec go sub_off sub_len = - let len = sub_len / (2 * idx.uid_ln) * idx.uid_ln in - (* XXX(dinosaure): prevent a wrong comparison with something outside the - hashes table. *) - if sub_off + len = hashes_offset + (idx.uid_ln * idx.n) then - raise_notrace Not_found; - let cmp = compare_bigstring idx { off = sub_off + len; len } hash in - - if cmp == 0 then { off = sub_off + len; len } - else if sub_len <= idx.uid_ln then raise_notrace Not_found - else if cmp > 0 then (go [@tailcall]) sub_off len - else (go [@tailcall]) (sub_off + len) (sub_len - len) - in - let { off; _ } = go abs_off len in - Int32.to_int a + ((off - abs_off) / idx.uid_ln) - -(* XXX(dinosaure): FIXME! It does not work at some points. *) -let isearch idx hash = - let n = string_get_int8 hash 0 in - let a = - if n = 0 then 0l else get_int32_be idx.mp (fanout_offset + (4 * (n - 1))) - in - let b = get_int32_be idx.mp (fanout_offset + (4 * n)) in - - let abs_off = hashes_offset + (Int32.to_int a * idx.uid_ln) in - let len = Int32.to_int (b <-> a <-> 1l) * idx.uid_ln in - - let hashf = Int64.to_float (string_get_int64_be hash 0) in - let uid_lnf = float_of_int idx.uid_ln in - - let rec go low high = - if low > high then raise_notrace Not_found; - if low == high || low + idx.uid_ln == high then - let cmp = compare_bigstring idx { off = low; len = idx.uid_ln } hash in - if cmp == 0 then { off = low; len = idx.uid_ln } - else raise_notrace Not_found - else - let lef = Int64.to_float (get_int64_be idx.mp low) in - let hef = Int64.to_float (get_int64_be idx.mp high) in - let lowf = float_of_int low in - let highf = float_of_int high in - - let interpolation = - floor ((highf -. lowf) *. (hashf -. lef) /. (hef -. lef)) - in - let off = lowf +. interpolation -. mod_float interpolation uid_lnf in - let off = int_of_float off in - let cmp = compare_bigstring idx { off; len = idx.uid_ln } hash in - - if cmp == 0 then { off; len = idx.uid_ln } - else if cmp > 0 then (go [@tailcall]) low (off - idx.uid_ln) - else (go [@tailcall]) (off + idx.uid_ln) high - in - if len < 0 then raise_notrace Not_found; - - let { off; _ } = go abs_off (abs_off + len) in - Int32.to_int a + ((off - abs_off) / idx.uid_ln) - -let find idx hash = - let hash = idx.uid_rw hash in - match bsearch idx hash with - | n -> - let crcs_offset = 8 + (256 * 4) + (idx.n * idx.uid_ln) in - let values_offset = 8 + (256 * 4) + (idx.n * idx.uid_ln) + (idx.n * 4) in - - let crc = get_int32_be idx.mp (crcs_offset + (n * 4)) in - let off = get_int32_be idx.mp (values_offset + (n * 4)) in - - if Int32.logand off 0x80000000l <> 0l then - let off = Int32.to_int off land 0x7fffffff in - let off = - get_int64_be idx.mp (values_offset + (idx.n * 4) + (off * 8)) - in - Some (Optint.of_int32 crc, off) - else Some (Optint.of_int32 crc, Int64.of_int32 off) - | exception Not_found -> None - -let exists idx uid = - let uid = idx.uid_rw uid in - match bsearch idx uid with _ -> true | exception Not_found -> false - -let get_uid idx n = - let res = Bytes.create idx.uid_ln in - Bigstringaf.blit_to_bytes idx.mp - ~src_off:(hashes_offset + (n * idx.uid_ln)) - res ~dst_off:0 ~len:idx.uid_ln; - idx.uid_wr (Bytes.unsafe_to_string res) - -let get_offset idx n = - let values_offset = 8 + (256 * 4) + (idx.n * idx.uid_ln) + (idx.n * 4) in - Int64.of_int32 (get_int32_be idx.mp (values_offset + (n * 4))) - -let get_crc idx n = - let crcs_offset = 8 + (256 * 4) + (idx.n * idx.uid_ln) in - Optint.of_int32 (get_int32_be idx.mp (crcs_offset + (n * 4))) - -let max { n; _ } = n - -let iter ~f idx = - let rec go n = - if n == idx.n then () - else - let uid = get_uid idx n in - let offset = get_offset idx n in - let crc = get_crc idx n in - f ~uid ~offset ~crc; - go (succ n) - in - go 0 - -let map ~f idx = - let rec go acc n = - if n == idx.n then List.rev acc - else - let uid = get_uid idx n in - let offset = get_offset idx n in - let crc = get_crc idx n in - go (f ~uid ~offset ~crc :: acc) (succ n) - in - go [] 0 - -module type UID = sig - type t - type ctx - - val empty : ctx - val feed : ctx -> ?off:int -> ?len:int -> bigstring -> ctx - val get : ctx -> t - val compare : t -> t -> int - val length : int - val to_raw_string : t -> string - val pp : t Fmt.t -end - -type 'uid entry = { crc : optint; offset : int64; uid : 'uid } - -module N (Uid : UID) : sig - type encoder - type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] - - val encoder : dst -> pack:Uid.t -> Uid.t entry array -> encoder - val encode : encoder -> [ `Await ] -> [ `Partial | `Ok ] - val dst_rem : encoder -> int - val dst : encoder -> Bigstringaf.t -> int -> int -> unit -end = struct - type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] - - type encoder = { - dst : dst; - mutable o : bigstring; - mutable o_off : int; - mutable o_pos : int; - mutable o_max : int; - t : bigstring; - q : int64 Queue.t; - mutable t_pos : int; - mutable t_max : int; - mutable n : int; - fanout : int array; - index : Uid.t entry array; - pack : Uid.t; - mutable ctx : Uid.ctx; - mutable k : encoder -> [ `Await ] -> [ `Partial | `Ok ]; - } - - let dst e s j l = - if j < 0 || l < 0 || j + l > Bigstringaf.length s then - Fmt.invalid_arg "Out of bounds (off: %d, len: %d)" j l; - e.o <- s; - e.o_off <- j; - e.o_pos <- j; - e.o_max <- j + l - 1 - - let partial k e = function `Await -> k e - - let flush_with_ctx k e = - match e.dst with - | `Manual -> - let ctx = Uid.feed e.ctx ~off:e.o_off ~len:e.o_pos e.o in - e.ctx <- ctx; - e.k <- partial k; - `Partial - | `Channel oc -> - let raw = Bigstringaf.substring e.o ~off:0 ~len:e.o_pos in - let ctx = Uid.feed e.ctx ~off:e.o_off ~len:e.o_pos e.o in - output_string oc raw; - e.o_pos <- 0; - e.ctx <- ctx; - k e - | `Buffer b -> - let raw = Bigstringaf.substring e.o ~off:0 ~len:e.o_pos in - let ctx = Uid.feed e.ctx ~off:e.o_off ~len:e.o_pos e.o in - Buffer.add_string b raw; - e.o_pos <- 0; - e.ctx <- ctx; - k e - - let flush_without_ctx k e = - match e.dst with - | `Manual -> - e.k <- partial k; - `Partial - | `Channel oc -> - let raw = Bigstringaf.substring e.o ~off:e.o_off ~len:e.o_pos in - output_string oc raw; - e.o_pos <- 0; - k e - | `Buffer b -> - let raw = Bigstringaf.substring e.o ~off:e.o_off ~len:e.o_pos in - Buffer.add_string b raw; - e.o_pos <- 0; - k e - - let o_rem e = e.o_max - e.o_pos + 1 - - let t_range e m = - e.t_pos <- 0; - e.t_max <- m - - let rec t_flush ?(with_ctx = true) k e = - let blit e l = - Bigstringaf.blit e.t ~src_off:e.t_pos e.o ~dst_off:e.o_pos ~len:l; - e.o_pos <- e.o_pos + l; - e.t_pos <- e.t_pos + l - in - let rem = o_rem e in - let len = e.t_max - e.t_pos + 1 in - let flush = if with_ctx then flush_with_ctx else flush_without_ctx in - if rem < len then ( - blit e rem; - flush (t_flush k) e) - else ( - blit e len; - k e) - - let ok e = - e.k <- (fun _ `Await -> `Ok); - `Ok - - let encode_trail e `Await = - let k2 e = flush_without_ctx ok e in - let k1 e = - let rem = o_rem e in - let s, j, k = - if rem < Uid.length then ( - t_range e (Uid.length - 1); - e.t, 0, t_flush ~with_ctx:false k2) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + Uid.length; - e.o, j, k2 - in - let uid = Uid.get e.ctx in - let uid = Uid.to_raw_string uid in - Bigstringaf.blit_from_string uid ~src_off:0 s ~dst_off:j ~len:Uid.length; - k e - in - let k0 e = flush_with_ctx k1 e in - let rem = o_rem e in - let s, j, k = - if rem < Uid.length then ( - t_range e (Uid.length - 1); - e.t, 0, t_flush k0) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + Uid.length; - e.o, j, k0 - in - let uid = Uid.to_raw_string e.pack in - Bigstringaf.blit_from_string uid ~src_off:0 s ~dst_off:j ~len:Uid.length; - k e - - let rec encode_big_offset e `Await = - let offset = Queue.pop e.q in - Fmt.epr ">>> ENCODE BIG OFFSET: %Lx\n%!" offset; - let k e = - if Queue.is_empty e.q then encode_trail e `Await - else encode_big_offset e `Await - in - - let rem = o_rem e in - - let s, j, k = - if rem < 8 then ( - t_range e 7; - e.t, 0, t_flush k) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + 8; - e.o, j, k - in - - Bigstringaf.set_int64_be s j offset; - k e - - let rec encode_offset e `Await = - let k e = - if e.n + 1 == Array.length e.index then ( - e.n <- 0; - if Queue.is_empty e.q then encode_trail e `Await - else encode_big_offset e `Await) - else ( - e.n <- succ e.n; - encode_offset e `Await) - in - let rem = o_rem e in - - let s, j, k = - if rem < 4 then ( - t_range e 3; - e.t, 0, t_flush k) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + 4; - e.o, j, k - in - let { offset; _ } = e.index.(e.n) in - if Int64.shift_right_logical offset 31 <> 0L then ( - let n = Queue.length e.q in - Queue.push offset e.q; - Bigstringaf.set_int32_be s j Int32.(logor 0x80000000l (of_int n)); - k e) - else ( - Bigstringaf.set_int32_be s j (Int64.to_int32 offset); - k e) - - let rec encode_crc e `Await = - let k e = - if e.n + 1 == Array.length e.index then ( - e.n <- 0; - encode_offset e `Await) - else ( - e.n <- succ e.n; - encode_crc e `Await) - in - let rem = o_rem e in - - let s, j, k = - if rem < 4 then ( - t_range e 3; - e.t, 0, t_flush k) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + 4; - e.o, j, k - in - let { crc; _ } = e.index.(e.n) in - Bigstringaf.set_int32_be s j (Optint.to_int32 crc); - k e - - let rec encode_hash e `Await = - let k e = - if e.n + 1 == Array.length e.index then ( - e.n <- 0; - encode_crc e `Await) - else ( - e.n <- succ e.n; - encode_hash e `Await) - in - let rem = o_rem e in - - let s, j, k = - if rem < Uid.length then ( - t_range e (Uid.length - 1); - e.t, 0, t_flush k) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + Uid.length; - e.o, j, k - in - let { uid; _ } = e.index.(e.n) in - - Bigstringaf.blit_from_string (Uid.to_raw_string uid) ~src_off:0 s ~dst_off:j - ~len:Uid.length; - k e - - let rec encode_fanout e `Await = - let k e = - if e.n + 1 == 256 then ( - e.n <- 0; - if Array.length e.index > 0 then encode_hash e `Await - else encode_trail e `Await) - else ( - e.n <- succ e.n; - encode_fanout e `Await) - in - let rem = o_rem e in - - let s, j, k = - if rem < 4 then ( - t_range e 3; - e.t, 0, t_flush k) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + 4; - e.o, j, k - in - let x = - let acc = ref 0 in - for i = 0 to e.n do - acc := !acc + e.fanout.(i) - done; - !acc - in - Bigstringaf.set_int32_be s j (Int32.of_int x); - k e - - let encode_header e `Await = - let k e = - e.n <- 0; - encode_fanout e `Await - in - let rem = o_rem e in - let s, j, k = - if rem < 8 then ( - t_range e 8; - e.t, 0, t_flush k) - else - let j = e.o_pos in - e.o_pos <- e.o_pos + 8; - e.o, j, k - in - Bigstringaf.set_int32_be s j 0xff744f63l; - Bigstringaf.set_int32_be s (j + 4) 0x2l; - k e - - let io_buffer_size = 65536 - - let encoder dst ~pack index = - Array.sort (fun { uid = a; _ } { uid = b; _ } -> Uid.compare a b) index; - let fanout = Array.make 256 0 in - Array.iter - (fun { uid; _ } -> - let n = Char.code (Uid.to_raw_string uid).[0] in - fanout.(n) <- fanout.(n) + 1) - index; - let o, o_pos, o_max = - match dst with - | `Manual -> Bigstringaf.empty, 1, 0 - | `Buffer _ | `Channel _ -> - Bigstringaf.create io_buffer_size, 0, io_buffer_size - 1 - in - { - dst; - o; - o_off = 0; - o_pos; - o_max; - t = Bigstringaf.create Uid.length; - q = Queue.create (); - t_pos = 1; - t_max = 0; - n = 0; - fanout; - index; - pack; - ctx = Uid.empty; - k = encode_header; - } - - let dst_rem = o_rem - let encode e = e.k e -end - -type file = File - -module Ephemeron = Ephemeron.K1.Make (struct - type t = file - - let equal = ( = ) - let hash = Hashtbl.hash -end) - -module Device = struct - type t = Bigstringaf.t ref Ephemeron.t - type uid = file - - let device () = Ephemeron.create 1 - - let create device = - let file = File in - Ephemeron.add device file (ref Bigstringaf.empty); - file - [@@inline never] - - let project device file = !(Ephemeron.find device file) -end - -module M (IO : sig - type +'a t - - val bind : 'a t -> ('a -> 'b t) -> 'b t - val return : 'a -> 'a t -end) (Uid : sig - include UID - - val of_raw_string : string -> t - val null : t -end) = -struct - open IO - - let ( >>= ) x f = bind x f - - type fd = { - mutable buffer : Bigstringaf.t; - mutable capacity : int; - mutable length : int; - uid : Device.uid; - } - - let enlarge fd more = - let _old_length = fd.length in - let old_capacity = fd.capacity in - let new_capacity = ref old_capacity in - while old_capacity + more > !new_capacity do - new_capacity := 2 * !new_capacity - done; - if !new_capacity > Sys.max_string_length then - if old_capacity + more <= Sys.max_string_length then - new_capacity := Sys.max_string_length - else failwith "Too big buffer"; - let new_buffer = Bigstringaf.create !new_capacity in - Bigstringaf.blit fd.buffer ~src_off:0 new_buffer ~dst_off:0 ~len:fd.length; - fd.buffer <- new_buffer; - fd.capacity <- !new_capacity; - (* XXX(dinosaure): these asserts wants to rely on some assumptions - even if we use [enlarge] into a preemptive thread as [Stdlib.Buffer]. - However, with [lwt], it should be fine to use it and avoid these - assertions. *) - (* assert (fd.position + more <= fd.capacity) ; *) - (* assert (old_length + more <= fd.capacity) ; *) - () - - type t = Device.t - type uid = Device.uid - type error = | - - let pp_error : error Fmt.t = fun _ppf -> function _ -> . - - let create device uid = - assert (Ephemeron.mem device uid); - IO.return - (Ok - { - uid; - buffer = Bigstringaf.create 0x1000; - capacity = 0x1000; - length = 0; - }) - - let append _ fd str = - let len = String.length str in - let new_length = fd.length + len in - if new_length > fd.capacity then enlarge fd len; - Bigstringaf.blit_from_string str ~src_off:0 fd.buffer ~dst_off:fd.length - ~len; - fd.length <- new_length; - IO.return () - - let close device fd = - let result = Bigstringaf.sub fd.buffer ~off:0 ~len:fd.length in - let v = Ephemeron.find device fd.uid in - v := result; - IO.return (Ok ()) -end diff --git a/src/carton/idx.mli b/src/carton/idx.mli deleted file mode 100644 index f8c2e91ab..000000000 --- a/src/carton/idx.mli +++ /dev/null @@ -1,87 +0,0 @@ -type bigstring = - (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t - -type 'uid idx -type optint = Optint.t - -val make : - bigstring -> - uid_ln:int -> - uid_rw:('uid -> string) -> - uid_wr:(string -> 'uid) -> - 'uid idx - -val find : 'uid idx -> 'uid -> (optint * int64) option - -val iter : - f:(uid:'uid -> offset:int64 -> crc:optint -> unit) -> 'uid idx -> unit - -val map : - f:(uid:'uid -> offset:int64 -> crc:optint -> 'a) -> 'uid idx -> 'a list - -val exists : 'uid idx -> 'uid -> bool -val max : 'uid idx -> int -val get_uid : 'uid idx -> int -> 'uid -val get_offset : 'uid idx -> int -> int64 -val get_crc : 'uid idx -> int -> optint - -module type UID = sig - type t - type ctx - - val empty : ctx - val feed : ctx -> ?off:int -> ?len:int -> bigstring -> ctx - val get : ctx -> t - val compare : t -> t -> int - val length : int - val to_raw_string : t -> string - val pp : t Fmt.t -end - -type 'uid entry = { crc : optint; offset : int64; uid : 'uid } - -module N (Uid : UID) : sig - type encoder - type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] - - val encoder : dst -> pack:Uid.t -> Uid.t entry array -> encoder - val encode : encoder -> [ `Await ] -> [ `Partial | `Ok ] - val dst_rem : encoder -> int - val dst : encoder -> Bigstringaf.t -> int -> int -> unit -end - -(** Memory safe IDX decoder. - - The goal of this library is to provide a way to decode the IDX file as a - stream. *) - -module Device : sig - type t - type uid - - val device : unit -> t - val create : t -> uid - val project : t -> uid -> Bigstringaf.t -end - -module M (IO : sig - type +'a t - - val bind : 'a t -> ('a -> 'b t) -> 'b t - val return : 'a -> 'a t -end) (Uid : sig - include UID - - val of_raw_string : string -> t - val null : t -end) : sig - type t = Device.t - type uid = Device.uid - type fd - type error - - val pp_error : error Fmt.t - val create : t -> uid -> (fd, error) result IO.t - val append : t -> fd -> string -> unit IO.t - val close : t -> fd -> (unit, error) result IO.t -end diff --git a/src/carton/index.mld b/src/carton/index.mld deleted file mode 100644 index 0e8247497..000000000 --- a/src/carton/index.mld +++ /dev/null @@ -1,15 +0,0 @@ -{1 Carton, a small library to handle and generate PACK files.} - -A PACK file is a specific format used by Git internally to efficiently store Git -objects. They can be found into [.git/objects/pack]. Such format is used to -transmit from a peer to one other Git objects and be synchronized each others -(eg. [git pull]/[git push]). - -[Carton] wants to provide an not-so-easy way to produce and handle such format -and be able to implement a Git back-end for Irmin, a Git push command and a Git -fetch/pull command. - -This document describes the API and how to use it - and it describes how to take -the advantage of this format for another purpose. In fact, [Carton] was made to -not be specific to Git and to be free from any {i syscalls} as usual for a -MirageOS project. \ No newline at end of file diff --git a/src/carton/sigs.ml b/src/carton/sigs.ml deleted file mode 100644 index ef42eb552..000000000 --- a/src/carton/sigs.ml +++ /dev/null @@ -1,81 +0,0 @@ -module type FUNCTOR = sig - type +'a t -end - -type (+'a, 's) io - -type 's scheduler = { - bind : 'a 'b. ('a, 's) io -> ('a -> ('b, 's) io) -> ('b, 's) io; - return : 'a. 'a -> ('a, 's) io; -} - -module type SCHEDULER = sig - type +'a s - type t - - external inj : 'a s -> ('a, t) io = "%identity" - external prj : ('a, t) io -> 'a s = "%identity" -end - -module type MUTEX = sig - type +'a fiber - type t - - val create : unit -> t - val lock : t -> unit fiber - val unlock : t -> unit -end - -module type CONDITION = sig - type +'a fiber - type mutex - type t - - val create : unit -> t - val wait : t -> mutex -> unit fiber - val signal : t -> unit - val broadcast : t -> unit -end - -module type IO = sig - type +'a t - - module Mutex : MUTEX with type 'a fiber = 'a t - - module Condition : - CONDITION with type 'a fiber = 'a t and type mutex = Mutex.t - - val bind : 'a t -> ('a -> 'b t) -> 'b t - val return : 'a -> 'a t - val detach : (unit -> 'a) -> 'a t - val parallel_map : f:('a -> 'b t) -> 'a list -> 'b list t - val parallel_iter : f:('a -> unit t) -> 'a list -> unit t -end - -module Make (T : FUNCTOR) : SCHEDULER with type 'a s = 'a T.t = struct - type 'a s = 'a T.t - type t - - external inj : 'a -> 'b = "%identity" - external prj : 'a -> 'b = "%identity" -end - -module type UID = sig - type t - type ctx - - val empty : ctx - val get : ctx -> t - val feed : ctx -> ?off:int -> ?len:int -> Bigstringaf.t -> ctx - val equal : t -> t -> bool - val compare : t -> t -> int - val length : int - val of_raw_string : string -> t - val to_raw_string : t -> string - val pp : t Fmt.t - val null : t -end - -type kind = [ `A | `B | `C | `D ] - -let _max_depth = 60 diff --git a/src/carton/sigs.mli b/src/carton/sigs.mli deleted file mode 100644 index 7b3ad6938..000000000 --- a/src/carton/sigs.mli +++ /dev/null @@ -1,75 +0,0 @@ -module type FUNCTOR = sig - type +'a t -end - -type (+'a, 's) io - -type 's scheduler = { - bind : 'a 'b. ('a, 's) io -> ('a -> ('b, 's) io) -> ('b, 's) io; - return : 'a. 'a -> ('a, 's) io; -} - -module type SCHEDULER = sig - type +'a s - type t - - external inj : 'a s -> ('a, t) io = "%identity" - external prj : ('a, t) io -> 'a s = "%identity" -end - -module type MUTEX = sig - type +'a fiber - type t - - val create : unit -> t - val lock : t -> unit fiber - val unlock : t -> unit -end - -module type CONDITION = sig - type +'a fiber - type mutex - type t - - val create : unit -> t - val wait : t -> mutex -> unit fiber - val signal : t -> unit - val broadcast : t -> unit -end - -module type IO = sig - type +'a t - - module Mutex : MUTEX with type 'a fiber = 'a t - - module Condition : - CONDITION with type 'a fiber = 'a t and type mutex = Mutex.t - - val bind : 'a t -> ('a -> 'b t) -> 'b t - val return : 'a -> 'a t - val detach : (unit -> 'a) -> 'a t - val parallel_map : f:('a -> 'b t) -> 'a list -> 'b list t - val parallel_iter : f:('a -> unit t) -> 'a list -> unit t -end - -module Make (T : FUNCTOR) : SCHEDULER with type 'a s = 'a T.t - -module type UID = sig - type t - type ctx - - val empty : ctx - val get : ctx -> t - val feed : ctx -> ?off:int -> ?len:int -> Bigstringaf.t -> ctx - val equal : t -> t -> bool - val compare : t -> t -> int - val length : int - val of_raw_string : string -> t - val to_raw_string : t -> string - val pp : t Fmt.t - val null : t -end - -type kind = [ `A | `B | `C | `D ] - -val _max_depth : int diff --git a/src/carton/thin.ml b/src/carton/thin.ml deleted file mode 100644 index 95612f86a..000000000 --- a/src/carton/thin.ml +++ /dev/null @@ -1,381 +0,0 @@ -open Carton - -type ('uid, 's) light_load = 'uid -> (kind * int, 's) io -type ('uid, 's) heavy_load = 'uid -> (Dec.v, 's) io -type optint = Optint.t - -let blit_from_string src src_off dst dst_off len = - Bigstringaf.blit_from_string src ~src_off dst ~dst_off ~len -[@@inline] - -let src = Logs.Src.create "thin" - -module Log = (val Logs.src_log src : Logs.LOG) - -exception Exists - -module Make - (Scheduler : SCHEDULER) - (IO : IO with type 'a t = 'a Scheduler.s) - (Uid : UID) = -struct - let ( >>= ) x f = IO.bind x f - let return x = IO.return x - let ( >>? ) x f = x >>= function Ok x -> f x | Error _ as err -> return err - - let sched = - let open Scheduler in - { - Carton.bind = (fun x f -> inj (prj x >>= fun x -> prj (f x))); - Carton.return = (fun x -> inj (return x)); - } - - let read stream = - let ke = Ke.Rke.create ~capacity:0x1000 Bigarray.char in - - let rec go filled inputs = - match Ke.Rke.N.peek ke with - | [] -> ( - stream () >>= function - | Some (src, off, len) -> - Ke.Rke.N.push ke ~blit:blit_from_string ~length:String.length ~off - ~len src; - go filled inputs - | None -> return filled) - | src :: _ -> - let src = Cstruct.of_bigarray src in - let len = min (Cstruct.length inputs) (Cstruct.length src) in - Cstruct.blit src 0 inputs 0 len; - Ke.Rke.N.shift_exn ke len; - if len < Cstruct.length inputs then - go (filled + len) (Cstruct.shift inputs len) - else return (filled + len) - in - fun filled inputs -> go filled inputs - - module Verify = Carton.Dec.Verify (Uid) (Scheduler) (IO) - module Fp = Carton.Dec.Fp (Uid) - - let first_pass ~zl_buffer ~digest stream = - let fl_buffer = Cstruct.create De.io_buffer_size in - let zl_window = De.make_window ~bits:15 in - - let allocate _ = zl_window in - - let read_cstruct = read stream in - let read_bytes () buf ~off ~len = - let rec go rest raw = - if rest <= 0 then ( - Cstruct.blit_to_bytes fl_buffer 0 buf off len; - return (abs rest + len)) - else - read_cstruct 0 raw >>= function - | 0 -> - (* TODO(dinosaure): end of flow, add a test. *) - return (len - rest) - | filled -> go (rest - filled) (Cstruct.shift raw filled) - in - go len fl_buffer - in - let read_bytes () buf ~off ~len = - Scheduler.inj (read_bytes () buf ~off ~len) - in - - Fp.check_header sched read_bytes () |> Scheduler.prj - >>= fun (max, _, len) -> - let decoder = Fp.decoder ~o:zl_buffer ~allocate `Manual in - let decoder = Fp.src decoder (Cstruct.to_bigarray fl_buffer) 0 len in - - let children = Hashtbl.create 0x100 in - let where = Hashtbl.create 0x100 in - let weight = Hashtbl.create 0x100 in - let checks = Hashtbl.create 0x100 in - let matrix = Array.make max Verify.unresolved_node in - - let replace hashtbl k v = - match Hashtbl.find_opt hashtbl k with - | Some v' -> if v' < v then Hashtbl.replace hashtbl k v - | None -> Hashtbl.add hashtbl k v - in - - let rec go decoder = - match Fp.decode decoder with - | `Await decoder -> - read_cstruct 0 fl_buffer >>= fun len -> - Log.debug (fun m -> - m "Refill the first-pass state with %d byte(s)." len); - go (Fp.src decoder (Cstruct.to_bigarray fl_buffer) 0 len) - | `Peek decoder -> - (* XXX(dinosaure): [Fp] does the compression. *) - let keep = Fp.src_rem decoder in - read_cstruct 0 (Cstruct.shift fl_buffer keep) >>= fun len -> - go (Fp.src decoder (Cstruct.to_bigarray fl_buffer) 0 (keep + len)) - | `Entry ({ Fp.kind = Base _; offset; size; crc; _ }, decoder) -> - let n = Fp.count decoder - 1 in - Log.debug (fun m -> m "[+] base object (%d) (%Ld)." n offset); - replace weight offset size; - Hashtbl.add where offset n; - Hashtbl.add checks offset crc; - matrix.(n) <- Verify.unresolved_base ~cursor:offset; - go decoder - | `Entry - ( { Fp.kind = Ofs { sub = s; source; target }; offset; crc; _ }, - decoder ) -> - let n = Fp.count decoder - 1 in - Log.debug (fun m -> m "[+] ofs object (%d) (%Ld)." n offset); - replace weight Int64.(sub offset (Int64.of_int s)) source; - replace weight offset target; - Hashtbl.add where offset n; - Hashtbl.add checks offset crc; - - (try - let vs = - Hashtbl.find children (`Ofs Int64.(sub offset (of_int s))) - in - Hashtbl.replace children - (`Ofs Int64.(sub offset (of_int s))) - (offset :: vs) - with Not_found -> - Hashtbl.add children - (`Ofs Int64.(sub offset (of_int s))) - [ offset ]); - go decoder - | `Entry - ({ Fp.kind = Ref { ptr; target; source }; offset; crc; _ }, decoder) - -> - let n = Fp.count decoder - 1 in - Log.debug (fun m -> - m "[+] ref object (%d) (%Ld) (weight: %d)." n offset - (Stdlib.max target source :> int)); - replace weight offset (Stdlib.max target source); - Hashtbl.add where offset n; - Hashtbl.add checks offset crc; - - (try - let vs = Hashtbl.find children (`Ref ptr) in - Hashtbl.replace children (`Ref ptr) (offset :: vs) - with Not_found -> Hashtbl.add children (`Ref ptr) [ offset ]); - go decoder - | `End uid -> return (Ok uid) - | `Malformed err -> - Log.err (fun m -> m "Got an error: %s." err); - return (Error (`Msg err)) - in - go decoder >>? fun uid -> - Log.debug (fun m -> m "First pass on incoming PACK file is done."); - return - (Ok - ( { - Carton.Dec.where = (fun ~cursor -> Hashtbl.find where cursor); - children = - (fun ~cursor ~uid -> - match - ( Hashtbl.find_opt children (`Ofs cursor), - Hashtbl.find_opt children (`Ref uid) ) - with - | Some a, Some b -> List.sort_uniq compare (a @ b) - | Some x, None | None, Some x -> x - | None, None -> []); - digest; - weight = (fun ~cursor -> Hashtbl.find weight cursor); - }, - matrix, - where, - checks, - children, - uid )) - - type ('t, 'path, 'fd, 'error) fs = { - create : ?trunc:bool -> 't -> 'path -> ('fd, 'error) result IO.t; - append : 't -> 'fd -> string -> unit IO.t; - map : 't -> 'fd -> pos:int64 -> int -> Bigstringaf.t; - close : 't -> 'fd -> (unit, 'error) result IO.t; - } - - module Set = Set.Make (Uid) - - let zip a b = - if Array.length a <> Array.length b then invalid_arg "zip: lengths mismatch"; - Array.init (Array.length a) (fun i -> a.(i), b.(i)) - - let share l0 l1 = - try - List.iter - (fun (v, _) -> if List.exists (Int64.equal v) l1 then raise Exists) - l0; - false - with Exists -> true - - let verify ?(threads = 4) ~digest t path { create; append; map; close } stream - = - let zl_buffer = De.bigstring_create De.io_buffer_size in - let allocate bits = De.make_window ~bits in - let weight = ref 0L in - create ~trunc:true t path >>? fun fd -> - let stream () = - stream () >>= function - | Some (buf, off, len) as res -> - append t fd (String.sub buf off len) >>= fun () -> - weight := Int64.add !weight (Int64.of_int len); - return res - | none -> return none - in - Log.debug (fun m -> m "Start to analyse the PACK file."); - first_pass ~zl_buffer ~digest stream - >>? fun (oracle, matrix, where, checks, children, uid) -> - let weight = !weight in - let pack = - Carton.Dec.make fd ~allocate ~z:zl_buffer ~uid_ln:Uid.length - ~uid_rw:Uid.of_raw_string (fun _ -> assert false) - in - let map fd ~pos len = - let len = min len Int64.(to_int (sub weight pos)) in - map t fd ~pos len - in - Log.debug (fun m -> m "Start to verify incoming PACK file (second pass)."); - Verify.verify ~threads pack ~map ~oracle ~matrix ~verbose:ignore - >>= fun () -> - Log.debug (fun m -> m "Second pass on incoming PACK file is done."); - let offsets = - Hashtbl.fold (fun k _ a -> k :: a) where [] - |> List.sort Int64.compare - |> Array.of_list - in - let unresolveds, resolveds = - let fold (unresolveds, resolveds) (offset, status) = - if Verify.is_resolved status then - let uid = Verify.uid_of_status status in - let crc = Hashtbl.find checks offset in - unresolveds, { Carton.Dec.Idx.crc; offset; uid } :: resolveds - else - let crc = Hashtbl.find checks offset in - (offset, crc) :: unresolveds, resolveds - in - Array.fold_left fold ([], []) (zip offsets matrix) - in - let requireds = - Hashtbl.fold - (fun k vs a -> - match k with - | `Ofs _ -> a - | `Ref uid -> if share unresolveds vs then Set.add uid a else a) - children Set.empty - in - close t fd >>? fun () -> - Log.debug (fun m -> - m "PACK file verified (%d resolved object(s), %d unresolved object(s))" - (List.length resolveds) (List.length unresolveds)); - return - (Ok - ( Hashtbl.length where, - Set.elements requireds, - unresolveds, - resolveds, - weight, - uid )) - - let find _ = assert false - - let vuid = - { Carton.Enc.uid_ln = Uid.length; Carton.Enc.uid_rw = Uid.to_raw_string } - - type nonrec light_load = (Uid.t, Scheduler.t) light_load - type nonrec heavy_load = (Uid.t, Scheduler.t) heavy_load - - (* XXX(dinosaure): [fs = { create; append; ... }] has a argument about - * [trunc] to know if we want to write (and delete old contents) or simply - * read - and, in that case, keep contents. - * - * This argument was added to fix a problem about [Cstruct_append] which - * needs to know if we want to erase old contents or keep it and read it. - * However, on top of that, something else can help if we want to read or - * write. Capabilities exist at another level ([Rd], [Wr] and [RdWr]) and we - * should fallback them at this level. By this way, we can delete [trunc] and - * ensure that we write only new contents ([O_CREATE | O_TRUNC | O_APPEND]) - * or read contents ([O_RDONLY]). - * - * Capabilities can not be applied at this level - at least, we can not - * constraint the ['fd] to be read-only or write-only - because we don't the - * high kind polymorphism for free. - * - * I'm not sure about a good solution on the API level and capabilities on - * this level. So we keep [trunc] for the moment but we should find a better - * solution, at least, on the API level, to decomplixify it. *) - - let canonicalize ~light_load ~heavy_load ~src ~dst t - { create; append; close; map; _ } n uids weight = - let b = - { - Carton.Enc.o = Bigstringaf.create De.io_buffer_size; - Carton.Enc.i = Bigstringaf.create De.io_buffer_size; - Carton.Enc.q = De.Queue.create 0x10000; - Carton.Enc.w = De.Lz77.make_window ~bits:15; - } - in - let ctx = ref Uid.empty in - let cursor = ref 0L in - let light_load uid = Scheduler.prj (light_load uid) in - create ~trunc:true t dst >>? fun fd -> - let header = Bigstringaf.create 12 in - Carton.Enc.header_of_pack ~length:(n + List.length uids) header 0 12; - let hdr = Bigstringaf.to_string header in - append t fd hdr >>= fun () -> - ctx := Uid.feed !ctx header; - cursor := Int64.add !cursor 12L; - let encode_base uid = - light_load uid >>= fun (kind, length) -> - let entry = Carton.Enc.make_entry ~kind ~length uid in - let anchor = !cursor in - let crc = ref Checkseum.Crc32.default in - Carton.Enc.entry_to_target sched ~load:heavy_load entry |> Scheduler.prj - >>= fun target -> - Carton.Enc.encode_target sched ~b ~find ~load:heavy_load ~uid:vuid target - ~cursor:(Int64.to_int anchor) - |> Scheduler.prj - >>= fun (len, encoder) -> - let rec go encoder = - match Carton.Enc.N.encode ~o:b.o encoder with - | `Flush (encoder, len) -> - append t fd (Bigstringaf.substring b.o ~off:0 ~len) >>= fun () -> - ctx := Uid.feed !ctx ~off:0 ~len b.o; - crc := Checkseum.Crc32.digest_bigstring b.o 0 len !crc; - cursor := Int64.add !cursor (Int64.of_int len); - let encoder = - Carton.Enc.N.dst encoder b.o 0 (Bigstringaf.length b.o) - in - go encoder - | `End -> return { Carton.Dec.Idx.crc = !crc; offset = anchor; uid } - in - append t fd (Bigstringaf.substring b.o ~off:0 ~len) >>= fun () -> - ctx := Uid.feed !ctx ~off:0 ~len b.o; - crc := Checkseum.Crc32.digest_bigstring b.o 0 len !crc; - cursor := Int64.add !cursor (Int64.of_int len); - let encoder = Carton.Enc.N.dst encoder b.o 0 (Bigstringaf.length b.o) in - go encoder - in - let rec go acc = function - | [] -> return (List.rev acc) - | uid :: uids -> encode_base uid >>= fun entry -> go (entry :: acc) uids - in - go [] uids >>= fun entries -> - let shift = Int64.sub !cursor 12L in - let top = Int64.sub weight (Int64.of_int Uid.length) in - let rec go src pos = - let max = Int64.sub top pos in - let len = min max (Int64.mul 1024L 1024L) in - let len = Int64.to_int len in - let raw = map t src ~pos len in - append t fd (Bigstringaf.to_string raw) >>= fun () -> - ctx := Uid.feed !ctx raw; - cursor := Int64.add !cursor (Int64.of_int len); - if Int64.add pos (Int64.of_int len) < top then - go src (Int64.add pos (Int64.of_int len)) - else - let uid = Uid.get !ctx in - append t fd (Uid.to_raw_string uid) >>= fun () -> - return (Ok (Int64.(add !cursor (of_int Uid.length)), uid)) - in - create ~trunc:false t src >>? fun src -> - go src 12L >>? fun (weight, uid) -> - close t fd >>? fun () -> return (Ok (shift, weight, uid, entries)) -end diff --git a/src/carton/thin.mli b/src/carton/thin.mli deleted file mode 100644 index 8863cd938..000000000 --- a/src/carton/thin.mli +++ /dev/null @@ -1,79 +0,0 @@ -open Carton - -type ('uid, 's) light_load = 'uid -> (kind * int, 's) io -type ('uid, 's) heavy_load = 'uid -> (Dec.v, 's) io -type optint = Optint.t - -module Make - (Scheduler : SCHEDULER) - (IO : IO with type 'a t = 'a Scheduler.s) - (Uid : UID) : sig - type ('t, 'path, 'fd, 'error) fs = { - create : ?trunc:bool -> 't -> 'path -> ('fd, 'error) result IO.t; - append : 't -> 'fd -> string -> unit IO.t; - map : 't -> 'fd -> pos:int64 -> int -> Bigstringaf.t; - close : 't -> 'fd -> (unit, 'error) result IO.t; - } - (** A record to manipulate a {i file-system}. - - [create] is like {!Unix.openfile}. It can open a pre-existing ['path] or - create a new one. It returns a {i representation} of it which can be - manipulated. [append] is like {!Unix.write}. It appends the given - {i payload} into the opened [fd]. It shifts the offset of the opened file - by the length of the given {i payload}. [map] is like {!Unix.map_file}. It - loads a part of the given ['fd] into a {!Bigstringaf.t} payload. It always - called with valid [pos] and [len] arguments. *) - - val verify : - ?threads:int -> - digest:Uid.t Carton.Dec.digest -> - 't -> - 'path -> - ('t, 'path, 'fd, ([> `Msg of string ] as 'error)) fs -> - (unit -> (string * int * int) option IO.t) -> - ( int - * Uid.t list - * (int64 * optint) list - * Uid.t Dec.Idx.entry list - * int64 - * Uid.t, - 'error ) - result - IO.t - (** [verify ~digest filename fs stream] does the first pass to analyze a PACK - file. While it analyzes the PACK file, it saves it into [filename] with - the [fs]'s [append] {i syscall}. Then, it returns how many objects has the - stream, the list of required external objects and the size of the stream. - - If the list is empty, the given stream (saved into [filename]) is a - {i canonic} PACK file. It does not require any external objects to extract - any of its objects. - - Otherwise, you probably should call {!canonicalize} to regenerate the PACK - file. *) - - type nonrec light_load = (Uid.t, Scheduler.t) light_load - type nonrec heavy_load = (Uid.t, Scheduler.t) heavy_load - - val canonicalize : - light_load:light_load -> - heavy_load:heavy_load -> - src:'path -> - dst:'path -> - 't -> - ('t, 'path, 'fd, ([> `Msg of string ] as 'error)) fs -> - int -> - Uid.t list -> - int64 -> - (int64 * int64 * Uid.t * Uid.t Dec.Idx.entry list, 'error) result IO.t - (** [canonicalize ~light_load ~heavy_load ~transmit filename fs n requireds - weight] generates a new PACK file with required objects [requireds]. It - puts on the front these objects available with [light_load] and - [heavy_load]. - - Then, it transmits all others objects of the old PACK file to the new one - with [transmit]. It must know how many objects has the old PACK file and - size of it. - - It returns the size of the new PACK file generated located to [filename]. *) -end diff --git a/src/carton/zh.ml b/src/carton/zh.ml deleted file mode 100644 index 79f3374c7..000000000 --- a/src/carton/zh.ml +++ /dev/null @@ -1,266 +0,0 @@ -let input_bigstring ic buf off len = - let tmp = Bytes.create len in - let res = input ic tmp 0 len in - Bigstringaf.blit_from_bytes tmp ~src_off:0 buf ~dst_off:off ~len:res; - res - -let output_bigstring oc buf off len = - let res = Bigstringaf.substring buf ~off ~len in - output_string oc res - -module N : sig - type encoder - type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] - type ret = [ `Flush of encoder | `End ] - - val dst_rem : encoder -> int - val dst : encoder -> Zl.bigstring -> int -> int -> encoder - val encode : encoder -> ret - - val encoder : - ?level:int -> - i:Zl.bigstring -> - q:De.Queue.t -> - w:De.Lz77.window -> - source:int -> - H.bigstring -> - dst -> - Duff.hunk list -> - encoder -end = struct - type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] - - type encoder = { - dst : dst; - src : Bigstringaf.t; - o : H.bigstring; - o_pos : int; - o_max : int; - h : H.N.encoder; - z : Zl.Def.encoder; - t : Zl.bigstring; - d : [ `Copy of int * int | `Insert of string | `End | `Await ] list; - } - - type ret = [ `Flush of encoder | `End ] - - let flush k e = - match e.dst with - | `Manual -> `Flush e - | `Channel oc -> - output_bigstring oc e.o 0 e.o_pos; - k { e with o_pos = 0 } - | `Buffer b -> - for i = 0 to e.o_pos - 1 do - Buffer.add_char b (Bigstringaf.get e.o i) - done; - k { e with o_pos = 0 } - - let rec encode_z e = - match Zl.Def.encode e.z with - | `End z -> - let len = Bigstringaf.length e.o - Zl.Def.dst_rem z in - let z = Zl.Def.dst z De.bigstring_empty 0 0 in - - if len > 0 then flush encode_z { e with z; o_pos = len } else `End - | `Flush z -> - let len = Bigstringaf.length e.o - Zl.Def.dst_rem z in - flush encode_z { e with z; o_pos = len } - | `Await z -> ( - match e.d with - | [] -> - let z = Zl.Def.src z De.bigstring_empty 0 0 in - encode_z { e with z } - | d -> - H.N.dst e.h e.t 0 (De.bigstring_length e.t); - encode_h { e with z } d) - - and encode_h e d = - let v, d = match d with v :: d -> v, d | [] -> `End, [] in - match H.N.encode e.h v, d with - | `Ok, [] -> - let len = Bigstringaf.length e.t - H.N.dst_rem e.h in - let z = Zl.Def.src e.z e.t 0 len in - - encode_z { e with d; z } - | `Ok, d -> encode_h { e with d } d - | `Partial, d -> - let len = Bigstringaf.length e.t - H.N.dst_rem e.h in - let z = Zl.Def.src e.z e.t 0 len in - - encode_z { e with d = `Await :: d; z } - - let encode e = encode_z e - - let encoder ?(level = 4) ~i ~q ~w ~source src dst hunks = - let o, o_pos, o_max = - match dst with - | `Manual -> De.bigstring_empty, 1, 0 - | `Buffer _ | `Channel _ -> - De.bigstring_create H.io_buffer_size, 0, H.io_buffer_size - 1 - in - let z = Zl.Def.encoder `Manual `Manual ~q ~w ~level in - let z = Zl.Def.dst z De.bigstring_empty 0 0 in - { - dst; - src; - o; - o_pos; - o_max; - t = i; - d = - List.map - (function - | Duff.Copy (off, len) -> `Copy (off, len) - | Duff.Insert (off, len) -> - `Insert (Bigstringaf.substring src ~off ~len)) - hunks; - z; - h = H.N.encoder `Manual ~dst_len:(Bigstringaf.length src) ~src_len:source; - } - - let dst_rem e = e.o_max - e.o_pos + 1 - - let dst e s j l = - let z = Zl.Def.dst e.z s j l in - { e with z; o = s; o_pos = j; o_max = j + l - 1 } -end - -module M : sig - type decoder - type src = [ `Channel of in_channel | `String of string | `Manual ] - - type decode = - [ `Await of decoder - | `Header of int * int * decoder - | `End of decoder - | `Malformed of string ] - - val src_len : decoder -> int - val dst_len : decoder -> int - val src_rem : decoder -> int - val dst_rem : decoder -> int - val src : decoder -> Zl.bigstring -> int -> int -> decoder - val dst : decoder -> H.bigstring -> int -> int -> decoder - val source : decoder -> H.bigstring -> decoder - val decode : decoder -> decode - - val decoder : - ?source:H.bigstring -> - o:Zl.bigstring -> - allocate:(int -> Zl.window) -> - src -> - decoder -end = struct - type src = [ `Channel of in_channel | `String of string | `Manual ] - - type decoder = { - src : src; - dst : H.bigstring; - dst_len : int; - src_len : int; - i : Zl.bigstring; - i_pos : int; - i_len : int; - o : Zl.bigstring; - z : Zl.Inf.decoder; - h : H.M.decoder; - k : decoder -> decode; - } - - and decode = - [ `Await of decoder - | `Header of int * int * decoder - | `End of decoder - | `Malformed of string ] - - let refill k d = - match d.src with - | `String _ -> - let z = Zl.Inf.src d.z De.bigstring_empty 0 0 in - k { d with z } - | `Channel ic -> - let res = input_bigstring ic d.i 0 (De.bigstring_length d.i) in - let z = Zl.Inf.src d.z d.i 0 res in - k { d with z } - | `Manual -> `Await { d with k } - - let rec decode d = - match H.M.decode d.h with - | `Header (src_len, dst_len) -> - `Header (src_len, dst_len, { d with src_len; dst_len; k = decode }) - | `End -> `End { d with k = decode } - | `Malformed err -> `Malformed err - | `Await -> inflate { d with z = Zl.Inf.flush d.z } - - and inflate d = - match Zl.Inf.decode d.z with - | `Await z -> - let dst_len = De.bigstring_length d.o - Zl.Inf.dst_rem z in - H.M.src d.h d.o 0 dst_len; - refill inflate { d with z } - | `End z -> - let dst_len = De.bigstring_length d.o - Zl.Inf.dst_rem z in - H.M.src d.h d.o 0 dst_len; - decode { d with z } - | `Flush z -> - let dst_len = De.bigstring_length d.o - Zl.Inf.dst_rem z in - H.M.src d.h d.o 0 dst_len; - decode { d with z } - | `Malformed err -> `Malformed err - - let src d s j l = - let z = Zl.Inf.src d.z s j l in - { d with z } - - let dst d s j l = - H.M.dst d.h s j l; - d - - let source d src = - H.M.source d.h src; - d - - let dst_len d = - let dst_len = H.M.dst_len d.h in - assert (d.dst_len = dst_len); - dst_len - - let src_len d = - let src_len = H.M.src_len d.h in - assert (d.src_len = src_len); - src_len - - let dst_rem d = H.M.dst_rem d.h - let src_rem d = Zl.Inf.src_rem d.z - - let decoder ?source ~o ~allocate src = - let decoder_z = Zl.Inf.decoder `Manual ~o ~allocate in - let decoder_h = H.M.decoder `Manual ?source in - - let i, i_pos, i_len = - match src with - | `Manual -> De.bigstring_empty, 1, 0 - | `String x -> - ( Bigstringaf.of_string x ~off:0 ~len:(String.length x), - 0, - String.length x - 1 ) - | `Channel _ -> Bigstringaf.create De.io_buffer_size, 1, 0 - in - - { - src; - dst = De.bigstring_empty; - dst_len = 0; - src_len = 0; - i; - i_pos; - i_len; - o; - z = decoder_z; - h = decoder_h; - k = decode; - } - - let decode d = d.k d -end diff --git a/src/carton/zh.mli b/src/carton/zh.mli deleted file mode 100644 index d867c5f45..000000000 --- a/src/carton/zh.mli +++ /dev/null @@ -1,47 +0,0 @@ -module N : sig - type encoder - type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] - type ret = [ `Flush of encoder | `End ] - - val dst_rem : encoder -> int - val dst : encoder -> Zl.bigstring -> int -> int -> encoder - val encode : encoder -> ret - - val encoder : - ?level:int -> - i:Zl.bigstring -> - q:De.Queue.t -> - w:De.Lz77.window -> - source:int -> - H.bigstring -> - dst -> - Duff.hunk list -> - encoder -end - -module M : sig - type decoder - type src = [ `Channel of in_channel | `String of string | `Manual ] - - type decode = - [ `Await of decoder - | `Header of int * int * decoder - | `End of decoder - | `Malformed of string ] - - val src_len : decoder -> int - val dst_len : decoder -> int - val src_rem : decoder -> int - val dst_rem : decoder -> int - val src : decoder -> Zl.bigstring -> int -> int -> decoder - val dst : decoder -> H.bigstring -> int -> int -> decoder - val source : decoder -> H.bigstring -> decoder - val decode : decoder -> decode - - val decoder : - ?source:H.bigstring -> - o:Zl.bigstring -> - allocate:(int -> Zl.window) -> - src -> - decoder -end diff --git a/test/bin/dune b/test/bin/dune deleted file mode 100644 index edc3e7f26..000000000 --- a/test/bin/dune +++ /dev/null @@ -1,8 +0,0 @@ -(cram - (package carton) - (deps - ../carton/bomb.pack - ../carton/bomb.idx - %{bin:carton.get} - %{bin:carton.verify-pack} - %{bin:carton.index-pack})) diff --git a/test/bin/get.t b/test/bin/get.t deleted file mode 100644 index dd9d3a905..000000000 --- a/test/bin/get.t +++ /dev/null @@ -1,65 +0,0 @@ -Extraction of Git objects - $ carton.get ../carton/bomb.pack --raw 12 - tree c1971b07ce6888558e2178a121804774c4201b17 - parent 18ed56cbc5012117e24a603e7c072cf65d36d469 - author Kate Murphy 1507821911 -0400 - committer GitHub 1507821911 -0400 - - Update README.md - $ carton.get ../carton/bomb.pack --raw 18ed56cbc5012117e24a603e7c072cf65d36d469 - tree d9513477b01825130c48c4bebed114c4b2d50401 - parent 45546f17e5801791d4bc5968b91253a2f4b0db72 - author Kate Murphy 1507821648 -0400 - committer GitHub 1507821648 -0400 - - Create Readme.md - $ carton.get ../carton/bomb.pack c1971b07ce6888558e2178a121804774c4201b17 - 00000000: 3130 3036 3434 2052 4541 444d 452e 6d64 100644 README.md - 00000010: 00ad 839b aae5 fc20 7ac0 db15 34ba 4819 ....... z...4.H. - 00000020: cbb4 a34b b934 3030 3030 2064 3000 8d10 ...K.40000 d0... - 00000030: 6ebc 17b2 de80 acef d454 825d 394b 9bc4 n........T.]9K.. - 00000040: 7fe6 3430 3030 3020 6431 008d 106e bc17 ..40000 d1...n.. - 00000050: b2de 80ac efd4 5482 5d39 4b9b c47f e634 ......T.]9K....4 - 00000060: 3030 3030 2064 3200 8d10 6ebc 17b2 de80 0000 d2...n..... - 00000070: acef d454 825d 394b 9bc4 7fe6 3430 3030 ...T.]9K....4000 - 00000080: 3020 6433 008d 106e bc17 b2de 80ac efd4 0 d3...n........ - 00000090: 5482 5d39 4b9b c47f e634 3030 3030 2064 T.]9K....40000 d - 000000a0: 3400 8d10 6ebc 17b2 de80 acef d454 825d 4...n........T.] - 000000b0: 394b 9bc4 7fe6 3430 3030 3020 6435 008d 9K....40000 d5.. - 000000c0: 106e bc17 b2de 80ac efd4 5482 5d39 4b9b .n........T.]9K. - 000000d0: c47f e634 3030 3030 2064 3600 8d10 6ebc ...40000 d6...n. - 000000e0: 17b2 de80 acef d454 825d 394b 9bc4 7fe6 .......T.]9K.... - 000000f0: 3430 3030 3020 6437 008d 106e bc17 b2de 40000 d7...n.... - 00000100: 80ac efd4 5482 5d39 4b9b c47f e634 3030 ....T.]9K....400 - 00000110: 3030 2064 3800 8d10 6ebc 17b2 de80 acef 00 d8...n....... - 00000120: d454 825d 394b 9bc4 7fe6 3430 3030 3020 .T.]9K....40000 - 00000130: 6439 008d 106e bc17 b2de 80ac efd4 5482 d9...n........T. - 00000140: 5d39 4b9b c47f e6 ]9K.... - $ carton.get --with-path --with-info ../carton/bomb.pack c1971b07ce6888558e2178a121804774c4201b17 - path: 279 - 234 - depth: 2 - length: 327 - kind: b - - 00000000: 3130 3036 3434 2052 4541 444d 452e 6d64 100644 README.md - 00000010: 00ad 839b aae5 fc20 7ac0 db15 34ba 4819 ....... z...4.H. - 00000020: cbb4 a34b b934 3030 3030 2064 3000 8d10 ...K.40000 d0... - 00000030: 6ebc 17b2 de80 acef d454 825d 394b 9bc4 n........T.]9K.. - 00000040: 7fe6 3430 3030 3020 6431 008d 106e bc17 ..40000 d1...n.. - 00000050: b2de 80ac efd4 5482 5d39 4b9b c47f e634 ......T.]9K....4 - 00000060: 3030 3030 2064 3200 8d10 6ebc 17b2 de80 0000 d2...n..... - 00000070: acef d454 825d 394b 9bc4 7fe6 3430 3030 ...T.]9K....4000 - 00000080: 3020 6433 008d 106e bc17 b2de 80ac efd4 0 d3...n........ - 00000090: 5482 5d39 4b9b c47f e634 3030 3030 2064 T.]9K....40000 d - 000000a0: 3400 8d10 6ebc 17b2 de80 acef d454 825d 4...n........T.] - 000000b0: 394b 9bc4 7fe6 3430 3030 3020 6435 008d 9K....40000 d5.. - 000000c0: 106e bc17 b2de 80ac efd4 5482 5d39 4b9b .n........T.]9K. - 000000d0: c47f e634 3030 3030 2064 3600 8d10 6ebc ...40000 d6...n. - 000000e0: 17b2 de80 acef d454 825d 394b 9bc4 7fe6 .......T.]9K.... - 000000f0: 3430 3030 3020 6437 008d 106e bc17 b2de 40000 d7...n.... - 00000100: 80ac efd4 5482 5d39 4b9b c47f e634 3030 ....T.]9K....400 - 00000110: 3030 2064 3800 8d10 6ebc 17b2 de80 acef 00 d8...n....... - 00000120: d454 825d 394b 9bc4 7fe6 3430 3030 3020 .T.]9K....40000 - 00000130: 6439 008d 106e bc17 b2de 80ac efd4 5482 d9...n........T. - 00000140: 5d39 4b9b c47f e6 ]9K.... diff --git a/test/bin/index.t b/test/bin/index.t deleted file mode 100644 index e402e79a0..000000000 --- a/test/bin/index.t +++ /dev/null @@ -1,13 +0,0 @@ -Generate index file - $ carton.index-pack -o bomb.idx ../carton/bomb.pack - d1c2ce2fc6dfaaa18d0ea1b564334d738b0e2339 - $ diff bomb.idx ../carton/bomb.idx - $ carton.index-pack < ../carton/bomb.pack > bomb.idx - $ diff bomb.idx ../carton/bomb.idx - $ carton.index-pack -o bomb.idx < ../carton/bomb.pack - d1c2ce2fc6dfaaa18d0ea1b564334d738b0e2339 - $ diff bomb.idx ../carton/bomb.idx - $ carton.index-pack -v -o bomb.idx ../carton/bomb.pack - Indexing objects: 0% (0/18) Indexing objects: 5% (1/18) Indexing objects: 11% (2/18) Indexing objects: 16% (3/18) Indexing objects: 22% (4/18) Indexing objects: 27% (5/18) Indexing objects: 33% (6/18) Indexing objects: 38% (7/18) Indexing objects: 44% (8/18) Indexing objects: 50% (9/18) Indexing objects: 55% (10/18) Indexing objects: 61% (11/18) Indexing objects: 66% (12/18) Indexing objects: 72% (13/18) Indexing objects: 77% (14/18) Indexing objects: 83% (15/18) Indexing objects: 88% (16/18) Indexing objects: 94% (17/18) Indexing objects: 100% (18/18), done. - Resolving deltas: 0% (0/3). Resolving deltas: 33% (1/3). Resolving deltas: 66% (2/3). Resolving deltas: 100% (3/3), done. - d1c2ce2fc6dfaaa18d0ea1b564334d738b0e2339 diff --git a/test/bin/verify.t b/test/bin/verify.t deleted file mode 100644 index 67f5280db..000000000 --- a/test/bin/verify.t +++ /dev/null @@ -1,24 +0,0 @@ -Verify pack file - $ carton.verify-pack ../carton/bomb.idx - $ carton.verify-pack -v ../carton/bomb.idx - 7af99c9e7d4768fa681f4fe4ff61259794cf719b commit 218 182 12 - 18ed56cbc5012117e24a603e7c072cf65d36d469 commit 218 177 194 - 45546f17e5801791d4bc5968b91253a2f4b0db72 commit 170 124 371 - 8d106ebc17b2de80acefd454825d394b9bc47fe6 tree 290 69 495 - 106d3b1c00034193bbe91194eb8a90fc45006377 tree 290 69 564 - c1971b07ce6888558e2178a121804774c4201b17 tree 45 59 633 1 106d3b1c00034193bbe91194eb8a90fc45006377 - dacaac6d3b2cf39ec8078dfb0bd3ce691e92557f blob 1867 786 692 - ad839baae5fc207ac0db1534ba4819cbb4a34bb9 blob 18 30 1478 1 dacaac6d3b2cf39ec8078dfb0bd3ce691e92557f - d9513477b01825130c48c4bebed114c4b2d50401 tree 45 58 1508 1 106d3b1c00034193bbe91194eb8a90fc45006377 - 5849ef743180c60e7936f2afc01f03df80918390 tree 290 69 1566 - 8f4c1eaecf06f745e3a4a0f19ef84d5b530447ec tree 290 70 1635 - e4adc8b0200fba1004f88b43dad80bb4ad8b9af0 tree 290 69 1705 - 8b35e3b9b79bb5f041402ad84c4477ef3b6fb536 tree 290 69 1774 - 0e50fdbbefb059864c397d7eaf054c7af58a23cf tree 290 69 1843 - 3e50041e82b225ca9e9b2641548b0c1b81eb971b tree 290 69 1912 - dceaeb0c4965760edaf030c7fe1eb100e7920d61 tree 290 69 1981 - 6961ae061a9b89b91162c00d55425b39a19c9f90 tree 300 72 2050 - 5faa3895522087022ba6fc9e64b02653bd7c4283 blob 9 18 2122 - non delta: 15 objects - chain length = 1: 3 objects - bomb.pack: ok diff --git a/test/carton/bomb.idx b/test/carton/bomb.idx deleted file mode 100644 index 7aaac5daf00f6a4de8f1ba5d6d93745d3db03c17..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1576 zcmexg;-AdGz`z8=fe$kSu!iCv^a>QT`pgKx1 zC(sN^F&EGb5axzr9w0psm=|a-H8CI1JX&IYp!ot&EC{6WVIiPA-7sIk-`(#wM7H@@ z*4C|O^{M*WrF>o>*II^w*>Upjmx5DXcTM=?%8*V@NlTbmQyk(yk%-;!hjICtleTnXywSc+r;)w~Hb?- z2~|Jm%&T?J_?02=|Kxw7>hvk+3ukwmKHj-~_SO%M4q7*ST*}{D=WjLZ70BBozUf}W zn)g>inqn=zXCJA5*6$;??mXLf*T+j1e4O{gH#(Tb{mt6u*{h!ZQK&j_ThwHihvexk zi@kRqoG#6NE~6u~PqAX5VuO3h5d~@Sn}H_f8zfYPc|4Bn+jmjq$fm0-jJHm$$+gz` zJnsa1?{DtQ=Q8Cch1TDB^_s^swT$o92ZQ7P&hDi-E<99I$xs z0%A}(49t&g?||a(fi$p;XFUc~UkRk20O>72J$r%TD=!{8r+@7Js)fCL3%8~i`xbZe cDO+YlHwotX?VR-GyZGznS36q-9^5Mf03LAroB#j- diff --git a/test/carton/bomb.pack b/test/carton/bomb.pack deleted file mode 100644 index 24a4771808e5692d47553d59b4edadfae0d3f576..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2160 zcmai#dpOg39LHx^5gEk^<$hb`G8=MRN5=7UNzok3{g!081exiLB84)j! zX{L|FI89$bELKW8DTT;*O=A+NYJPsH5PXVj-iC}smn60)#bDk$x5;B_*kl^))h7Bi z7)ut%Ld6Ccc8Z~Ye2z`h8`oFb>XqO(;YSX;Oy@LX1I*6*=IwTxEC9XgCTE@-!+|y4> znQ{1Rc0vmktrQ87uP=}`@wl))9qa2Ol6uMJK&g#SwahAeJN2N*tM{1`Qe$B-~Q48;{UAJdTi60tjjoicA`*}K+Vsv`%n1SP2)XbeamlKrd=xuK?)hBD9FEz;Ffl$T5cxQA>gf zSm9X`Zt;LlneL4^MgQtu(P=njnSu{9iN0zZT2uWZA-`FIKQHTvNsRMb6TSzA_dS#% zP!Vtt`1Vmj@Z1?e96FQPgS4IJ3Rn^;)>~?sUx31iTx5@C z5ZRG?oJJQ$0d@=us8)cavjnyxPzAI>+#e6YEU-FUPtOpERC2PiKs#9JT=xgnBo$T9 z>?p-{j7lS)+Q{}dl-e~vRuS-Om&7w83Djate+bnUI<_VqHDi^euRHFFbuF|y3S;_5 zl`(A?(Ski3cx374Ucc`s3{tN<`n39!$0-jL=BTj)($vV2*`~qVX;nHTKbR~Q>x?W= zScP=@1mah$%$3BN@ZM~4(#@ou>y-E7<9N%;ni!hX;vxe8+jnW)>kq%&@5T5#HCGR7(UIgY|R@ZVYCZ=C?{PE*|IN33eJ?I^ZCw8 z)|Y~{-Yx{Z`Pz0~x1g&@d-!aZk;R>Eh|t~f-mv)h_gU>b#j@A6(v*+k?a~jD%6N|| z>j-z6Bs>(kRp*-4I_I-&&kkpuh_u0KJ#RFEYS)*!FQDb(Z&ZTMKMeOu(7Y-}bmshi zkX5-f0xN%)J_k+R&d^!?Zu+vP&+^8jhSx7cA6O2bcmKyZ`6MmOg_lT~7A{=itoRuB zm`#M?E*0Gsy&p0;KRT(yY)BYb&EkqBM)y=H&q#G&qvCQpO{-US=f{&KLjQQ>nx@|x zW`<@3A);HWzElmpIUY{BTeCi~(0*f3zNi9+N^cH&vg~B`sNenG$0%W^)jNO)OZ6bk ztt>N7MMJBuwCqgRy1<0!wK#}W+)ZooPiCz>A(x9+4U9b1BWnF^@fq z=#t@S6dXfjAdJVM)>d*s5tr*St_^Wk8Y&)sbm5}Q3%NQs>%(6ov;TCSO`flGlbIvR zelzl}?Zvt)9##wqL2oRs7y0~6q*oZ`5KrwW#Ii?X+)Pul-)+S2#$(f@UyeM{cCYOn zcLHnb=JgxQ+#V>(`;od)rjXNa%$bY&{K_~cXO+L?Mqgl3QYtYh4#9+B&0ONpeSgb| z)8~^itG|Bx)LpS?kI*0cf$7Oe5X_}T=V9eO3f$fsw&@@~J=+1>Y5oN5qf#_#J)VFA z88tu@mcV2^0@bZMk_x=xbaV`Kqm}#K4D}3Sbd7FL@~RA)c_y^EV(;bMK@#7+{5_qs zEpD3ua^p!J*uV66#pbBFw{1+6XZ^%fi(=bQ=jqR(_DU4;-pp43xv5-ar)E&PBUg+@ zH%0-6h?D|Xb%EsS*hl*zn!%2^3lrn&9jm3Ie4hAc?OH(v;J7aQNOpAzzlW%@V!!*=J8#UeS|jvu$UQ*-5K(J8hkr0#=2 z+HSTp$3s(bYjunoH}-<_dZQh`MFh=-(&&;Xz>i4L*el>(zb12$EX|-qN3PUgY9I^i z$o(b|CP=2a+@ZQ+cF-k4Y<@N;glO*ZPDmvv)scIUMu($-BScF1mfGN6NvCofIy8gQ zFjAmCx;uTJ3E8yPbB$15@MUb>1|9uHaJv{{cq7A}3)hhsQU{g{I#J#1(z~o=w+T@)vkUtk7 z{^f<-WWGV~so-0`^N!XsWIeGFE%h>l0PVULU{I^wwOL~VwV!Q==>twXut6ZnV}g-G YT`zTe$E!;+50^ChBOD_BIIL>?FKd1NXaE2J diff --git a/test/carton/dune b/test/carton/dune deleted file mode 100644 index 2e49b6cfc..000000000 --- a/test/carton/dune +++ /dev/null @@ -1,33 +0,0 @@ -(executable - (name test) - (flags - (:standard -thread)) - (libraries - git_version - lwt - lwt.unix - rresult - fpath - decompress.de - decompress.zl - optint - fmt - bigstringaf - base64 - bos - checkseum.c - digestif.c - unix - threads - carton - carton-lwt - alcotest)) - -(rule - (alias runtest) - (package carton-lwt) - (deps - (:test test.exe) - bomb.pack) - (action - (run %{test} --color=always))) diff --git a/test/carton/prelude.ml b/test/carton/prelude.ml deleted file mode 100644 index 75878376f..000000000 --- a/test/carton/prelude.ml +++ /dev/null @@ -1,256 +0,0 @@ -type fd = { fd : Unix.file_descr; mx : int64 } - -let unix_map : fd Carton.Dec.W.map = - fun fd ~pos len -> - let payload = - let len = min Int64.(to_int (sub fd.mx pos)) len in - Unix.map_file fd.fd ~pos Bigarray.char Bigarray.c_layout false [| len |] - in - Bigarray.array1_of_genarray payload - -module IO = struct - type 'a t = ('a -> unit) -> unit - - let return x k = k x - let bind t f k = t (fun x -> f x k) - let ( >>= ) x f = bind x f - - module Ivar = struct - type 'a state = Full of 'a | Empty of ('a -> unit) Queue.t - type 'a t = { mutable state : 'a state } - - let create () = { state = Empty (Queue.create ()) } - - let fill t x = - match t.state with - | Full _ -> failwith "Ivar.fill" - | Empty q -> - t.state <- Full x; - Queue.iter (fun f -> f x) q - - let read t k = - match t.state with Full x -> k x | Empty q -> Queue.push k q - end - - module Future = struct - let wait = Ivar.read - end - - let fork f k = - let ivar = Ivar.create () in - f () (fun x -> Ivar.fill ivar x); - k ivar - - type tpool = { - seq : seq; - work_mutex : Mutex.t; - work_cond : Condition.t; - working_cond : Condition.t; - mutable working_cnt : int; - mutable thread_cnt : int; - mutable stop : bool; - } - - and seq = { mutable prev : seq; mutable next : seq } - - let make_seq () = - let rec seq = { prev = seq; next = seq } in - seq - - type prgn = Prgn : ('a -> unit) * 'a -> prgn - - type node = { - mutable node_prev : seq; - mutable node_next : seq; - prgn : prgn; - mutable active : bool; - } - - external node_of_seq : seq -> node = "%identity" - external seq_of_node : node -> seq = "%identity" - - let is_empty tm = tm.seq.next == tm.seq - - let wait tm = - Mutex.lock tm.work_mutex; - let rec loop () = - if - ((not tm.stop) && (tm.working_cnt <> 0 || not (is_empty tm))) - || (tm.stop && tm.thread_cnt <> 0) - then ( - Condition.wait tm.working_cond tm.work_mutex; - loop ()) - in - loop (); - Mutex.unlock tm.work_mutex - - let remove node = - if node.active then ( - node.active <- false; - let seq = seq_of_node node in - seq.prev.next <- seq.next; - seq.next.prev <- seq.prev) - - let pop tm = - if is_empty tm then None - else - let res = node_of_seq tm.seq.next in - remove res; - Some res.prgn - - let worker tm = - let rec loop () = - Mutex.lock tm.work_mutex; - while is_empty tm && not tm.stop do - Condition.wait tm.work_cond tm.work_mutex - done; - if not tm.stop then ( - let res = pop tm in - tm.working_cnt <- tm.working_cnt + 1; - Mutex.unlock tm.work_mutex; - (match res with Some (Prgn (f, a)) -> f a | None -> ()); - Mutex.lock tm.work_mutex; - tm.working_cnt <- tm.working_cnt - 1; - if (not tm.stop) && tm.working_cnt = 0 && is_empty tm then - Condition.signal tm.working_cond; - Mutex.unlock tm.work_mutex; - loop ()) - in - loop (); - tm.thread_cnt <- tm.thread_cnt - 1; - Condition.signal tm.working_cond; - Mutex.unlock tm.work_mutex - - let concurrency = ref 4 - - let make () = - let tm = - { - working_cnt = 0; - thread_cnt = !concurrency; - stop = false; - work_mutex = Mutex.create (); - work_cond = Condition.create (); - working_cond = Condition.create (); - seq = make_seq (); - } - in - for _ = 0 to !concurrency - 1 do - let _ = Thread.create worker tm in - () - done; - tm - - let drop tm = - let rec loop () = match pop tm with Some _ -> loop () | None -> () in - loop () - - let reset tm = - Mutex.lock tm.work_mutex; - drop tm; - tm.stop <- true; - Condition.broadcast tm.work_cond; - Mutex.unlock tm.work_mutex; - wait tm; - tm.stop <- false; - tm.thread_cnt <- !concurrency; - for _ = 0 to !concurrency - 1 do - let _ = Thread.create worker tm in - () - done - - let add tm prgn = - Mutex.lock tm.work_mutex; - let node = - { node_prev = tm.seq.prev; node_next = tm.seq; prgn; active = true } - in - tm.seq.prev.next <- seq_of_node node; - tm.seq.prev <- seq_of_node node; - Condition.signal tm.work_cond; - Mutex.unlock tm.work_mutex - - let tm = make () - - let run fiber = - let result = ref None in - fiber (fun x -> result := Some x); - wait tm; - match !result with - | Some x -> - reset tm; - x - | None -> failwith "IO.run" - - module Mutex = struct - type 'a fiber = 'a t - - let create () = Mutex.create () - - let lock : Mutex.t -> unit t = - fun t -> - fork (fun () -> - Mutex.lock t; - return ()) - >>= fun future -> Future.wait future - - let unlock t = Mutex.unlock t - - type t = Mutex.t - end - - module Condition = struct - type 'a fiber = 'a t - type mutex = Mutex.t - type t = Condition.t - - let create () = Condition.create () - - let wait mutex t = - fork (fun () -> - Condition.wait mutex t; - return ()) - >>= fun future -> Future.wait future - - let signal t = Condition.signal t - let broadcast t = Condition.broadcast t - end - - let rec parallel_iter ~f = function - | [] -> return () - | x :: r -> - fork (fun () -> f x) >>= fun future -> - parallel_iter ~f r >>= fun () -> Future.wait future - - let create_process tm prgn = add tm (Prgn (prgn, ())) - - let detach prgn = - let ivar = Ivar.create () in - create_process tm (fun () -> - let res = prgn () in - Ivar.fill ivar res); - Ivar.read ivar - - let rec parallel_map ~f = function - | [] -> return [] - | x :: r -> - fork (fun () -> f x) >>= fun future -> - parallel_map ~f r >>= fun r -> - Future.wait future >>= fun x -> return (x :: r) -end - -module Us = Carton.Make (struct - type 'a t = 'a IO.t -end) - -let unix = - let open IO in - let open Us in - { - Carton.bind = (fun x f -> inj (bind (prj x) (fun x -> prj (f x)))); - Carton.return = (fun x -> inj (return x)); - } - -let unix_read : (in_channel, Us.t) Carton.Dec.read = - fun fd buf ~off ~len -> - let n = input fd buf off len in - Us.inj (IO.return n) diff --git a/test/carton/test.ml b/test/carton/test.ml deleted file mode 100644 index 76c4145b2..000000000 --- a/test/carton/test.ml +++ /dev/null @@ -1,1133 +0,0 @@ -let () = Printexc.record_backtrace true - -let weights = - Alcotest.test_case "weight" `Quick @@ fun () -> - Alcotest.(check int) "0" (Carton.Dec.null :> int) 0; - Alcotest.(check int) "1" (Carton.Dec.weight_of_int_exn 1 :> int) 1; - Alcotest.(check int) "2" (Carton.Dec.weight_of_int_exn 2 :> int) 2; - Alcotest.(check int) "3" (Carton.Dec.weight_of_int_exn 3 :> int) 3; - let weight_of_int_exn = Invalid_argument "weight_of_int_exn" in - Alcotest.check_raises "-1" weight_of_int_exn (fun () -> - ignore @@ Carton.Dec.weight_of_int_exn (-1)); - Alcotest.check_raises "-2" weight_of_int_exn (fun () -> - ignore @@ Carton.Dec.weight_of_int_exn (-2)); - Alcotest.check_raises "-3" weight_of_int_exn (fun () -> - ignore @@ Carton.Dec.weight_of_int_exn (-3)) - -let randomize payload = - for i = 0 to Bigstringaf.length payload - 1 do - Bigstringaf.set payload i (Char.chr (Random.bits () land 0xff)) - done - -let seed = "OYfrfVoWdfZgHS18ubCo4ChABW+SstWbVXUya2moM2Y=" -let seed = Base64.decode_exn seed - -let seed = - let res = Array.make (String.length seed / 2) 0 in - for i = 0 to (String.length seed / 2) - 1 do - res.(i) <- (Char.code seed.[i * 2] lsl 8) lor Char.code seed.[(i * 2) + 1] - done; - res - -let () = Random.full_init seed - -open Prelude - -let failf fmt = Alcotest.failf fmt - -let bigstringaf = - Alcotest.testable - Fmt.(using Bigstringaf.to_string string) - (fun a b -> - String.equal (Bigstringaf.to_string a) (Bigstringaf.to_string b)) - -let physical_equal = - Alcotest.testable (fun ppf _ -> Fmt.string ppf "#ptr") (fun a b -> a == b) - -let loads = - Alcotest.test_case "load" `Quick @@ fun () -> - let chunk = 1024 * 1024 in - let payload = Bigstringaf.create (chunk * 2) in - randomize payload; - let do_mmap = ref false in - let map payload ~pos len = - if pos < 0L then failf "mmap: index out of bounds"; - if pos > Int64.of_int (Bigstringaf.length payload) then - failf "mmap: index out of bounds"; - let max = Int64.sub (Int64.of_int (Bigstringaf.length payload)) pos in - let len = min max (Int64.of_int len) in - let len = Int64.to_int len in - do_mmap := true; - Bigstringaf.sub payload ~off:(Int64.to_int pos) ~len - in - let w = Carton.Dec.W.make ~sector:(Int64.of_int chunk) payload in - let slice0 = Carton.Dec.W.load ~map w 0L in - Alcotest.(check bool) "first load" (Option.is_some slice0) true; - let slice0 = Option.get slice0 in - Alcotest.(check bool) "first load" !do_mmap true; - Alcotest.(check int64) "first load: offset" slice0.Carton.Dec.W.offset 0L; - Alcotest.(check int) - "first load: length" slice0.Carton.Dec.W.length - (min (Bigstringaf.length payload) chunk); - Alcotest.(check bigstringaf) - "first load: contents" - (Bigstringaf.sub payload ~off:0 - ~len:(min (Bigstringaf.length payload) chunk)) - slice0.Carton.Dec.W.payload; - do_mmap := false; - (* reset *) - let slice1 = Carton.Dec.W.load ~map w 0L in - Alcotest.(check bool) "second load" (Option.is_some slice1) true; - let slice1 = Option.get slice1 in - Alcotest.(check bool) "second load" !do_mmap false; - Alcotest.(check int64) "second load: offset" slice1.Carton.Dec.W.offset 0L; - Alcotest.(check int) - "second load: length" slice1.Carton.Dec.W.length - (min (Bigstringaf.length payload) chunk); - Alcotest.(check bigstringaf) - "second load: contents" - (Bigstringaf.sub payload ~off:0 - ~len:(min (Bigstringaf.length payload) chunk)) - slice1.Carton.Dec.W.payload; - Alcotest.(check physical_equal) "no allocation" slice0 slice1; - let slice2 = Carton.Dec.W.load ~map w 100L in - Alcotest.(check bool) "third load" (Option.is_some slice2) true; - let slice2 = Option.get slice2 in - Alcotest.(check bool) "third load" !do_mmap false; - Alcotest.(check physical_equal) "no allocation" slice0 slice2; - do_mmap := false; - (* reset *) - let slice3 = Carton.Dec.W.load ~map w (Int64.of_int chunk) in - Alcotest.(check bool) "four load" (Option.is_some slice3) true; - let slice3 = Option.get slice3 in - Alcotest.(check bool) "four load" !do_mmap true; - Alcotest.(check int64) - "four load: offset" slice3.Carton.Dec.W.offset (Int64.of_int chunk); - Alcotest.(check int) - "four load: length" slice3.Carton.Dec.W.length - (min (Bigstringaf.length payload) chunk); - Alcotest.(check bigstringaf) - "four load: contents" - (Bigstringaf.sub payload ~off:chunk - ~len:(min (Bigstringaf.length payload) chunk)) - slice3.Carton.Dec.W.payload - -let pp_kind ppf = function - | `A -> Fmt.string ppf "a" - | `B -> Fmt.string ppf "b" - | `C -> Fmt.string ppf "c" - | `D -> Fmt.string ppf "d" - -let equal_kind a b = - match a, b with `A, `A | `B, `B | `C, `C | `D, `D -> true | _ -> false - -let kind = Alcotest.testable pp_kind equal_kind -let optint = Alcotest.testable Optint.pp Optint.equal -let sha1 = Alcotest.testable Digestif.SHA1.pp Digestif.SHA1.equal -let s = Alcotest.testable (fun ppf x -> Fmt.pf ppf "%S" x) String.equal -let z = Bigstringaf.create De.io_buffer_size -let allocate bits = De.make_window ~bits -let o = Bigstringaf.create De.io_buffer_size - -let empty_pack, uid_empty_pack = - let () = - let cmd = Bos.Cmd.(v "git" % "pack-objects" % "-q" % "--stdout") in - let out = - let open Rresult in - Bos.OS.Dir.current () >>= fun current -> - let dst = Fpath.(current / "pack-null") in - ( Bos.OS.Dir.with_tmp "git-%s" @@ fun path -> - Bos.OS.Dir.with_current path @@ fun () -> - Bos.OS.Cmd.run_status Bos.Cmd.(v "git" % "init") >>= fun _ -> - Bos.OS.Cmd.run - Bos.Cmd.(v "git" % "config" % "init.defaultBranch" % "master") - >>| fun () -> - let out = Bos.OS.Cmd.(run_io cmd in_null) in - Bos.OS.Cmd.out_file dst out ) - () - in - match Rresult.R.(join (join (join out))) with - | Ok ((), (_, `Exited 0)) -> () - | _ -> Alcotest.fail "Error while executing 'git pack-objects'" - in - let ic = open_in_bin "pack-null" in - let ln = in_channel_length ic in - let rs = Bytes.create ln in - really_input ic rs 0 ln; - close_in ic; - ( Bytes.unsafe_to_string rs, - Digestif.SHA1.of_raw_string (Bytes.sub_string rs (Bytes.length rs - 20) 20) - ) - -let test_empty_pack () = - Alcotest.test_case "empty pack" `Quick @@ fun () -> - let contents_expected = empty_pack in - let buf = Bigstringaf.create 12 in - let ctx = Digestif.SHA1.empty in - Carton.Enc.header_of_pack ~length:0 buf 0 12; - let ctx = Digestif.SHA1.feed_bigstring ctx buf ~off:0 ~len:12 in - let sha = Digestif.SHA1.get ctx in - Alcotest.(check sha1) "hash" sha uid_empty_pack; - let res = Bigstringaf.to_string buf ^ Digestif.SHA1.to_raw_string sha in - Alcotest.(check s) "contents" res contents_expected - -module Fp = Carton.Dec.Fp (Uid) - -type fake_file_descriptor = { - mutable pos : int; - mutable lst : Bigstringaf.t list; -} - -let fd_and_read_of_bigstring_list lst = - let fd = { pos = 0; lst } in - let read fd buf ~off ~len = - match fd.lst with - | [] -> Us.inj (IO.return 0) - | x :: r -> - let len = min len (Bigstringaf.length x - fd.pos) in - Bigstringaf.blit_to_bytes x ~src_off:fd.pos buf ~dst_off:off ~len; - fd.pos <- fd.pos + len; - if fd.pos = Bigstringaf.length x then fd.lst <- r; - Us.inj (IO.return len) - in - fd, read - -let ( <.> ) f g x = f (g x) - -let valid_empty_pack () = - Alcotest.test_case "valid empty pack" `Quick @@ fun () -> - let fd, read = - fd_and_read_of_bigstring_list - [ - Bigstringaf.of_string ~off:0 ~len:(String.length empty_pack) empty_pack; - ] - in - let max, buf, _ = (IO.run <.> Us.prj) (Fp.check_header unix read fd) in - let tmp0 = Bytes.create De.io_buffer_size in - let tmp1 = Bigstringaf.create De.io_buffer_size in - - let decoder = Fp.decoder ~o ~allocate `Manual in - let decoder = - Fp.src decoder - (Bigstringaf.of_string buf ~off:0 ~len:(String.length buf)) - 0 (String.length buf) - in - - Alcotest.(check int) "number" max 0; - - let rec go decoder = - let open IO in - match Fp.decode decoder with - | `End uid -> - Alcotest.(check sha1) "hash" uid uid_empty_pack; - return () - | `Entry _ -> Alcotest.fail "Unexpected entry" - | `Malformed err -> Alcotest.fail err - | `Await decoder -> - read fd tmp0 ~off:0 ~len:(Bytes.length tmp0) |> Us.prj >>= fun len -> - Bigstringaf.blit_from_bytes tmp0 ~src_off:0 tmp1 ~dst_off:0 ~len; - let decoder = Fp.src decoder tmp1 0 len in - go decoder - | `Peek _ -> Alcotest.fail "Unexpected `Peek" - in - - IO.run (go decoder) - -module Verify = Carton.Dec.Verify (Uid) (Us) (IO) - -let digest_like_git ~kind ?(off = 0) ?len buf = - let len = - match len with Some len -> len | None -> Bigstringaf.length buf - off - in - let ctx = Digestif.SHA1.empty in - - let ctx = - match kind with - | `A -> Digestif.SHA1.feed_string ctx (Fmt.str "commit %d\000" len) - | `B -> Digestif.SHA1.feed_string ctx (Fmt.str "tree %d\000" len) - | `C -> Digestif.SHA1.feed_string ctx (Fmt.str "blob %d\000" len) - | `D -> Digestif.SHA1.feed_string ctx (Fmt.str "tag %d\000" len) - in - let ctx = Digestif.SHA1.feed_bigstring ctx ~off ~len buf in - Digestif.SHA1.get ctx - -let verify_empty_pack () = - Alcotest.test_case "verify empty pack" `Quick @@ fun () -> - let t = - Carton.Dec.make () ~sector:512L ~z ~allocate ~uid_ln:Uid.length - ~uid_rw:Uid.of_raw_string (fun _ -> Alcotest.fail "Invalid call to IDX") - in - let map () ~pos length = - let len = min length (Int64.to_int pos - String.length empty_pack) in - Bigstringaf.of_string empty_pack ~off:(Int64.to_int pos) ~len - in - let oracle = - { - Carton.Dec.digest = digest_like_git; - children = (fun ~cursor:_ ~uid:_ -> []); - where = (fun ~cursor:_ -> Alcotest.fail "Invalid call to [where]"); - weight = (fun ~cursor:_ -> Alcotest.fail "Invalid call to [weight]"); - } - in - IO.run (Verify.verify ~threads:1 ~map ~oracle ~verbose:ignore t ~matrix:[||]) - -module Idx = Carton.Dec.Idx.N (Uid) - -let empty_index, uid_empty_index = - let res = - let open Rresult in - Bos.OS.Dir.current () >>= fun current -> - let dst = Fpath.(current / "index-null.idx") in - ( Bos.OS.Dir.with_tmp "git-%s" @@ fun path -> - Bos.OS.Dir.with_current path @@ fun () -> - Bos.OS.Cmd.run_status Bos.Cmd.(v "git" % "init") >>= fun _ -> - Bos.OS.Cmd.run - Bos.Cmd.(v "git" % "config" % "init.defaultBranch" % "master") - >>= fun () -> - let cmd = Bos.Cmd.(v "git" % "pack-objects" % "-q" % "--stdout") in - let out = Bos.OS.Cmd.(run_io cmd in_null) in - Bos.OS.Cmd.out_run_in out >>= fun in_cmd -> - let cmd = - Bos.Cmd.( - v "git" % "index-pack" % "--stdin" % "-o" % Fpath.to_string dst) - in - Bos.OS.Cmd.run_in cmd in_cmd ) - () - in - let () = - match Rresult.R.(join (join res)) with - | Ok () -> () - | Error (`Msg err) -> Alcotest.fail err - in - let ic = open_in_bin "index-null.idx" in - let ln = in_channel_length ic in - let rs = Bytes.create ln in - really_input ic rs 0 ln; - close_in ic; - ( Bytes.unsafe_to_string rs, - Digestif.SHA1.of_raw_string (Bytes.sub_string rs (Bytes.length rs - 20) 20) - ) - -let index_of_empty_pack () = - Alcotest.test_case "index of empty pack" `Quick @@ fun () -> - let p = ref 0 and c = ref 0 in - let encoder = Idx.encoder `Manual ~pack:uid_empty_pack [||] in - Idx.dst encoder o 0 (Bigstringaf.length o); - - let rec go () = - match Idx.encode encoder `Await with - | `Partial -> - Alcotest.(check bool) "`Partial" (!c < 3) true; - incr c; - let pos = Bigstringaf.length o - !p - Idx.dst_rem encoder in - Idx.dst encoder o pos (Bigstringaf.length o - pos); - p := !p + pos; - go () - | `Ok -> - let raw = Bigstringaf.substring o ~off:0 ~len:!p in - Alcotest.(check s) "index" raw empty_index - in - go (); - let uid = Bigstringaf.substring o ~off:(!p - Uid.length) ~len:Uid.length in - let uid = Uid.of_raw_string uid in - Alcotest.(check sha1) "hash" uid uid_empty_index - -let check_empty_index () = - Alcotest.test_case "check empty index" `Quick @@ fun () -> - let map = - Bigstringaf.of_string empty_index ~off:0 ~len:(String.length empty_index) - in - let idx = - Carton.Dec.Idx.make map ~uid_ln:Uid.length ~uid_rw:Uid.to_raw_string - ~uid_wr:Uid.of_raw_string - in - Alcotest.(check int) "number of entries" (Carton.Dec.Idx.max idx) 0 - -let index_of_one_entry () = - Alcotest.test_case "index of one entry" `Quick @@ fun () -> - let p = ref 0 and c = ref 0 in - let encoder = - Idx.encoder `Manual ~pack:(Uid.of_hex "") - [| - { - Carton.Dec.Idx.crc = Checkseum.Crc32.default; - offset = 0L; - uid = Uid.null; - }; - |] - in - Idx.dst encoder o 0 (Bigstringaf.length o); - - let rec go () = - match Idx.encode encoder `Await with - | `Partial -> - Alcotest.(check bool) "`Partial" (!c < 3) true; - incr c; - let pos = Bigstringaf.length o - !p - Idx.dst_rem encoder in - Idx.dst encoder o pos (Bigstringaf.length o - pos); - p := !p + pos; - go () - | `Ok -> Bigstringaf.sub o ~off:0 ~len:!p - in - let idx = - Carton.Dec.Idx.make (go ()) ~uid_ln:Uid.length ~uid_rw:Uid.to_raw_string - ~uid_wr:Uid.of_raw_string - in - Alcotest.(check int) "number of entries" (Carton.Dec.Idx.max idx) 1; - Alcotest.(check (option (pair optint int64))) - "entry" - (Carton.Dec.Idx.find idx Uid.null) - (Some (Checkseum.Crc32.default, 0L)) - -let file = - let compare a b = - let ic_a = open_in_bin a in - let ic_b = open_in_bin b in - let ln_a = in_channel_length ic_a and ln_b = in_channel_length ic_b in - if ln_a <> ln_b then ( - close_in ic_a; - close_in ic_b; - false) - else - let bf_a = Bytes.create 0x1000 and bf_b = Bytes.create 0x1000 in - let rec go () = - match input ic_a bf_a 0 0x1000, input ic_b bf_b 0 0x1000 with - | 0, 0 -> true - | rs_a, rs_b when rs_a = rs_b -> - if not (Bytes.sub_string bf_a 0 rs_a = Bytes.sub_string bf_b 0 rs_b) - then false - else go () - | _ -> false - | exception End_of_file -> true - in - let rs = go () in - close_in ic_a; - close_in ic_b; - rs - in - Alcotest.testable Fmt.string compare - -let zip a b = - if Array.length a <> Array.length b then Fmt.invalid_arg "Array.zip"; - Array.init (Array.length a) (fun i -> a.(i), b.(i)) - -type fd_with_length = { fd : Unix.file_descr; mx : int64 } - -let map { fd; mx } ~pos len = - let len = min Int64.(sub mx pos) (Int64.of_int len) in - let mp = - Unix.map_file fd ~pos Bigarray.char Bigarray.c_layout false - [| Int64.to_int len |] - in - Bigarray.array1_of_genarray mp - -let verify_bomb_pack () = - Alcotest.test_case "verify & generate index of bomb pack" `Quick @@ fun () -> - let o = Bigstringaf.create 0x1000 in - let allocate bits = De.make_window ~bits in - let decoder = Fp.decoder ~o ~allocate `Manual in - let tmp0 = Bytes.create 0x1000 in - let tmp1 = Bigstringaf.create 0x1000 in - - let ic = open_in_bin "bomb.pack" in - let hash_expected = - let len = in_channel_length ic in - seek_in ic (len - 20); - let res = really_input_string ic 20 in - let res = Digestif.SHA1.of_raw_string res in - seek_in ic 0; - res - in - - let max, buf, _ = - (IO.run <.> Us.prj) - (Fp.check_header unix - (fun ic buf ~off ~len -> (Us.inj <.> IO.return) (input ic buf off len)) - ic) - in - let decoder = - Fp.src decoder (Bigstringaf.of_string buf ~off:0 ~len:12) 0 12 - in - - let weight = Hashtbl.create max in - let checks = Hashtbl.create max in - let children = Hashtbl.create max in - let where = Hashtbl.create max in - let matrix = Array.make max Verify.unresolved_node in - - let rec go decoder = - match Fp.decode decoder with - | `Await decoder -> - let len = input ic tmp0 0 0x1000 in - Bigstringaf.blit_from_bytes tmp0 ~src_off:0 tmp1 ~dst_off:0 ~len; - let decoder = Fp.src decoder tmp1 0 len in - go decoder - | `Peek decoder -> - let keep = Fp.src_rem decoder in - let len = input ic tmp0 0 (0x1000 - keep) in - Bigstringaf.blit_from_bytes tmp0 ~src_off:0 tmp1 ~dst_off:keep ~len; - let decoder = Fp.src decoder tmp1 0 (keep + len) in - go decoder - | `Entry ({ Fp.kind = Base _; offset; size; crc; _ }, decoder) -> - let n = Fp.count decoder - 1 in - Hashtbl.add checks offset crc; - Hashtbl.add weight offset size; - Hashtbl.add where offset n; - matrix.(n) <- Verify.unresolved_base ~cursor:offset; - go decoder - | `Entry - ({ Fp.kind = Ofs { sub = s; source; target }; offset; crc; _ }, decoder) - -> - let n = Fp.count decoder - 1 in - let base = Int64.(sub offset (of_int s)) in - - Hashtbl.add checks offset crc; - Hashtbl.add weight base source; - Hashtbl.add weight offset target; - Hashtbl.add where offset n; - - (try - let v = Hashtbl.find children (`Ofs base) in - Hashtbl.add children (`Ofs base) (offset :: v) - with Not_found -> Hashtbl.add children (`Ofs base) [ offset ]); - go decoder - | `Entry _ -> (* OBJ_REF *) Alcotest.fail "Unexpected OBJ_REF" - | `Malformed err -> Alcotest.fail err - | `End uid -> Alcotest.(check sha1) "hash" uid hash_expected - in - - go decoder; - close_in ic; - let fd = Unix.openfile "bomb.pack" Unix.[ O_RDONLY ] 0o644 in - let mx = - let st = Unix.LargeFile.fstat fd in - st.Unix.LargeFile.st_size - in - - let oracle = - { - Carton.Dec.where = (fun ~cursor -> Hashtbl.find where cursor); - children = - (fun ~cursor ~uid -> - match - ( Hashtbl.find_opt children (`Ofs cursor), - Hashtbl.find_opt children (`Ref uid) ) - with - | Some a, Some b -> List.sort_uniq compare (a @ b) - | Some x, None | None, Some x -> x - | None, None -> []); - digest = digest_like_git; - weight = (fun ~cursor -> Hashtbl.find weight cursor); - } - in - - let z = Bigstringaf.create 0x1000 in - let t = - Carton.Dec.make { fd; mx } ~z ~allocate ~uid_ln:Uid.length - ~uid_rw:Uid.of_raw_string (fun _ -> Alcotest.fail "Invalid call to IDX") - in - IO.run (Verify.verify ~threads:1 ~map ~oracle ~verbose:ignore t ~matrix); - Unix.close fd; - - let offsets = - Hashtbl.fold (fun k _ a -> k :: a) where [] - |> List.sort Stdlib.compare - |> Array.of_list - in - let matrix = zip offsets matrix in - let entries = - Array.map - (fun (offset, s) -> - let uid = Verify.uid_of_status s in - let crc = Hashtbl.find checks offset in - { Carton.Dec.Idx.crc; offset; uid }) - matrix - in - let oc = open_out_bin "bomb-test.idx" in - let encoder = Idx.encoder (`Channel oc) ~pack:hash_expected entries in - let go () = - match Idx.encode encoder `Await with `Partial -> assert false | `Ok -> () - in - go (); - close_out oc; - - let () = - let cmd = - Bos.Cmd.(v "git" % "index-pack" % "-o" % "git-bomb.idx" % "bomb.pack") - in - match Bos.OS.Cmd.run cmd with - | Ok () -> () - | Error (`Msg err) -> Alcotest.fail err - in - Alcotest.(check file) "index" "git-bomb.idx" "bomb-test.idx" - -let first_entry_of_bomb_pack () = - Alcotest.test_case "first entry of bomb pack" `Quick @@ fun () -> - let fd = Unix.openfile "bomb.pack" Unix.[ O_RDONLY ] 0o644 in - let mx = - let st = Unix.LargeFile.fstat fd in - st.Unix.LargeFile.st_size - in - let pack = - Carton.Dec.make { fd; mx } ~z ~allocate ~uid_ln:Uid.length - ~uid_rw:Uid.of_raw_string (fun _ -> Alcotest.fail "Invalid call to IDX") - in - let fiber () = - let weight = - Carton.Dec.weight_of_offset ~map pack ~weight:Carton.Dec.null 12L - in - let raw = Carton.Dec.make_raw ~weight in - Carton.Dec.of_offset ~map pack raw ~cursor:12L - in - let v = fiber () in - Alcotest.(check kind) "kind" (Carton.Dec.kind v) `A; - Alcotest.(check int) "length" (Carton.Dec.len v) 218; - Alcotest.(check int) "depth" (Carton.Dec.depth v) 1 - -let bomb_matrix = ref [||] -let bomb_index = Hashtbl.create 0x10 - -(* XXX(dinosaure): to avoid systematic unpack of bomb.pack, [unpack_bomb_pack] - sets [bomb_matrix] and fills [bomb_index]. Any use of them should be after - [unpack_bomb_pack]. *) - -module Verbose = struct - type 'a fiber = 'a IO.t - - let succ () = IO.return () - let print () = IO.return () - let flush () = IO.return () -end - -let unpack_bomb_pack () = - Alcotest.test_case "unpack bomb pack" `Quick @@ fun () -> - let fd = Unix.openfile "bomb.pack" Unix.[ O_RDONLY ] 0o644 in - let mx = - let st = Unix.LargeFile.fstat fd in - st.Unix.LargeFile.st_size - in - let pack = - Carton.Dec.make { fd; mx } ~z ~allocate ~uid_ln:Uid.length - ~uid_rw:Uid.of_raw_string (fun _ -> Alcotest.fail "Invalid call to IDX") - in - - let first_pass () = - let ic = open_in_bin "bomb.pack" in - let max, _, _ = (IO.run <.> Us.prj) (Fp.check_header unix unix_read ic) in - seek_in ic 0; - let decoder = Fp.decoder ~o:z ~allocate (`Channel ic) in - let matrix = Array.make max Verify.unresolved_node in - - let where = Hashtbl.create 0x10 in - let children = Hashtbl.create 0x10 in - let weight = Hashtbl.create 0x10 in - - let rec go decoder = - match Fp.decode decoder with - | `Await _ | `Peek _ -> assert false - | `Entry ({ Fp.kind = Base _; offset; size; _ }, decoder) -> - let n = Fp.count decoder - 1 in - Hashtbl.add weight offset size; - Hashtbl.add where offset n; - matrix.(n) <- Verify.unresolved_base ~cursor:offset; - go decoder - | `Entry - ({ Fp.kind = Ofs { sub = s; source; target; _ }; offset; _ }, decoder) - -> - let n = Fp.count decoder - 1 in - Hashtbl.add weight Int64.(sub offset (Int64.of_int s)) source; - Hashtbl.add weight offset target; - Hashtbl.add where offset n; - (try - let v = - Hashtbl.find children (`Ofs Int64.(sub offset (of_int s))) - in - Hashtbl.add children - (`Ofs Int64.(sub offset (of_int s))) - (offset :: v) - with _exn -> - Hashtbl.add children - (`Ofs Int64.(sub offset (of_int s))) - [ offset ]); - go decoder - | `Entry _ -> assert false - | `End _ -> - close_in ic; - ( { - Carton.Dec.digest = digest_like_git; - children = - (fun ~cursor ~uid -> - match - ( Hashtbl.find_opt children (`Ofs cursor), - Hashtbl.find_opt children (`Ref uid) ) - with - | Some a, Some b -> List.sort_uniq compare (a @ b) - | Some x, None | None, Some x -> x - | None, None -> []); - where = (fun ~cursor -> Hashtbl.find where cursor); - weight = (fun ~cursor -> Hashtbl.find weight cursor); - }, - matrix ) - | `Malformed err -> Alcotest.fail err - in - go decoder - in - - let oracle, matrix = first_pass () in - IO.run (Verify.verify ~threads:1 ~map ~oracle ~verbose:ignore pack ~matrix); - Alcotest.(check pass) "verify" () (); - let unpack status = - let cursor = Verify.offset_of_status status in - let weight = - Carton.Dec.weight_of_offset ~map pack ~weight:Carton.Dec.null cursor - in - let raw = Carton.Dec.make_raw ~weight in - let _ = Carton.Dec.of_offset ~map pack raw ~cursor in - () - in - Array.iter - (fun s -> - let _ = unpack s in - Alcotest.(check pass) (Uid.to_hex (Verify.uid_of_status s)) () ()) - matrix; - Alcotest.(check pass) "unpack" () (); - bomb_matrix := matrix; - Array.iter - (fun s -> - Hashtbl.add bomb_index (Verify.uid_of_status s) - (Verify.offset_of_status s)) - matrix - -let fake_pack_bomb_pack () = - Alcotest.test_case "fake pack bomb pack" `Quick @@ fun () -> () - -let pack_bomb_pack () = - Alcotest.test_case "pack bomb pack" `Quick @@ fun () -> - let fd = Unix.openfile "bomb.pack" Unix.[ O_RDONLY ] 0o644 in - let mx = - let st = Unix.LargeFile.fstat fd in - st.Unix.LargeFile.st_size - in - let pack = - Carton.Dec.make { fd; mx } ~z ~allocate ~uid_ln:Uid.length - ~uid_rw:Uid.of_raw_string (fun _ -> Alcotest.fail "Invalid call to IDX") - in - - let load uid = - match Hashtbl.find bomb_index uid with - | cursor -> - let weight = - Carton.Dec.weight_of_offset ~map pack ~weight:Carton.Dec.null cursor - in - let raw = Carton.Dec.make_raw ~weight in - let offset = Carton.Dec.of_offset ~map pack raw ~cursor in - (Us.inj <.> IO.return) offset - | exception Not_found -> Alcotest.failf "Invalid UID %a" Uid.pp uid - in - let entries = - Array.map - (fun s -> - let uid = Verify.uid_of_status s in - let cursor = Hashtbl.find bomb_index uid in - let length = - let weight = - Carton.Dec.weight_of_offset ~map pack ~weight:Carton.Dec.null cursor - in - let raw = Carton.Dec.make_raw ~weight in - let v = Carton.Dec.of_offset ~map pack raw ~cursor in - Carton.Dec.len v - in - Carton.Enc.make_entry ~kind:(Verify.kind_of_status s) ~length - (Verify.uid_of_status s)) - !bomb_matrix - in - let module D = Carton.Enc.Delta (Us) (IO) (Uid) (Verbose) in - let offsets = Hashtbl.create 0x10 in - let find uid = - match Hashtbl.find offsets uid with - | v -> (Us.inj <.> IO.return) (Some v) - | exception Not_found -> (Us.inj <.> IO.return) None - in - - let uid = - { Carton.Enc.uid_ln = Uid.length; Carton.Enc.uid_rw = Uid.to_raw_string } - in - - let b = - { - Carton.Enc.o = Bigstringaf.create De.io_buffer_size; - Carton.Enc.i = Bigstringaf.create De.io_buffer_size; - Carton.Enc.q = De.Queue.create 0x10000; - Carton.Enc.w = De.Lz77.make_window ~bits:15; - } - in - - let output_bigstring ctx oc buf ~off ~len = - let ctx = Uid.feed ctx buf ~off ~len in - let s = Bigstringaf.substring buf ~off ~len in - output_string oc s; - (Us.inj <.> IO.return) ctx - in - - let oc = open_out_bin "new.pack" in - - let cursor = ref 12 in - let iter ctx target = - let return = unix.Carton.return in - let ( >>= ) = unix.Carton.bind in - - Hashtbl.add offsets (Carton.Enc.target_uid target) !cursor; - Carton.Enc.encode_target unix ~b ~find ~load ~uid target ~cursor:!cursor - >>= fun (len, encoder) -> - let rec go ctx encoder = - match Carton.Enc.N.encode ~o:b.o encoder with - | `Flush (encoder, len) -> - output_bigstring ctx oc b.o ~off:0 ~len >>= fun ctx -> - cursor := !cursor + len; - let encoder = - Carton.Enc.N.dst encoder b.o 0 (Bigstringaf.length b.o) - in - go ctx encoder - | `End -> return ctx - in - output_bigstring ctx oc b.o ~off:0 ~len >>= fun ctx -> - cursor := !cursor + len; - let encoder = Carton.Enc.N.dst encoder b.o 0 (Bigstringaf.length b.o) in - go ctx encoder - in - - let fiber ctx = - let open IO in - let header = Bigstringaf.create 12 in - D.delta ~threads:[ load ] ~weight:10 ~uid_ln:Uid.length entries - >>= fun targets -> - Carton.Enc.header_of_pack ~length:(Array.length targets) header 0 12; - output_bigstring ctx oc header ~off:0 ~len:12 |> Us.prj >>= fun ctx -> - let rec go ctx idx arr = - if idx < Array.length arr then - iter ctx arr.(idx) |> Us.prj >>= fun ctx -> go ctx (succ idx) arr - else return ctx - in - go ctx 0 targets - in - let ctx = IO.run (fiber Uid.empty) in - let hash = Uid.get ctx in - output_string oc (Uid.to_raw_string hash); - close_out oc; - - Alcotest.(check pass) "new.pack" () (); - let res = - let cmd = Bos.Cmd.(v "git" % "index-pack" % "new.pack") in - let out = Bos.OS.Cmd.(run_out cmd) in - match Bos.OS.Cmd.out_lines out with - | Ok ([ hash ], (_, `Exited 0)) -> Uid.of_hex hash - | _ -> Alcotest.fail "Error while executing 'git index-pack'" - in - let uid = Alcotest.testable Uid.pp Uid.equal in - Alcotest.(check uid) "hash" res hash - -let cycle () = - Alcotest.test_case "cycle" `Quick @@ fun () -> - let a = Bigstringaf.create 0x100 in - let b = Bigstringaf.create 0x100 in - randomize a; - randomize b; - let ea = - Carton.Enc.make_entry ~kind:`A ~length:(Bigstringaf.length a) - ~delta:(Carton.Enc.From `B) `A - in - let eb = - Carton.Enc.make_entry ~kind:`A ~length:(Bigstringaf.length b) - ~delta:(Carton.Enc.From `A) `B - in - - let load = function - | `A -> (Us.inj <.> IO.return) (Carton.Dec.v ~kind:`A a) - | `B -> (Us.inj <.> IO.return) (Carton.Dec.v ~kind:`A b) - in - let ta = (IO.run <.> Us.prj) (Carton.Enc.entry_to_target unix ~load ea) in - let tb = (IO.run <.> Us.prj) (Carton.Enc.entry_to_target unix ~load eb) in - - let offsets = Hashtbl.create 0x10 in - let find uid = - match Hashtbl.find offsets uid with - | v -> (Us.inj <.> IO.return) (Some v) - | exception Not_found -> (Us.inj <.> IO.return) None - in - - let uid = - { - Carton.Enc.uid_ln = 1; - Carton.Enc.uid_rw = (function `A -> "a" | `B -> "b"); - } - in - - let b = - { - Carton.Enc.o = Bigstringaf.create De.io_buffer_size; - Carton.Enc.i = Bigstringaf.create De.io_buffer_size; - Carton.Enc.q = De.Queue.create 0x10000; - Carton.Enc.w = De.Lz77.make_window ~bits:15; - } - in - - let ctx = ref Uid.empty in - let output_bigstring oc buf ~off ~len = - ctx := Uid.feed !ctx buf ~off ~len; - let s = Bigstringaf.substring buf ~off ~len in - output_string oc s - in - - let oc = open_out_bin "cycle.pack" in - let targets = [| ta; tb |] in - - let cursor = ref 12 in - let iter target = - let return = unix.Carton.return in - let ( >>= ) = unix.Carton.bind in - - Hashtbl.add offsets (Carton.Enc.target_uid target) !cursor; - Carton.Enc.encode_target unix ~b ~find ~load ~uid target ~cursor:!cursor - >>= fun (len, encoder) -> - let rec go encoder = - match Carton.Enc.N.encode ~o:b.o encoder with - | `Flush (encoder, len) -> - output_bigstring oc b.o ~off:0 ~len; - cursor := !cursor + len; - let encoder = - Carton.Enc.N.dst encoder b.o 0 (Bigstringaf.length b.o) - in - go encoder - | `End -> () - in - output_bigstring oc b.o ~off:0 ~len; - cursor := !cursor + len; - let encoder = Carton.Enc.N.dst encoder b.o 0 (Bigstringaf.length b.o) in - go encoder; - return () - in - - let header = Bigstringaf.create 12 in - Carton.Enc.header_of_pack ~length:(Array.length targets) header 0 12; - output_bigstring oc header ~off:0 ~len:12; - let fiber = - let open IO in - let rec go idx arr = - if idx < Array.length arr then - iter arr.(idx) |> Us.prj >>= fun () -> go (succ idx) arr - else return () - in - go 0 targets - in - IO.run fiber; - let hash = Uid.get !ctx in - output_string oc (Uid.to_raw_string hash); - close_out oc; - - Alcotest.(check pass) "cycle.pack" () (); - - let fd = Unix.openfile "cycle.pack" Unix.[ O_RDONLY ] 0o644 in - let mx = - let st = Unix.LargeFile.fstat fd in - st.Unix.LargeFile.st_size - in - let pack = - Carton.Dec.make { fd; mx } ~z ~allocate ~uid_ln:1 - ~uid_rw:(function - | "a" -> `A | "b" -> `B | v -> Fmt.invalid_arg "invalid uid: %S" v) - (fun uid -> Int64.of_int (Hashtbl.find offsets uid)) - in - - (* XXX(dinosaure): must fail! *) - try - let _ = - let cursor = Int64.of_int (Hashtbl.find offsets `B) in - let weight = - Carton.Dec.weight_of_offset ~map pack ~weight:Carton.Dec.null cursor - in - let raw = Carton.Dec.make_raw ~weight in - Carton.Dec.of_offset ~map pack raw ~cursor - in - Alcotest.failf "We did not discovered our cycle." - with Carton.Dec.Cycle -> Alcotest.(check pass) "cycle" () () - -module Index_stream_decoder = Carton.Dec.Idx.M (IO) (Uid) - -let decode_index_stream () = - Alcotest.test_case "decode index stream" `Quick @@ fun () -> - let ic = open_in_bin "git-bomb.idx" in - let device = Carton.Dec.Idx.Device.device () in - let uid = Carton.Dec.Idx.Device.create device in - let tp = Bytes.create 0x1000 in - let ( >>? ) x f = - let open IO in - x >>= function Ok x -> f x | Error _ as err -> return err - in - let fiber = - Index_stream_decoder.create device uid >>? fun fd -> - let rec go () = - let len = 1 + Random.int (Bytes.length tp - 1) in - let len = input ic tp 0 len in - if len = 0 then ( - Gc.minor (); - Gc.full_major () - (* XXX(dinosaure): we must ensure that underlying value kept by [device] - is available as long as we keep [uid]. - TODO(dinosaure): we should add test to ensure that [device] did not - keep our value if we don't keep [uid]. *); - close_in ic; - Index_stream_decoder.close device fd) - else - let open IO in - Index_stream_decoder.append device fd (Bytes.sub_string tp 0 len) - >>= fun () -> go () - in - Gc.minor (); - Gc.full_major (); - go () - in - match IO.run fiber with - | Ok () -> - Alcotest.(check pass) "index decoder" () (); - let ic = Unix.openfile "git-bomb.idx" Unix.[ O_RDONLY ] 0o644 in - let ln = (Unix.fstat ic).Unix.st_size in - let mp = - Unix.map_file ic ~pos:0L Bigarray.char Bigarray.c_layout false [| ln |] - in - Unix.close ic; - - let payload = Carton.Dec.Idx.Device.project device uid in - let index0 = - Carton.Dec.Idx.make - (Bigarray.array1_of_genarray mp) - ~uid_ln:Uid.length ~uid_rw:Uid.to_raw_string ~uid_wr:Uid.of_raw_string - in - let index1 = - Carton.Dec.Idx.make payload ~uid_ln:Uid.length ~uid_rw:Uid.to_raw_string - ~uid_wr:Uid.of_raw_string - in - Carton.Dec.Idx.iter - ~f:(fun ~uid ~offset ~crc:_ -> - match Carton.Dec.Idx.find index0 uid with - | Some (_, offset') -> - Alcotest.(check int64) (Fmt.str "%a" Uid.pp uid) offset offset' - | None -> Alcotest.failf "%a not found" Uid.pp uid) - index1 - | Error err -> - Alcotest.failf "Error while decoding IDX file: %a" - Index_stream_decoder.pp_error err - -let empty_stream () = - Alcotest.test_case "empty stream" `Quick @@ fun () -> - let decoder = Fp.decoder ~o ~allocate `Manual in - let decoder = - match Fp.decode decoder with - | `Await decoder -> Fp.src decoder Bigstringaf.empty 0 0 - | _ -> Alcotest.fail "Unexpected result of [decode]" - in - match Fp.decode decoder with - | `Malformed _ -> Alcotest.(check pass) "no infinite loop" () () - | _ -> Alcotest.fail "Unexpected result of [decode]" - -let huge_pack () = - Alcotest.test_case "big offset" `Quick @@ fun () -> - let encoder = - Idx.encoder `Manual ~pack:(Uid.of_hex "") - [| - { - Carton.Dec.Idx.crc = Checkseum.Crc32.default; - offset = 0L; - uid = Uid.digest "foo"; - }; - { - Carton.Dec.Idx.crc = Checkseum.Crc32.default; - offset = Int64.(add (of_int32 Int32.max_int) 1L); - uid = Uid.digest "bar"; - }; - |] - in - Idx.dst encoder o 0 (Bigstringaf.length o); - let cur = ref 0 in - let rec go () = - match Idx.encode encoder `Await with - | `Partial -> - let pos = Bigstringaf.length o - !cur - Idx.dst_rem encoder in - Idx.dst encoder o pos (Bigstringaf.length o - pos); - cur := !cur + pos; - go () - | `Ok -> Bigstringaf.sub o ~off:0 ~len:!cur - in - let idx = - Carton.Dec.Idx.make (go ()) ~uid_ln:Uid.length ~uid_rw:Uid.to_raw_string - ~uid_wr:Uid.of_raw_string - in - Alcotest.(check (option (pair optint int64))) - "foo, offset" - (Carton.Dec.Idx.find idx (Uid.digest "foo")) - (Some (Checkseum.Crc32.default, 0L)); - Alcotest.(check (option (pair optint int64))) - "bar, big offset" - (Carton.Dec.Idx.find idx (Uid.digest "bar")) - (Some (Checkseum.Crc32.default, Int64.(add (of_int32 Int32.max_int) 1L))) - -let v1_9_0 = - { - Git_version.major = 1; - minor = 9; - patch = Some "0"; - revision = None; - release_candidate = None; - } - -let git_version = - match - Bos.( - OS.Cmd.run_out Cmd.(v "git" % "--version") |> OS.Cmd.out_string ~trim:true) - with - | Error (`Msg err) -> failwith err - | Ok (str, _) -> ( - match Git_version.parse str with - | Some version -> version - | None -> Fmt.failwith "Impossible to parse the Git version: %s" str) - -let tmp = "tmp" - -let () = - let fiber = - let open Bos in - let open Rresult in - OS.Dir.current () >>= fun current -> - OS.Dir.create Fpath.(current / tmp) >>= fun _ -> R.ok Fpath.(current / tmp) - in - let tmp = Rresult.R.failwith_error_msg fiber in - Bos.OS.Dir.set_default_tmp tmp; - - Alcotest.run "carton" - [ - "weights", [ weights ]; - "loads", [ loads ]; - ( "decoder", - [ - valid_empty_pack (); - verify_empty_pack (); - check_empty_index (); - verify_bomb_pack (); - first_entry_of_bomb_pack (); - unpack_bomb_pack (); - decode_index_stream (); - empty_stream (); - huge_pack (); - ] ); - ( "encoder", - [ - test_empty_pack (); - index_of_empty_pack (); - index_of_one_entry (); - (* XXX(dinosaure): it seems that a bug exists in Git (not ocaml-git) - on git-index-pack until 1.9.0. *) - (if Git_version.compare v1_9_0 git_version <= 0 then pack_bomb_pack () - else fake_pack_bomb_pack ()); - cycle (); - ] ); - "lwt", [ Test_lwt.test_map_yield ]; - ] diff --git a/test/carton/test_lwt.ml b/test/carton/test_lwt.ml deleted file mode 100644 index 7bbd01536..000000000 --- a/test/carton/test_lwt.ml +++ /dev/null @@ -1,123 +0,0 @@ -open Lwt.Infix - -let map _ fd ~pos len = - let fd = Lwt_unix.unix_file_descr fd in - let payload = - Unix.map_file fd ~pos Bigarray.char Bigarray.c_layout false [| len |] - in - Bigarray.array1_of_genarray payload - -let yield_map root fd ~pos len = map root fd ~pos len - -let create ?(trunc = true) root path = - let path = Fpath.(root // path) in - let flags = - match trunc with - | false -> Unix.[ O_RDWR; O_APPEND; O_CREAT ] - | true -> Unix.[ O_RDWR; O_APPEND; O_CREAT; O_TRUNC ] - in - - let rec process () = - Lwt_unix.openfile (Fpath.to_string path) flags 0o644 >>= fun fd -> - Lwt.return_ok fd - and error = function - | Unix.Unix_error (Unix.ENOENT, _, _) | Unix.Unix_error (Unix.EACCES, _, _) - -> - Lwt.return_error (`Msg (Fmt.str "Impossible to open %a." Fpath.pp path)) - | Unix.Unix_error (Unix.EINTR, _, _) -> Lwt.catch process error - | exn -> Lwt.fail exn - in - Lwt.catch process error - -let append _ fd str = - let rec go off len = - let process () = - Lwt_unix.write_string fd str off len >>= fun len' -> - if len = len' then Lwt.return () else go (off + len') (len - len') - in - let error = function - | Unix.Unix_error (Unix.EINTR, _, _) -> go off len - | exn -> Lwt.fail exn - in - Lwt.catch process error - in - go 0 (String.length str) - -let close _ fd = - let rec process () = Lwt_unix.close fd >>= fun () -> Lwt.return_ok () - and error = function - | Unix.Unix_error (Unix.EINTR, _, _) -> Lwt.catch process error - | exn -> Lwt.fail exn - in - Lwt.catch process error - -module Thin = Carton_lwt.Thin.Make (Uid) - -let access = { Thin.create; Thin.append; Thin.map = yield_map; Thin.close } - -let safely_open path = - let rec process () = - Lwt_unix.openfile (Fpath.to_string path) Unix.[ O_RDONLY ] 0o400 - and error = function - | Unix.Unix_error (Unix.EINTR, _, _) -> Lwt.catch process error - | exn -> Lwt.fail exn - in - Lwt.catch process error - -let safely_close fd = - let rec process () = Lwt_unix.close fd - and error = function - | Unix.Unix_error (Unix.EINTR, _, _) -> Lwt.catch process error - | exn -> Lwt.fail exn - in - Lwt.catch process error - -let safely_read fd tmp off len = - let rec process () = Lwt_unix.read fd tmp off len - and error = function - | Unix.Unix_error (Unix.EINTR, _, _) -> Lwt.catch process error - | exn -> Lwt.fail exn - in - Lwt.catch process error - -let stream_of_file path = - let stream fd () = - let tmp = Bytes.create De.io_buffer_size in - safely_read fd tmp 0 (Bytes.length tmp) >>= function - | 0 -> safely_close fd >>= fun () -> Lwt.return_none - | len -> - let res = Bytes.sub_string tmp 0 len in - Lwt.return_some (res, 0, len) - in - safely_open path >|= stream - -let digest ~kind ?(off = 0) ?len buf = - let len = - match len with Some len -> len | None -> Bigstringaf.length buf - off - in - let ctx = Digestif.SHA1.empty in - - let ctx = - match kind with - | `A -> Digestif.SHA1.feed_string ctx (Fmt.str "commit %d\000" len) - | `B -> Digestif.SHA1.feed_string ctx (Fmt.str "tree %d\000" len) - | `C -> Digestif.SHA1.feed_string ctx (Fmt.str "blob %d\000" len) - | `D -> Digestif.SHA1.feed_string ctx (Fmt.str "tag %d\000" len) - in - let ctx = Digestif.SHA1.feed_bigstring ctx ~off ~len buf in - Digestif.SHA1.get ctx - -let test_map_yield = - Alcotest.test_case "map-yield" `Quick @@ fun () -> - let root = Fpath.v "." - and pack0 = Fpath.v "bomb.pack" - and pack1 = Fpath.v "check.pack" in - - let fiber = - stream_of_file pack0 >>= Thin.verify ~threads:4 ~digest root pack1 access - in - match Lwt_main.run fiber with - | Ok (_n, [], [], _resolveds, _, _uid) -> - Alcotest.(check pass) "map-yield" () () - | Ok _ -> Alcotest.failf "Invalid thin-pack." - | Error (`Msg err) -> Alcotest.failf "%s" err diff --git a/test/carton/uid.ml b/test/carton/uid.ml deleted file mode 100644 index d778e9b45..000000000 --- a/test/carton/uid.ml +++ /dev/null @@ -1,17 +0,0 @@ -type t = Digestif.SHA1.t -type ctx = Digestif.SHA1.ctx - -let empty = Digestif.SHA1.empty -let feed = Digestif.SHA1.feed_bigstring -let get = Digestif.SHA1.get -let hash = Hashtbl.hash -let length = Digestif.SHA1.digest_size -let equal = Digestif.SHA1.equal -let pp = Digestif.SHA1.pp -let of_raw_string = Digestif.SHA1.of_raw_string -let to_raw_string = Digestif.SHA1.to_raw_string -let of_hex = Digestif.SHA1.of_hex -let to_hex = Digestif.SHA1.to_hex -let compare = Digestif.SHA1.unsafe_compare -let null = Digestif.SHA1.digest_string "" -let digest = Digestif.SHA1.digest_string