diff --git a/.gitignore b/.gitignore index 2893b5d..d21137e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,8 +17,4 @@ darwin/* *.docdir setup.data -setup.log - -META -*.mldylib -*.mllib +setup.log \ No newline at end of file diff --git a/README.md b/README.md index 74dae2e..2e4dcaf 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ -# Welcome +# Rdr 2.0 - Welcome [![Floobits Status](https://floobits.com/m4b/rdr.svg)](https://floobits.com/m4b/rdr/redirect) +**UPDATE**: + +> `rdr` is now version 2.0, which adds a new "byte coverage" algorithm for returning understood sections/segments with respect to a binary. Check it out with `rdr -c `. +> Moreover, `rdr` is now a binary _and_ a library, which you all can link against and use in your own projects, if you so desire. [elf2json](http://github.com/m4b/elf2json) is a new program demonstrating this. + Welcome to the `rdr` project. `rdr` is an OCaml tool/library for doing cross-platform analysis of binaries. I typically use it for looking up symbol names, finding the address offset, and then running `gdb` or `lldb` to mess around (you should be using both if you even know what you're doing). diff --git a/_oasis b/_oasis index 3592a4d..3812b84 100644 --- a/_oasis +++ b/_oasis @@ -1,16 +1,42 @@ OASISFormat: 0.4 Plugins: META (0.4), DevFiles (0.4) Name: rdr -Version: 1.2 +Version: 2.0.1 Synopsis: Lightweight, cross platform binary parsing and analysis library with no dependencies Authors: m4b +Homepage: http://github.com/m4b/rdr +Maintainers: License: BSD-3-clause +Description:`rdr` is an OCaml tool/library for doing cross-platform analysis of binaries, + by printing headers, locating entry points, showing import and export + symbols, their binary offsets and size, etc. + + It also features a symbol map which allows fast lookups for arbitrary + symbols, and their associated data, on your system + (the default search location are binaries in /usr/lib). + + The latest release also makes `rdr` a package which you can link against + and use in your own projects. + + See the README at http://github.com/m4b/rdr for more details. + + Features: + + * 64-bit Linux and Mach-o binary analysis + * Searchable symbol-map of all the symbols on your system, including binary + offset, size, and exporting library + * Print imports and exports of binaries + * Make pretty graphs, at the binary or symbol map level + * Byte Coverage algorithm which marks byte sequences as understood (or not) + and provides other meta-data + Library "utils" Path: lib/utils BuildTools: ocamlbuild FindLibParent: rdr FindLibName: utils + CompiledObject: best Modules: RdrUtils, Binary, @@ -19,47 +45,38 @@ Library "utils" ByteCoverage, Generics -Library "goblin" - Path: lib/goblin - BuildTools: ocamlbuild - FindLibParent: rdr - FindLibName: goblin - Modules: - Goblin, - GoblinSymbol, - GoblinExport, - GoblinImport - BuildDepends: - rdr.utils - Library "mach" Path: lib/mach BuildTools: ocamlbuild FindLibParent: rdr FindLibName: mach + CompiledObject: best Modules: Mach, MachBindOpcodes, MachCpuTypes, MachFat, MachLoadCommand, + MachLoadCommandTypes, MachConstants, MachExports, MachHeader, MachImports, - MachSegment64, - MachNlist, + MachSection, + MachSymbolTable, MachRebaseOpcodes, - MachVersion + MachVersion, + MachCoverage, + MachLoadCommandMacro BuildDepends: - rdr.utils, - rdr.goblin + rdr.utils Library "elf" Path: lib/elf FindLibParent: rdr FindLibName: elf BuildTools: ocamlbuild + CompiledObject: best Modules: Elf, ElfHeader, @@ -73,10 +90,29 @@ Library "elf" BuildDepends: rdr.utils +Library "goblin" + Path: lib/goblin + BuildTools: ocamlbuild + FindLibParent: rdr + FindLibName: goblin + CompiledObject: best + Modules: + Goblin, + GoblinSymbol, + GoblinExport, + GoblinImport, + GoblinMach, + GoblinElf + BuildDepends: + rdr.utils, + rdr.mach, + rdr.elf + Library "rdr" Path: lib FindLibName: rdr BuildTools: ocamlbuild + CompiledObject: best Modules: LibRdr BuildDepends: @@ -89,6 +125,7 @@ Library "rdrutils" Path: src BuildTools: ocamlbuild Install: false + CompiledObject: best Modules: Config, Command, diff --git a/_tags b/_tags index c39f19b..91b829a 100644 --- a/_tags +++ b/_tags @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 561c0ff1381a99f143e3c3b7c8381ca7) +# DO NOT EDIT (digest: 28b9a39932677973ac2e4a14f90407c6) # Ignore VCS directories, you can use the same kind of rule outside # OASIS_START/STOP if you want to exclude directories that contains # useless stuff for the build process @@ -16,16 +16,17 @@ true: annot, bin_annot "_darcs": not_hygienic # Library utils "lib/utils/utils.cmxs": use_utils -# Library goblin -"lib/goblin/goblin.cmxs": use_goblin -: use_utils # Library mach "lib/mach/mach.cmxs": use_mach -: use_goblin : use_utils # Library elf "lib/elf/elf.cmxs": use_elf : use_utils +# Library goblin +"lib/goblin/goblin.cmxs": use_goblin +: use_elf +: use_mach +: use_utils # Library rdr "lib/rdr.cmxs": use_rdr : use_elf diff --git a/lib/LibRdr.ml b/lib/LibRdr.ml index 67000e6..e2dc963 100644 --- a/lib/LibRdr.ml +++ b/lib/LibRdr.ml @@ -2,3 +2,97 @@ module Elf = Elf module Mach = Mach module Goblin = Goblin module Utils = RdrUtils + +module Object = struct + type t = | Mach of bytes | Elf of bytes | PE of bytes | Unknown of string * string + let get ?verbose:(verbose=false) filename = + let ic = open_in_bin filename in + if (in_channel_length ic < 4) then + (* 4 bytes, less than any magic number we're looking for *) + begin + close_in ic; + Unknown (filename, "less than 4 bytes") + end + else + (* BEGIN Binary cases *) + let magic = Input.input_i32be ic in + if (verbose) then + Printf.printf "opening %s with magic: 0x%x\n" filename magic; + (* MACH FAT *) + if (magic = Mach.Fat.kFAT_MAGIC) (* cafe babe *) then + let nfat_arch = Input.input_i32be ic in + if (nfat_arch > 4) then (* hack to avoid java class file errors which have same magic num *) + begin + close_in ic; + Unknown (filename, "mach fat too many archs (probably a java class file)") + end + else + let sizeof_arch_bytes = nfat_arch * Mach.Fat.sizeof_fat_arch in + let fat_arch_bytes = Bytes.create sizeof_arch_bytes in + really_input ic fat_arch_bytes 0 sizeof_arch_bytes; + let offset = + Mach.Fat.get_x86_64_binary_offset fat_arch_bytes nfat_arch in + match offset with + | Some (offset, size) -> + seek_in ic offset; + let magic = Input.input_i32be ic in + if (magic = Mach.Header.kMH_CIGAM_64) then + begin + seek_in ic offset; + let binary = Bytes.create size in + really_input ic binary 0 size; + close_in ic; + Mach binary + end + else + begin + close_in ic; Unknown (filename, "mach fat has no 64 bit binaries") + end + | None -> + close_in ic; + Printf.eprintf " ERROR, bad binary: %s\n" filename; + Unknown (filename, "mach fat has no binaries") + (* backwards cause we read the 32bit int big E style *) + (* MACH *) + else if (magic = Mach.Header.kMH_CIGAM_64) then + begin + seek_in ic 0; + let binary = Bytes.create (in_channel_length ic) in + really_input ic binary 0 (in_channel_length ic); + close_in ic; + Mach binary + end + (* ELF *) + else if (magic = Elf.Header.kMAGIC_ELF) then + begin + seek_in ic 0; + let binary = Bytes.create (in_channel_length ic) in + really_input ic binary 0 (in_channel_length ic); + close_in ic; + if (Elf.Header.check_64bit binary) then + Elf binary + else + Unknown (filename, "elf binary is not 64-bit") + end + (* PE *) + (* + else if (magic = Elf.Header.kMAGIC_ELF) then + begin +seek_in ic 0; +let binary = Bytes.create (in_channel_length ic) in +really_input ic binary 0 (in_channel_length ic); +close_in ic; +if (Elf.Header.check_64bit binary) then +Elf binary +else +Unknown filename + end + *) + else + begin + close_in ic; + if (verbose) then + Printf.printf "ignoring binary: %s\n" filename; + Unknown (filename, "unknown magic number") + end +end diff --git a/lib/META b/lib/META new file mode 100644 index 0000000..cfa6e4d --- /dev/null +++ b/lib/META @@ -0,0 +1,59 @@ +# OASIS_START +# DO NOT EDIT (digest: 19245a18a78bafcbdce567c9476d0a71) +version = "2.0.1" +description = +"Lightweight, cross platform binary parsing and analysis library with no dependencies" +requires = "rdr.goblin rdr.utils rdr.mach rdr.elf" +archive(byte) = "rdr.cma" +archive(byte, plugin) = "rdr.cma" +archive(native) = "rdr.cmxa" +archive(native, plugin) = "rdr.cmxs" +exists_if = "rdr.cma" +package "utils" ( + version = "2.0.1" + description = + "Lightweight, cross platform binary parsing and analysis library with no dependencies" + archive(byte) = "utils.cma" + archive(byte, plugin) = "utils.cma" + archive(native) = "utils.cmxa" + archive(native, plugin) = "utils.cmxs" + exists_if = "utils.cma" +) + +package "mach" ( + version = "2.0.1" + description = + "Lightweight, cross platform binary parsing and analysis library with no dependencies" + requires = "rdr.utils" + archive(byte) = "mach.cma" + archive(byte, plugin) = "mach.cma" + archive(native) = "mach.cmxa" + archive(native, plugin) = "mach.cmxs" + exists_if = "mach.cma" +) + +package "goblin" ( + version = "2.0.1" + description = + "Lightweight, cross platform binary parsing and analysis library with no dependencies" + requires = "rdr.utils rdr.mach rdr.elf" + archive(byte) = "goblin.cma" + archive(byte, plugin) = "goblin.cma" + archive(native) = "goblin.cmxa" + archive(native, plugin) = "goblin.cmxs" + exists_if = "goblin.cma" +) + +package "elf" ( + version = "2.0.1" + description = + "Lightweight, cross platform binary parsing and analysis library with no dependencies" + requires = "rdr.utils" + archive(byte) = "elf.cma" + archive(byte, plugin) = "elf.cma" + archive(native) = "elf.cmxa" + archive(native, plugin) = "elf.cmxs" + exists_if = "elf.cma" +) +# OASIS_STOP + diff --git a/lib/elf/Elf.ml b/lib/elf/Elf.ml index 00a46ea..19e7929 100644 --- a/lib/elf/Elf.ml +++ b/lib/elf/Elf.ml @@ -1,5 +1,3 @@ -(* TODO: implement a "byte accountant", which determines percent of known inert/or flagged bytes, and remainder is unknown data/code *) - module Header = ElfHeader module ProgramHeader = ElfProgramHeader module SectionHeader = ElfSectionHeader @@ -9,6 +7,8 @@ module Dynamic = ElfDynamic module SymbolTable = ElfSymbolTable module Coverage = ElfCoverage +let debug = false + type t = { header: Header.t; program_headers: ProgramHeader.t; @@ -18,6 +18,7 @@ type t = { symbol_table: SymbolTable.t; relocations: Reloc.t; is_lib: bool; + is_64: bool; soname: string; interpreter: string; libraries: string list; @@ -28,6 +29,8 @@ type t = { let get ?meta_only:(meta_only=false) binary = let header = Header.get_elf_header64 binary in + let is_64 = Header.is_64bit header.Header.e_ident in + if (debug) then Header.print_elf_header64 header; let program_headers = ProgramHeader.get_program_headers binary @@ -35,10 +38,17 @@ let get ?meta_only:(meta_only=false) binary = header.Header.e_phentsize header.Header.e_phnum in + if (debug) then ProgramHeader.print_program_headers program_headers; let interpreter = ProgramHeader.get_interpreter binary program_headers in + if (debug) then Printf.printf "interpreter: %s\n" interpreter; let slide_sectors = ProgramHeader.get_slide_sectors program_headers in + if (debug) then + begin + Printf.printf "slide sectors\n"; + ProgramHeader.print_slide_sectors slide_sectors; + end; let section_headers = SectionHeader.get_section_headers binary @@ -46,16 +56,26 @@ let get ?meta_only:(meta_only=false) binary = header.Header.e_shentsize header.Header.e_shnum in + if (debug) then SectionHeader.print_section_headers section_headers; let size = Bytes.length binary in + if (debug) then Printf.printf "size: 0x%x\n" size; let is_lib = (Header.is_lib header) in + if (debug) then Printf.printf "is_lib: %b\n" is_lib; let symbol_table = SymbolTable.get_symbol_table binary section_headers in + (* if (debug) then SymbolTable.print_symbol_table symbol_table; *) let _dynamic = Dynamic.get_dynamic binary program_headers in + (* if (debug) then Dynamic.print_dynamic _dynamic; *) let symtab_offset, strtab_offset, strtab_size = Dynamic.get_dynamic_symbol_offset_data _dynamic slide_sectors in + if (debug) then + Printf.printf "symtab_offset: 0x%x strtab_offset: 0x%x strtab_size: 0x%x\n" + symtab_offset strtab_offset strtab_size; + (* broken right here for /usr/lib/go/pkg/tool/linux_amd64/cgo *) let dynamic_strtab = Dynamic.get_dynamic_strtab binary strtab_offset strtab_size in + (* if (debug) then Printf.printf "dynamic_strtab: %s\n" dynamic_strtab; *) let libraries = Dynamic.get_libraries _dynamic dynamic_strtab in let dynamic_symbols = Dynamic.get_dynamic_symbols @@ -75,7 +95,10 @@ let get ?meta_only:(meta_only=false) binary = Dynamic.get_reloc_data _dynamic slide_sectors |> Reloc.get_relocs64 binary in - let byte_coverage = ElfCoverage.compute_byte_coverage header program_headers section_headers in + if (debug) then Reloc.print_relocs64 relocations; + let byte_coverage = + ElfCoverage.compute_byte_coverage header program_headers section_headers size + in (* TODO: fix *) let raw_code = if (meta_only) then Bytes.create 0 @@ -91,6 +114,7 @@ let get ?meta_only:(meta_only=false) binary = symbol_table; relocations; is_lib; + is_64; soname; interpreter; libraries; @@ -106,5 +130,4 @@ let print elf = SymbolTable.print_symbol_table elf.dynamic_symbols; SymbolTable.print_symbol_table elf.symbol_table; Reloc.print_relocs64 elf.relocations; - ByteCoverage.stats elf.byte_coverage elf.size; ByteCoverage.print elf.byte_coverage diff --git a/lib/elf/ElfCoverage.ml b/lib/elf/ElfCoverage.ml index cf1b921..55d9779 100644 --- a/lib/elf/ElfCoverage.ml +++ b/lib/elf/ElfCoverage.ml @@ -2,122 +2,138 @@ open ByteCoverage let debug = false -let compute_section_coverage kind map section = +(* add more, and let melding happen *) +let known_program_headers = + [(ElfProgramHeader.kPT_INTERP, String); + (ElfProgramHeader.kPT_NOTE, String); + (ElfProgramHeader.kPT_DYNAMIC, Symbol); + (ElfProgramHeader.kPT_GNU_EH_FRAME, PlatformSpecific); + ] + +let compute_program_header_coverage kind data ph = + match ph with + | Some ph -> + if (debug) then + Printf.printf "called: %s\n" + (ElfProgramHeader.ptype_to_string + ph.ElfProgramHeader.p_type); + let size = ph.ElfProgramHeader.p_filesz in + let range_start = ph.ElfProgramHeader.p_offset in + let range_end = size + range_start in + let extra = ElfProgramHeader.ptype_to_string ph.ElfProgramHeader.p_type in + ByteCoverage.add + (create_data + ~tag:String + ~r1:range_start + ~r2:range_end + ~extra:extra + ~understood:true + ) data + | None -> data + +let compute_program_header_coverage phs data = + if (ElfProgramHeader.is_empty phs) then + data + else + let f = (fun data (header_type, tag) -> + let header = ElfProgramHeader.get_header header_type phs in + compute_program_header_coverage tag data header + ) in + (fun data -> List.fold_left f data known_program_headers) data + +let known_sections = + [(ElfSectionHeader.kSHT_SYMTAB, SymbolTable); + (ElfSectionHeader.kSHT_STRTAB, StringTable); + (ElfSectionHeader.kSHT_PROGBITS, Code); + (ElfSectionHeader.kSHT_NOBITS, Semantic); + (ElfSectionHeader.kSHT_INIT_ARRAY, Code); + (ElfSectionHeader.kSHT_FINI_ARRAY, Code); + (ElfSectionHeader.kSHT_DYNAMIC, Symbol); + (ElfSectionHeader.kSHT_HASH, Symbol); + (ElfSectionHeader.kSHT_GNU_HASH, Symbol); + (ElfSectionHeader.kSHT_RELA, Rela); + (ElfSectionHeader.kSHT_DYNSYM, SymbolTable); + (ElfSectionHeader.kSHT_NOTE, PlatformSpecific); + (ElfSectionHeader.kSHT_GNU_verdef, PlatformSpecific); + (ElfSectionHeader.kSHT_GNU_verneed, PlatformSpecific); + ] + +let compute_section_coverage stype tag data section = if (debug) then Printf.printf "called: %s\n" (ElfSectionHeader.shtype_to_string section.ElfSectionHeader.sh_type); let size = section.ElfSectionHeader.sh_size in let range_start = section.ElfSectionHeader.sh_offset in - let range_end = size + range_start in + let range_end = + if (stype = ElfSectionHeader.kSHT_NOBITS) then + range_start + else + size + range_start + in let extra = section.ElfSectionHeader.name ^ " // " ^ ElfSectionHeader.shtype_to_string section.ElfSectionHeader.sh_type in - ByteCoverage.Map.add range_start - {size; understood = true; kind; - range_start; range_end; extra} map - -let compute_program_header_coverage phs m = - (* TODO: make this less dry and finish up known coverage *) - if (ElfProgramHeader.is_empty phs) then - m - else - begin - match ElfProgramHeader.get_interpreter_header phs with - | Some ph -> - let size = ph.ElfProgramHeader.p_filesz in - let range_start = ph.ElfProgramHeader.p_offset in - let range_end = size + range_start in - let extra = ElfProgramHeader.ptype_to_string ph.ElfProgramHeader.p_type in - ByteCoverage.Map.add range_start - {size; understood = true; kind = String; - range_start; range_end; extra;} m - | None -> - m - end - |> - begin - match ElfProgramHeader.get_header ElfProgramHeader.kPT_NOTE phs with - | Some ph -> - ( - fun m -> - let size = ph.ElfProgramHeader.p_filesz in - let range_start = ph.ElfProgramHeader.p_offset in - let range_end = size + range_start in - let extra = ElfProgramHeader.ptype_to_string ph.ElfProgramHeader.p_type in - ByteCoverage.Map.add range_start - {size; understood = true; kind = String; - range_start; range_end; extra} m - ) - | None -> - (fun m -> m) - end - |> - begin - match ElfProgramHeader.get_dynamic_program_header phs with - | Some ph -> - ( - fun m -> - let size = ph.ElfProgramHeader.p_filesz in - let range_start = ph.ElfProgramHeader.p_offset in - let range_end = size + range_start in - let extra = ElfProgramHeader.ptype_to_string ph.ElfProgramHeader.p_type in - ByteCoverage.Map.add range_start - {size; understood = true; kind = Symbol; - range_start; range_end; extra} m - ) - | None -> - (fun m -> m) - end + ByteCoverage.add + (create_data + ~tag:tag + ~r1:range_start + ~r2:range_end + ~extra:extra + ~understood:true + ) data -let compute_section_header_coverage h shs m = +(* add a platform specific post process, for e.g. NOBITS *) +let compute_section_header_coverage h shs data = if (ElfSectionHeader.is_empty shs) then - m + data else begin let size = h.ElfHeader.e_shentsize * h.ElfHeader.e_shnum in let range_start = h.ElfHeader.e_shoff in let range_end = size + range_start in - ByteCoverage.Map.add range_start - {size; understood = true; - kind = Meta; - range_start; range_end; - extra = "section headers meta data"} m + ByteCoverage.add + (create_data + ~tag:Meta + ~r1:range_start + ~r2:range_end + ~extra:"Section Headers" + ~understood:true + ) data end |> begin - let known_sections = - [(ElfSectionHeader.kSHT_SYMTAB, Symbol); - (ElfSectionHeader.kSHT_STRTAB, StringTable); - (ElfSectionHeader.kSHT_PROGBITS, Code); - (ElfSectionHeader.kSHT_NOBITS, Code); - (ElfSectionHeader.kSHT_INIT_ARRAY, Code); - (ElfSectionHeader.kSHT_FINI_ARRAY, Code); - (ElfSectionHeader.kSHT_DYNAMIC, Symbol); - (ElfSectionHeader.kSHT_HASH, Symbol); - (ElfSectionHeader.kSHT_RELA, Rela); - ] in - let f = (fun m (section_type, kind) -> + let f = (fun data (section_type, tag) -> let sections = ElfSectionHeader.get_sections section_type shs in - List.fold_left (compute_section_coverage kind) m sections + List.fold_left (compute_section_coverage section_type tag) data sections ) in - (fun m -> List.fold_left f m known_sections) + (fun data -> List.fold_left f data known_sections) end -let compute_byte_coverage h phs shs : ByteCoverage.t = - let size = - h.ElfHeader.e_ehsize + - (h.ElfHeader.e_phentsize * h.ElfHeader.e_phnum) +let compute_byte_coverage h phs shs elf_size : ByteCoverage.t = + let ehsize = h.ElfHeader.e_ehsize in + let phsize = + (h.ElfHeader.e_phentsize * h.ElfHeader.e_phnum) in - let m = - ByteCoverage.Map.add 0 - {size; - understood = true; - kind = Meta; range_start = 0; - range_end = size; - extra = "header + program headers meta data"} - ByteCoverage.Map.empty + ByteCoverage.add + (create_data + ~tag:Meta + ~r1:0 + ~r2:h.ElfHeader.e_ehsize + ~extra:"ELF Header" + ~understood:true + ) + ByteCoverage.empty + |> ByteCoverage.add + (create_data + ~tag:Meta + ~r1:ehsize + ~r2:(phsize+ehsize) + ~extra:"Program Headers" + ~understood:true + ) |> compute_program_header_coverage phs - |> compute_section_header_coverage h shs in m + |> compute_section_header_coverage h shs + |> ByteCoverage.create elf_size diff --git a/lib/elf/ElfProgramHeader.ml b/lib/elf/ElfProgramHeader.ml index 8dc85f7..1a726a1 100644 --- a/lib/elf/ElfProgramHeader.ml +++ b/lib/elf/ElfProgramHeader.ml @@ -1,6 +1,3 @@ -(* TODO: -(1) Memory adjust function TOTALLY BROKEN; see this culprit for reason: /usr/lib/libqgsttools_p.so.1.0.0 *) - open Printf (* @@ -46,6 +43,7 @@ let kPT_LOOS = 0x60000000 (* Start of OS-specific *) let kPT_GNU_EH_FRAME = 0x6474e550 (* GCC .eh_frame_hdr segment *) let kPT_GNU_STACK = 0x6474e551 (* Indicates stack executability *) let kPT_GNU_RELRO = 0x6474e552 (* Read-only after relocation *) +let kPT_PAX_FLAGS = 0x65041580 (* pax security header, _not_ in ELF header *) let kPT_LOSUNW = 0x6ffffffa let kPT_SUNWBSS = 0x6ffffffa (* Sun Specific segment *) let kPT_SUNWSTACK = 0x6ffffffb (* Stack segment *) @@ -54,6 +52,38 @@ let kPT_HIOS = 0x6fffffff (* End of OS-specific *) let kPT_LOPROC = 0x70000000 (* Start of processor-specific *) let kPT_HIPROC = 0x7fffffff (* End of processor-specific *) +let kPT_MIPS_REGINFO = 0x70000000 (* Register usage information *) +let kPT_MIPS_RTPROC = 0x70000001 (* Runtime procedure table. *) +let kPT_MIPS_OPTIONS = 0x70000002 +let kPT_HP_TLS = (kPT_LOOS + 0x0) +let kPT_HP_CORE_NONE = (kPT_LOOS + 0x1) +let kPT_HP_CORE_VERSION = (kPT_LOOS + 0x2) +let kPT_HP_CORE_KERNEL = (kPT_LOOS + 0x3) +let kPT_HP_CORE_COMM = (kPT_LOOS + 0x4) +let kPT_HP_CORE_PROC = (kPT_LOOS + 0x5) +let kPT_HP_CORE_LOADABLE = (kPT_LOOS + 0x6) +let kPT_HP_CORE_STACK = (kPT_LOOS + 0x7) +let kPT_HP_CORE_SHM = (kPT_LOOS + 0x8) +let kPT_HP_CORE_MMF = (kPT_LOOS + 0x9) +let kPT_HP_PARALLEL = (kPT_LOOS + 0x10) +let kPT_HP_FASTBIND = (kPT_LOOS + 0x11) +let kPT_HP_OPT_ANNOT = (kPT_LOOS + 0x12) +let kPT_HP_HSL_ANNOT = (kPT_LOOS + 0x13) +let kPT_HP_STACK = (kPT_LOOS + 0x14) +(* we don't care about this, and it aliases anyway +let kPT_PARISC_ARCHEXT = 0x70000000 +let kPT_PARISC_UNWIND = 0x70000001 +*) +let kPPC64_OPT_TLS = 1 +let kPPC64_OPT_MULTI_TOC = 2 + +let kPT_ARM_EXIDX = (kPT_LOPROC + 1) (* ARM unwind segment. *) +let kPT_IA_64_ARCHEXT = (kPT_LOPROC + 0) (* arch extension bits *) +let kPT_IA_64_UNWIND = (kPT_LOPROC + 1) (* ia64 unwind bits *) +let kPT_IA_64_HP_OPT_ANOT = (kPT_LOOS + 0x12) +let kPT_IA_64_HP_HSL_ANOT = (kPT_LOOS + 0x13) +let kPT_IA_64_HP_STACK = (kPT_LOOS + 0x14) + let sizeof_program_header = 56 (* bytes *) let get_program_header binary offset = @@ -78,27 +108,53 @@ let get_program_header binary offset = let ptype_to_string ptype = match ptype with - | 0 -> "NULL" - | 1 -> "LOAD" - | 2 -> "DYNAMIC" - | 3 -> "INTERP" - | 4 -> "NOTE" - | 5 -> "SHLIB" - | 6 -> "PHDR" - | 7 -> "TLS" - | 8 -> "NUM" - | 0x60000000 -> "LOOS" - | 0x6474e550 -> "GNU_EH_FRAME" - | 0x6474e551 -> "GNU_STACK" - | 0x6474e552 -> "GNU_RELRO" - (* | 0x6ffffffa -> "LOSUNW" *) - | 0x6ffffffa -> "SUNWBSS" - | 0x6ffffffb -> "SUNWSTACK" - | 0x6fffffff -> "HISUNW" - (* | 0x6fffffff -> "HIOS" *) - | 0x70000000 -> "LOPROC" - | 0x7fffffff -> "HIPROC" - | _ -> "UNKNOWN" + | 0 -> "PT_NULL" + | 1 -> "PT_LOAD" + | 2 -> "PT_DYNAMIC" + | 3 -> "PT_INTERP" + | 4 -> "PT_NOTE" + | 5 -> "PT_SHLIB" + | 6 -> "PT_PHDR" + | 7 -> "PT_TLS" + | 8 -> "PT_NUM" + (* | 0x60000000 -> "PT_LOOS" *) + | 0x6474e550 -> "PT_GNU_EH_FRAME" + | 0x6474e551 -> "PT_GNU_STACK" + | 0x6474e552 -> "PT_GNU_RELRO" + | 0x65041580 -> "PT_PAX_FLAGS" + (* | 0x6ffffffa -> "PT_LOSUNW" *) + | 0x6ffffffa -> "PT_SUNWBSS" + | 0x6ffffffb -> "PT_SUNWSTACK" + (* | 0x6fffffff -> "PT_HIOS" *) + | pt when pt = kPT_MIPS_REGINFO -> "PT_MIPS_REGINFO" + | pt when pt = kPT_MIPS_RTPROC -> "PT_MIPS_RTPROC" + | pt when pt = kPT_MIPS_OPTIONS -> "PT_MIPS_OPTIONS" + | pt when pt = kPT_HP_TLS -> "PT_HP_TLS" + | pt when pt = kPT_HP_CORE_NONE -> "PT_HP_CORE_NONE" + | pt when pt = kPT_HP_CORE_VERSION -> "PT_HP_CORE_VERSION" + | pt when pt = kPT_HP_CORE_KERNEL -> "PT_HP_CORE_KERNEL" + | pt when pt = kPT_HP_CORE_COMM -> "PT_HP_CORE_COMM" + | pt when pt = kPT_HP_CORE_PROC -> "PT_HP_CORE_PROC" + | pt when pt = kPT_HP_CORE_LOADABLE -> "PT_HP_CORE_LOADABLE" + | pt when pt = kPT_HP_CORE_STACK -> "PT_HP_CORE_STACK" + | pt when pt = kPT_HP_CORE_SHM -> "PT_HP_CORE_SHM" + | pt when pt = kPT_HP_CORE_MMF -> "PT_HP_CORE_MMF" + | pt when pt = kPT_HP_PARALLEL -> "PT_HP_PARALLEL" + | pt when pt = kPT_HP_FASTBIND -> "PT_HP_FASTBIND" + (* PT_LOOS + 0x12 *) + | pt when pt = kPT_IA_64_HP_OPT_ANOT -> "PT_IA_64_HP_OPT_ANOT" + | pt when pt = kPT_IA_64_HP_HSL_ANOT -> "PT_IA_64_HP_HSL_ANOT" + | pt when pt = kPT_IA_64_HP_STACK -> "PT_IA_64_HP_STACK" + (* PT_LOOS + 0x14 *) + | pt when pt = kPT_HP_OPT_ANNOT -> "PT_HP_OPT_ANNOT" + | pt when pt = kPT_HP_HSL_ANNOT -> "PT_HP_HSL_ANNOT" + | pt when pt = kPT_HP_STACK -> "PT_HP_STACK" + (* | 0x70000000 -> "PT_LOPROC" *) + | pt when pt = kPT_ARM_EXIDX -> "PT_ARM_EXIDX" + | pt when pt = kPT_IA_64_ARCHEXT -> "PT_IA_64_ARCHEXT" + | pt when pt = kPT_IA_64_UNWIND -> "PT_IA_64_UNWIND" + (* | 0x7fffffff -> "PT_HIPROC" *) + | pt -> Printf.sprintf "PT_UNKNOWN 0x%x" pt let flags_to_string flags = match flags with @@ -109,7 +165,7 @@ let flags_to_string flags = | 5 -> "R+X" | 6 -> "RW" | 7 -> "RW+X" - | _ -> "UNKNOWN FLAG" + | f -> Printf.sprintf "FLAG 0x%x" f let is_empty phs = phs = [] @@ -150,7 +206,7 @@ let get_dynamic_program_header phs = get_header kPT_DYNAMIC phs let get_interpreter binary phs = match get_interpreter_header phs with | Some ph -> - Binary.string binary ~maxlen:(ph.p_filesz + ph.p_offset) ph.p_offset + Binary.string binary ~max:ph.p_filesz ph.p_offset | None -> "" type slide_sector = {start_sector: int; end_sector: int; slide: int;} @@ -159,38 +215,64 @@ let is_in_sector offset sector = (* should this be offset <= sector.end_sector ? *) offset >= sector.start_sector && offset < sector.end_sector +let is_contained_in s1 s2 = + s1.start_sector >= s2.start_sector + && s1.end_sector <= s2.end_sector + +let join s1 s2 = + let start_sector = min s1.start_sector s2.start_sector in + let end_sector = max s1.end_sector s2.end_sector in + assert (s1.slide = s2.slide); + let slide = s1.slide in + {start_sector; end_sector; slide} + let print_slide_sector sector = Printf.printf "0x%x: 0x%x - 0x%x\n" sector.slide sector.start_sector sector.end_sector - + +let print_slide_sectors sectors = + List.iter (fun el -> print_slide_sector el) sectors + (* checks to see if the slides are equal; will this hold uniformly? *) module SlideSet = Set.Make( struct type t = slide_sector let compare = - (fun a b -> Pervasives.compare a.slide b.slide) end) + (fun a b -> Pervasives.compare a.slide b.slide) + end) + +module Map = + Map.Make(struct + type t = int + let compare = compare + end) (* finds the vaddr masks *) let get_slide_sectors phs = - List.fold_left (fun acc ph -> - if (ph.p_type = kPT_LOAD) then - let slide = ph.p_vaddr - ph.p_offset in - if (slide <> 0) then - let start_sector = ph.p_vaddr in - let end_sector = start_sector + ph.p_filesz in - SlideSet.add {start_sector; end_sector; slide} acc - else - acc - else - acc - ) SlideSet.empty phs |> SlideSet.elements - -(* This also assumed the leading digit was always the vm addr offset; -but /usr/lib/libqgsttools_p.so.1.0.0 has demonstrated otherwise: -binary offset: 30000 vm: 31000 -will have to approach this in sections; if the offset in question is contained in a vm "covered" area, then subtract the difference, otherwise don't... - *) + let map = + List.fold_left + (fun acc ph -> + let slide = ph.p_vaddr - ph.p_offset in + if (slide <> 0) then + let start_sector = ph.p_vaddr in + let end_sector = start_sector + ph.p_filesz in (* this might need to be ph.p_memsz *) + let s1 = {start_sector; end_sector; slide} in + if (Map.mem slide acc) then + let s2 = Map.find slide acc in + if (is_contained_in s1 s2) then + acc + else + Map.add slide (join s1 s2) acc + else + Map.add slide s1 acc + else + acc + ) Map.empty phs + in + Map.fold (fun k v acc -> v::acc) map [] +(* checks if the offset is in the slide sector, + and adjusts using the sectors slide if so *) let adjust sectors offset = List.fold_left (fun acc sector -> if (is_in_sector offset sector) then @@ -237,7 +319,7 @@ let get_p_type p_type = | 6 -> PT_PHDR | 7 -> PT_TLS | 8 -> PT_NUM - | 0x60000000 -> PT_LOOS + | 0x60000000 -> kPT_LOOS | 0x6474e550 -> PT_GNU_EH_FRAME | 0x6474e551 -> PT_GNU_STACK | 0x6474e552 -> PT_GNU_RELRO diff --git a/lib/elf/META b/lib/elf/META new file mode 100644 index 0000000..c02f85f --- /dev/null +++ b/lib/elf/META @@ -0,0 +1,13 @@ +# OASIS_START +# DO NOT EDIT (digest: dc13e6ca3cd0fe7a183086218f6af14f) +version = "1.1" +description = +"Lightweight, cross platform binary parsing and analysis library with no dependencies" +requires = "goblin utils" +archive(byte) = "elf.cma" +archive(byte, plugin) = "elf.cma" +archive(native) = "elf.cmxa" +archive(native, plugin) = "elf.cmxs" +exists_if = "elf.cma" +# OASIS_STOP + diff --git a/lib/elf/elf.mldylib b/lib/elf/elf.mldylib new file mode 100644 index 0000000..2cb6eb4 --- /dev/null +++ b/lib/elf/elf.mldylib @@ -0,0 +1,12 @@ +# OASIS_START +# DO NOT EDIT (digest: e93d6fd95d53dfc93eab879f82f97465) +Elf +ElfHeader +ElfProgramHeader +ElfSectionHeader +ElfConstants +ElfDynamic +ElfReloc +ElfSymbolTable +ElfCoverage +# OASIS_STOP diff --git a/lib/elf/elf.mllib b/lib/elf/elf.mllib new file mode 100644 index 0000000..2cb6eb4 --- /dev/null +++ b/lib/elf/elf.mllib @@ -0,0 +1,12 @@ +# OASIS_START +# DO NOT EDIT (digest: e93d6fd95d53dfc93eab879f82f97465) +Elf +ElfHeader +ElfProgramHeader +ElfSectionHeader +ElfConstants +ElfDynamic +ElfReloc +ElfSymbolTable +ElfCoverage +# OASIS_STOP diff --git a/lib/goblin/Goblin.ml b/lib/goblin/Goblin.ml index dc57e6e..9ce1b73 100644 --- a/lib/goblin/Goblin.ml +++ b/lib/goblin/Goblin.ml @@ -19,31 +19,29 @@ (* (ab#get_import name).idx *) module Symbol = GoblinSymbol - module Import = GoblinImport - module Export = GoblinExport module StringMap = Map.Make(String) type t = - { - name: string; (* name of this binary used in linking and imports, ID_DYLIB/SONAME, or basename if executable *) - install_name: string; (* fully qualified pathname to the binary/dylib NOTE: OSX ID_DYLIB = PWD of dylib *) - islib: bool; (* are we a library? *) - libs: string array; (* lib string array *) - nlibs: int; (* number of libs *) - imports: Import.t array; (* the import map *) (* if this is an array much simpler *) - nimports: int; (* number of imports *) - exports: Export.t array; (* the export map *) (* if this is an array much simpler *) - nexports: int; (* number of exports *) - code: bytes; (* to bytes array or not to bytes array *) - } - (* pro: if bytes array, then (tol#find import.lib).code.(import.idx) = the symbol's routine *) - (* con: if bytes array, must relocate/translate all references in export symbol code to be indexed in an array *) - (* pro: if not bytes array, then: import.offset <- (find import.name (tol#find import.lib)).address at dynamic bind time - similarly: Bytes.sub (tol#find import.lib).code import.offset import.size = the symbol's routine - *) + { + name: string; (* name of this binary used in linking and imports, ID_DYLIB/SONAME, or basename if executable *) + install_name: string; (* fully qualified pathname to the binary/dylib NOTE: OSX ID_DYLIB = PWD of dylib *) + islib: bool; (* are we a library? *) + libs: string array; (* lib string array *) + nlibs: int; (* number of libs *) + imports: Import.t array; (* the import map *) (* if this is an array much simpler *) + nimports: int; (* number of imports *) + exports: Export.t array; (* the export map *) (* if this is an array much simpler *) + nexports: int; (* number of exports *) + code: bytes; (* to bytes array or not to bytes array *) + } +(* pro: if bytes array, then (tol#find import.lib).code.(import.idx) = the symbol's routine *) +(* con: if bytes array, must relocate/translate all references in export symbol code to be indexed in an array *) +(* pro: if not bytes array, then: import.offset <- (find import.name (tol#find import.lib)).address at dynamic bind time + similarly: Bytes.sub (tol#find import.lib).code import.offset import.size = the symbol's routine +*) let get_symbol collection i = Array.get collection i @@ -52,42 +50,47 @@ let _find pred arr = let rec loop i = if (i >= size) then raise Not_found else - if (pred arr.(i)) then - arr.(i) - else - loop (i + 1) + if (pred arr.(i)) then + arr.(i) + else + loop (i + 1) in loop 0 - + let get_import import = _find (fun symbol -> symbol.Import.name = import) let get_export export = _find (fun symbol -> symbol.Export.name = export) -let iter = +let iter = Array.iter let empty = [||] -let libs_to_string libs = +let libs_to_string libs = let b = Buffer.create @@ Array.length libs in - Array.iteri (fun i elem -> + Array.iteri (fun i elem -> Buffer.add_string b @@ Printf.sprintf "(%d) %s\n" i elem) libs; Buffer.contents b -let imports_to_string imports = +let imports_to_string imports = let b = Buffer.create @@ (Array.length imports) * 15 in (* just ballpark *) - Array.iter (fun import -> - let squiggle = if (import.Import.is_lazy) then "~>" else "->" in - Buffer.add_string b @@ Printf.sprintf "%s (%d) %s %s\n" import.Import.name import.Import.size squiggle import.Import.lib) imports; + Array.iter (fun import -> + Buffer.add_string b @@ Import.to_string import) imports; Buffer.contents b - -let exports_to_string exports = + +let exports_to_string exports = let b = Buffer.create @@ (Array.length exports) * 15 in (* just ballpark *) - Array.iter (fun export -> - Buffer.add_string b @@ Printf.sprintf "%s (%d) -> 0x%x\n" export.Export.name export.Export.size export.Export.offset) exports; + Array.iter (fun export -> + Buffer.add_string b @@ Export.to_string export) exports; Buffer.contents b +let print_exports = + Array.iter Export.print + +let print_imports = + Array.iter Import.print + let to_string goblin = Printf.sprintf "%s (%s):\nLibs (%d):\n%s\nExports (%d):\n%s\nImports (%d):\n%s" goblin.name goblin.install_name goblin.nlibs @@ -96,3 +99,79 @@ let to_string goblin = Printf.sprintf "%s (%s):\nLibs (%d):\n%s\nExports (%d):\n (imports_to_string goblin.imports) goblin.nexports (exports_to_string goblin.exports) + +let print goblin = Printf.printf "%s\n" @@ to_string goblin + +module Mach = struct + include GoblinMach + open MachImports + open MachExports + + (* @invariant sorted, sizecomputed *) + let to_goblin mach install_name = + let name = mach.Mach.name in + let install_name = install_name in + let libs = mach.Mach.libraries in + let nlibs = mach.Mach.nlibraries in + let exports = List.map + (fun export -> + let name = export.name in + let size = export.size in + let offset = export.offset in + {Export.name; size; offset} + ) mach.Mach.exports + in + let exports = Array.of_list exports in + let nexports = mach.Mach.nexports in + let imports = + List.mapi + ( + fun i import -> + let name = import.bi.symbol_name in + let lib = import.dylib in + let is_lazy = import.is_lazy in + let offset = import.offset in + let size = import.size in + {Import.name = name; lib; is_lazy; + idx = i; offset; size} + ) mach.Mach.imports + |> Array.of_list + in + let nimports = mach.Mach.nimports in + let islib = mach.Mach.is_lib in + let code = mach.Mach.raw_code in + {name; install_name; islib; libs; nlibs; + exports; nexports; imports; nimports; code} +end + +(* TODO: add Elf for consistency *) +module Elf = struct + include GoblinElf + exception Unimplemented + let to_goblin elf install_name = raise Unimplemented + (* + +let create_goblin_binary soname install_name libraries islib goblin_exports goblin_imports = + let name = soname in + let install_name = install_name in + let libs = Array.of_list (soname::libraries) in (* to be consistent... for graphing, etc. *) + let nlibs = Array.length libs in + let exports = + Array.of_list + @@ List.map (GoblinSymbol.to_goblin_export) goblin_exports + in + let nexports = Array.length exports in + let imports = + Array.of_list + @@ List.map (GoblinSymbol.to_goblin_import) goblin_imports + in + let nimports = Array.length imports in + (* empty code *) + let code = Bytes.empty in + {Goblin.name; + install_name; islib; libs; nlibs; exports; nexports; + imports; nimports; code} + + *) +(* {name; install_name; islib; libs; nlibs; exports; nexports; imports; nimports; code} *) +end diff --git a/lib/goblin/GoblinElf.ml b/lib/goblin/GoblinElf.ml new file mode 100644 index 0000000..897098e --- /dev/null +++ b/lib/goblin/GoblinElf.ml @@ -0,0 +1,72 @@ +(* TODO add Import, Export, and Symbols/Nlist *) +(* TODO move ToL back to Goblin and tease out deps *) + +(* hacky function to filter imports from exports, etc. *) +(* todo use proper variants here ffs *) + +open ElfSymbolTable + +let get_goblin_kind entry bind stype = + if (entry.ElfSymbolTable.st_value = 0x0 + && entry.ElfSymbolTable.st_shndx = 0 + && entry.ElfSymbolTable.name <> "") (* ignore first \0 entry *) + then + GoblinSymbol.Import + else if (bind = "LOCAL") then + GoblinSymbol.Local + else if ((bind = "GLOBAL" + || (bind = "WEAK" && (stype = "FUNC" + || stype = "IFUNC" + || stype = "OBJECT"))) + && entry.ElfSymbolTable.st_value <> 0) then + GoblinSymbol.Export + else GoblinSymbol.Other + +(* +(* polymorphic variants don't need to be qualified by module + since they are open and the symbol is unique *) +let symbol_entry_to_goblin_symbol + ~tol:tol ~libs:libs ~relocs:relocs (soname,install_name) index entry = + let bind = (ElfSymbolTable.get_bind entry.ElfSymbolTable.st_info |> ElfSymbolTable.symbol_bind_to_string) in + let stype = (ElfSymbolTable.get_type entry.ElfSymbolTable.st_info |> ElfSymbolTable.symbol_type_to_string) in + let name = `Name entry.ElfSymbolTable.name in + let offset = + `Offset ( + if (entry.ElfSymbolTable.st_value = 0) then + (* this _could_ be relatively expensive *) + Elf.Reloc.get_size index relocs + else + entry.ElfSymbolTable.st_value) + in + let size = `Size entry.ElfSymbolTable.st_size in + let kind = `Kind (get_goblin_kind entry bind stype) in + let lib = + (* TODO: this is a complete disaster; *) + match kind with + | `Kind GoblinSymbol.Export -> + `Lib (soname,install_name) + | `Kind GoblinSymbol.Import -> + if (ToL.is_empty tol) then + `Lib ("∅","∅") + else + let l = (ToL.get_libraries ~bin_libs:libs entry.ElfSymbolTable.name tol) in + `Lib (l,l) + | _ -> + `Lib ("","") + in + let data = `PrintableData + (Printf.sprintf + "%s %s" bind stype) in + [name; lib; offset; size; kind; data] + +let symbols_to_goblin ?use_tol:(use_tol=true) ~libs:libs soname dynsyms relocs = + let tol = + try + if (use_tol) then ToL.get() else ToL.empty + with ToL.Not_built -> + ToL.empty + in + List.mapi + (symbol_entry_to_goblin_symbol + ~tol:tol ~libs:libs ~relocs:relocs soname) dynsyms + *) diff --git a/lib/goblin/GoblinExport.ml b/lib/goblin/GoblinExport.ml index 28a8607..8a344bd 100644 --- a/lib/goblin/GoblinExport.ml +++ b/lib/goblin/GoblinExport.ml @@ -1,12 +1,12 @@ -type t = +type t = { name: string; (* name of the exported symbol *) offset: int; (* offset into the containing binary's byte array *) size: int; (* size of the routine, in bytes *) } -let print ?like_goblin:(like_goblin=true) export = - if (like_goblin) then - Printf.printf "0x%-16x %s (%d)\n" export.offset export.name export.size - else - Printf.printf "%s (%d) -> 0x%x\n" export.name export.size export.offset +let to_string export = + Printf.sprintf "%16x %s (%d)" export.offset export.name export.size + +let print export = + Printf.printf "%s\n" @@ to_string export diff --git a/lib/goblin/GoblinImport.ml b/lib/goblin/GoblinImport.ml index b392f6f..343e0fd 100644 --- a/lib/goblin/GoblinImport.ml +++ b/lib/goblin/GoblinImport.ml @@ -1,4 +1,4 @@ -type t = +type t = { name: string; (* name of the imported symbol *) lib: string; (* library which contains the binary *) @@ -7,3 +7,10 @@ type t = mutable offset: int; (* offset into (tol#find import.lib).code *) size: int; (* size of the imported symbol, in bytes *) } + +let to_string import = + let squiggle = if (import.is_lazy) then "~>" else "->" in + Printf.sprintf "%16x %s (%d) %s %s" import.offset import.name import.size squiggle import.lib + +let print import = + Printf.printf "%s\n" @@ to_string import diff --git a/lib/goblin/GoblinMach.ml b/lib/goblin/GoblinMach.ml new file mode 100644 index 0000000..75f1523 --- /dev/null +++ b/lib/goblin/GoblinMach.ml @@ -0,0 +1,350 @@ +(* MACH EXPORT -> GOBLIN *) +module Exports = struct + open MachExports + + let find symbol = + List.find + (fun export -> + (GoblinSymbol.find_symbol_name export) = symbol) + + (* a datum is a single unit, + so data is a list of such possible datums *) + type mach_export_data = + [ + | GoblinSymbol.symbol_datum + (* we extend with mach specific details: *) + | `Reexport of [`As of string * string | `From of string] + | `Stub of int * int + | `Flags of int + ] list + + let export_info_to_mach_export_data name soname = + function + | MachExports.Regular symbol -> + [ + `Name name; + `Offset symbol.address; + `Kind GoblinSymbol.Export; + `Flags symbol.flags; + `Lib (soname, soname); + ] + | MachExports.Reexport symbol -> + begin + match symbol.lib_symbol_name with + | Some name' -> + [ + `Name name; + `Reexport (`As (name',symbol.lib)); + `PrintableData (Printf.sprintf "REEX as %s from %s" name' symbol.lib); + `Kind GoblinSymbol.Export; + `Flags symbol.flags; + `Lib (soname, soname); + ] + | None -> + [ + `Name name; + `Reexport (`From symbol.lib); + `PrintableData (Printf.sprintf "REEX from %s" symbol.lib); + `Kind GoblinSymbol.Export; + `Flags symbol.flags; + `Lib (soname, soname); + ] + end + | MachExports.Stub symbol -> + [ + `Name name; + `Stub (symbol.stub_offset, symbol.resolver_offset); + `PrintableData (Printf.sprintf "STUB 0x%x 0x%x" symbol.stub_offset symbol.resolver_offset); + `Kind GoblinSymbol.Export; + `Flags symbol.flags; + `Lib (soname, soname); + ] + + let rec find_reexport = + function + | [] -> raise Not_found + | (`Reexport _) as reex :: _-> + reex + | _::rest -> find_reexport rest + + let rec find_stub = + function + | [] -> raise Not_found + | `Stub stub :: _ -> + stub + | _::rest -> find_stub rest + + let rec find_flags = + function + | [] -> raise Not_found + | `Flags flags :: _ -> + flags + | _::rest -> find_flags rest + + let mach_export_data_to_export_info data = + try + let reex = find_reexport data in + begin + match reex with + | `Reexport (`As (name, lib)) -> + let lib_symbol_name = Some name in + let flags = find_flags data in + Reexport {lib; lib_symbol_name; flags} + | `Reexport (`From lib) -> + let lib_symbol_name = None in + let flags = find_flags data in + Reexport {lib; lib_symbol_name; flags} + end + with Not_found -> + try + let stub_offset,resolver_offset = find_stub data in + let flags = find_flags data in + Stub {stub_offset;resolver_offset; flags} + with Not_found -> + let address = GoblinSymbol.find_symbol_offset data in + let flags = find_flags data in + Regular {address; flags} + + let mach_export_datum_to_string ?use_kind:(use_kind=true) ?use_flags:(use_flags=false) ?use_lib:(use_lib=true) datum = + match datum with + | #GoblinSymbol.symbol_datum as datum -> + GoblinSymbol.symbol_datum_to_string + ~use_kind:use_kind + ~use_lib:use_lib + ~use_printable:false datum + | `Reexport `As (name,lib) -> + Printf.sprintf "REEX as %s from %s" name lib + | `Reexport `From lib -> + Printf.sprintf "REEX from %s" lib + | `Stub (i1,i2) -> + Printf.sprintf "STUB 0x%x 0x%x" i1 i2 + | `Flags flags -> + if (use_flags) then + Printf.sprintf "FLAGS 0x%x" flags + else "" + + let mach_export_datum_ordinal = + function + | #GoblinSymbol.symbol_datum as datum -> + GoblinSymbol.symbol_datum_ordinal datum + | `Reexport `As (_,_) (* fall through *) + | `Flags _ + | `Reexport `From _ + | `Stub _ -> GoblinSymbol.kORDINAL_RIGHT + + let sort_mach_export_data (data) = + List.sort (fun a b -> + let e1 = mach_export_datum_ordinal a in + let e2 = mach_export_datum_ordinal b in + Pervasives.compare e1 e2 + ) data + + let mach_export_data_to_string + (* should probably be true to show REEX ? *) + ?use_kind:(use_kind=false) + ?use_flags:(use_flags=false) + ?use_lib:(use_lib=true) + (data:mach_export_data) + = + let data = sort_mach_export_data data in + let b = Buffer.create ((List.length data) * 15) in + List.iter + (fun elem -> + Buffer.add_string b + @@ mach_export_datum_to_string + ~use_kind:use_kind + ~use_flags:use_flags + ~use_lib:use_lib + elem; + Buffer.add_string b " " + ) data; + Buffer.contents b + + (* lessening of the data set, ignores mach specific extensions *) + let mach_export_data_to_symbol_data list = + List.filter + ( + function + (* this is sweet *) + | #GoblinSymbol.symbol_datum as datum -> + ignore datum; true + (* ignore the warning, can't use _ + :( we just need to see if it's a subset of goblin symbols, + is all *) + | _ -> false + ) list + + let print exports = + Printf.printf "Exports (%d):\n" @@ List.length exports; + List.iter + (fun symbol -> + mach_export_data_to_string + ~use_kind:false ~use_lib:false symbol + |> Printf.printf "%s\n" + ) exports + + let print_mach_export_data + ?simple:(simple=false) + ?goblin:(goblin=false) + export + = + if (not goblin) then + mach_export_data_to_string + ~use_lib:(not simple) export + |> Printf.printf "%s\n" + else + GoblinSymbol.symbol_data_to_string + ~basic_export:true export + |> Printf.printf "%s\n" + + (* TODO: move this into ReadMach *) + (* + |> GoblinSymbol.sort_symbols + |> GoblinSymbol.compute_size 0x0 (* FIX THIS WITH EXTRA NLIST DATA *) + |> List.rev (* for some reason compute size wasn't reversing... ? *) + *) + + let empty = [] + +end + +(* MACH IMPORT -> GOBLIN *) +module Imports = struct + open MachImports + + type import = + [ + | GoblinSymbol.symbol_datum + (* we extend with mach specific details: *) + | `Flags of int + | `IsLazy of bool (* this is getting too hacky *) + ] list + + open MachLoadCommand + + let import_to_goblin (import:MachImports.import) :import = + [ + `Name import.bi.symbol_name; + `Offset import.offset; + `Kind GoblinSymbol.Import; + `Size import.size; + `Lib (import.dylib, import.dylib); + `Flags import.bi.symbol_flags; + `IsLazy import.is_lazy + ] + + (* + let mach_import_to_goblin libraries segments ~is_lazy:is_lazy (import:bind_information) = + let offset = (List.nth segments import.seg_index).fileoff + import.seg_offset in + let size = if (import.bind_type == MachBindOpcodes.kBIND_TYPE_POINTER) then 8 else 0 in + let libname = libraries.(import.symbol_library_ordinal) in + [ + `Name import.symbol_name; + `Offset offset; + `Kind GoblinSymbol.Import; + `Size size; + `Lib (libname, libname); + `Flags import.symbol_flags; + `IsLazy is_lazy + ] + + + let get_imports binary dyld_info libs segments = + let bind_off = dyld_info.MachLoadCommand.bind_off in + let bind_size = dyld_info.MachLoadCommand.bind_size in + let lazy_bind_off = dyld_info.MachLoadCommand.lazy_bind_off in + let lazy_bind_size = dyld_info.MachLoadCommand.lazy_bind_size in + let non_lazy_bytes = Bytes.sub binary bind_off bind_size in + let lazy_bytes = Bytes.sub binary lazy_bind_off lazy_bind_size in + let non_lazy_imports = bind_interpreter non_lazy_bytes 0 bind_size false in + let lazy_imports = bind_interpreter lazy_bytes 0 lazy_bind_size true in + let nl = List.map + (mach_import_to_goblin libs segments ~is_lazy:false) + non_lazy_imports |> GoblinSymbol.sort_symbols in + let la = List.map + (mach_import_to_goblin libs segments ~is_lazy:true) + lazy_imports |> GoblinSymbol.sort_symbols in + nl,la + *) + let mach_import_data_to_string (data:import) = + GoblinSymbol.symbol_data_to_string data + + let rec is_lazy = + function + | [] -> raise Not_found + | `IsLazy islazy :: _ -> islazy + | _::remainder -> is_lazy remainder + + let import_name = GoblinSymbol.find_symbol_name + + let import_lib import = GoblinSymbol.find_symbol_lib import |> fst + + let print (nlas,las) = + let n1 = List.length nlas in + let n2 = List.length las in + Printf.printf "Imports (%d):\n" @@ (n1 + n2); + Printf.printf " Non-lazy (%d):\n" n1; + List.iter + (fun data -> + GoblinSymbol.print_symbol_data ~with_lib:true data) nlas; + Printf.printf " Lazy (%d):\n" n2; + List.iter + (fun data -> + GoblinSymbol.print_symbol_data ~with_lib:true data) las + + let print_imports_deprecated (nlas, las) = + let n1 = Array.length nlas in + let n2 = Array.length las in + Printf.printf "Imports (%d):\n" @@ (n1 + n2); + Printf.printf "Non-lazy (%d):\n" n1; + Array.iteri (fun i bi -> + Printf.printf "%s\n" @@ bind_information_to_string bi) nlas; + Printf.printf "Lazy (%d):\n" n2; + Array.iter (fun bi -> + Printf.printf "%s\n" @@ bind_information_to_string bi) las + + let empty = [],[] + + let find string array = + let len = Array.length array in + let rec loop i = + if (i >= len) then raise Not_found + else + let symbol = import_name array.(i) in + if (string = symbol) then + array.(i) + else + loop (i + 1) + in loop 0 +end + +(* MACH NLIST -> GOBLIN *) +module SymbolTable = struct + + open MachSymbolTable + + let nlist_flag_to_symbol_kind = + function + | 0xe -> GoblinSymbol.Local + | 0xf -> GoblinSymbol.Export + | 0x1 -> GoblinSymbol.Import + | _ -> GoblinSymbol.Other + + let nlist_to_symbol_data (nlist, symbol) = + let kind = `Kind (nlist_flag_to_symbol_kind @@ (nlist.MachSymbolTable.n_type)) in + let name = `Name (symbol) in + let offset = `Offset (nlist.MachSymbolTable.n_value) in + [name; offset; kind] + + let filter_by_kind kind = List.filter (fun symbol -> try GoblinSymbol.find_symbol_kind symbol = kind with Not_found -> false) + + let print_symbols symbols = + List.iter (GoblinSymbol.print_symbol_data ~like_nlist:true) symbols + + (* +TODO: maps from nlist to goblin + let goblin_symbols = List.map (nlist_to_symbol_data) symbols in + goblin_symbols + + *) +end diff --git a/lib/goblin/META b/lib/goblin/META new file mode 100644 index 0000000..817a667 --- /dev/null +++ b/lib/goblin/META @@ -0,0 +1,13 @@ +# OASIS_START +# DO NOT EDIT (digest: c06468a8578c3570ca857b0f4da94952) +version = "1.1" +description = +"Lightweight, cross platform binary parsing and analysis library with no dependencies" +requires = "utils" +archive(byte) = "goblin.cma" +archive(byte, plugin) = "goblin.cma" +archive(native) = "goblin.cmxa" +archive(native, plugin) = "goblin.cmxs" +exists_if = "goblin.cma" +# OASIS_STOP + diff --git a/lib/goblin/goblin.mldylib b/lib/goblin/goblin.mldylib new file mode 100644 index 0000000..8951295 --- /dev/null +++ b/lib/goblin/goblin.mldylib @@ -0,0 +1,9 @@ +# OASIS_START +# DO NOT EDIT (digest: fb89a7c50e4cb05629e8a79a1b8cc7c6) +Goblin +GoblinSymbol +GoblinExport +GoblinImport +GoblinMach +GoblinElf +# OASIS_STOP diff --git a/lib/goblin/goblin.mllib b/lib/goblin/goblin.mllib new file mode 100644 index 0000000..8951295 --- /dev/null +++ b/lib/goblin/goblin.mllib @@ -0,0 +1,9 @@ +# OASIS_START +# DO NOT EDIT (digest: fb89a7c50e4cb05629e8a79a1b8cc7c6) +Goblin +GoblinSymbol +GoblinExport +GoblinImport +GoblinMach +GoblinElf +# OASIS_STOP diff --git a/lib/mach/META b/lib/mach/META new file mode 100644 index 0000000..7846332 --- /dev/null +++ b/lib/mach/META @@ -0,0 +1,13 @@ +# OASIS_START +# DO NOT EDIT (digest: 3a9fea97bc0f9f339ba064f211798e15) +version = "1.1" +description = +"Lightweight, cross platform binary parsing and analysis library with no dependencies" +requires = "utils goblin" +archive(byte) = "mach.cma" +archive(byte, plugin) = "mach.cma" +archive(native) = "mach.cmxa" +archive(native, plugin) = "mach.cmxs" +exists_if = "mach.cma" +# OASIS_STOP + diff --git a/lib/mach/Mach.ml b/lib/mach/Mach.ml index cb72324..2c983bb 100644 --- a/lib/mach/Mach.ml +++ b/lib/mach/Mach.ml @@ -1,3 +1,5 @@ +(* TODO: add bytecoverage computer *) + module BindOpcodes = MachBindOpcodes module CpuTypes = MachCpuTypes module Fat = MachFat @@ -7,70 +9,94 @@ module Constants = MachConstants module Exports = MachExports module Header = MachHeader module Imports = MachImports -module Segment64 = MachSegment64 -module Nlist = MachNlist +module Section = MachSection +module SymbolTable = MachSymbolTable module RebaseOpcodes = MachRebaseOpcodes module Version = MachVersion +module Coverage = MachCoverage + +open LoadCommand.Types let debug = false type t = { header: Header.t; load_commands: LoadCommand.t; - (* todo make this goblin independent by adding the to_goblin step in the readelf analyzer *) - imports: Imports.mach_import_data array; + imports: Imports.t; nimports: int; - exports: Exports.mach_export_data array; + exports: Exports.t; nexports: int; - nlist: Nlist.t; + nlist: SymbolTable.t; nnlist: int; name: string; - install_name: string; is_lib: bool; libraries: string array; nlibraries: int; size: int; - raw_header: bytes; raw_code: bytes; - raw_dyldinfo: bytes; + byte_coverage: ByteCoverage.t; } -let imports_to_string imports = - let b = Buffer.create (Array.length imports) in - Array.fold_left (fun acc import -> - Buffer.add_string acc - @@ Printf.sprintf "%s" - @@ Imports.mach_import_data_to_string import; - acc - ) b imports |> Buffer.contents - -let exports_to_string exports = - let b = Buffer.create (Array.length exports) in - Array.fold_left (fun acc export -> - Buffer.add_string acc - @@ Printf.sprintf "%s" - @@ Exports.mach_export_data_to_string export; - acc - ) b exports |> Buffer.contents - -let binary_to_string binary = +let binary_to_string binary = let libstr = if (binary.is_lib) then " (LIB)" else "" in - Printf.sprintf "%s%s:\nImports (%d):\n%sExports (%d):\n%s\n" + Printf.sprintf "%s%s:\nImports (%d):\n%sExports (%d):\n%s" binary.name libstr (binary.nimports) - (imports_to_string binary.imports) + (Imports.imports_to_string binary.imports) (binary.nexports) - (exports_to_string binary.exports) + (Exports.exports_to_string binary.exports) +let print binary = + Printf.printf "%s" @@ binary_to_string binary; + ByteCoverage.print binary.byte_coverage -(* TODO: add header -let create_binary (name,install_name) (nls,las) exports islib libs = - (* flatten and condense import info *) - let imports = nls @ las |> Array.of_list in - let nimports = Array.length imports in - let exports = Array.of_list exports in - let nexports = Array.length exports in (* careful here, due to aliasing, if order swapped, in trouble *) - let nlibs = Array.length libs in - let code = Bytes.empty in - {name; install_name; imports; nimports; exports; nexports; islib; libs; nlibs; code} - *) +let get binary = + let size = Bytes.length binary in + let header = Header.get_mach_header binary in + let load_commands = LoadCommand.get_load_commands binary + Header.sizeof_mach_header + header.Header.ncmds + header.Header.sizeofcmds + in + let name = LoadCommand.get_lib_name load_commands (* if "" we're not a dylib *) + in + let segments = LoadCommand.get_segments load_commands in + let libraries = LoadCommand.get_libraries load_commands name in (* TODO: watch this, with the libs.(0) *) + (* move this inside of dyld, need the nlist info to compute locals... *) + let is_lib = header.Header.filetype = Header.kMH_DYLIB in + let nlist = + match LoadCommand.get_load_command LC_SYMTAB load_commands with + | Some (LC_SYMTAB symtab) -> + SymbolTable.get_symbols binary symtab + | _ -> [] + in + let dyld_info = LoadCommand.get_dyld_info load_commands in + let exports, imports = + match dyld_info with + | Some dyld_info -> + (* TODO: add load segment boundaries, and nlists locals as a parameters *) + let exports = + Exports.get_exports binary dyld_info libraries + in + (* TODO: yea, need to fix imports like machExports; send in the libraries, + do all that preprocessing there, and not in create binary *) + let imports = + Imports.get_imports + binary dyld_info libraries segments + in + exports,imports + | None -> + [],[] + in + let nnlist = List.length nlist in + let nimports = List.length imports in + let nexports = List.length exports in + let nlibraries = Array.length libraries in + let raw_code = Bytes.empty in + (* TODO: add bytecoverage computer *) + { + header; load_commands; name; nlist; nnlist; + imports; nimports; exports; nexports; + is_lib; libraries; nlibraries; raw_code; size; + byte_coverage = Coverage.compute header load_commands size; + } diff --git a/lib/mach/MachConstants.ml b/lib/mach/MachConstants.ml index 5454b79..6f6f979 100644 --- a/lib/mach/MachConstants.ml +++ b/lib/mach/MachConstants.ml @@ -24,3 +24,172 @@ let kDYLD_IOS_VERSION_6_1 = 0x00060100 let kDYLD_IOS_VERSION_7_0 = 0x00070000 let kDYLD_IOS_VERSION_7_1 = 0x00070100 let kDYLD_IOS_VERSION_8_0 = 0x00080000 + +(* Segment and Section Constants *) + +(* + * The flags field of a section structure is separated into two parts a section + * type and section attributes. The section types are mutually exclusive (it + * can only have one type) but the section attributes are not (it may have more + * than one attribute). + *) +let kSECTION_TYPE = 0x000000ff (* 256 section types *) +let kSECTION_ATTRIBUTES = 0xffffff00 (* 24 section attributes *) + +(* Constants for the type of a section *) +let kS_REGULAR = 0x0 (* regular section *) +let kS_ZEROFILL = 0x1 (* zero fill on demand section *) +let kS_CSTRING_LITERALS = 0x2 (* section with only literal C strings*) +let kS_4BYTE_LITERALS = 0x3 (* section with only 4 byte literals *) +let kS_8BYTE_LITERALS = 0x4 (* section with only 8 byte literals *) +let kS_LITERAL_POINTERS = 0x5 (* section with only pointers to *) + (* literals *) +(* + * For the two types of symbol pointers sections and the symbol stubs section + * they have indirect symbol table entries. For each of the entries in the + * section the indirect symbol table entries, in corresponding order in the + * indirect symbol table, start at the index stored in the reserved1 field + * of the section structure. Since the indirect symbol table entries + * correspond to the entries in the section the number of indirect symbol table + * entries is inferred from the size of the section divided by the size of the + * entries in the section. For symbol pointers sections the size of the entries + * in the section is 4 bytes and for symbol stubs sections the byte size of the + * stubs is stored in the reserved2 field of the section structure. + *) +let kS_NON_LAZY_SYMBOL_POINTERS = 0x6 (* section with only non-lazy + symbol pointers *) +let kS_LAZY_SYMBOL_POINTERS = 0x7 (* section with only lazy symbol + pointers *) +let kS_SYMBOL_STUBS = 0x8 (* section with only symbol + stubs, byte size of stub in + the reserved2 field *) +let kS_MOD_INIT_FUNC_POINTERS = 0x9 (* section with only function + pointers for initialization*) +let kS_MOD_TERM_FUNC_POINTERS = 0xa (* section with only function + pointers for termination *) +let kS_COALESCED = 0xb (* section contains symbols that + are to be coalesced *) +let kS_GB_ZEROFILL = 0xc (* zero fill on demand section + (that can be larger than 4 + gigabytes) *) +let kS_INTERPOSING = 0xd (* section with only pairs of + function pointers for + interposing *) +let kS_16BYTE_LITERALS = 0xe (* section with only 16 byte + literals *) +let kS_DTRACE_DOF = 0xf (* section contains + DTrace Object Format *) +let kS_LAZY_DYLIB_SYMBOL_POINTERS = 0x10 (* section with only lazy + symbol pointers to lazy + loaded dylibs *) +(* + * Section types to support thread local variables + *) +let kS_THREAD_LOCAL_REGULAR = 0x11 (* template of initial + values for TLVs *) +let kS_THREAD_LOCAL_ZEROFILL = 0x12 (* template of initial + values for TLVs *) +let kS_THREAD_LOCAL_VARIABLES = 0x13 (* TLV descriptors *) +let kS_THREAD_LOCAL_VARIABLE_POINTERS = 0x14 (* pointers to TLV + descriptors *) +let kS_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15 (* functions to call + to initialize TLV + values *) + +(* + * Constants for the section attributes part of the flags field of a section + * structure. + *) +let kSECTION_ATTRIBUTES_USR = 0xff000000 (* User setable attributes *) +let kS_ATTR_PURE_INSTRUCTIONS = 0x80000000 (* section contains only true + machine instructions *) +let kS_ATTR_NO_TOC = 0x40000000 (* section contains coalesced + symbols that are not to be + in a ranlib table of + contents *) +let kS_ATTR_STRIP_STATIC_SYMS = 0x20000000 (* ok to strip static symbols + in this section in files + with the MH_DYLDLINK flag *) +let kS_ATTR_NO_DEAD_STRIP = 0x10000000 (* no dead stripping *) +let kS_ATTR_LIVE_SUPPORT = 0x08000000 (* blocks are live if they + reference live blocks *) +let kS_ATTR_SELF_MODIFYING_CODE = 0x04000000 (* Used with i386 code stubs + written on by dyld *) +(* + * If a segment contains any sections marked with S_ATTR_DEBUG then all + * sections in that segment must have this attribute. No section other than + * a section marked with this attribute may reference the contents of this + * section. A section with this attribute may contain no symbols and must have + * a section type S_REGULAR. The static linker will not copy section contents + * from sections with this attribute into its output file. These sections + * generally contain DWARF debugging info. + *) +let kS_ATTR_DEBUG = 0x02000000 (* debug section *) +let kSECTION_ATTRIBUTES_SYS = 0x00ffff00 (* system setable attributes *) +let kS_ATTR_SOME_INSTRUCTIONS = 0x00000400 (* section contains some + machine instructions *) +let kS_ATTR_EXT_RELOC = 0x00000200 (* section has external + relocation entries *) +let kS_ATTR_LOC_RELOC = 0x00000100 (* section has local + relocation entries *) + + +(* + * The names of segments and sections in them are mostly meaningless to the + * link-editor. But there are few things to support traditional UNIX + * executables that require the link-editor and assembler to use some names + * agreed upon by convention. + * + * The initial protection of the "__TEXT" segment has write protection turned + * off (not writeable). + * + * The link-editor will allocate common symbols at the end of the "__common" + * section in the "__DATA" segment. It will create the section and segment + * if needed. + *) + +(* The currently known segment names and the section names in those segments *) + +let kSEG_PAGEZERO = "__PAGEZERO" (* the pagezero segment which has no *) + (* protections and catches NULL *) + (* references for MH_EXECUTE files *) + + +let kSEG_TEXT = "__TEXT" (* the tradition UNIX text segment *) +let kSECT_TEXT = "__text" (* the real text part of the text *) + (* section no headers, and no padding *) +let kSECT_FVMLIB_INIT0 = "__fvmlib_init0" (* the fvmlib initialization *) + (* section *) +let kSECT_FVMLIB_INIT1 = "__fvmlib_init1" (* the section following the *) + (* fvmlib initialization *) + (* section *) + +let kSEG_DATA = "__DATA" (* the tradition UNIX data segment *) +let kSECT_DATA = "__data" (* the real initialized data section *) + (* no padding, no bss overlap *) +let kSECT_BSS = "__bss" (* the real uninitialized data section*) + (* no padding *) +let kSECT_COMMON = "__common" (* the section common symbols are *) + (* allocated in by the link editor *) + +let kSEG_OBJC = "__OBJC" (* objective-C runtime segment *) +let kSECT_OBJC_SYMBOLS = "__symbol_table" (* symbol table *) +let kSECT_OBJC_MODULES = "__module_info" (* module information *) +let kSECT_OBJC_STRINGS = "__selector_strs" (* string table *) +let kSECT_OBJC_REFS = "__selector_refs" (* string table *) + +let kSEG_ICON = "__ICON" (* the icon segment *) +let kSECT_ICON_HEADER = "__header" (* the icon headers *) +let kSECT_ICON_TIFF = "__tiff" (* the icons in tiff format *) + +let kSEG_LINKEDIT = "__LINKEDIT" (* the segment containing all structs *) + (* created and maintained by the link *) + (* editor. Created with -seglinkedit *) + (* option to ld(1) for MH_EXECUTE and *) + (* FVMLIB file types only *) + +let kSEG_UNIXSTACK = "__UNIXSTACK" (* the unix stack segment *) + +let kSEG_IMPORT = "__IMPORT" (* the segment for the self (dyld) *) + (* modifing code stubs that has read, *) + (* write and execute permissions *) diff --git a/lib/mach/MachCoverage.ml b/lib/mach/MachCoverage.ml new file mode 100644 index 0000000..033573d --- /dev/null +++ b/lib/mach/MachCoverage.ml @@ -0,0 +1,193 @@ +(* +# TODO: + * compute nlist size +*) +open ByteCoverage +open MachLoadCommand +open MachLoadCommand.Types +open MachConstants + +let debug = false + +let compute_header_coverage header dataset = + let mhsize = MachHeader.sizeof_mach_header in + let lcsize = header.MachHeader.sizeofcmds in + let range_start = 0 in + let range_end = mhsize + range_start in + let extra = "Mach Header" in + ByteCoverage.add + (create_data + ~tag:Meta + ~r1:range_start + ~r2:range_end + ~extra:extra + ~understood:true + ) + dataset + |> + ByteCoverage.add + (create_data + ~tag:Meta + ~r1:mhsize + ~r2:(lcsize+mhsize) + ~extra:"LoadCommands" + ~understood:true + ) + +(* granular dyldinfo/LINKEDIT coverage here *) +let compute_dyldinfo_coverage load_commands dataset = + match MachLoadCommand.get_dyld_info load_commands with + | Some dyld_info -> + let r1 = dyld_info.rebase_off in + let r2 = r1 + dyld_info.rebase_size in + let dyldextra = cmd_int_to_string dyld_info.cmd in + let extra = "dyldinfo" in + add + (create_data + ~tag:Rela + ~r1:r1 + ~r2:r2 + ~extra:("rebase // " ^ dyldextra) + ~understood:true) dataset + |> add + (create_data + ~tag:Symbol + ~r1:dyld_info.bind_off + ~r2:(dyld_info.bind_off + dyld_info.bind_size) + ~extra:("bind // " ^ dyldextra) + ~understood:true) + |> add + (create_data + ~tag:SymbolTable + ~r1:dyld_info.weak_bind_off + ~r2:(dyld_info.weak_bind_off + dyld_info.weak_bind_size) + ~extra:("weak bind // " ^ dyldextra) + ~understood:true) + |> add + (create_data + ~tag:SymbolTable + ~r1:dyld_info.lazy_bind_off + ~r2:(dyld_info.lazy_bind_off + dyld_info.lazy_bind_size) + ~extra:("lazy bind // " ^ dyldextra) + ~understood:true) + |> add + (create_data + ~tag:SymbolTable + ~r1:dyld_info.export_off + ~r2:(dyld_info.export_off + dyld_info.export_size) + ~extra:("export // " ^ dyldextra) + ~understood:true) + |> add + (create_data + ~tag:Meta + ~r1:dyld_info.rebase_off + ~r2:(dyld_info.export_off + dyld_info.export_size) + ~extra:extra + ~understood:true) + +| None -> dataset + +let known_sections = + [ + (kSECT_TEXT, Code); + (kSECT_DATA, Data); + (kSECT_COMMON, Semantic); + (kSECT_BSS, Semantic); + ("__cstring", String); + ("__stubs", Rela); + ("__stub_helper", Rela); + ("__nl_symbol_ptr", Symbol); + ("__la_symbol_ptr", Symbol); + ] + +module SectionMap = Map.Make(String) +(* add the known sections to our map *) +let section_map = + List.fold_left (fun acc (sectname, tag) -> + SectionMap.add sectname tag acc + ) SectionMap.empty known_sections + +let compute_section_coverage dataset (section:section_64) = + if (debug) then + Printf.printf "SECTION called: %s\n" section.sectname; + let range_start = section.offset in + let range_end = range_start + section.size in + let tag,understood = + try (SectionMap.find section.sectname section_map),true + with Not_found -> Unknown,false + in + let extra = + section.sectname ^ " // " ^ section.segname ^ + ( + if (tag <> Unknown) then + "" + else + " // Unknown" + ) + in + add (create_data + ~tag:tag + ~r1:range_start + ~r2:range_end + ~extra:extra + ~understood:understood) + dataset + +let compute_segment_coverage tag dataset (segment:segment_command_64) = + let size = segment.filesize in + if (debug) then + Printf.printf "SEGMENT called: %s\n" segment.segname; + let range_start = segment.fileoff in + let range_end = size + range_start in + let extra = + segment.segname ^ " // " ^ + cmd_int_to_string segment.cmd + in + let dataset = + List.fold_left compute_section_coverage + dataset segment.sections + in + add + (create_data + ~tag:tag + ~r1:range_start + ~r2:range_end + ~extra:extra + ~understood:true) + dataset + +(* todo, maybe let tags be a list, or have a single main tag... *) +let known_segments = + [ + (* + (kSEG_TEXT, [Code;Semantic]); + (kSEG_DATA, [Data;Semantic]); + (kSEG_OBJC, [Code;Semantic]); + (kSEG_ICON, [Data;Semantic]); + (kSEG_LINKEDIT, [Meta;Semantic]); + *) + (kSEG_TEXT, Semantic); + (kSEG_DATA, Semantic); + (kSEG_OBJC, Semantic); + (kSEG_ICON, Semantic); + (kSEG_LINKEDIT, Semantic); + ] + +let compute_segment_coverage (segments:segment_command_64 list) dataset = + if (segments = []) then + dataset + else + let f = (fun dataset (section_name, tag) -> + match get_segment_by_name section_name segments with + | Some segment -> + compute_segment_coverage tag dataset segment + | None -> dataset + (* List.fold_left (compute_segment_coverage section_type tag) dataset segments *) + ) in + (fun dataset -> List.fold_left f dataset known_segments) dataset + +let compute header (load_commands:lc list) size = + compute_header_coverage header ByteCoverage.empty + |> compute_dyldinfo_coverage load_commands + |> compute_segment_coverage (get_segments load_commands) + |> ByteCoverage.create size diff --git a/lib/mach/MachExports.ml b/lib/mach/MachExports.ml index 0f2a959..7c8f2d5 100644 --- a/lib/mach/MachExports.ml +++ b/lib/mach/MachExports.ml @@ -1,27 +1,9 @@ (* TODO: - (* TOOO: make export data more generic, in a higher up module *) (1) Weak of regular_symbol_info type probably needs to be added ? (3) /usr/lib/libstdc++.6.0.9.dylib has flag 0xc at many offsets... they're weak *) -(* -for testing -#directory "/Users/matthewbarney/projects/binreader/_build/src/utils/";; -#directory "/Users/matthewbarney/projects/binreader/_build/src/mach/";; -#directory "/Users/matthewbarney/projects/binreader/_build/src/goblin/";; -#load "Binary.cmo";; -#load "InputUtils.cmo";; -#load "Version.cmo";; -#load "Nlist.cmo";; -#load "MachLoadCommand.cmo";; -#load "BindOpcodes.cmo";; -#load "Goblin.Symbol.cmo";; -#load "Leb128.cmo";; -#load "Imports.cmo";; -#load "Exports.cmo";; -#load "Macho.cmo";; -*) open Binary open MachLoadCommand @@ -46,12 +28,19 @@ type symbol_kind = exception Unknown_symbol_kind of string let get_symbol_kind kind = - match kind with + match kind land kEXPORT_SYMBOL_FLAGS_KIND_MASK with | 0x00 -> REGULAR | 0x01 -> THREAD_LOCAL | 0x02 -> ABSOLUTE | _ -> UNKNOWN_SYMBOL_KIND kind +let symbol_kind_to_string = + function + | REGULAR -> "REGULAR" + | ABSOLUTE -> "ABSOLUTE" + | THREAD_LOCAL -> "THREAD_LOCAL" + | UNKNOWN_SYMBOL_KIND i -> Printf.sprintf "UNKNOWN %d" i + (* "If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. The resolver is used by lazy pointers and must be called to get the actual address to use." *) type stub_symbol_info = {stub_offset: int; resolver_offset: int; flags: int} @@ -61,186 +50,25 @@ type reexport_symbol_info = {lib: string; lib_symbol_name: string option; flags: int} type regular_symbol_info = {address: int; flags: int} -type export_symbol_info = +type export_info = | Regular of regular_symbol_info | Reexport of reexport_symbol_info | Stub of stub_symbol_info -module ExportMap = Map.Make(String) - - (* a datum is a single unit, - so data is a list of such possible datums *) -type mach_export_data = - [ - | Goblin.Symbol.symbol_datum - (* we extend with mach specific details: *) - | `Reexport of [`As of string * string | `From of string] - | `Stub of int * int - | `Flags of int - ] list +type export = + { + info: export_info; + name: string; + size: int; + offset: int; + } -type export_map = mach_export_data ExportMap.t +type t = export list exception Unimplemented_symbol_flag of int * string -let export_info_to_mach_export_data name soname = +let export_info_to_string = function - | Regular symbol -> - [ - `Name name; - `Offset symbol.address; - `Kind Goblin.Symbol.Export; - `Flags symbol.flags; - `Lib (soname, soname); - ] - | Reexport symbol -> - begin - match symbol.lib_symbol_name with - | Some name' -> - [ - `Name name; - `Reexport (`As (name',symbol.lib)); - `PrintableData (Printf.sprintf "REEX as %s from %s" name' symbol.lib); - `Kind Goblin.Symbol.Export; - `Flags symbol.flags; - `Lib (soname, soname); - ] - | None -> - [ - `Name name; - `Reexport (`From symbol.lib); - `PrintableData (Printf.sprintf "REEX from %s" symbol.lib); - `Kind Goblin.Symbol.Export; - `Flags symbol.flags; - `Lib (soname, soname); - ] - end - | Stub symbol -> - [ - `Name name; - `Stub (symbol.stub_offset, symbol.resolver_offset); - `PrintableData (Printf.sprintf "STUB 0x%x 0x%x" symbol.stub_offset symbol.resolver_offset); - `Kind Goblin.Symbol.Export; - `Flags symbol.flags; - `Lib (soname, soname); - ] - -let rec find_reexport = - function - | [] -> raise Not_found - | (`Reexport _) as reex :: _-> - reex - | _::rest -> find_reexport rest - -let rec find_stub = - function - | [] -> raise Not_found - | `Stub stub :: _ -> - stub - | _::rest -> find_stub rest - -let rec find_flags = - function - | [] -> raise Not_found - | `Flags flags :: _ -> - flags - | _::rest -> find_flags rest - - let mach_export_data_to_export_info data = - try - let reex = find_reexport data in - begin - match reex with - | `Reexport (`As (name, lib)) -> - let lib_symbol_name = Some name in - let flags = find_flags data in - Reexport {lib; lib_symbol_name; flags} - | `Reexport (`From lib) -> - let lib_symbol_name = None in - let flags = find_flags data in - Reexport {lib; lib_symbol_name; flags} - end - with Not_found -> - try - let stub_offset,resolver_offset = find_stub data in - let flags = find_flags data in - Stub {stub_offset;resolver_offset; flags} - with Not_found -> - let address = Goblin.Symbol.find_symbol_offset data in - let flags = find_flags data in - Regular {address; flags} - -let mach_export_datum_to_string ?use_kind:(use_kind=true) ?use_flags:(use_flags=false) ?use_lib:(use_lib=true) datum = - match datum with - | #Goblin.Symbol.symbol_datum as datum -> - Goblin.Symbol.symbol_datum_to_string - ~use_kind:use_kind - ~use_lib:use_lib - ~use_printable:false datum - | `Reexport `As (name,lib) -> - Printf.sprintf "REEX as %s from %s" name lib - | `Reexport `From lib -> - Printf.sprintf "REEX from %s" lib - | `Stub (i1,i2) -> - Printf.sprintf "STUB 0x%x 0x%x" i1 i2 - | `Flags flags -> - if (use_flags) then - Printf.sprintf "FLAGS 0x%x" flags - else "" - -let mach_export_datum_ordinal = - function - | #Goblin.Symbol.symbol_datum as datum -> - Goblin.Symbol.symbol_datum_ordinal datum - | `Reexport `As (_,_) (* fall through *) - | `Flags _ - | `Reexport `From _ - | `Stub _ -> Goblin.Symbol.kORDINAL_RIGHT - -let sort_mach_export_data (data) = - List.sort (fun a b -> - let e1 = mach_export_datum_ordinal a in - let e2 = mach_export_datum_ordinal b in - Pervasives.compare e1 e2 - ) data - -let mach_export_data_to_string - (* should probably be true to show REEX ? *) - ?use_kind:(use_kind=false) - ?use_flags:(use_flags=false) - ?use_lib:(use_lib=true) - (data:mach_export_data) - = - let data = sort_mach_export_data data in - let b = Buffer.create ((List.length data) * 15) in - List.iter - (fun elem -> - Buffer.add_string b - @@ mach_export_datum_to_string - ~use_kind:use_kind - ~use_flags:use_flags - ~use_lib:use_lib - elem; - Buffer.add_string b " " - ) data; - Buffer.contents b - -(* lessening of the data set, ignores mach specific extensions *) -let mach_export_data_to_symbol_data list = - List.filter - ( - function - (* this is sweet *) - | #Goblin.Symbol.symbol_datum as datum -> - ignore datum; true - (* ignore the warning, can't use _ - :( we just need to see if it's a subset of goblin symbols, - is all *) - | _ -> false - ) list - -let export_info_to_string ei = - match ei with | Regular info -> Printf.sprintf "0x%x REGU" info.address | Reexport info -> @@ -252,81 +80,81 @@ let export_info_to_string ei = Printf.sprintf "from %s -> %s REEX" info.lib original_symbol end | Stub info -> - Printf.sprintf "(0x%x 0x%x) STUB" - info.stub_offset info.resolver_offset + Printf.sprintf "(0x%x 0x%x) STUB" + info.stub_offset info.resolver_offset -let export_map_to_string map = - let b = Buffer.create ((ExportMap.cardinal map) * 15) in - ExportMap.iter - (fun key symbol -> - (* TODO: change this to a different printer, - this will be used disassembling single binaries, - so more mach info, the better *) - Buffer.add_string b - @@ Goblin.Symbol.symbol_data_to_string - ~basic_export:true symbol; - ) map; - Buffer.contents b +let export_to_string export = + Printf.sprintf "%s (0x%x) { %s }\n" + export.name + export.size + (export_info_to_string export.info) -let export_map_to_mach_export_data_list map = - ExportMap.fold (fun key export acc -> export::acc) map [] +let exports_to_string exports = + List.fold_left (fun acc import -> + (Printf.sprintf "%s" @@ export_to_string import) ^ acc + ) "" exports -let mach_export_data_list_to_export_map list = - List.fold_left - (fun acc export -> - ExportMap.add - (Goblin.Symbol.find_symbol_name export) - export acc - ) ExportMap.empty list +let print_export export = + Printf.printf "%s\n" @@ export_to_string export -let print_exports exports = +let print exports = Printf.printf "Exports (%d):\n" @@ List.length exports; - List.iter - (fun symbol -> - mach_export_data_to_string - ~use_kind:false ~use_lib:false symbol - |> Printf.printf "%s\n" - ) exports - -let print_mach_export_data - ?simple:(simple=false) - ?goblin:(goblin=false) - export - = - if (not goblin) then - mach_export_data_to_string - ~use_lib:(not simple) export - |> Printf.printf "%s\n" - else - Goblin.Symbol.symbol_data_to_string - ~basic_export:true export - |> Printf.printf "%s\n" - -let find_map export map = - ExportMap.find export map - -let find symbol = - List.find - (fun export -> - (Goblin.Symbol.find_symbol_name export) = symbol) + List.iter print_export exports let length exports = List.length exports -let length_map = ExportMap.cardinal - -let empty_map = ExportMap.empty +let compute_size exports:t = + let rec loop acc exports = + match exports with + | [] -> acc + | e1::[] -> + (e1::acc) |> List.rev + | e1::(e2::_ as rest) -> + let size = + if (e1.offset = 0x0 || e2.offset = 0x0) then + 0x0 + else + e2.offset - e1.offset + in + loop ({e1 with size}::acc) rest + in + loop [] exports let empty = [] -let fold = List.fold_left - -let fold_map f (map:export_map) = ExportMap.fold f map +let sort = List.sort (fun e1 e2 -> + match e1.info with + | Regular symbol1 -> + begin + match e2.info with + | Regular symbol2 -> + compare symbol1.address symbol2.address + | _ -> + 1 + end + | Reexport symbol1 -> + begin + match e2.info with + | Reexport symbol2 -> + compare symbol1.lib symbol2.lib + | _ -> + -1 + end + | Stub symbol1 -> + begin + match e2.info with + | Stub symbol2 -> + compare symbol1.stub_offset symbol2.stub_offset + | _ -> + -1 + end + ) (* ======================= *) (* BINARY work *) (* ======================= *) -let get_symbol_type bytes libs flags offset = +let get_export bytes libs flags offset = match flags land kEXPORT_SYMBOL_FLAGS_KIND_MASK |> get_symbol_kind with | REGULAR -> @@ -359,13 +187,12 @@ let get_symbol_type bytes libs flags offset = let address, _ = Leb128.get_uleb128 bytes offset in Regular {address; flags} - let interp = false let debug = interp (* current_symbol accumulates the symbol name until we hit a terminal, which we then add to the list as a key to the flags and location *) (* this is the meat of the binary work, reads out the trie encoded and builds the export list *) -let rec get_exports_it bytes base size libs current_symbol pos acc = +let rec get_exports_it bytes base size libs current_symbol pos acc = if (pos >= size) then (* shouldn't happen because we terminate based on the trie structure itself, not pos *) acc @@ -386,8 +213,10 @@ let rec get_exports_it bytes base size libs current_symbol pos acc = let num_children,children_start = Leb128.get_uleb128 bytes (pos + terminal_size) in (* skip past the symbol info to get the number of children *) let flags,pos = Leb128.get_uleb128 bytes pos in if (debug) then Printf.printf "\tTERM %d flags: 0x%x\n" num_children flags; - let export = (get_symbol_type bytes libs flags pos |> (export_info_to_mach_export_data current_symbol libs.(0))) in - if (debug) then begin Printf.printf "\t"; print_mach_export_data export end; + let info = get_export bytes libs flags pos in + let offset = match info with | Regular symbol -> symbol.address | _ -> 0x0 in + let export = {info; name = current_symbol; offset; size = 0} in + if (debug) then begin Printf.printf "\t"; print_export export end; let acc = export::acc in if (num_children = 0) then acc @@ -418,16 +247,12 @@ and get_branches bytes base count current_symbol curr pos branches = if (debug) then Printf.printf "\t(%d) string: %s next_node: 0x%x\n" curr key (base + next_node); get_branches bytes base count current_symbol (curr+1) pos ((key, (base + next_node))::branches) -(* TODO: see todos above *) (* entry point for doing the work *) -let get_exports binary dyld_info libs = - let boundary = (dyld_info.MachLoadCommand.export_size + dyld_info.MachLoadCommand.export_off) in - let base = dyld_info.MachLoadCommand.export_off in +let get_exports binary dyld_info libs :t = + let boundary = (dyld_info.MachLoadCommand.Types.export_size + dyld_info.MachLoadCommand.Types.export_off) in + let base = dyld_info.MachLoadCommand.Types.export_off in if (debug) then Printf.printf "export init: 0x%x 0x%x\n" base boundary; - get_exports_it binary base boundary libs "" base [] - |> Goblin.Symbol.sort_symbols - |> Goblin.Symbol.compute_size 0x0 (* FIX THIS WITH EXTRA NLIST DATA *) - |> List.rev (* for some reason compute size wasn't reversing... ? *) + get_exports_it binary base boundary libs "" base [] |> sort |> compute_size (* ======================== *) diff --git a/lib/mach/MachImports.ml b/lib/mach/MachImports.ml index 692fb4d..7ef08fb 100644 --- a/lib/mach/MachImports.ml +++ b/lib/mach/MachImports.ml @@ -1,7 +1,3 @@ -(* TODO: - (0): sort symbols by address, like exports - *) - open Binary open MachBindOpcodes open MachLoadCommand @@ -11,6 +7,7 @@ open MachLoadCommand symbol flags are undocumented *) +(* TODO: move the special fields (if any) and match them with what's on disk/in c structs? *) type bind_information = { seg_index: int; seg_offset: int; @@ -26,27 +23,31 @@ type import = { bi: bind_information; dylib: string; is_lazy: bool; + offset: int; + size: int; } -type mach_import_data = - [ - | Goblin.Symbol.symbol_datum - (* we extend with mach specific details: *) - | `Flags of int - | `IsLazy of bool (* this is getting too hacky *) - ] list +type t = import list + +let empty = [] let import_to_string import = if (import.is_lazy) then - Printf.sprintf "%s ~> %s\n" import.bi.symbol_name import.dylib + Printf.sprintf "%s ~> %s" import.bi.symbol_name import.dylib else - Printf.sprintf "%s -> %s\n" import.bi.symbol_name import.dylib + Printf.sprintf "%s -> %s" import.bi.symbol_name import.dylib + +let print_import import = + Printf.printf "%s\n" @@ import_to_string import let imports_to_string imports = List.fold_left (fun acc import -> (Printf.sprintf "%s" @@ import_to_string import) ^ acc ) "" imports +let print (imports:t) = + List.iter print_import imports + (* TODO: dyld lazy binder sets bind type to initial record as MachBindOpcodes.kBIND_TYPE_POINTER *) let empty_bind_information = { seg_index = 0; seg_offset = 0x0; bind_type = 0x0; special_dylib = 1; symbol_library_ordinal = 0; symbol_name = ""; symbol_flags = 0; addend = 0} @@ -67,6 +68,21 @@ let bind_information_to_string bi = let print_bind_information bis = List.iteri (fun i bi -> Printf.printf "%s\n" (bind_information_to_string bi)) bis +let sort = + List.sort (fun i1 i2 -> + if (i1.is_lazy) then + if (i2.is_lazy) then + compare i1.offset i2.offset + else + -1 + else + if (i2.is_lazy) then + 1 + else + compare i1.offset i2.offset + ) + + (* interpreter for BIND opcodes: runs on prebound (non lazy) symbols (usually dylib extern consts and extern variables), and lazy symbols (usually dylib functions) @@ -199,90 +215,25 @@ let bind_interpreter bytes pos size is_lazy = done; let seg_offset = !addr in loop pos'' (bind_info::acc) {bind_info with seg_offset} - in loop pos [] bind_info -(* non-lazy: extern [const] or specially requested prebound symbols ? *) +let bind_information_to_import libraries segments ~is_lazy:is_lazy bi = + let offset = (List.nth segments bi.seg_index).MachLoadCommand.Types.fileoff + bi.seg_offset in + let size = if (bi.bind_type == MachBindOpcodes.kBIND_TYPE_POINTER) then 8 else 0 in + let dylib = libraries.(bi.symbol_library_ordinal) in + {bi; dylib; is_lazy; offset; size} -let mach_import_to_goblin libraries segments ~is_lazy:is_lazy (import:bind_information) = - let offset = (List.nth segments import.seg_index).fileoff + import.seg_offset in - let size = if (import.bind_type == MachBindOpcodes.kBIND_TYPE_POINTER) then 8 else 0 in - let libname = libraries.(import.symbol_library_ordinal) in - [ - `Name import.symbol_name; - `Offset offset; - `Kind Goblin.Symbol.Import; - `Size size; - `Lib (libname, libname); - `Flags import.symbol_flags; - `IsLazy is_lazy - ] - let get_imports binary dyld_info libs segments = - let bind_off = dyld_info.MachLoadCommand.bind_off in - let bind_size = dyld_info.MachLoadCommand.bind_size in - let lazy_bind_off = dyld_info.MachLoadCommand.lazy_bind_off in - let lazy_bind_size = dyld_info.MachLoadCommand.lazy_bind_size in + let bind_off = dyld_info.MachLoadCommand.Types.bind_off in + let bind_size = dyld_info.MachLoadCommand.Types.bind_size in + let lazy_bind_off = dyld_info.MachLoadCommand.Types.lazy_bind_off in + let lazy_bind_size = dyld_info.MachLoadCommand.Types.lazy_bind_size in let non_lazy_bytes = Bytes.sub binary bind_off bind_size in let lazy_bytes = Bytes.sub binary lazy_bind_off lazy_bind_size in - let non_lazy_imports = bind_interpreter non_lazy_bytes 0 bind_size false in - let lazy_imports = bind_interpreter lazy_bytes 0 lazy_bind_size true in - let nl = List.map - (mach_import_to_goblin libs segments ~is_lazy:false) - non_lazy_imports |> Goblin.Symbol.sort_symbols in - let la = List.map - (mach_import_to_goblin libs segments ~is_lazy:true) - lazy_imports |> Goblin.Symbol.sort_symbols in - nl,la - -let mach_import_data_to_string (data:mach_import_data) = - Goblin.Symbol.symbol_data_to_string data - -let rec is_lazy = - function - | [] -> raise Not_found - | `IsLazy islazy :: _ -> islazy - | _::remainder -> is_lazy remainder - -let import_name = Goblin.Symbol.find_symbol_name - -let import_lib import = Goblin.Symbol.find_symbol_lib import |> fst - -let print_imports (nlas,las) = - let n1 = List.length nlas in - let n2 = List.length las in - Printf.printf "Imports (%d):\n" @@ (n1 + n2); - Printf.printf " Non-lazy (%d):\n" n1; - List.iter - (fun data -> - Goblin.Symbol.print_symbol_data ~with_lib:true data) nlas; - Printf.printf " Lazy (%d):\n" n2; - List.iter - (fun data -> - Goblin.Symbol.print_symbol_data ~with_lib:true data) las - -let print_imports_deprecated (nlas, las) = - let n1 = Array.length nlas in - let n2 = Array.length las in - Printf.printf "Imports (%d):\n" @@ (n1 + n2); - Printf.printf "Non-lazy (%d):\n" n1; - Array.iteri (fun i bi -> - Printf.printf "%s\n" @@ bind_information_to_string bi) nlas; - Printf.printf "Lazy (%d):\n" n2; - Array.iter (fun bi -> - Printf.printf "%s\n" @@ bind_information_to_string bi) las - -let empty = [],[] - -let find string array = - let len = Array.length array in - let rec loop i = - if (i >= len) then raise Not_found - else - let symbol = import_name array.(i) in - if (string = symbol) then - array.(i) - else - loop (i + 1) - in loop 0 + let non_lazy_bi = bind_interpreter non_lazy_bytes 0 bind_size false in + let lazy_bi = bind_interpreter lazy_bytes 0 lazy_bind_size true in + let nli = List.map (bind_information_to_import libs segments ~is_lazy:false) non_lazy_bi in + let li = List.map (bind_information_to_import libs segments ~is_lazy:true) lazy_bi in + nli@li |> sort +(* non-lazy: extern [const] or specially requested prebound symbols ? *) diff --git a/lib/mach/MachLoadCommand.ml b/lib/mach/MachLoadCommand.ml index 587c01b..c893fa9 100644 --- a/lib/mach/MachLoadCommand.ml +++ b/lib/mach/MachLoadCommand.ml @@ -1,788 +1,199 @@ (* -TODO: - (1) Verify the new load commands: Figure out lazy load of libraries and load weak dylibs +# TODO: + * Figure out lazy load of libraries and load weak dylibs, currently sending them to dylib_command *) -(* -for testing +module Types = MachLoadCommandTypes -#directory "/Users/matthewbarney/projects/binreader/_build/src/utils/";; -#directory "/Users/matthewbarney/projects/binreader/_build/src/mach/";; -#load "Binary.cmo";; -#load "InputUtils.cmo";; -#load "Version.cmo";; -*) - -exception Bad_load_command of int - -type lc = | UUID - | SEGMENT - | SEGMENT_64 - | SYMTAB - | DYSYMTAB - | THREAD - | LOAD_DYLIB - | ID_DYLIB - | PREBOUND_DYLIB - | LOAD_DYLINKER - | ID_DYLINKER - | ROUTINES - | ROUTINES_64 - | TWOLEVEL_HINTS - | SUB_FRAMEWORK - | SUB_UMBRELLA - | SUB_LIBRARY - | SUB_CLIENT - | MAIN - | SYMSEG - | UNIXTHREAD - | VERSION_MIN_IPHONEOS - | VERSION_MIN_MACOSX - | SOURCE_VERSION - (* new *) - | REQ_DYLD - | REEXPORT_DYLIB - | FUNCTION_STARTS - | DATA_IN_CODE - | DYLIB_CODE_SIGN_DRS - | RPATH - | DYLD_INFO_ONLY - | LOAD_UPWARD_DYLIB - | SEGMENT_SPLIT_INFO - | CODE_SIGNATURE - (* newer *) - | LAZY_LOAD_DYLIB - | LC_DYLD_INFO (* not sure what the diff is... luckily this seems very isolated, libsymsea uses it (symantic library) *) - | LC_LOAD_WEAK_DYLIB - -(* - struct segment_command { uint32_t cmd; uint32_t cmdsize; char segname[16]; uint32_t vmaddr; uint32_t vmsize; uint32_t fileoff; uint32_t filesize; vm_prot_t maxprot; vm_prot_t initprot; uint32_t nsects; uint32_t flags; }; - *) - -let lc_to_int = - function - | UUID -> 0x1b - | SEGMENT -> 1 - | SEGMENT_64 -> 0x19 - | SYMTAB -> 0x2 - | DYSYMTAB -> 0xb - | THREAD -> 4 - | LOAD_DYLIB -> 0xc - | ID_DYLIB -> 13 - | PREBOUND_DYLIB -> 16 - | LOAD_DYLINKER -> 0xe - | ID_DYLINKER -> 15 - | ROUTINES -> 17 - | ROUTINES_64 -> 26 - | TWOLEVEL_HINTS -> 22 - | SUB_FRAMEWORK -> 18 - | SUB_UMBRELLA -> 19 - | SUB_LIBRARY -> 21 - | SUB_CLIENT -> 20 - | MAIN -> 0x80000028 - | UNIXTHREAD -> 5 - | SYMSEG -> 3 - | VERSION_MIN_IPHONEOS -> 37 - | VERSION_MIN_MACOSX -> 0x24 - | SOURCE_VERSION -> 0x2A - (* new *) - | REEXPORT_DYLIB -> 0x8000001f - | REQ_DYLD -> 0x80000000 - | FUNCTION_STARTS -> 0x26 - | DATA_IN_CODE -> 0x29 - | DYLIB_CODE_SIGN_DRS -> 0x2B - | RPATH -> 0x8000001c - | DYLD_INFO_ONLY -> 0x80000022 - | LOAD_UPWARD_DYLIB -> 0x80000023 - | SEGMENT_SPLIT_INFO -> 0x1E - | CODE_SIGNATURE -> 0x1D - (* really new *) - | LAZY_LOAD_DYLIB -> 0x20 - | LC_DYLD_INFO -> 0x22 - | LC_LOAD_WEAK_DYLIB -> 0x80000018 - -let lookup_lc = - function - | 0x1b -> UUID - | 1 -> SEGMENT - | 0x19 -> SEGMENT_64 - | 0x2 -> SYMTAB - | 0xb -> DYSYMTAB - | 4 -> THREAD - | 0xc -> LOAD_DYLIB - | 13 -> ID_DYLIB - | 16 -> PREBOUND_DYLIB - | 0xe -> LOAD_DYLINKER - | 15 -> ID_DYLINKER - | 17 -> ROUTINES - | 26 -> ROUTINES_64 - | 22 -> TWOLEVEL_HINTS - | 18 -> SUB_FRAMEWORK - | 19 -> SUB_UMBRELLA - | 21 -> SUB_LIBRARY - | 20 -> SUB_CLIENT - | 0x80000028 -> MAIN (* 0x28 | LC_REQ_DYLD *) - | 5 -> UNIXTHREAD (* this is the same as TIME? *) - | 3 -> SYMSEG - | 37 -> VERSION_MIN_IPHONEOS - | 0x24 -> VERSION_MIN_MACOSX - | 0x2A -> SOURCE_VERSION - (* new *) - | 0x8000001f -> REEXPORT_DYLIB - | 0x80000000 -> REQ_DYLD - | 0x26 -> FUNCTION_STARTS - | 0x29 -> DATA_IN_CODE - | 0x2B -> DYLIB_CODE_SIGN_DRS - | 0x8000001c -> RPATH - | 0x80000022 -> DYLD_INFO_ONLY - | 0x80000023 -> LOAD_UPWARD_DYLIB - | 0x1E -> SEGMENT_SPLIT_INFO - | 0x1D -> CODE_SIGNATURE - (* really new *) - | 0x20 -> LAZY_LOAD_DYLIB - | 0x22 -> LC_DYLD_INFO - | 0x80000018 -> LC_LOAD_WEAK_DYLIB - | i -> raise @@ Bad_load_command i - -let lc_to_string = - function - | UUID -> "UUID" - | SEGMENT -> "SEGMENT" - | SEGMENT_64 -> "SEGMENT_64" - | SYMTAB -> "SYMTAB" - | DYSYMTAB -> "DYSYMTAB" - | THREAD -> "THREAD" - | LOAD_DYLIB -> "LOAD_DYLIB" - | ID_DYLIB -> "ID_DYLIB" - | PREBOUND_DYLIB -> "PREBOUND_DYLIB" - | LOAD_DYLINKER -> "LOAD_DYLINKER" - | ID_DYLINKER -> "ID_DYLINKER" - | ROUTINES -> "ROUTINES" - | ROUTINES_64 -> "ROUTINES_64" - | TWOLEVEL_HINTS -> "TWOLEVEL_HINTS" - | SUB_FRAMEWORK -> "SUB_FRAMEWORK" - | SUB_UMBRELLA -> "SUB_UMBRELLA" - | SUB_LIBRARY -> "SUB_LIBRARY" - | SUB_CLIENT -> "SUB_CLIENT" - | MAIN -> "MAIN" - | UNIXTHREAD -> "UNIXTHREAD" - | SYMSEG -> "SYMSEG" - | VERSION_MIN_IPHONEOS -> "VERSION_MIN_IPHONEOS" - | VERSION_MIN_MACOSX -> "VERSION_MIN_MACOSX" - | SOURCE_VERSION -> "SOURCE_VERSION" - (* new *) - | REEXPORT_DYLIB -> "REEXPORT_DYLIB" - | REQ_DYLD -> "REQ_DYLD" - | FUNCTION_STARTS -> "FUNCTION_STARTS" - | DATA_IN_CODE -> "DATA_IN_CODE" - | DYLIB_CODE_SIGN_DRS -> "DYLIB_CODE_SIGN_DRS" - | RPATH -> "RPATH" - | DYLD_INFO_ONLY -> "DYLD_INFO_ONLY" - | LOAD_UPWARD_DYLIB -> "LOAD_UPWARD_DYLIB" - | SEGMENT_SPLIT_INFO -> "SEGMENT_SPLIT_INFO" - | CODE_SIGNATURE -> "CODE_SIGNATURE" - (* really new *) - | LAZY_LOAD_DYLIB -> "LAZY_LOAD_DYLIB" - | LC_DYLD_INFO -> "LC_DYLD_INFO" - | LC_LOAD_WEAK_DYLIB -> "LC_LOAD_WEAK_DYLIB" - -(* ========================== *) -(* Specific Load Command Structures *) -(* ========================== *) +open MachLoadCommandTypes +open MachLoadCommandMacro -type section = { - sectname: string; (* 16 bytes *) - segname: string; (* 16 bytes *) - (* 4 bytes each *) - addr: int; - size: int; - offset: int; - align: int; - reloff: int; - nreloc: int; - flags: int; - reserved1: int; - reserved2: int; - } - -let sizeof_section = 68 (* bytes *) - -type section_64 = { - sectname: string; (* 16 bytes *) - segname: string; (* 16 bytes *) - addr: int; (* 8 bytes *) - size: int; (* 8 bytes *) - (* 4 bytes each *) - offset: int; - align: int; - reloff: int; - nreloc: int; - flags: int; - reserved1: int; - reserved2: int; - reserved3: int; - } - -let sizeof_section_64 = 80 (* bytes *) +type t = lc list let sections64_to_string sections = - let b = Buffer.create ((Array.length sections) * 32) in + let b = Buffer.create ((List.length sections) * 32) in let indent = "\t\t" in - Array.iteri (fun i section -> - Printf.sprintf "\n%s(%2d) %s addr: 0x%x size: 0x%x\n%soffset: 0x%x align: %d reloff: 0x%x nreloc: 0x%x\n%sflags: 0x%x r1: 0x%x r2: 0x%x r3: 0x%x\n" - indent i section.sectname section.addr section.size - indent section.offset section.align section.reloff section.nreloc - indent section.flags section.reserved1 section.reserved2 section.reserved3 - |> Buffer.add_string b - ) sections; + List.iteri (fun i section -> + Printf.sprintf "\n%s(%2d) %s addr: 0x%x size: 0x%x\n%soffset: 0x%x align: %d reloff: 0x%x nreloc: 0x%x\n%sflags: 0x%x r1: 0x%x r2: 0x%x r3: 0x%x\n" + indent i section.sectname section.addr section.size + indent section.offset section.align section.reloff section.nreloc + indent section.flags section.reserved1 section.reserved2 section.reserved3 + |> Buffer.add_string b + ) sections; Buffer.contents b -type segment_command = { - segname: string; - vmaddr: int; - vmsize: int; - fileoff: int; - filesize: int; - maxprot: int; - initprot: int; - nsects: int; - flags: int; - sections: section array; - } - -let sizeof_segment_command_64 = 48 (* 56 - 8 *) - -type segment_command_64 = { - segname: string; (* 16 bytes *) - vmaddr: int; (* 8 bytes *) - vmsize: int; (* 8 bytes *) - fileoff: int; (* 8 bytes *) - filesize: int; (* 8 bytes *) - maxprot: int; (* 4 int *) - initprot: int; (* 4 int *) - nsects: int; (* 4 *) - flags: int; (* 4 *) - sections: section_64 array; (* extra *) -} - -let sizeof_segment_command_64 = 64 (* 72 - 8 *) - -(* - * This is the second set of the symbolic information which is used to support - * the data structures for the dynamically link editor. - * - * The original set of symbolic information in the symtab_command which contains - * the symbol and string tables must also be present when this load command is - * present. When this load command is present the symbol table is organized - * into three groups of symbols: - * local symbols (static and debugging symbols) - grouped by module - * defined external symbols - grouped by module (sorted by name if not lib) - * undefined external symbols (sorted by name if MH_BINDATLOAD is not set, - * and in order the were seen by the static - * linker if MH_BINDATLOAD is set) - * In this load command there are offsets and counts to each of the three groups - * of symbols. - * - * This load command contains a the offsets and sizes of the following new - * symbolic information tables: - * table of contents - * module table - * reference symbol table - * indirect symbol table - * The first three tables above (the table of contents, module table and - * reference symbol table) are only present if the file is a dynamically linked - * shared library. For executable and object modules, which are files - * containing only one module, the information that would be in these three - * tables is determined as follows: - * table of contents - the defined external symbols are sorted by name - * module table - the file contains only one module so everything in the - * file is part of the module. - * reference symbol table - is the defined and undefined external symbols - * - * For dynamically linked shared library files this load command also contains - * offsets and sizes to the pool of relocation entries for all sections - * separated into two groups: - * external relocation entries - * local relocation entries - * For executable and object modules the relocation entries continue to hang - * off the section structures. - *) -type dysymtab_command = { - ilocalsym: int; (* index to local symbols *) - nlocalsym: int; (* number of local symbols *) - - iextdefsym: int; (* index to externally defined symbols *) - nextdefsym: int; (* number of externally defined symbols *) - - iundefsym: int; (* index to undefined symbols *) - nundefsym: int; (* number of undefined symbols *) - - tocoff: int; (* file offset to table of contents *) - ntoc: int; (* number of entries in table of contents *) - modtaboff: int; (* file offset to module table *) - nmodtab: int; (* number of module table entries *) - - extrefsymoff: int; (* offset to referenced symbol table *) - nextrefsyms: int; (* number of referenced symbol table entries *) - - indirectsymoff: int; (* file offset to the indirect symbol table *) - nindirectsyms: int; (* number of indirect symbol table entries *) - extreloff: int; (* offset to external relocation entries *) - nextrel: int; (* number of external relocation entries *) - locreloff: int; (* offset to local relocation entries *) - nlocrel: int; (* number of local relocation entries *) -} - -let sizeof_dysymtab_command = 72 (* bytes *) - -type symtab_command = { - symoff: int; - nsyms: int; - stroff: int; - strsize: int; -} - -(* -* - * Dynamicly linked shared libraries are identified by two things. The - * pathname (the name of the library as found for execution), and the - * compatibility version number. The pathname must match and the compatibility - * number in the user of the library must be greater than or equal to the - * library being used. The time stamp is used to record the time a library was - * built and copied into user so it can be use to determined if the library used - * at runtime is exactly the same as used to built the program. - * -struct dylib { - union lc_str name; /* library's path name */ - uint32_t timestamp; /* library's build time stamp */ - uint32_t current_version; /* library's current version number */ - uint32_t compatibility_version; /* library's compatibility vers number*/ -}; - *) - -(* - * - * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) - * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. - * An object that uses a dynamically linked shared library also contains a - * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or - * LC_REEXPORT_DYLIB) for each library it uses. - * -struct dylib_command { -uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB, -LC_REEXPORT_DYLIB */ -uint32_t cmdsize; /* includes pathname string */ -struct dylib dylib; /* the library identification */ -}; - *) - -(* so we just use the dylib struct as the dylib command *) -type dylib_command = - { lc_str : string (* offset to zero-terminated string *); (* __LP64__ mentioned, which makes this a char * pointer, but doesn't seem present in 64 bit binaries... *) - timestamp: int; - current_version: int; - compatibility_version: int; - } - -let sizeof_dylib_command = 16 (*bytes*) - -(* -struct dyld_info_command { - uint32_t cmd; /* LC_DYLD_INFO or LC_DYLD_INFO_ONLY */ - uint32_t cmdsize; /* sizeof(struct dyld_info_command) */ - uint32_t rebase_off; /* file offset to rebase info */ - uint32_t rebase_size; /* size of rebase info */ - uint32_t bind_off; /* file offset to binding info */ - uint32_t bind_size; /* size of binding info */ - uint32_t weak_bind_off; /* file offset to weak binding info */ - uint32_t weak_bind_size; /* size of weak binding info */ - uint32_t lazy_bind_off; /* file offset to lazy binding info */ - uint32_t lazy_bind_size; /* size of lazy binding infs */ - uint32_t export_off; /* file offset to lazy binding info */ - uint32_t export_size; /* size of lazy binding infs */ -}; - *) - -type dyld_info_command = - { - rebase_off: int; - rebase_size: int; - bind_off: int; - bind_size: int; - weak_bind_off: int; - weak_bind_size: int; - lazy_bind_off: int; - lazy_bind_size: int; - export_off: int; - export_size: int; - } - -let sizeof_dylib_info = 40 - -(* - * The version_min_command contains the min OS version on which this - * binary was built to run. - * - * LC_VERSION_MIN_MACOSX or LC_VERSION_MIN_IPHONEOS *) -type version_min_command = { - version: int; (* X.Y.Z is encoded in nibbles xxxx.yy.zz *) - sdk: int; (* X.Y.Z is encoded in nibbles xxxx.yy.zz *) -} - -let sizeof_version_min_command = 8 (* bytes *) - -(* - * The entry_point_command is a replacement for thread_command. - * It is used for main executables to specify the location (file offset) - * of main(). If -stack_size was used at link time, the stacksize - * field will contain the stack size need for the main thread. - *) -type entry_point_command = { - entryoff: int; (* uint64_t file (__TEXT) offset of main() *) - stacksize: int ;(* uint64_t if not zero, initial stack size *) -} - -let sizeof_entry_point_command = 16 (* bytes *) - -type id_dylib_command = dylib_command - -type load_command_header = { - cmd: lc; - cmdsize: int; -} - -let sizeof_load_command = 8 - -type dylinker = { - lc_str : string (* offset to zero-terminated string *); - } - -type lc_t = - | SEGMENT_64 of segment_command_64 - | DYLINKER of dylinker - | DYLIB of dylib_command - | SYMTAB of symtab_command - | DYLD_INFO of dyld_info_command - | DYSYMTAB of dysymtab_command - | VERSION of version_min_command - | ENTRY_POINT of entry_point_command - | Unimplemented of bytes - -(* add printing mechanisms here *) -let lc_t_to_string = - function - | SEGMENT_64 segment -> +(* add implemented printing mechanisms here *) +let load_command_to_string lc = + Printf.sprintf "%s (0x%x) %d %s" (cmd_to_string lc.cmd) (cmd_to_int lc.cmd) lc.cmdsize @@ + match lc.t with + | LC_SEGMENT_64 lc -> Printf.sprintf "\n\t%s vmaddr: 0x%x vmsize: 0x%x\n\tfileoff: 0x%x filesize: 0x%x\n\tmaxprot: %d initprot: %d nsects: %d flags: 0x%x\n%s" - segment.segname segment.vmaddr segment.vmsize - segment.fileoff segment.filesize segment.maxprot - segment.initprot segment.nsects segment.flags - (sections64_to_string segment.sections) - - | SYMTAB symtab -> + lc.segname lc.vmaddr lc.vmsize + lc.fileoff lc.filesize lc.maxprot + lc.initprot lc.nsects lc.flags + (sections64_to_string lc.sections) + + | LC_SYMTAB lc -> Printf.sprintf "\n\tsymoff: 0x%x nsyms: %u stroff: 0x%x strsize: %u" - symtab.symoff - symtab.nsyms - symtab.stroff - symtab.strsize + lc.symoff + lc.nsyms + lc.stroff + lc.strsize - | DYSYMTAB dysymtab -> + | LC_DYSYMTAB lc -> Printf.sprintf "\n\tilocalsym: 0x%x nlocalsym: %d iextdefsym: 0x%x nextdefsym: %d\n\tiundefsym: 0x%x nundefsym: %d tocoff: 0x%x ntoc: %d\n\tmodtaboff: 0x%x nmodtab: %d extrefsymoff: 0x%x nextrefsyms: %d\n\tindirectsymoff: 0x%x nindirectsyms: %d extreloff: 0x%x nextrel: %d\n\tlocreloff: 0x%x nlocrel: %d" - dysymtab.ilocalsym - dysymtab.nlocalsym - dysymtab.iextdefsym - dysymtab.nextdefsym - dysymtab.iundefsym - dysymtab.nundefsym - dysymtab.tocoff - dysymtab.ntoc - dysymtab.modtaboff - dysymtab.nmodtab - dysymtab.extrefsymoff - dysymtab.nextrefsyms - dysymtab.indirectsymoff - dysymtab.nindirectsyms - dysymtab.extreloff - dysymtab.nextrel - dysymtab.locreloff - dysymtab.nlocrel - - | DYLD_INFO dyld_info -> + lc.ilocalsym + lc.nlocalsym + lc.iextdefsym + lc.nextdefsym + lc.iundefsym + lc.nundefsym + lc.tocoff + lc.ntoc + lc.modtaboff + lc.nmodtab + lc.extrefsymoff + lc.nextrefsyms + lc.indirectsymoff + lc.nindirectsyms + lc.extreloff + lc.nextrel + lc.locreloff + lc.nlocrel + + | LC_DYLD_INFO_ONLY lc + | LC_DYLD_INFO lc -> Printf.sprintf "\n\trebase_off: 0x%x rebase_size: %u \n\tbind_off: 0x%x bind_size: %u \n\tweak_bind_off: 0x%x weak_bind_size: %u \n\tlazy_bind_off: 0x%x lazy_bind_size: %u \n\texport_off: 0x%x export_size: %u" - dyld_info.rebase_off - dyld_info.rebase_size - dyld_info.bind_off - dyld_info.bind_size - dyld_info.weak_bind_off - dyld_info.weak_bind_size - dyld_info.lazy_bind_off - dyld_info.lazy_bind_size - dyld_info.export_off - dyld_info.export_size - | DYLINKER dylinker -> + lc.rebase_off + lc.rebase_size + lc.bind_off + lc.bind_size + lc.weak_bind_off + lc.weak_bind_size + lc.lazy_bind_off + lc.lazy_bind_size + lc.export_off + lc.export_size + + | LC_LOAD_DYLINKER lc + | LC_ID_DYLINKER lc -> Printf.sprintf "\n\t%s" - dylinker.lc_str - | DYLIB load_dylib -> - Printf.sprintf "\n\t%s" - load_dylib.lc_str - (* verbose version *) - (* Printf.sprintf "\n\tlc_str: %s timestamp: %u current_version: %u compatibility_version: %u" - load_dylib.lc_str - load_dylib.timestamp - load_dylib.current_version - load_dylib.compatibility_version - *) - | VERSION version -> - Printf.sprintf "\n\tversion: %s sdk: %s" (MachVersion.version_to_string version.version) (MachVersion.version_to_string version.sdk) - | ENTRY_POINT ep -> - Printf.sprintf "\n\toffset: 0x%x stacksize: 0x%x" ep.entryoff ep.stacksize - | _ -> - Printf.sprintf "" - -let load_command_to_string (cmd, cmdsize, lc_t) = - Printf.sprintf "%s (0x%x) %d %s" (lc_to_string cmd) (lc_to_int cmd) cmdsize (lc_t_to_string lc_t) + lc.name.str + + | LC_ID_DYLIB lc + | LC_LOAD_UPWARD_DYLIB lc + | LC_LAZY_LOAD_DYLIB lc + | LC_LOAD_WEAK_DYLIB lc + | LC_LOAD_DYLIB lc + | LC_REEXPORT_DYLIB lc -> + (* short version + Printf.sprintf "\n\t%s" + lc.dylib.name.str + *) + Printf.sprintf "\n\tname: %s timestamp: %u current_version: %u compatibility_version: %u" + lc.dylib.name.str + lc.dylib.timestamp + lc.dylib.current_version + lc.dylib.compatibility_version + + | LC_VERSION_MIN_IPHONEOS lc + | LC_VERSION_MIN_MACOSX lc -> + Printf.sprintf "\n\tversion: %s sdk: %s" (MachVersion.version_to_string lc.version) (MachVersion.version_to_string lc.sdk) + + | LC_MAIN lc -> + Printf.sprintf "\n\toffset: 0x%x stacksize: 0x%x" lc.entryoff lc.stacksize + + | lc -> + "" let print_load_command lc = Printf.printf "%s\n" (load_command_to_string lc) -let print_load_commands lcs = +let print_load_commands lcs = List.iteri (fun i lc -> Printf.printf "(%2d): " i; print_load_command lc) lcs; print_string "\n" -let get_load_command_header binary offset = - let cmd = lookup_lc @@ Binary.u32 binary offset in - let cmdsize = Binary.u32 binary (offset + 4) in - cmd,cmdsize +let get_load_command_header binary offset = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + cmd,cmdsize,o -let get_data binary offset size = +let get_load_command_raw binary offset size = Bytes.sub binary offset size -(* specific load command constructors *) -let get_symtable binary = - let symoff = Binary.u32 binary 0 in - let nsyms = Binary.u32 binary 4 in - let stroff = Binary.u32 binary 8 in - let strsize = Binary.u32 binary 12 in - {symoff; nsyms; stroff; strsize;} - -let get_dysymtable binary = - let ilocalsym = Binary.u32 binary 0 in - let nlocalsym = Binary.u32 binary 4 in - - let iextdefsym = Binary.u32 binary 8 in - let nextdefsym = Binary.u32 binary 12 in - - let iundefsym = Binary.u32 binary 16 in - let nundefsym = Binary.u32 binary 20 in - - let tocoff = Binary.u32 binary 24 in - let ntoc = Binary.u32 binary 28 in - let modtaboff = Binary.u32 binary 32 in - let nmodtab = Binary.u32 binary 36 in - - let extrefsymoff = Binary.u32 binary 40 in - let nextrefsyms = Binary.u32 binary 44 in - - let indirectsymoff = Binary.u32 binary 48 in - let nindirectsyms = Binary.u32 binary 52 in - let extreloff = Binary.u32 binary 56 in - let nextrel = Binary.u32 binary 60 in - let locreloff = Binary.u32 binary 64 in - let nlocrel = Binary.u32 binary 68 in - - { - ilocalsym; nlocalsym; iextdefsym; nextdefsym; - iundefsym; nundefsym; tocoff; ntoc; modtaboff; nmodtab; - extrefsymoff; nextrefsyms; - indirectsymoff; nindirectsyms; extreloff; nextrel; locreloff; nlocrel; - } - -let get_dyld_info binary = - let rebase_off = Binary.u32 binary 0 in - let rebase_size = Binary.u32 binary 4 in - let bind_off = Binary.u32 binary 8 in - let bind_size = Binary.u32 binary 12 in - let weak_bind_off = Binary.u32 binary 16 in - let weak_bind_size = Binary.u32 binary 20 in - let lazy_bind_off = Binary.u32 binary 24 in - let lazy_bind_size = Binary.u32 binary 28 in - let export_off = Binary.u32 binary 32 in - let export_size = Binary.u32 binary 36 in - {rebase_off; rebase_size; bind_off; bind_size; weak_bind_off; weak_bind_size; lazy_bind_off; lazy_bind_size; export_off; export_size;} - -(* consider glomming all libs into single binary with 0...N load commands and library indexed at 1... *) -let get_dylib binary = - let lc_str_offset = Binary.u32 binary 0 in - let timestamp = Binary.u32 binary 4 in - let current_version = Binary.u32 binary 8 in - let compatibility_version = Binary.u32 binary 12 in - let lc_str = Binary.string binary (lc_str_offset - sizeof_load_command) in (* technically should use the lc_str_offset but need (lc_str_offset - sizeof_load_command) because offset from start of the load_command and we chopped off the first 8 bytes of the lc*) - {lc_str; timestamp; current_version; compatibility_version;} +let get_segment_by_name segname segments = + try Some (List.find (fun segment -> segment.segname = segname) segments) + with Not_found -> None -(* version for osx and ios *) -let get_version binary = - let version = Binary.u32 binary 0 in - let sdk = Binary.u32 binary 4 in - {version; sdk;} - -let get_main binary = - let entryoff = Binary.u64 binary 0 in - let stacksize = Binary.u64 binary 8 in - {entryoff; stacksize;} - -let get_dylinker binary = - let lc_str_offset = Binary.u32 binary 0 in - let lc_str = Binary.string binary (lc_str_offset - sizeof_load_command) in - {lc_str;} - -let get_section64 binary offset = - (* Printf.printf "initial o: %d\n" o; *) - let sectname = Binary.string binary offset ~maxlen:(15+offset) in - (* Printf.printf "sectname: %s o: %d\n" sectname o; *) - let segname = Binary.string binary (16+offset) ~maxlen:(15+16+offset) in - (* Printf.printf "segname: %s o: %d\n" segname o; *) - let addr,o = Binary.u64o binary (offset+32) in - let size,o = Binary.u64o binary o in - let offset,o = Binary.u32o binary o in - let align,o = Binary.u32o binary o in - let reloff,o = Binary.u32o binary o in - let nreloc,o = Binary.u32o binary o in - let flags,o = Binary.u32o binary o in - let reserved1,o = Binary.u32o binary o in - let reserved2,o = Binary.u32o binary o in - let reserved3,o = Binary.u32o binary o in - {sectname; segname; addr; size; offset; align; reloff; nreloc; flags; reserved1; reserved2; reserved3;} - -let get_sections64 binary nsects offset = - let rec loop count acc = - if (count >= nsects) then - List.rev acc |> Array.of_list - else - let section = get_section64 binary ((sizeof_section_64*count)+offset) in - loop (count+1) (section::acc) - in loop 0 [] - -let get_segment64 binary = - let segname = Binary.string binary 0 ~maxlen:15 in - let vmaddr = Binary.u64 binary 16 in - let vmsize = Binary.u64 binary 24 in - let fileoff = Binary.u64 binary 32 in - let filesize = Binary.u64 binary 40 in - let maxprot = Binary.u32 binary 48 in - let initprot = Binary.u32 binary 52 in - let nsects = Binary.u32 binary 56 in - let flags = Binary.u32 binary 60 in - let sections = get_sections64 binary nsects 64 in - {segname; vmaddr; vmsize; fileoff; filesize; maxprot; initprot; nsects; flags; sections;} - -(* type of load commands; may need to change *) -type t = (lc * int * lc_t) list - -let rec get_load_commands_it binary offset ncmds acc = +let rec get_load_commands_it binary offset ncmds acc = if (ncmds <= 0) then List.rev acc else - let cmd,cmdsize = get_load_command_header binary offset in - let bytes = Bytes.sub binary (offset + sizeof_load_command) (cmdsize - sizeof_load_command) in - let lc_t = - match cmd with - | SEGMENT_64 -> - SEGMENT_64 (get_segment64 bytes) - | LOAD_DYLINKER -> - DYLINKER (get_dylinker bytes) - | SYMTAB -> - SYMTAB (get_symtable bytes) - | DYSYMTAB -> - DYSYMTAB (get_dysymtable bytes) - | DYLD_INFO_ONLY | LC_DYLD_INFO -> - DYLD_INFO (get_dyld_info bytes) - | LOAD_DYLIB | REEXPORT_DYLIB | ID_DYLIB | LOAD_UPWARD_DYLIB | LAZY_LOAD_DYLIB | LC_LOAD_WEAK_DYLIB -> - DYLIB (get_dylib bytes) - | VERSION_MIN_MACOSX | VERSION_MIN_IPHONEOS -> - VERSION (get_version bytes) - | MAIN -> - ENTRY_POINT (get_main bytes) - | _ -> - Unimplemented (get_data binary (offset + sizeof_load_command) (cmdsize - sizeof_load_command)) in - let load_command = (cmd, cmdsize, lc_t) in + let cmdi,cmdsize,_ = get_load_command_header binary offset in + let cmd = cmdi |> to_cmd in + let bytes = binary in + let t = get_t cmd bytes offset in + let load_command = {cmd; cmdsize; t} in get_load_commands_it binary (offset + cmdsize) (ncmds - 1) (load_command::acc) -let get_load_commands binary offset ncmds sizeofcmds = - let load_command_bytes = Bytes.sub binary offset sizeofcmds in - get_load_commands_it load_command_bytes 0 ncmds [] +let get_load_commands binary offset ncmds sizeofcmds : t = + get_load_commands_it binary offset ncmds [] -exception Missing_load_command of string - -(* lc, cmdsize, and lc_t *) -let rec find_load_command lc lcs = +let rec get_load_command cmd (lcs:Types.lc list) :Types.lc_t option = match lcs with - | [] -> raise @@ Missing_load_command (lc_to_string lc) - | ((cmd, _, _) as lc')::lcs -> - if (lc == cmd) then lc' + | [] -> None + | lc::lcs -> + if (lc.cmd = cmd) then + Some lc.t else - find_load_command lc lcs + get_load_command cmd lcs let get_segments lcs = - let rec loop lcs acc = - match lcs with - | [] -> List.rev acc - | (cmd, _, (SEGMENT_64 segment))::lcs -> - loop lcs (segment::acc) - | lc::lcs -> loop lcs acc - in loop lcs [] + List.fold_left (fun acc lc -> + match lc.t with + | LC_SEGMENT_64 segment -> + segment::acc + | _ -> + acc + ) [] lcs |> List.rev let rec get_dyld_info lcs = match lcs with | [] -> None - | (cmd,cmdsize, DYLD_INFO dyldinfo)::lcs -> - if (cmd = DYLD_INFO_ONLY || cmd = LC_DYLD_INFO) then Some dyldinfo - else + | lc::lcs -> + match lc.t with + | LC_DYLD_INFO lc + | LC_DYLD_INFO_ONLY lc -> + Some lc + | _ -> get_dyld_info lcs - | lc::lcs -> get_dyld_info lcs let rec get_lib_name lcs = match lcs with - | [] -> None - | (ID_DYLIB, _, DYLIB id)::lcs -> - Some id - | lc::lcs -> get_lib_name lcs - -let get_libraries lcs = - let rec loop lcs acc = - match lcs with - | [] -> - Array.of_list @@ List.rev acc - | ((cmd, _, _) as lc)::lcs -> - if (cmd = LOAD_DYLIB) then loop lcs (lc::acc) - else - loop lcs acc - in loop lcs [] + | [] -> "" + | lc::lcs -> + match lc.t with + | LC_ID_DYLIB lc -> + lc.dylib.name.str + | _ -> get_lib_name lcs let print_libraries libs = if ((Array.length libs) <> 0) then begin Printf.printf "Libraries (%d)\n" @@ (Array.length libs - 1); Array.iteri (fun i lib -> - if (i <> 0) then - Printf.printf "%s\n" lib) libs + if (i <> 0) then + Printf.printf "%s\n" lib) libs end -let get_libraries lcs self = - let rec loop lcs acc = - match lcs with - | [] -> - Array.of_list @@ List.rev acc - | (cmd, _, DYLIB dylib)::lcs -> - begin - match cmd with - | LOAD_DYLIB | REEXPORT_DYLIB | LOAD_UPWARD_DYLIB | LAZY_LOAD_DYLIB | LC_LOAD_WEAK_DYLIB -> - loop lcs (dylib.lc_str::acc) - | _ -> - loop lcs acc - end - | _::lcs -> - loop lcs acc - in loop lcs [self] - -let cmd (cmd', _, _) = cmd' -let cmdsize (_, cmdsize', _) = cmdsize' -let lc_t (_, _, lc_t') = lc_t' +let get_libraries lcs self = + List.fold_left (fun acc lc -> + match lc.t with + | LC_LOAD_DYLIB lc + | LC_REEXPORT_DYLIB lc + | LC_LOAD_UPWARD_DYLIB lc + | LC_LAZY_LOAD_DYLIB lc + | LC_LOAD_WEAK_DYLIB lc -> + lc.dylib.name.str::acc + | _ -> acc + ) [self] lcs |> List.rev |> Array.of_list diff --git a/lib/mach/MachLoadCommandMacro.ml b/lib/mach/MachLoadCommandMacro.ml new file mode 100644 index 0000000..ffb1fba --- /dev/null +++ b/lib/mach/MachLoadCommandMacro.ml @@ -0,0 +1,496 @@ +open MachLoadCommandTypes +open Binary + +let lc_count = 43 + +let get_load_command binary offset :load_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize = Binary.u32 binary o in + {cmd;cmdsize;} + + +let get_section binary offset :section = + let sectname,o = Binary.stringo binary offset ~num_bytes:16 in + let segname,o = Binary.stringo binary o ~num_bytes:16 in + let addr,o = Binary.u32o binary o in + let size,o = Binary.u32o binary o in + let offset,o = Binary.u32o binary o in + let align,o = Binary.u32o binary o in + let reloff,o = Binary.u32o binary o in + let nreloc,o = Binary.u32o binary o in + let flags,o = Binary.u32o binary o in + let reserved1,o = Binary.u32o binary o in + let reserved2 = Binary.u32 binary o in + {sectname;segname;addr;size;offset;align;reloff;nreloc;flags;reserved1;reserved2;} + +let get_sections binary nsects offset = + let rec loop count acc = + if (count >= nsects) then + List.rev acc + else + let section = get_section binary ((sizeof_section*count)+offset) in + loop (count+1) (section::acc) + in loop 0 [] + +let get_section_64 binary offset :section_64 = + let sectname,o = Binary.stringo binary offset ~num_bytes:16 in + let segname,o = Binary.stringo binary o ~num_bytes:16 in + let addr,o = Binary.u64o binary o in + let size,o = Binary.u64o binary o in + let offset,o = Binary.u32o binary o in + let align,o = Binary.u32o binary o in + let reloff,o = Binary.u32o binary o in + let nreloc,o = Binary.u32o binary o in + let flags,o = Binary.u32o binary o in + let reserved1,o = Binary.u32o binary o in + let reserved2,o = Binary.u32o binary o in + let reserved3 = Binary.u32 binary o in + {sectname;segname;addr;size;offset;align;reloff;nreloc;flags;reserved1;reserved2;reserved3;} + +let get_sections_64 binary nsects offset = + let rec loop count acc = + if (count >= nsects) then + List.rev acc + else + let section = get_section_64 binary ((sizeof_section_64*count)+offset) in + loop (count+1) (section::acc) + in loop 0 [] + +let get_segment_command binary offset :segment_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let segname,o = Binary.stringo binary o ~num_bytes:16 in + let vmaddr,o = Binary.u32o binary o in + let vmsize,o = Binary.u32o binary o in + let fileoff,o = Binary.u32o binary o in + let filesize,o = Binary.u32o binary o in + let maxprot,o = Binary.u32o binary o in + let initprot,o = Binary.u32o binary o in + let nsects,o = Binary.u32o binary o in + let flags,o = Binary.u32o binary o in + let sections = get_sections binary nsects o in + {cmd;cmdsize;segname;vmaddr;vmsize;fileoff;filesize;maxprot;initprot;nsects;flags;sections;} + +let get_segment_command_64 binary offset :segment_command_64 = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let segname,o = Binary.stringo binary o ~num_bytes:16 in + let vmaddr,o = Binary.u64o binary o in + let vmsize,o = Binary.u64o binary o in + let fileoff,o = Binary.u64o binary o in + let filesize,o = Binary.u64o binary o in + let maxprot,o = Binary.u32o binary o in + let initprot,o = Binary.u32o binary o in + let nsects,o = Binary.u32o binary o in + let flags,o = Binary.u32o binary o in + let sections = get_sections_64 binary nsects o in + {cmd;cmdsize;segname;vmaddr;vmsize;fileoff;filesize;maxprot;initprot;nsects;flags;sections;} + +let get_fvmlib binary offset o :fvmlib = + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let name = {offset; str} in + let minor_version,o = Binary.u32o binary o in + let header_addr = Binary.u32 binary o in + {name;minor_version;header_addr;} + +let get_fvmlib_command binary offset :fvmlib_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let fvmlib = get_fvmlib binary offset o in + {cmd;cmdsize;fvmlib;} + +let get_dylib binary offset o :dylib = + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let name = {offset; str} in + let timestamp,o = Binary.u32o binary o in + let current_version,o = Binary.u32o binary o in + let compatibility_version = Binary.u32 binary o in + {name;timestamp;current_version;compatibility_version;} + +let get_dylib_command binary offset :dylib_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let dylib = get_dylib binary offset o in + {cmd;cmdsize;dylib;} + +let get_sub_framework_command binary offset :sub_framework_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let umbrella = {offset; str} in + {cmd;cmdsize;umbrella;} + +let get_sub_client_command binary offset :sub_client_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let client = {offset; str} in + {cmd;cmdsize;client;} + +let get_sub_umbrella_command binary offset :sub_umbrella_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let sub_umbrella = {offset; str} in + {cmd;cmdsize;sub_umbrella;} + +let get_sub_library_command binary offset :sub_library_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let sub_library = {offset; str} in + {cmd;cmdsize;sub_library;} + +let get_prebound_dylib_command binary offset :prebound_dylib_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let name = {offset; str} in + let nmodules,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let linked_modules = {offset; str} in + {cmd;cmdsize;name;nmodules;linked_modules;} + +let get_dylinker_command binary offset :dylinker_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let name = {offset; str} in + {cmd;cmdsize;name;} + +let get_thread_command binary offset :thread_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize = Binary.u32 binary o in + {cmd;cmdsize;} + +let get_routines_command binary offset :routines_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let init_address,o = Binary.u32o binary o in + let init_module,o = Binary.u32o binary o in + let reserved1,o = Binary.u32o binary o in + let reserved2,o = Binary.u32o binary o in + let reserved3,o = Binary.u32o binary o in + let reserved4,o = Binary.u32o binary o in + let reserved5,o = Binary.u32o binary o in + let reserved6 = Binary.u32 binary o in + {cmd;cmdsize;init_address;init_module;reserved1;reserved2;reserved3;reserved4;reserved5;reserved6;} + +let get_routines_command_64 binary offset :routines_command_64 = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let init_address,o = Binary.u64o binary o in + let init_module,o = Binary.u64o binary o in + let reserved1,o = Binary.u64o binary o in + let reserved2,o = Binary.u64o binary o in + let reserved3,o = Binary.u64o binary o in + let reserved4,o = Binary.u64o binary o in + let reserved5,o = Binary.u64o binary o in + let reserved6 = Binary.u64 binary o in + {cmd;cmdsize;init_address;init_module;reserved1;reserved2;reserved3;reserved4;reserved5;reserved6;} + +let get_symtab_command binary offset :symtab_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let symoff,o = Binary.u32o binary o in + let nsyms,o = Binary.u32o binary o in + let stroff,o = Binary.u32o binary o in + let strsize = Binary.u32 binary o in + {cmd;cmdsize;symoff;nsyms;stroff;strsize;} + +let get_dysymtab_command binary offset :dysymtab_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let ilocalsym,o = Binary.u32o binary o in + let nlocalsym,o = Binary.u32o binary o in + let iextdefsym,o = Binary.u32o binary o in + let nextdefsym,o = Binary.u32o binary o in + let iundefsym,o = Binary.u32o binary o in + let nundefsym,o = Binary.u32o binary o in + let tocoff,o = Binary.u32o binary o in + let ntoc,o = Binary.u32o binary o in + let modtaboff,o = Binary.u32o binary o in + let nmodtab,o = Binary.u32o binary o in + let extrefsymoff,o = Binary.u32o binary o in + let nextrefsyms,o = Binary.u32o binary o in + let indirectsymoff,o = Binary.u32o binary o in + let nindirectsyms,o = Binary.u32o binary o in + let extreloff,o = Binary.u32o binary o in + let nextrel,o = Binary.u32o binary o in + let locreloff,o = Binary.u32o binary o in + let nlocrel = Binary.u32 binary o in + {cmd;cmdsize;ilocalsym;nlocalsym;iextdefsym;nextdefsym;iundefsym;nundefsym;tocoff;ntoc;modtaboff;nmodtab;extrefsymoff;nextrefsyms;indirectsymoff;nindirectsyms;extreloff;nextrel;locreloff;nlocrel;} + +let get_dylib_table_of_contents binary offset :dylib_table_of_contents = + let symbol_index,o = Binary.u32o binary offset in + let module_index = Binary.u32 binary o in + {symbol_index;module_index;} + +let get_dylib_module binary offset :dylib_module = + let module_name,o = Binary.u32o binary offset in + let iextdefsym,o = Binary.u32o binary o in + let nextdefsym,o = Binary.u32o binary o in + let irefsym,o = Binary.u32o binary o in + let nrefsym,o = Binary.u32o binary o in + let ilocalsym,o = Binary.u32o binary o in + let nlocalsym,o = Binary.u32o binary o in + let iextrel,o = Binary.u32o binary o in + let nextrel,o = Binary.u32o binary o in + let iinit_iterm,o = Binary.u32o binary o in + let ninit_nterm,o = Binary.u32o binary o in + let objc_module_info_addr,o = Binary.u32o binary o in + let objc_module_info_size = Binary.u32 binary o in + {module_name;iextdefsym;nextdefsym;irefsym;nrefsym;ilocalsym;nlocalsym;iextrel;nextrel;iinit_iterm;ninit_nterm;objc_module_info_addr;objc_module_info_size;} + +let get_dylib_module_64 binary offset :dylib_module_64 = + let module_name,o = Binary.u32o binary offset in + let iextdefsym,o = Binary.u32o binary o in + let nextdefsym,o = Binary.u32o binary o in + let irefsym,o = Binary.u32o binary o in + let nrefsym,o = Binary.u32o binary o in + let ilocalsym,o = Binary.u32o binary o in + let nlocalsym,o = Binary.u32o binary o in + let iextrel,o = Binary.u32o binary o in + let nextrel,o = Binary.u32o binary o in + let iinit_iterm,o = Binary.u32o binary o in + let ninit_nterm,o = Binary.u32o binary o in + let objc_module_info_size,o = Binary.u32o binary o in + let objc_module_info_addr = Binary.u64 binary o in + {module_name;iextdefsym;nextdefsym;irefsym;nrefsym;ilocalsym;nlocalsym;iextrel;nextrel;iinit_iterm;ninit_nterm;objc_module_info_size;objc_module_info_addr;} + +let get_dylib_reference binary offset :dylib_reference = + let isym,o = Binary.stringo binary offset ~num_bytes:24 in + let flags = Binary.u64 binary o in + {isym;flags;} + +let get_twolevel_hints_command binary offset :twolevel_hints_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let offset,o = Binary.u32o binary o in + let nhints = Binary.u32 binary o in + {cmd;cmdsize;offset;nhints;} + +let get_twolevel_hint binary offset :twolevel_hint = + let isub_image,o = Binary.u64o binary offset in + let itoc = Binary.string binary o ~max:24 in + {isub_image;itoc;} + +let get_prebind_cksum_command binary offset :prebind_cksum_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let cksum = Binary.u32 binary o in + {cmd;cmdsize;cksum;} + +let get_uuid_command binary offset :uuid_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let uuid = Binary.string binary o ~max:16 in + {cmd;cmdsize;uuid;} + +let get_rpath_command binary offset :rpath_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let path = {offset; str} in + {cmd;cmdsize;path;} + +let get_linkedit_data_command binary offset :linkedit_data_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let dataoff,o = Binary.u32o binary o in + let datasize = Binary.u32 binary o in + {cmd;cmdsize;dataoff;datasize;} + +let get_encryption_info_command binary offset :encryption_info_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let cryptoff,o = Binary.u32o binary o in + let cryptsize,o = Binary.u32o binary o in + let cryptid = Binary.u32 binary o in + {cmd;cmdsize;cryptoff;cryptsize;cryptid;} + +let get_encryption_info_command_64 binary offset :encryption_info_command_64 = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let cryptoff,o = Binary.u32o binary o in + let cryptsize,o = Binary.u32o binary o in + let cryptid,o = Binary.u32o binary o in + let pad = Binary.u32 binary o in + {cmd;cmdsize;cryptoff;cryptsize;cryptid;pad;} + +let get_version_min_command binary offset :version_min_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let version,o = Binary.u32o binary o in + let sdk = Binary.u32 binary o in + {cmd;cmdsize;version;sdk;} + +let get_dyld_info_command binary offset :dyld_info_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let rebase_off,o = Binary.u32o binary o in + let rebase_size,o = Binary.u32o binary o in + let bind_off,o = Binary.u32o binary o in + let bind_size,o = Binary.u32o binary o in + let weak_bind_off,o = Binary.u32o binary o in + let weak_bind_size,o = Binary.u32o binary o in + let lazy_bind_off,o = Binary.u32o binary o in + let lazy_bind_size,o = Binary.u32o binary o in + let export_off,o = Binary.u32o binary o in + let export_size = Binary.u32 binary o in + {cmd;cmdsize;rebase_off;rebase_size;bind_off;bind_size;weak_bind_off;weak_bind_size;lazy_bind_off;lazy_bind_size;export_off;export_size;} + +let get_linker_option_command binary offset :linker_option_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let count = Binary.u32 binary o in + {cmd;cmdsize;count;} + +let get_symseg_command binary offset :symseg_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let offset,o = Binary.u32o binary o in + let size = Binary.u32 binary o in + {cmd;cmdsize;offset;size;} + +let get_ident_command binary offset :ident_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize = Binary.u32 binary o in + {cmd;cmdsize;} + +let get_fvmfile_command binary offset :fvmfile_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let stroffset,o = Binary.u32o binary o in + let str = Binary.string binary (offset+stroffset) in + let name = {offset; str} in + let header_addr = Binary.u32 binary o in + {cmd;cmdsize;name;header_addr;} + +let get_entry_point_command binary offset :entry_point_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let entryoff,o = Binary.u64o binary o in + let stacksize = Binary.u64 binary o in + {cmd;cmdsize;entryoff;stacksize;} + +let get_source_version_command binary offset :source_version_command = + let cmd,o = Binary.u32o binary offset in + let cmdsize,o = Binary.u32o binary o in + let version = Binary.u64 binary o in + {cmd;cmdsize;version;} + +let get_data_in_code_entry binary offset :data_in_code_entry = + let offset,o = Binary.u32o binary offset in + let length,o = Binary.u16o binary o in + let kind = Binary.u16 binary o in + {offset;length;kind;} + + +let get_t (cmd:cmd) bytes offset = + match cmd with + | LC_SEGMENT -> + LC_SEGMENT (get_segment_command bytes offset) + | LC_SYMTAB -> + LC_SYMTAB (get_symtab_command bytes offset) + | LC_SYMSEG -> + LC_SYMSEG (get_symseg_command bytes offset) + | LC_THREAD -> + LC_THREAD (get_thread_command bytes offset) + | LC_UNIXTHREAD -> + LC_UNIXTHREAD (get_thread_command bytes offset) + | LC_LOADFVMLIB -> + LC_LOADFVMLIB (get_fvmlib_command bytes offset) + | LC_IDFVMLIB -> + LC_IDFVMLIB (get_fvmlib_command bytes offset) + | LC_IDENT -> + LC_IDENT (get_ident_command bytes offset) + | LC_FVMFILE -> + LC_FVMFILE (get_fvmfile_command bytes offset) + | LC_PREPAGE -> + LC_PREPAGE (get_load_command bytes offset) + | LC_DYSYMTAB -> + LC_DYSYMTAB (get_dysymtab_command bytes offset) + | LC_LOAD_DYLIB -> + LC_LOAD_DYLIB (get_dylib_command bytes offset) + | LC_ID_DYLIB -> + LC_ID_DYLIB (get_dylib_command bytes offset) + | LC_LOAD_DYLINKER -> + LC_LOAD_DYLINKER (get_dylinker_command bytes offset) + | LC_ID_DYLINKER -> + LC_ID_DYLINKER (get_dylinker_command bytes offset) + | LC_PREBOUND_DYLIB -> + LC_PREBOUND_DYLIB (get_prebound_dylib_command bytes offset) + | LC_ROUTINES -> + LC_ROUTINES (get_routines_command bytes offset) + | LC_SUB_FRAMEWORK -> + LC_SUB_FRAMEWORK (get_sub_framework_command bytes offset) + | LC_SUB_UMBRELLA -> + LC_SUB_UMBRELLA (get_sub_umbrella_command bytes offset) + | LC_SUB_CLIENT -> + LC_SUB_CLIENT (get_sub_client_command bytes offset) + | LC_SUB_LIBRARY -> + LC_SUB_LIBRARY (get_sub_library_command bytes offset) + | LC_TWOLEVEL_HINTS -> + LC_TWOLEVEL_HINTS (get_twolevel_hints_command bytes offset) + | LC_PREBIND_CKSUM -> + LC_PREBIND_CKSUM (get_prebind_cksum_command bytes offset) + | LC_LOAD_WEAK_DYLIB -> + LC_LOAD_WEAK_DYLIB (get_dylib_command bytes offset) + | LC_SEGMENT_64 -> + LC_SEGMENT_64 (get_segment_command_64 bytes offset) + | LC_ROUTINES_64 -> + LC_ROUTINES_64 (get_routines_command_64 bytes offset) + | LC_UUID -> + LC_UUID (get_uuid_command bytes offset) + | LC_RPATH -> + LC_RPATH (get_rpath_command bytes offset) + | LC_CODE_SIGNATURE -> + LC_CODE_SIGNATURE (get_linkedit_data_command bytes offset) + | LC_SEGMENT_SPLIT_INFO -> + LC_SEGMENT_SPLIT_INFO (get_linkedit_data_command bytes offset) + | LC_REEXPORT_DYLIB -> + LC_REEXPORT_DYLIB (get_dylib_command bytes offset) + | LC_LAZY_LOAD_DYLIB -> + LC_LAZY_LOAD_DYLIB (get_dylib_command bytes offset) + | LC_ENCRYPTION_INFO -> + LC_ENCRYPTION_INFO (get_encryption_info_command bytes offset) + | LC_DYLD_INFO -> + LC_DYLD_INFO (get_dyld_info_command bytes offset) + | LC_DYLD_INFO_ONLY -> + LC_DYLD_INFO_ONLY (get_dyld_info_command bytes offset) + | LC_LOAD_UPWARD_DYLIB -> + LC_LOAD_UPWARD_DYLIB (get_dylib_command bytes offset) + | LC_VERSION_MIN_MACOSX -> + LC_VERSION_MIN_MACOSX (get_version_min_command bytes offset) + | LC_VERSION_MIN_IPHONEOS -> + LC_VERSION_MIN_IPHONEOS (get_version_min_command bytes offset) + | LC_FUNCTION_STARTS -> + LC_FUNCTION_STARTS (get_linkedit_data_command bytes offset) + | LC_DYLD_ENVIRONMENT -> + LC_DYLD_ENVIRONMENT (get_dylinker_command bytes offset) + | LC_MAIN -> + LC_MAIN (get_entry_point_command bytes offset) + | LC_DATA_IN_CODE -> + LC_DATA_IN_CODE (get_linkedit_data_command bytes offset) + | LC_SOURCE_VERSION -> + LC_SOURCE_VERSION (get_source_version_command bytes offset) + | LC_DYLIB_CODE_SIGN_DRS -> + LC_DYLIB_CODE_SIGN_DRS (get_linkedit_data_command bytes offset) + | LC_ENCRYPTION_INFO_64 -> + LC_ENCRYPTION_INFO_64 (get_encryption_info_command_64 bytes offset) + | LC_LINKER_OPTION -> + LC_LINKER_OPTION (get_linkedit_data_command bytes offset) + | LC_LINKER_OPTIMIZATION_HINT -> + LC_LINKER_OPTIMIZATION_HINT (get_linkedit_data_command bytes offset) + diff --git a/lib/mach/MachLoadCommandTypes.ml b/lib/mach/MachLoadCommandTypes.ml new file mode 100644 index 0000000..3155441 --- /dev/null +++ b/lib/mach/MachLoadCommandTypes.ml @@ -0,0 +1,1097 @@ +(* ==================================== *) +(* Load Commands from mach-o/loader.h *) +(* ==================================== *) + +type load_command = { + cmd: int [@size 4]; + cmdsize: int [@size 4]; +} + +let sizeof_load_command = 8 + +(* NOTE: str is _not_ apart of the lc_str struct, but added for convenience *) +type lc_str = { + offset: int [@size 4]; + str: string [@computed]; +} + +let sizeof_lc_str = 4 + +(* +struct section_64 { (* for 64-bit architectures *) + char sectname[16]; (* name of this section *) + char segname[16]; (* segment this section goes in *) + uint64_t addr; (* memory address of this section *) + uint64_t size; (* size in bytes of this section *) + uint32_t offset; (* file offset of this section *) + uint32_t align; (* section alignment (power of 2) *) + uint32_t reloff; (* file offset of relocation entries *) + uint32_t nreloc; (* number of relocation entries *) + uint32_t flags; (* flags (section type and attributes)*) + uint32_t reserved1; (* reserved (for offset or index) *) + uint32_t reserved2; (* reserved (for count or sizeof) *) + uint32_t reserved3; (* reserved *) +}; +*) + +type section = { + sectname: string [@size 16]; + segname: string [@size 16]; + addr: int [@size 4]; + size: int [@size 4]; + offset: int [@size 4]; + align: int [@size 4]; + reloff: int [@size 4]; + nreloc: int [@size 4]; + flags: int [@size 4]; + reserved1: int [@size 4]; + reserved2: int [@size 4]; +} + +let sizeof_section = 68 (* bytes *) + +type section_64 = { + sectname: string [@size 16]; + segname: string [@size 16]; + addr: int [@size 8]; + size: int [@size 8]; + offset: int [@size 4]; + align: int [@size 4]; + reloff: int [@size 4]; + nreloc: int [@size 4]; + flags: int [@size 4]; + reserved1: int [@size 4]; + reserved2: int [@size 4]; + reserved3: int [@size 4]; +} + +let sizeof_section_64 = 80 (* bytes *) + +type segment_command = { + cmd: int [@size 4]; + cmdsize: int [@size 4]; + segname: string [@size 16]; + vmaddr: int [@size 4]; + vmsize: int [@size 4]; + fileoff: int [@size 4]; + filesize: int [@size 4]; + maxprot: int [@size 4]; + initprot: int [@size 4]; + nsects: int [@size 4]; + flags: int [@size 4]; + sections: section list [@computed nsects]; +} + +let sizeof_segment_command_64 = 56 + +type segment_command_64 = { + cmd: int [@size 4]; + cmdsize: int [@size 4]; + segname: string [@size 16]; + vmaddr: int [@size 8]; + vmsize: int [@size 8]; + fileoff: int [@size 8]; + filesize: int [@size 8]; + maxprot: int [@size 4]; + initprot: int [@size 4]; + nsects: int [@size 4]; + flags: int [@size 4]; + sections: section_64 list [@computed nsects]; +} + +let sizeof_segment_command_64 = 72 + +(* + * Fixed virtual memory shared libraries are identified by two things. The + * target pathname (the name of the library as found for execution), and the + * minor version number. The address of where the headers are loaded is in + * header_addr. (THIS IS OBSOLETE and no longer supported). + *) +type fvmlib = { + name: lc_str [@size 4]; (* library's target pathname *) + minor_version: int [@size 4]; (* library's minor version number *) + header_addr: int [@size 4]; (* library's header address *) +} + +let sizeof_fvmlib = 12 + +(* + * A fixed virtual shared library (filetype == MH_FVMLIB in the mach header) + * contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library. + * An object that uses a fixed virtual shared library also contains a + * fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses. + * (THIS IS OBSOLETE and no longer supported). + *) +type fvmlib_command = { + cmd: int [@size 4]; (* LC_IDFVMLIB or LC_LOADFVMLIB *) + cmdsize: int [@size 4]; (* includes pathname string *) + fvmlib: fvmlib [@size 4]; (* the library identification *) +} + +let sizeof_fvmlib_command = 20 + +(* +* + * Dynamicly linked shared libraries are identified by two things. The + * pathname (the name of the library as found for execution), and the + * compatibility version number. The pathname must match and the compatibility + * number in the user of the library must be greater than or equal to the + * library being used. The time stamp is used to record the time a library was + * built and copied into user so it can be use to determined if the library used + * at runtime is exactly the same as used to built the program. + * +struct dylib { + union lc_str name; (* library's path name *) + uint32_t timestamp; (* library's build time stamp *) + uint32_t current_version; (* library's current version number *) + uint32_t compatibility_version; (* library's compatibility vers number*) +}; + *) + +(* + * + * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) + * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. + * An object that uses a dynamically linked shared library also contains a + * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or + * LC_REEXPORT_DYLIB) for each library it uses. + * + *) + +type dylib = { + name: lc_str [@size 4]; (* library's path name *) + timestamp: int [@size 4]; (* library's build time stamp *) + current_version: int [@size 4]; (* library's current version number *) + compatibility_version: int [@size 4]; (* library's compatibility vers number*) +} + +let sizeof_dylib = 16 + +type dylib_command = + { + cmd: int [@size 4]; (* LC_ID_DYLIB, LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB *) + cmdsize: int [@size 4]; (* includes pathname string *) + dylib: dylib [@size 16]; (* the library identification *) + } + +let sizeof_dylib_command = 20 (*bytes*) + +(* + * A dynamically linked shared library may be a subframework of an umbrella + * framework. If so it will be linked with "-umbrella umbrella_name" where + * Where "umbrella_name" is the name of the umbrella framework. A subframework + * can only be linked against by its umbrella framework or other subframeworks + * that are part of the same umbrella framework. Otherwise the static link + * editor produces an error and states to link against the umbrella framework. + * The name of the umbrella framework for subframeworks is recorded in the + * following structure. + *) +type sub_framework_command = { + cmd: int [@size 4]; (* LC_SUB_FRAMEWORK *) + cmdsize: int [@size 4]; (* includes umbrella string *) + umbrella: lc_str [@size 4]; (* the umbrella framework name *) +} + +let sizeof_sub_framework_command= 12 + +(* + * For dynamically linked shared libraries that are subframework of an umbrella + * framework they can allow clients other than the umbrella framework or other + * subframeworks in the same umbrella framework. To do this the subframework + * is built with "-allowable_client client_name" and an LC_SUB_CLIENT load + * command is created for each -allowable_client flag. The client_name is + * usually a framework name. It can also be a name used for bundles clients + * where the bundle is built with "-client_name client_name". + *) +type sub_client_command = { + cmd: int [@size 4]; (* LC_SUB_CLIENT *) + cmdsize: int [@size 4]; (* includes client string *) + client: lc_str [@size 4]; (* the client name *) +} + +let sizeof_sub_client_command = 12 + +(* + * A dynamically linked shared library may be a sub_umbrella of an umbrella + * framework. If so it will be linked with "-sub_umbrella umbrella_name" where + * Where "umbrella_name" is the name of the sub_umbrella framework. When + * staticly linking when -twolevel_namespace is in effect a twolevel namespace + * umbrella framework will only cause its subframeworks and those frameworks + * listed as sub_umbrella frameworks to be implicited linked in. Any other + * dependent dynamic libraries will not be linked it when -twolevel_namespace + * is in effect. The primary library recorded by the static linker when + * resolving a symbol in these libraries will be the umbrella framework. + * Zero or more sub_umbrella frameworks may be use by an umbrella framework. + * The name of a sub_umbrella framework is recorded in the following structure. + *) +type sub_umbrella_command = { + cmd: int [@size 4]; (* LC_SUB_UMBRELLA *) + cmdsize: int [@size 4]; (* includes sub_umbrella string *) + sub_umbrella: lc_str [@size 4]; (* the sub_umbrella framework name *) +} + +let sizeof_sub_umbrella_command = 12 + +(* + * A dynamically linked shared library may be a sub_library of another shared + * library. If so it will be linked with "-sub_library library_name" where + * Where "library_name" is the name of the sub_library shared library. When + * staticly linking when -twolevel_namespace is in effect a twolevel namespace + * shared library will only cause its subframeworks and those frameworks + * listed as sub_umbrella frameworks and libraries listed as sub_libraries to + * be implicited linked in. Any other dependent dynamic libraries will not be + * linked it when -twolevel_namespace is in effect. The primary library + * recorded by the static linker when resolving a symbol in these libraries + * will be the umbrella framework (or dynamic library). Zero or more sub_library + * shared libraries may be use by an umbrella framework or (or dynamic library). + * The name of a sub_library framework is recorded in the following structure. + * For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc". + *) +type sub_library_command = { + cmd: int [@size 4]; (* LC_SUB_LIBRARY *) + cmdsize: int [@size 4]; (* includes sub_library string *) + sub_library: lc_str [@size 4]; (* the sub_library name *) +} + +let sizeof_sub_library_command = 12 + +(* + * A program (filetype == MH_EXECUTE) that is + * prebound to its dynamic libraries has one of these for each library that + * the static linker used in prebinding. It contains a bit vector for the + * modules in the library. The bits indicate which modules are bound (1) and + * which are not (0) from the library. The bit for module 0 is the low bit + * of the first byte. So the bit for the Nth module is: + * (linked_modules[N/8] >> N%8) & 1 + *) +type prebound_dylib_command = { + cmd: int [@size 4]; (* LC_PREBOUND_DYLIB *) + cmdsize: int [@size 4]; (* includes strings *) + name: lc_str [@size 4]; (* library's path name *) + nmodules: int [@size 4]; (* number of modules in library *) + linked_modules: lc_str [@size 4]; (* bit vector of linked modules *) +} + +let sizeof_prebound_dylib_command = 20 + +type dylinker_command = { + cmd: int [@size 4]; + cmdsize: int [@size 4]; + name: lc_str [@size 4]; +} + +let sizeof_dylinker_command = 12 + +(* + * Thread commands contain machine-specific data structures suitable for + * use in the thread state primitives. The machine specific data structures + * follow the struct thread_command as follows. + * Each flavor of machine specific data structure is preceded by an unsigned + * long constant for the flavor of that data structure, an uint32_t + * that is the count of longs of the size of the state data structure and then + * the state data structure follows. This triple may be repeated for many + * flavors. The constants for the flavors, counts and state data structure + * definitions are expected to be in the header file . + * These machine specific data structures sizes must be multiples of + * 4 bytes The cmdsize reflects the total size of the thread_command + * and all of the sizes of the constants for the flavors, counts and state + * data structures. + * + * For executable objects that are unix processes there will be one + * thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor. + * This is the same as a LC_THREAD, except that a stack is automatically + * created (based on the shell's limit for the stack size). Command arguments + * and environment variables are copied onto that stack. + *) +(* unimplemented, see machine/thread_status.h for rest of values: + uint32_t flavor flavor of thread state + uint32_t count count of longs in thread state + struct XXX_thread_state state thread state for this flavor + ... *) + +type thread_command = { + cmd: int [@size 4]; (* LC_THREAD or LC_UNIXTHREAD *) + cmdsize: int [@size 4]; (* total size of this command *) +} + +(* + * The routines command contains the address of the dynamic shared library + * initialization routine and an index into the module table for the module + * that defines the routine. Before any modules are used from the library the + * dynamic linker fully binds the module that defines the initialization routine + * and then calls it. This gets called before any module initialization + * routines (used for C++ static constructors) in the library. + *) +type routines_command = { (* for 32-bit architectures *) + cmd: int [@size 4]; (* LC_ROUTINES *) + cmdsize: int [@size 4]; (* total size of this command *) + init_address: int [@size 4]; (* address of initialization routine *) + init_module: int [@size 4]; (* index into the module table that the init routine is defined in *) + reserved1: int [@size 4]; + reserved2: int [@size 4]; + reserved3: int [@size 4]; + reserved4: int [@size 4]; + reserved5: int [@size 4]; + reserved6: int [@size 4]; +} + +(* + * The 64-bit routines command. Same use as above. + *) +type routines_command_64 = { (* for 64-bit architectures *) + cmd: int [@size 4]; (* LC_ROUTINES_64 *) + cmdsize: int [@size 4]; (* total size of this command *) + init_address: int [@size 8];(* address of initialization routine *) + init_module: int [@size 8]; (* index into the module table that the init routine is defined in 8 bytes each *) + reserved1: int [@size 8]; + reserved2: int [@size 8]; + reserved3: int [@size 8]; + reserved4: int [@size 8]; + reserved5: int [@size 8]; + reserved6: int [@size 8]; +} + +type symtab_command = { + cmd: int [@size 4]; + cmdsize: int [@size 4]; + symoff: int [@size 4]; + nsyms: int [@size 4]; + stroff: int [@size 4]; + strsize: int [@size 4]; +} + +let sizeof_symtab_command = 24 + +(* + * This is the second set of the symbolic information which is used to support + * the data structures for the dynamically link editor. + * + * The original set of symbolic information in the symtab_command which contains + * the symbol and string tables must also be present when this load command is + * present. When this load command is present the symbol table is organized + * into three groups of symbols: + * local symbols (static and debugging symbols) - grouped by module + * defined external symbols - grouped by module (sorted by name if not lib) + * undefined external symbols (sorted by name if MH_BINDATLOAD is not set, + * and in order the were seen by the static + * linker if MH_BINDATLOAD is set) + * In this load command there are offsets and counts to each of the three groups + * of symbols. + * + * This load command contains a the offsets and sizes of the following new + * symbolic information tables: + * table of contents + * module table + * reference symbol table + * indirect symbol table + * The first three tables above (the table of contents, module table and + * reference symbol table) are only present if the file is a dynamically linked + * shared library. For executable and object modules, which are files + * containing only one module, the information that would be in these three + * tables is determined as follows: + * table of contents - the defined external symbols are sorted by name + * module table - the file contains only one module so everything in the + * file is part of the module. + * reference symbol table - is the defined and undefined external symbols + * + * For dynamically linked shared library files this load command also contains + * offsets and sizes to the pool of relocation entries for all sections + * separated into two groups: + * external relocation entries + * local relocation entries + * For executable and object modules the relocation entries continue to hang + * off the section structures. + *) +type dysymtab_command = { + cmd: int [@size 4]; + cmdsize: int [@size 4]; + ilocalsym: int [@size 4]; (* index to local symbols *) + nlocalsym: int [@size 4]; (* number of local symbols *) + + iextdefsym: int [@size 4]; (* index to externally defined symbols *) + nextdefsym: int [@size 4]; (* number of externally defined symbols *) + + iundefsym: int [@size 4]; (* index to undefined symbols *) + nundefsym: int [@size 4]; (* number of undefined symbols *) + + tocoff: int [@size 4]; (* file offset to table of contents *) + ntoc: int [@size 4]; (* number of entries in table of contents *) + modtaboff: int [@size 4]; (* file offset to module table *) + nmodtab: int [@size 4]; (* number of module table entries *) + + extrefsymoff: int [@size 4]; (* offset to referenced symbol table *) + nextrefsyms: int [@size 4]; (* number of referenced symbol table entries *) + + indirectsymoff: int [@size 4]; (* file offset to the indirect symbol table *) + nindirectsyms: int [@size 4]; (* number of indirect symbol table entries *) + extreloff: int [@size 4]; (* offset to external relocation entries *) + nextrel: int [@size 4]; (* number of external relocation entries *) + locreloff: int [@size 4]; (* offset to local relocation entries *) + nlocrel: int [@size 4]; (* number of local relocation entries *) +} + +let sizeof_dysymtab_command = 80 (* bytes *) + +(* TODO: unimplemented *) +(* a table of contents entry *) +type dylib_table_of_contents = { + symbol_index: int [@size 4]; (* the defined external symbol (index into the symbol table) *) + module_index: int [@size 4]; (* index into the module table this symbol is defined in *) +} + +(* TODO: unimplemented *) +(* a module table entry *) +type dylib_module = { + module_name: int [@size 4]; (* the module name (index into string table) *) + + iextdefsym: int [@size 4]; (*index into externally defined symbols *) + nextdefsym: int [@size 4]; (*number of externally defined symbols *) + irefsym: int [@size 4]; (* index into reference symbol table *) + nrefsym: int [@size 4]; (*number of reference symbol table entries *) + ilocalsym: int [@size 4]; (* index into symbols for local symbols *) + nlocalsym: int [@size 4]; (*number of local symbols *) + + iextrel: int [@size 4]; (* index into external relocation entries *) + nextrel: int [@size 4]; (* number of external relocation entries *) + + iinit_iterm: int [@size 4]; (* low 16 bits are the index into the init section, high 16 bits are the index into the term section *) + ninit_nterm: int [@size 4]; (* low 16 bits are the number of init section entries, high 16 bits are the number of term section entries *) + objc_module_info_addr: int [@size 4]; (* the (__OBJC,__module_info) section *) + objc_module_info_size: int [@size 4]; (* the (__OBJC,__module_info) section *) +} + +(* TODO: unimplemented *) +(* a 64-bit module table entry *) +type dylib_module_64 = { + module_name: int [@size 4]; (* the module name (index into string table) *) + + iextdefsym: int [@size 4]; (* index into externally defined symbols *) + nextdefsym: int [@size 4]; (* number of externally defined symbols *) + irefsym: int [@size 4]; (* index into reference symbol table *) + nrefsym: int [@size 4]; (* number of reference symbol table entries *) + ilocalsym: int [@size 4]; (* index into symbols for local symbols *) + nlocalsym: int [@size 4]; (* number of local symbols *) + + iextrel: int [@size 4]; (* index into external relocation entries *) + nextrel: int [@size 4]; (* number of external relocation entries *) + + iinit_iterm: int [@size 4]; (* low 16 bits are the index into the init section, high 16 bits are the index into the term section *) + ninit_nterm: int [@size 4]; (* low 16 bits are the number of init section entries, high 16 bits are the number of term section entries *) + + objc_module_info_size: int [@size 4]; (* the (__OBJC,__module_info) section *) + objc_module_info_addr: int [@size 8]; (* the (__OBJC,__module_info) section *) +} + +(* + * The entries in the reference symbol table are used when loading the module + * (both by the static and dynamic link editors) and if the module is unloaded + * or replaced. Therefore all external symbols (defined and undefined) are + * listed in the module's reference table. The flags describe the type of + * reference that is being made. The constants for the flags are defined in + * as they are also used for symbol table entries. + *) +(* TODO: unimplemented, BIT FIELDS *) +type dylib_reference = { + isym: bytes [@size 24]; (* 24 bits bit-field index into the symbol table *) + flags: int [@size 8]; (* flags to indicate the type of reference *) +} + +(* + * The twolevel_hints_command contains the offset and number of hints in the + * two-level namespace lookup hints table. + *) +(* TODO: unimplemented *) +type twolevel_hints_command = { + cmd: int [@size 4];(* LC_TWOLEVEL_HINTS *) + cmdsize: int [@size 4]; (* sizeof(struct twolevel_hints_command) *) + offset: int [@size 4]; (* offset to the hint table *) + nhints: int [@size 4]; (* number of hints in the hint table *) +} + +(* + * The entries in the two-level namespace lookup hints table are twolevel_hint + * structs. These provide hints to the dynamic link editor where to start + * looking for an undefined symbol in a two-level namespace image. The + * isub_image field is an index into the sub-images (sub-frameworks and + * sub-umbrellas list) that made up the two-level image that the undefined + * symbol was found in when it was built by the static link editor. If + * isub-image is 0 the the symbol is expected to be defined in library and not + * in the sub-images. If isub-image is non-zero it is an index into the array + * of sub-images for the umbrella with the first index in the sub-images being + * 1. The array of sub-images is the ordered list of sub-images of the umbrella + * that would be searched for a symbol that has the umbrella recorded as its + * primary library. The table of contents index is an index into the + * library's table of contents. This is used as the starting point of the + * binary search or a directed linear search. + *) +(* TODO: unimplemented, BITFIELDS *) +type twolevel_hint = { + isub_image: int [@size 8]; (* index into the sub images *) + itoc: bytes [@size 24]; (* 24 bit field index into the table of contents *) +} + +(* + * The prebind_cksum_command contains the value of the original check sum for + * prebound files or zero. When a prebound file is first created or modified + * for other than updating its prebinding information the value of the check sum + * is set to zero. When the file has it prebinding re-done and if the value of + * the check sum is zero the original check sum is calculated and stored in + * cksum field of this load command in the output file. If when the prebinding + * is re-done and the cksum field is non-zero it is left unchanged from the + * input file. + *) +(* TODO: unimplemented *) +type prebind_cksum_command = { + cmd: int [@size 4]; (* LC_PREBIND_CKSUM *) + cmdsize: int [@size 4]; (* sizeof(struct prebind_cksum_command) *) + cksum: int [@size 4]; (* the check sum or zero *) +} + +(* + * The uuid load command contains a single 128-bit unique random number that + * identifies an object produced by the static link editor. + *) +type uuid_command = { + cmd: int [@size 4]; (* LC_UUID *) + cmdsize: int [@size 4]; (* sizeof(struct uuid_command) *) + uuid: bytes [@size 16]; (* 16 bytes the 128-bit uuid *) +} + +let sizeof_uuid_command = 24 + +(* + * The rpath_command contains a path which at runtime should be added to + * the current run path used to find @rpath prefixed dylibs. + *) +type rpath_command = { + cmd: int [@size 4]; (* LC_RPATH *) + cmdsize: int [@size 4]; (* includes string *) + path: lc_str [@size 4]; (* path to add to run path *) +} + +let sizeof_rpath_command = 12 + +(* + * The linkedit_data_command contains the offsets and sizes of a blob + * of data in the __LINKEDIT segment. + *) +type linkedit_data_command = { + cmd: int [@size 4]; (* LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS or LC_LINKER_OPTIMIZATION_HINT. *) + cmdsize: int [@size 4]; (* sizeof(struct linkedit_data_command) *) + dataoff: int [@size 4]; (* file offset of data in __LINKEDIT segment *) + datasize: int [@size 4]; (* file size of data in __LINKEDIT segment *) +} + +let sizeof_linkedit_data_command = 16 + +(* + * The encryption_info_command contains the file offset and size of an + * of an encrypted segment. + *) +type encryption_info_command = { + cmd: int [@size 4]; (* LC_ENCRYPTION_INFO *) + cmdsize: int [@size 4]; (* sizeof(struct encryption_info_command) *) + cryptoff: int [@size 4]; (* file offset of encrypted range *) + cryptsize: int [@size 4]; (* file size of encrypted range *) + cryptid: int [@size 4]; (* which enryption system, 0 means not-encrypted yet *) +} + +let sizeof_encryption_info_command = 20 + +(* + * The encryption_info_command_64 contains the file offset and size of an + * of an encrypted segment (for use in x86_64 targets). + *) +type encryption_info_command_64 = { + cmd: int [@size 4]; (* LC_ENCRYPTION_INFO_64 *) + cmdsize: int [@size 4]; (* sizeof(struct encryption_info_command_64) *) + cryptoff: int [@size 4]; (* file offset of encrypted range *) + cryptsize: int [@size 4]; (* file size of encrypted range *) + cryptid: int [@size 4]; (* which enryption system, 0 means not-encrypted yet *) + pad: int [@size 4]; (* padding to make this struct's size a multiple of 8 bytes *) +} + +let sizeof_encryption_info_command_64 = 24 + +(* + * The version_min_command contains the min OS version on which this + * binary was built to run. + * + * LC_VERSION_MIN_MACOSX or LC_VERSION_MIN_IPHONEOS *) +type version_min_command = { + cmd: int [@size 4]; + cmdsize: int [@size 4]; + version: int [@size 4]; (* X.Y.Z is encoded in nibbles xxxx.yy.zz *) + sdk: int [@size 4]; (* X.Y.Z is encoded in nibbles xxxx.yy.zz *) +} + +let sizeof_version_min_command = 16 (* bytes *) + +(* +struct dyld_info_command { + uint32_t cmd; (* LC_DYLD_INFO or LC_DYLD_INFO_ONLY *) + uint32_t cmdsize; (* sizeof(struct dyld_info_command) *) + uint32_t rebase_off; (* file offset to rebase info *) + uint32_t rebase_size; (* size of rebase info *) + uint32_t bind_off; (* file offset to binding info *) + uint32_t bind_size; (* size of binding info *) + uint32_t weak_bind_off; (* file offset to weak binding info *) + uint32_t weak_bind_size; (* size of weak binding info *) + uint32_t lazy_bind_off; (* file offset to lazy binding info *) + uint32_t lazy_bind_size; (* size of lazy binding infs *) + uint32_t export_off; (* file offset to lazy binding info *) + uint32_t export_size; (* size of lazy binding infs *) +}; + *) + +type dyld_info_command = + { + cmd: int [@size 4]; + cmdsize: int [@size 4]; + rebase_off: int [@size 4]; + rebase_size: int [@size 4]; + bind_off: int [@size 4]; + bind_size: int [@size 4]; + weak_bind_off: int [@size 4]; + weak_bind_size: int [@size 4]; + lazy_bind_off: int [@size 4]; + lazy_bind_size: int [@size 4]; + export_off: int [@size 4]; + export_size: int [@size 4]; + } + +let sizeof_dylib_info_command = 48 + +(* + * The linker_option_command contains linker options embedded in object files. + *) +type linker_option_command = { + cmd: int [@size 4]; (* LC_LINKER_OPTION only used in MH_OBJECT filetypes *) + cmdsize: int [@size 4]; + count: int [@size 4]; (* number of strings concatenation of zero terminated UTF8 strings. Zero filled at end to align *) +} + +let sizeof_linker_option_command = 12 + +(* + * The symseg_command contains the offset and size of the GNU style + * symbol table information as described in the header file . + * The symbol roots of the symbol segments must also be aligned properly + * in the file. So the requirement of keeping the offsets aligned to a + * multiple of a 4 bytes translates to the length field of the symbol + * roots also being a multiple of a long. Also the padding must again be + * zeroed. (THIS IS OBSOLETE and no longer supported). + *) +type symseg_command = { + cmd: int [@size 4]; (* LC_SYMSEG *) + cmdsize: int [@size 4]; (* sizeof(struct symseg_command) *) + offset: int [@size 4]; (* symbol segment offset *) + size: int [@size 4]; (* symbol segment size in bytes *) +} + +let sizeof_symseg_command = 16 + +(* + * The ident_command contains a free format string table following the + * ident_command structure. The strings are null terminated and the size of + * the command is padded out with zero bytes to a multiple of 4 bytes/ + * (THIS IS OBSOLETE and no longer supported). + *) +type ident_command = { + cmd: int [@size 4]; (* LC_IDENT *) + cmdsize: int [@size 4]; (* strings that follow this command *) +} + +let sizeof_ident_command = 8 + +(* + * The fvmfile_command contains a reference to a file to be loaded at the + * specified virtual address. (Presently, this command is reserved for + * internal use. The kernel ignores this command when loading a program into + * memory). + *) +type fvmfile_command = { + cmd: int [@size 4]; (* LC_FVMFILE *) + cmdsize: int [@size 4]; (* includes pathname string *) + name: lc_str [@size 4]; (* files pathname *) + header_addr: int [@size 4]; (* files virtual address *) +} + +let sizeof_fvmfile_command = 16 + +(* + * The entry_point_command is a replacement for thread_command. + * It is used for main executables to specify the location (file offset) + * of main(). If -stack_size was used at link time, the stacksize + * field will contain the stack size need for the main thread. + *) +type entry_point_command = { + cmd: int [@size 4]; + cmdsize: int [@size 4]; + entryoff: int [@size 8]; (* uint64_t file __TEXT offset of main *) + stacksize: int [@size 8]; (* uint64_t if not zero, initial stack size *) +} + +let sizeof_entry_point_command = 24 (* bytes *) + +(* + * The source_version_command is an optional load command containing + * the version of the sources used to build the binary. + *) +type source_version_command = { + cmd: int [@size 4]; (* LC_SOURCE_VERSION *) + cmdsize: int [@size 4]; + version: int [@size 8]; (* A.B.C.D.E packed as a24.b10.c10.d10.e10 *) +} + +(* + * The LC_DATA_IN_CODE load commands uses a linkedit_data_command + * to point to an array of data_in_code_entry entries. Each entry + * describes a range of data in a code section. + *) +type data_in_code_entry = { + offset: int [@size 4]; (* from mach_header to start of data range*) + length: int [@size 2]; (* number of bytes in data range *) + kind: int [@size 2]; (* a DICE_KIND_* value *) +} + +(* ==================================== *) +(* END Load Commands *) +(* ==================================== *) + +(* undefined, or internal commands not in loader.h *) +(* +type unimplemented_command = { + cmd: int; (* 4 *) + cmdsize: int; (* 4 we'll just read past the cmdsize and hope that it actually has a cmdsize value...*) +} + *) + +let kLC_REQ_DYLD = 0x80000000 +let kLC_LOAD_WEAK_DYLIB = 0x18 lor kLC_REQ_DYLD +let kLC_RPATH = 0x1c lor kLC_REQ_DYLD +let kLC_REEXPORT_DYLIB = 0x1f lor kLC_REQ_DYLD +let kLC_DYLD_INFO_ONLY = 0x22 lor kLC_REQ_DYLD +let kLC_LOAD_UPWARD_DYLIB = 0x23 lor kLC_REQ_DYLD +let kLC_MAIN = 0x28 lor kLC_REQ_DYLD + +exception Bad_load_command of int * string + +type cmd = + | LC_SEGMENT + | LC_SYMTAB + | LC_SYMSEG + | LC_THREAD + | LC_UNIXTHREAD + | LC_LOADFVMLIB + | LC_IDFVMLIB + | LC_IDENT + | LC_FVMFILE + | LC_PREPAGE + | LC_DYSYMTAB + | LC_LOAD_DYLIB + | LC_ID_DYLIB + | LC_LOAD_DYLINKER + | LC_ID_DYLINKER + | LC_PREBOUND_DYLIB + | LC_ROUTINES + | LC_SUB_FRAMEWORK + | LC_SUB_UMBRELLA + | LC_SUB_CLIENT + | LC_SUB_LIBRARY + | LC_TWOLEVEL_HINTS + | LC_PREBIND_CKSUM + | LC_LOAD_WEAK_DYLIB + | LC_SEGMENT_64 + | LC_ROUTINES_64 + | LC_UUID + | LC_RPATH + | LC_CODE_SIGNATURE + | LC_SEGMENT_SPLIT_INFO + | LC_REEXPORT_DYLIB + | LC_LAZY_LOAD_DYLIB + | LC_ENCRYPTION_INFO + | LC_DYLD_INFO + | LC_DYLD_INFO_ONLY + | LC_LOAD_UPWARD_DYLIB + | LC_VERSION_MIN_MACOSX + | LC_VERSION_MIN_IPHONEOS + | LC_FUNCTION_STARTS + | LC_DYLD_ENVIRONMENT + | LC_MAIN + | LC_DATA_IN_CODE + | LC_SOURCE_VERSION + | LC_DYLIB_CODE_SIGN_DRS + | LC_ENCRYPTION_INFO_64 + | LC_LINKER_OPTION + | LC_LINKER_OPTIMIZATION_HINT + +let cmd_int_to_string = + function + | 0x1 -> "LC_SEGMENT" + | 0x2 -> "LC_SYMTAB" + | 0x3 -> "LC_SYMSEG" + | 0x4 -> "LC_THREAD" + | 0x5 -> "LC_UNIXTHREAD" + | 0x6 -> "LC_LOADFVMLIB" + | 0x7 -> "LC_IDFVMLIB" + | 0x8 -> "LC_IDENT" + | 0x9 -> "LC_FVMFILE" + | 0xa -> "LC_PREPAGE" + | 0xb -> "LC_DYSYMTAB" + | 0xc -> "LC_LOAD_DYLIB" + | 0xd -> "LC_ID_DYLIB" + | 0xe -> "LC_LOAD_DYLINKER" + | 0xf -> "LC_ID_DYLINKER" + | 0x10 -> "LC_PREBOUND_DYLIB" + | 0x11 -> "LC_ROUTINES" + | 0x12 -> "LC_SUB_FRAMEWORK" + | 0x13 -> "LC_SUB_UMBRELLA" + | 0x14 -> "LC_SUB_CLIENT" + | 0x15 -> "LC_SUB_LIBRARY" + | 0x16 -> "LC_TWOLEVEL_HINTS" + | 0x17 -> "LC_PREBIND_CKSUM" + | cmd when cmd = kLC_LOAD_WEAK_DYLIB -> "LC_LOAD_WEAK_DYLIB" + | 0x19 -> "LC_SEGMENT_64" + | 0x1a -> "LC_ROUTINES_64" + | 0x1b -> "LC_UUID" + | cmd when cmd = kLC_RPATH -> "LC_RPATH" + | 0x1d -> "LC_CODE_SIGNATURE" + | 0x1e -> "LC_SEGMENT_SPLIT_INFO" + | cmd when cmd = kLC_REEXPORT_DYLIB -> "LC_REEXPORT_DYLIB" + | 0x20 -> "LC_LAZY_LOAD_DYLIB" + | 0x21 -> "LC_ENCRYPTION_INFO" + | 0x22 -> "LC_DYLD_INFO" + | cmd when cmd = kLC_DYLD_INFO_ONLY -> "LC_DYLD_INFO_ONLY" + | cmd when cmd = kLC_LOAD_UPWARD_DYLIB -> "LC_LOAD_UPWARD_DYLIB" + | 0x24 -> "LC_VERSION_MIN_MACOSX" + | 0x25 -> "LC_VERSION_MIN_IPHONEOS" + | 0x26 -> "LC_FUNCTION_STARTS" + | 0x27 -> "LC_DYLD_ENVIRONMENT" + | cmd when cmd = kLC_MAIN -> "LC_MAIN" + | 0x29 -> "LC_DATA_IN_CODE" + | 0x2A -> "LC_SOURCE_VERSION" + | 0x2B -> "LC_DYLIB_CODE_SIGN_DRS" + | 0x2C -> "LC_ENCRYPTION_INFO_64" + | 0x2D -> "LC_LINKER_OPTION" + | 0x2E -> "LC_LINKER_OPTIMIZATION_HINT" + | cmd -> Printf.sprintf "UKNOWN LOAD COMMAND 0x%x" cmd + +let to_cmd = + function + | 0x1 -> LC_SEGMENT + | 0x2 -> LC_SYMTAB + | 0x3 -> LC_SYMSEG + | 0x4 -> LC_THREAD + | 0x5 -> LC_UNIXTHREAD + | 0x6 -> LC_LOADFVMLIB + | 0x7 -> LC_IDFVMLIB + | 0x8 -> LC_IDENT + | 0x9 -> LC_FVMFILE + | 0xa -> LC_PREPAGE + | 0xb -> LC_DYSYMTAB + | 0xc -> LC_LOAD_DYLIB + | 0xd -> LC_ID_DYLIB + | 0xe -> LC_LOAD_DYLINKER + | 0xf -> LC_ID_DYLINKER + | 0x10 -> LC_PREBOUND_DYLIB + | 0x11 -> LC_ROUTINES + | 0x12 -> LC_SUB_FRAMEWORK + | 0x13 -> LC_SUB_UMBRELLA + | 0x14 -> LC_SUB_CLIENT + | 0x15 -> LC_SUB_LIBRARY + | 0x16 -> LC_TWOLEVEL_HINTS + | 0x17 -> LC_PREBIND_CKSUM + | cmd when cmd = kLC_LOAD_WEAK_DYLIB -> LC_LOAD_WEAK_DYLIB + | 0x19 -> LC_SEGMENT_64 + | 0x1a -> LC_ROUTINES_64 + | 0x1b -> LC_UUID + | cmd when cmd = kLC_RPATH -> LC_RPATH + | 0x1d -> LC_CODE_SIGNATURE + | 0x1e -> LC_SEGMENT_SPLIT_INFO + | cmd when cmd = kLC_REEXPORT_DYLIB -> LC_REEXPORT_DYLIB + | 0x20 -> LC_LAZY_LOAD_DYLIB + | 0x21 -> LC_ENCRYPTION_INFO + | 0x22 -> LC_DYLD_INFO + | cmd when cmd = kLC_DYLD_INFO_ONLY -> LC_DYLD_INFO_ONLY + | cmd when cmd = kLC_LOAD_UPWARD_DYLIB -> LC_LOAD_UPWARD_DYLIB + | 0x24 -> LC_VERSION_MIN_MACOSX + | 0x25 -> LC_VERSION_MIN_IPHONEOS + | 0x26 -> LC_FUNCTION_STARTS + | 0x27 -> LC_DYLD_ENVIRONMENT + | cmd when cmd = kLC_MAIN -> LC_MAIN + | 0x29 -> LC_DATA_IN_CODE + | 0x2A -> LC_SOURCE_VERSION + | 0x2B -> LC_DYLIB_CODE_SIGN_DRS + | 0x2C -> LC_ENCRYPTION_INFO_64 + | 0x2D -> LC_LINKER_OPTION + | 0x2E -> LC_LINKER_OPTIMIZATION_HINT + | cmd -> raise @@ Bad_load_command (cmd,(Printf.sprintf "0x%x" cmd)) + +let cmd_to_int = + function + | LC_SEGMENT -> 0x1 + | LC_SYMTAB -> 0x2 + | LC_SYMSEG -> 0x3 + | LC_THREAD -> 0x4 + | LC_UNIXTHREAD -> 0x5 + | LC_LOADFVMLIB -> 0x6 + | LC_IDFVMLIB -> 0x7 + | LC_IDENT -> 0x8 + | LC_FVMFILE -> 0x9 + | LC_PREPAGE -> 0xa + | LC_DYSYMTAB -> 0xb + | LC_LOAD_DYLIB -> 0xc + | LC_ID_DYLIB -> 0xd + | LC_LOAD_DYLINKER -> 0xe + | LC_ID_DYLINKER -> 0xf + | LC_PREBOUND_DYLIB -> 0x10 + | LC_ROUTINES -> 0x11 + | LC_SUB_FRAMEWORK -> 0x12 + | LC_SUB_UMBRELLA -> 0x13 + | LC_SUB_CLIENT -> 0x14 + | LC_SUB_LIBRARY -> 0x15 + | LC_TWOLEVEL_HINTS -> 0x16 + | LC_PREBIND_CKSUM -> 0x17 + | LC_LOAD_WEAK_DYLIB -> kLC_LOAD_WEAK_DYLIB + | LC_SEGMENT_64 -> 0x19 + | LC_ROUTINES_64 -> 0x1a + | LC_UUID -> 0x1b + | LC_RPATH -> kLC_RPATH + | LC_CODE_SIGNATURE -> 0x1d + | LC_SEGMENT_SPLIT_INFO -> 0x1e + | LC_REEXPORT_DYLIB -> kLC_REEXPORT_DYLIB + | LC_LAZY_LOAD_DYLIB -> 0x20 + | LC_ENCRYPTION_INFO -> 0x21 + | LC_DYLD_INFO -> 0x22 + | LC_DYLD_INFO_ONLY -> kLC_DYLD_INFO_ONLY + | LC_LOAD_UPWARD_DYLIB -> kLC_LOAD_UPWARD_DYLIB + | LC_VERSION_MIN_MACOSX -> 0x24 + | LC_VERSION_MIN_IPHONEOS -> 0x25 + | LC_FUNCTION_STARTS -> 0x26 + | LC_DYLD_ENVIRONMENT -> 0x27 + | LC_MAIN -> kLC_MAIN + | LC_DATA_IN_CODE -> 0x29 + | LC_SOURCE_VERSION -> 0x2A + | LC_DYLIB_CODE_SIGN_DRS -> 0x2B + | LC_ENCRYPTION_INFO_64 -> 0x2C + | LC_LINKER_OPTION -> 0x2D + | LC_LINKER_OPTIMIZATION_HINT -> 0x2E + +let cmd_to_string cmd = cmd_to_int cmd |> cmd_int_to_string + +type lc_t = + (* Constants for the cmd field of all load commands, the type *) + | LC_SEGMENT of segment_command + | LC_SYMTAB of symtab_command + | LC_SYMSEG of symseg_command + | LC_THREAD of thread_command + | LC_UNIXTHREAD of thread_command + | LC_LOADFVMLIB of fvmlib_command + | LC_IDFVMLIB of fvmlib_command + | LC_IDENT of ident_command + | LC_FVMFILE of fvmfile_command + | LC_PREPAGE of load_command + | LC_DYSYMTAB of dysymtab_command + | LC_LOAD_DYLIB of dylib_command + | LC_ID_DYLIB of dylib_command + | LC_LOAD_DYLINKER of dylinker_command + | LC_ID_DYLINKER of dylinker_command + | LC_PREBOUND_DYLIB of prebound_dylib_command + | LC_ROUTINES of routines_command + | LC_SUB_FRAMEWORK of sub_framework_command + | LC_SUB_UMBRELLA of sub_umbrella_command + | LC_SUB_CLIENT of sub_client_command + | LC_SUB_LIBRARY of sub_library_command + | LC_TWOLEVEL_HINTS of twolevel_hints_command + | LC_PREBIND_CKSUM of prebind_cksum_command + | LC_LOAD_WEAK_DYLIB of dylib_command + | LC_SEGMENT_64 of segment_command_64 + | LC_ROUTINES_64 of routines_command_64 + | LC_UUID of uuid_command + | LC_RPATH of rpath_command + | LC_CODE_SIGNATURE of linkedit_data_command + | LC_SEGMENT_SPLIT_INFO of linkedit_data_command + | LC_REEXPORT_DYLIB of dylib_command + | LC_LAZY_LOAD_DYLIB of dylib_command (* not in header file *) + | LC_ENCRYPTION_INFO of encryption_info_command + | LC_DYLD_INFO of dyld_info_command + | LC_DYLD_INFO_ONLY of dyld_info_command + | LC_LOAD_UPWARD_DYLIB of dylib_command (* not in header file *) + | LC_VERSION_MIN_MACOSX of version_min_command + | LC_VERSION_MIN_IPHONEOS of version_min_command + | LC_FUNCTION_STARTS of linkedit_data_command + | LC_DYLD_ENVIRONMENT of dylinker_command + | LC_MAIN of entry_point_command + | LC_DATA_IN_CODE of linkedit_data_command + | LC_SOURCE_VERSION of source_version_command + | LC_DYLIB_CODE_SIGN_DRS of linkedit_data_command + | LC_ENCRYPTION_INFO_64 of encryption_info_command_64 + | LC_LINKER_OPTION of linkedit_data_command + | LC_LINKER_OPTIMIZATION_HINT of linkedit_data_command + | LC_UNIMPLEMENTED of load_command + +(* +let get cmd f = + match cmd with +| 0x1 -> LC_SEGMENT (* segment of this file to be mapped *) +| 0x2 -> LC_SYMTAB (* link-edit stab symbol table info *) +| 0x3 -> LC_SYMSEG (* link-edit gdb symbol table info (obsolete) *) +| 0x4 -> LC_THREAD (* thread *) +| 0x5 -> LC_UNIXTHREAD (* unix thread (includes a stack) *) +| 0x6 -> LC_LOADFVMLIB (* load a specified fixed VM shared library *) +| 0x7 -> LC_IDFVMLIB (* fixed VM shared library identification *) +| 0x8 -> LC_IDENT (* object identification info (obsolete) *) +| 0x9 -> LC_FVMFILE (* fixed VM file inclusion (internal use) *) +| 0xa -> LC_PREPAGE (* prepage command (internal use) *) +| 0xb -> LC_DYSYMTAB (* dynamic link-edit symbol table info *) +| 0xc -> LC_LOAD_DYLIB (* load a dynamically linked shared library *) +| 0xd -> LC_ID_DYLIB (* dynamically linked shared lib ident *) +| 0xe -> LC_LOAD_DYLINKER (* load a dynamic linker *) +| 0xf -> LC_ID_DYLINKER (* dynamic linker identification *) +| 0x10 -> LC_PREBOUND_DYLIB (* modules prebound for a dynamically *) +(* linked shared library *) +| 0x11 -> LC_ROUTINES (* image routines *) +| 0x12 ->LC_SUB_FRAMEWORK (* sub framework *) +| 0x13 -> LC_SUB_UMBRELLA (* sub umbrella *) +| 0x14 -> LC_SUB_CLIENT (* sub client *) +| 0x15 -> LC_SUB_LIBRARY (* sub library *) +| 0x16 -> LC_TWOLEVEL_HINTS (* two-level namespace lookup hints *) +| 0x17 -> LC_PREBIND_CKSUM (* prebind checksum *) +(* + * load a dynamically linked shared library that is allowed to be missing + * (all symbols are weak imported). + *) +| cmd when cmd = kLC_LOAD_WEAK_DYLIB -> LC_LOAD_WEAK_DYLIB +| 0x19 -> LC_SEGMENT_64 (* 64-bit segment of this file to be mapped *) +| 0x1a -> LC_ROUTINES_64 (* 64-bit image routines *) +| 0x1b -> LC_UUID (* the uuid *) + +| cmd when cmd = kLC_RPATH -> LC_RPATH (* runpath additions *) +| 0x1d -> LC_CODE_SIGNATURE (* local of code signature *) +| 0x1e -> LC_SEGMENT_SPLIT_INFO (* local of info to split segments *) +| cmd when cmd = kLC_REEXPORT_DYLIB -> LC_REEXPORT_DYLIB (* load and re-export dylib *) +| 0x20 -> LC_LAZY_LOAD_DYLIB (* delay load of dylib until first use *) +| 0x21 -> LC_ENCRYPTION_INFO (* encrypted segment information *) +| 0x22 -> LC_DYLD_INFO (* compressed dyld information *) +| cmd when cmd = kLC_DYLD_INFO_ONLY -> LC_DYLD_INFO_ONLY (* compressed dyld information only *) +| cmd when cmd = kLC_LOAD_UPWARD_DYLIB -> LC_LOAD_UPWARD_DYLIB (* load upward dylib *) +| 0x24 -> LC_VERSION_MIN_MACOSX (* build for MacOSX min OS version *) +| 0x25 -> LC_VERSION_MIN_IPHONEOS (* build for iPhoneOS min OS version *) +| 0x26 -> LC_FUNCTION_STARTS (* compressed table of function start addresses *) +| 0x27 -> LC_DYLD_ENVIRONMENT (* string for dyld to treat like environment variable *) +| cmd when cmd = kLC_MAIN -> LC_MAIN (* replacement for LC_UNIXTHREAD *) +| 0x29 -> LC_DATA_IN_CODE (* table of non-instructions in __text *) +| 0x2A -> LC_SOURCE_VERSION (* source version used to build binary *) +| 0x2B -> LC_DYLIB_CODE_SIGN_DRS (* Code signing DRs copied from linked dylibs *) +| 0x2C -> LC_ENCRYPTION_INFO_64(* 64-bit encrypted segment information *) +| 0x2D -> LC_LINKER_OPTION (* linker options in MH_OBJECT files *) +| 0x2E -> LC_LINKER_OPTIMIZATION_HINT(* optimization hints in MH_OBJECT files *) +| cmd -> raise @@ Bad_load_command cmd + *) + +type lc = { + cmd: cmd; (* 4 bytes *) + cmdsize: int; (* 4 *) + t: lc_t; +} diff --git a/lib/mach/MachNlist.ml b/lib/mach/MachNlist.ml deleted file mode 100644 index b57ea76..0000000 --- a/lib/mach/MachNlist.ml +++ /dev/null @@ -1,117 +0,0 @@ -open Printf - -open Binary -open MachLoadCommand - -(* -struct nlist_64 { - union { - uint32_t n_strx; /* index into the string table */ - } n_un; - uint8_t n_type; /* type flag, see below */ - uint8_t n_sect; /* section number or NO_SECT */ - uint16_t n_desc; /* see */ - uint64_t n_value; /* value of this symbol (or stab offset) */ -}; - *) - -type nlist = { - strx: int; (* 32 *) - n_type: int; (* byte *) - n_sect: int; (* byte *) - n_desc: int; (* uint16_t *) - n_value: int; (* 64 byte *) -} - -let kNLIST_TYPE_MASK = 0xe -let kNLIST_TYPE_GLOBAL = 0x1 -let kNLIST_TYPE_LOCAL = 0x0 - -type symbol = nlist * string - -let sizeof_nlist = 16 - -(* printing *) - -let nlist_to_string nlist = - sprintf "strx: %4u type: 0x%02x sect: %x desc: 0x%3u value: %x" - nlist.strx - nlist.n_type - nlist.n_sect - nlist.n_desc - nlist.n_value - -let print_nlist nlist = - printf "%s\n" (nlist_to_string nlist) - -let print_nlists nlists = - List.iteri (fun i nlist -> printf "(%3d): " i; print_nlist nlist) nlists; (* because 3 space formatting is _enough_ *) - (* i.e., need to learn format module *) - printf "\n" - -let print_symlist symlist = - List.iteri (fun i (nlist,symname) -> - printf "%-10x %s sect: %x type: %002x desc: 0x%x\n" nlist.n_value symname nlist.n_sect nlist.n_type nlist.n_desc - ) symlist; - printf "\n" - -(* build nlist, symtabs, symlist *) - -let rec get_nlist_it binary offset nsyms acc = - if (nsyms <= 0) then - List.rev acc - else - let strx = u32 binary offset in - let n_type = u8 binary (offset + 4) in - let n_sect = u8 binary (offset + 5) in - let n_desc = u16 binary (offset + 6) in - let n_value = u64 binary (offset + 8) in - let nlist = {strx; n_type; n_sect; n_desc; n_value;} in - get_nlist_it binary (offset + sizeof_nlist) (nsyms - 1) (nlist::acc) - -type t = (nlist * string) list - -let rec get_symlist_it binary strsize nlists acc = - match nlists with - [] -> List.rev acc - | nlist::nlists -> - let index = Bytes.index_from binary nlist.strx '\000' in - let symname = Bytes.sub_string binary nlist.strx (index - nlist.strx) in - get_symlist_it binary strsize nlists ((nlist,symname)::acc) - -let get_symlist binary (cmd, cmdsize, symtab_command) nlists = - match symtab_command with - SYMTAB symtab -> - let str_bytes = Bytes.sub binary symtab.stroff symtab.strsize in - get_symlist_it str_bytes symtab.strsize nlists [] - | _ -> [] - -(* Move this out to remove goblin deps *) -let nlist_flag_to_symbol_kind = - function - | 0xe -> Goblin.Symbol.Local - | 0xf -> Goblin.Symbol.Export - | 0x1 -> Goblin.Symbol.Import - | _ -> Goblin.Symbol.Other - -let nlist_to_symbol_data (nlist, symbol) = - let kind = `Kind (nlist_flag_to_symbol_kind @@ (nlist.n_type)) in - let name = `Name (symbol) in - let offset = `Offset (nlist.n_value) in - [name; offset; kind] - -let get_symbols binary ((_,_, symtab_command) as cmd) = - match symtab_command with - | SYMTAB symtab -> - let nlist_bytes = Bytes.sub binary symtab.symoff (sizeof_nlist * symtab.nsyms) in - let nlists = get_nlist_it nlist_bytes 0 symtab.nsyms [] in - let symbols = get_symlist binary cmd nlists in - (* remove this dep here *) - let goblin_symbols = List.map (nlist_to_symbol_data) symbols in - goblin_symbols - | _ -> [] - -let filter_by_kind kind = List.filter (fun symbol -> try Goblin.Symbol.find_symbol_kind symbol = kind with Not_found -> false) - -let print_symbols symbols = - List.iter (Goblin.Symbol.print_symbol_data ~like_nlist:true) symbols diff --git a/lib/mach/MachSection.ml b/lib/mach/MachSection.ml new file mode 100644 index 0000000..6338e46 --- /dev/null +++ b/lib/mach/MachSection.ml @@ -0,0 +1,28 @@ +(* + * A segment is made up of zero or more sections. Non-MH_OBJECT files have + * all of their segments with the proper sections in each, and padded to the + * specified segment alignment when produced by the link editor. The first + * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header + * and load commands of the object file before its first section. The zero + * fill sections are always last in their segment (in all formats). This + * allows the zeroed segment padding to be mapped into memory where zero fill + * sections might be. The gigabyte zero fill sections, those with the section + * type S_GB_ZEROFILL, can only be in a segment with sections of this type. + * These segments are then placed after all other segments. + * + * The MH_OBJECT format has all of its sections in one segment for + * compactness. There is no padding to a specified segment boundary and the + * mach_header and load commands are not part of the segment. + * + * Sections with the same section name, sectname, going into the same segment, + * segname, are combined by the link editor. The resulting section is aligned + * to the maximum alignment of the combined sections and is the new section's + * alignment. The combined sections are aligned to their original alignment in + * the combined section. Any padded bytes to get the specified alignment are + * zeroed. + * + * The format of the relocation entries referenced by the reloff and nreloc + * fields of the section structure for mach object files is described in the + * header file . + *) + diff --git a/lib/mach/MachSegment64.ml b/lib/mach/MachSegment64.ml deleted file mode 100644 index 8bc63df..0000000 --- a/lib/mach/MachSegment64.ml +++ /dev/null @@ -1,212 +0,0 @@ -(* - * A segment is made up of zero or more sections. Non-MH_OBJECT files have - * all of their segments with the proper sections in each, and padded to the - * specified segment alignment when produced by the link editor. The first - * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header - * and load commands of the object file before its first section. The zero - * fill sections are always last in their segment (in all formats). This - * allows the zeroed segment padding to be mapped into memory where zero fill - * sections might be. The gigabyte zero fill sections, those with the section - * type S_GB_ZEROFILL, can only be in a segment with sections of this type. - * These segments are then placed after all other segments. - * - * The MH_OBJECT format has all of its sections in one segment for - * compactness. There is no padding to a specified segment boundary and the - * mach_header and load commands are not part of the segment. - * - * Sections with the same section name, sectname, going into the same segment, - * segname, are combined by the link editor. The resulting section is aligned - * to the maximum alignment of the combined sections and is the new section's - * alignment. The combined sections are aligned to their original alignment in - * the combined section. Any padded bytes to get the specified alignment are - * zeroed. - * - * The format of the relocation entries referenced by the reloff and nreloc - * fields of the section structure for mach object files is described in the - * header file . - *) - -(* -struct section_64 { (* for 64-bit architectures *) - char sectname[16]; (* name of this section *) - char segname[16]; (* segment this section goes in *) - uint64_t addr; (* memory address of this section *) - uint64_t size; (* size in bytes of this section *) - uint32_t offset; (* file offset of this section *) - uint32_t align; (* section alignment (power of 2) *) - uint32_t reloff; (* file offset of relocation entries *) - uint32_t nreloc; (* number of relocation entries *) - uint32_t flags; (* flags (section type and attributes)*) - uint32_t reserved1; (* reserved (for offset or index) *) - uint32_t reserved2; (* reserved (for count or sizeof) *) - uint32_t reserved3; (* reserved *) -}; - -*) - -(* - * The flags field of a section structure is separated into two parts a section - * type and section attributes. The section types are mutually exclusive (it - * can only have one type) but the section attributes are not (it may have more - * than one attribute). - *) -let kSECTION_TYPE = 0x000000ff (* 256 section types *) -let kSECTION_ATTRIBUTES = 0xffffff00 (* 24 section attributes *) - -(* Constants for the type of a section *) -let kS_REGULAR = 0x0 (* regular section *) -let kS_ZEROFILL = 0x1 (* zero fill on demand section *) -let kS_CSTRING_LITERALS = 0x2 (* section with only literal C strings*) -let kS_4BYTE_LITERALS = 0x3 (* section with only 4 byte literals *) -let kS_8BYTE_LITERALS = 0x4 (* section with only 8 byte literals *) -let kS_LITERAL_POINTERS = 0x5 (* section with only pointers to *) - (* literals *) -(* - * For the two types of symbol pointers sections and the symbol stubs section - * they have indirect symbol table entries. For each of the entries in the - * section the indirect symbol table entries, in corresponding order in the - * indirect symbol table, start at the index stored in the reserved1 field - * of the section structure. Since the indirect symbol table entries - * correspond to the entries in the section the number of indirect symbol table - * entries is inferred from the size of the section divided by the size of the - * entries in the section. For symbol pointers sections the size of the entries - * in the section is 4 bytes and for symbol stubs sections the byte size of the - * stubs is stored in the reserved2 field of the section structure. - *) -let kS_NON_LAZY_SYMBOL_POINTERS = 0x6 (* section with only non-lazy - symbol pointers *) -let kS_LAZY_SYMBOL_POINTERS = 0x7 (* section with only lazy symbol - pointers *) -let kS_SYMBOL_STUBS = 0x8 (* section with only symbol - stubs, byte size of stub in - the reserved2 field *) -let kS_MOD_INIT_FUNC_POINTERS = 0x9 (* section with only function - pointers for initialization*) -let kS_MOD_TERM_FUNC_POINTERS = 0xa (* section with only function - pointers for termination *) -let kS_COALESCED = 0xb (* section contains symbols that - are to be coalesced *) -let kS_GB_ZEROFILL = 0xc (* zero fill on demand section - (that can be larger than 4 - gigabytes) *) -let kS_INTERPOSING = 0xd (* section with only pairs of - function pointers for - interposing *) -let kS_16BYTE_LITERALS = 0xe (* section with only 16 byte - literals *) -let kS_DTRACE_DOF = 0xf (* section contains - DTrace Object Format *) -let kS_LAZY_DYLIB_SYMBOL_POINTERS = 0x10 (* section with only lazy - symbol pointers to lazy - loaded dylibs *) -(* - * Section types to support thread local variables - *) -let kS_THREAD_LOCAL_REGULAR = 0x11 (* template of initial - values for TLVs *) -let kS_THREAD_LOCAL_ZEROFILL = 0x12 (* template of initial - values for TLVs *) -let kS_THREAD_LOCAL_VARIABLES = 0x13 (* TLV descriptors *) -let kS_THREAD_LOCAL_VARIABLE_POINTERS = 0x14 (* pointers to TLV - descriptors *) -let kS_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15 (* functions to call - to initialize TLV - values *) - -(* - * Constants for the section attributes part of the flags field of a section - * structure. - *) -let kSECTION_ATTRIBUTES_USR = 0xff000000 (* User setable attributes *) -let kS_ATTR_PURE_INSTRUCTIONS = 0x80000000 (* section contains only true - machine instructions *) -let kS_ATTR_NO_TOC = 0x40000000 (* section contains coalesced - symbols that are not to be - in a ranlib table of - contents *) -let kS_ATTR_STRIP_STATIC_SYMS = 0x20000000 (* ok to strip static symbols - in this section in files - with the MH_DYLDLINK flag *) -let kS_ATTR_NO_DEAD_STRIP = 0x10000000 (* no dead stripping *) -let kS_ATTR_LIVE_SUPPORT = 0x08000000 (* blocks are live if they - reference live blocks *) -let kS_ATTR_SELF_MODIFYING_CODE = 0x04000000 (* Used with i386 code stubs - written on by dyld *) -(* - * If a segment contains any sections marked with S_ATTR_DEBUG then all - * sections in that segment must have this attribute. No section other than - * a section marked with this attribute may reference the contents of this - * section. A section with this attribute may contain no symbols and must have - * a section type S_REGULAR. The static linker will not copy section contents - * from sections with this attribute into its output file. These sections - * generally contain DWARF debugging info. - *) -let kS_ATTR_DEBUG = 0x02000000 (* debug section *) -let kSECTION_ATTRIBUTES_SYS = 0x00ffff00 (* system setable attributes *) -let kS_ATTR_SOME_INSTRUCTIONS = 0x00000400 (* section contains some - machine instructions *) -let kS_ATTR_EXT_RELOC = 0x00000200 (* section has external - relocation entries *) -let kS_ATTR_LOC_RELOC = 0x00000100 (* section has local - relocation entries *) - - -(* - * The names of segments and sections in them are mostly meaningless to the - * link-editor. But there are few things to support traditional UNIX - * executables that require the link-editor and assembler to use some names - * agreed upon by convention. - * - * The initial protection of the "__TEXT" segment has write protection turned - * off (not writeable). - * - * The link-editor will allocate common symbols at the end of the "__common" - * section in the "__DATA" segment. It will create the section and segment - * if needed. - *) - -(* The currently known segment names and the section names in those segments *) - -let kSEG_PAGEZERO = "__PAGEZERO" (* the pagezero segment which has no *) - (* protections and catches NULL *) - (* references for MH_EXECUTE files *) - - -let kSEG_TEXT = "__TEXT" (* the tradition UNIX text segment *) -let kSECT_TEXT = "__text" (* the real text part of the text *) - (* section no headers, and no padding *) -let kSECT_FVMLIB_INIT0 = "__fvmlib_init0" (* the fvmlib initialization *) - (* section *) -let kSECT_FVMLIB_INIT1 = "__fvmlib_init1" (* the section following the *) - (* fvmlib initialization *) - (* section *) - -let kSEG_DATA = "__DATA" (* the tradition UNIX data segment *) -let kSECT_DATA = "__data" (* the real initialized data section *) - (* no padding, no bss overlap *) -let kSECT_BSS = "__bss" (* the real uninitialized data section*) - (* no padding *) -let kSECT_COMMON = "__common" (* the section common symbols are *) - (* allocated in by the link editor *) - -let kSEG_OBJC = "__OBJC" (* objective-C runtime segment *) -let kSECT_OBJC_SYMBOLS = "__symbol_table" (* symbol table *) -let kSECT_OBJC_MODULES = "__module_info" (* module information *) -let kSECT_OBJC_STRINGS = "__selector_strs" (* string table *) -let kSECT_OBJC_REFS = "__selector_refs" (* string table *) - -let kSEG_ICON = "__ICON" (* the icon segment *) -let kSECT_ICON_HEADER = "__header" (* the icon headers *) -let kSECT_ICON_TIFF = "__tiff" (* the icons in tiff format *) - -let kSEG_LINKEDIT = "__LINKEDIT" (* the segment containing all structs *) - (* created and maintained by the link *) - (* editor. Created with -seglinkedit *) - (* option to ld(1) for MH_EXECUTE and *) - (* FVMLIB file types only *) - -let kSEG_UNIXSTACK = "__UNIXSTACK" (* the unix stack segment *) - -let kSEG_IMPORT = "__IMPORT" (* the segment for the self (dyld) *) - (* modifing code stubs that has read, *) - (* write and execute permissions *) diff --git a/lib/mach/MachSymbolTable.ml b/lib/mach/MachSymbolTable.ml new file mode 100644 index 0000000..c93cbe2 --- /dev/null +++ b/lib/mach/MachSymbolTable.ml @@ -0,0 +1,93 @@ +open Printf + +open Binary +open MachLoadCommand.Types + +(* +struct nlist_64 { + union { + uint32_t n_strx; /* index into the string table */ + } n_un; + uint8_t n_type; /* type flag, see below */ + uint8_t n_sect; /* section number or NO_SECT */ + uint16_t n_desc; /* see */ + uint64_t n_value; /* value of this symbol (or stab offset) */ +}; + *) + +type nlist = { + n_strx: int; (* 4 *) + n_type: int; (* 1 *) + n_sect: int; (* 1 *) + n_desc: int; (* 2 *) + n_value: int; (* 8 *) +} + +let kNLIST_TYPE_MASK = 0xe +let kNLIST_TYPE_GLOBAL = 0x1 +let kNLIST_TYPE_LOCAL = 0x0 + +type symbol = nlist * string + +type t = symbol list + +let sizeof_nlist = 16 + +(* printing *) + +let n_type_to_string n_type = + match n_type land kNLIST_TYPE_MASK with + | 0x0 -> "LOCAL" + | 0x1 -> "GLOBAL" + | other -> Printf.sprintf "OTHER 0x%x" other + +let nlist_to_string nlist = + sprintf "strx: %4u type: 0x%02x sect: %x desc: 0x%3u value: %x" + nlist.n_strx + nlist.n_type + nlist.n_sect + nlist.n_desc + nlist.n_value + +let print_nlist nlist = + printf "%s\n" @@ nlist_to_string nlist + +let print_nlists nlists = + List.iteri (fun i nlist -> printf "(%3d): " i; print_nlist nlist) nlists; (* because 3 space formatting is _enough_ *) + (* i.e., need to learn format module *) + printf "\n" + +let print symlist = + List.iteri (fun i (nlist,symname) -> + printf "%-10x %s sect: %x type: %002x desc: 0x%x\n" nlist.n_value symname nlist.n_sect nlist.n_type nlist.n_desc + ) symlist; + printf "\n" + +(* build nlist, symtabs, symlist *) + +let rec get_nlist_it binary offset nsyms acc = + if (nsyms <= 0) then + List.rev acc + else + let n_strx,o = u32o binary offset in + let n_type,o = u8o binary o in + let n_sect,o = u8o binary o in + let n_desc,o = u16o binary o in + let n_value,o = u64o binary o in + let nlist = {n_strx; n_type; n_sect; n_desc; n_value;} in + get_nlist_it binary o (nsyms - 1) (nlist::acc) + +let rec get_symlist_it binary offset nlists acc = + match nlists with + [] -> List.rev acc + | nlist::nlists -> + let symname = Binary.string binary (offset + nlist.n_strx) in + get_symlist_it binary offset nlists ((nlist,symname)::acc) + +let get_symlist binary symtab nlists = + get_symlist_it binary symtab.stroff nlists [] + +let get_symbols binary symtab = + let nlists = get_nlist_it binary symtab.symoff symtab.nsyms [] in + let symbols = get_symlist binary symtab nlists in + symbols diff --git a/lib/mach/mach.mldylib b/lib/mach/mach.mldylib new file mode 100644 index 0000000..fc27257 --- /dev/null +++ b/lib/mach/mach.mldylib @@ -0,0 +1,19 @@ +# OASIS_START +# DO NOT EDIT (digest: f564cdbce57aab570eaa64260e122d5f) +Mach +MachBindOpcodes +MachCpuTypes +MachFat +MachLoadCommand +MachLoadCommandTypes +MachConstants +MachExports +MachHeader +MachImports +MachSection +MachSymbolTable +MachRebaseOpcodes +MachVersion +MachCoverage +MachLoadCommandMacro +# OASIS_STOP diff --git a/lib/mach/mach.mllib b/lib/mach/mach.mllib new file mode 100644 index 0000000..fc27257 --- /dev/null +++ b/lib/mach/mach.mllib @@ -0,0 +1,19 @@ +# OASIS_START +# DO NOT EDIT (digest: f564cdbce57aab570eaa64260e122d5f) +Mach +MachBindOpcodes +MachCpuTypes +MachFat +MachLoadCommand +MachLoadCommandTypes +MachConstants +MachExports +MachHeader +MachImports +MachSection +MachSymbolTable +MachRebaseOpcodes +MachVersion +MachCoverage +MachLoadCommandMacro +# OASIS_STOP diff --git a/lib/rdr.mldylib b/lib/rdr.mldylib new file mode 100644 index 0000000..38bb930 --- /dev/null +++ b/lib/rdr.mldylib @@ -0,0 +1,4 @@ +# OASIS_START +# DO NOT EDIT (digest: e3ae9c50cee13b0646bcd43ae6ff723e) +LibRdr +# OASIS_STOP diff --git a/lib/rdr.mllib b/lib/rdr.mllib new file mode 100644 index 0000000..38bb930 --- /dev/null +++ b/lib/rdr.mllib @@ -0,0 +1,4 @@ +# OASIS_START +# DO NOT EDIT (digest: e3ae9c50cee13b0646bcd43ae6ff723e) +LibRdr +# OASIS_STOP diff --git a/lib/utils/Binary.ml b/lib/utils/Binary.ml index f1078b0..c5c0a09 100644 --- a/lib/utils/Binary.ml +++ b/lib/utils/Binary.ml @@ -1,3 +1,5 @@ +let debug = false + (* legacy, non-offset producing versions, eventually swap out when generating binary structs/records *) let u64 binary offset = let res = ref (Char.code @@ Bytes.get binary offset) in @@ -45,19 +47,41 @@ let u8o binary offset = (* strings and printing *) -(* max len must be size + offset *) -let string binary ?maxlen:(maxlen=0) offset = - let null_index = Bytes.index_from binary offset '\000' in - let len = if (null_index > maxlen && maxlen > 0) then maxlen else null_index in - if (len <= offset) then "" - else Bytes.sub_string binary offset (len - offset) - -let stringo binary ?maxlen:(maxlen=0) offset = - let null_index = Bytes.index_from binary offset '\000' in - let len = if (null_index > maxlen && maxlen > 0) then maxlen else null_index in - if (len <= offset) then "",offset+1 +let sub binary max offset = + let max_idx = max + offset in + let null_idx = Bytes.index_from binary offset '\000' in + let len = if (null_idx > max_idx && max > 0) then max_idx else null_idx in + if (len <= offset) then "",len + else Bytes.sub_string binary offset (len - offset),len + +let string binary ?max:(max=0) offset = + let str,_ = sub binary max offset in str + +let trim_null string = + let idx = + try + Bytes.index string '\000' + with + | Not_found -> 0 + in + if (idx <> 0) then + Bytes.sub_string string 0 idx + else + string + +let stringo binary ?num_bytes:(count=0) ?max:(max=0) offset = + if (count > 0) then + let o = offset+count in + let string = Bytes.sub_string binary offset count in + let trim = trim_null string in + if (debug) then Printf.printf "trim: %s\n" trim; + if (debug) then Printf.printf "string: %s\n" string; + trim, o else - (Bytes.sub_string binary offset (len - offset)), (len + 1) + let str,len = sub binary max offset in + if (str = "") then "",offset+1 + else + str, (len + 1) let print_bytes binary = let () = Bytes.iter (fun b -> Printf.printf "%x" (Char.code b)) binary in diff --git a/lib/utils/ByteCoverage.ml b/lib/utils/ByteCoverage.ml index 07ef139..5ef58fe 100644 --- a/lib/utils/ByteCoverage.ml +++ b/lib/utils/ByteCoverage.ml @@ -1,61 +1,332 @@ -(* TODO: create a normalize map function *) -(* Add a coverage struct with the map of understood data, and a map of unknown, which is the understood data ranges modulo the size *) -(* add a compute_unknown, from the current map *) +let debug = false -type kind = | Meta | Code | Unknown - | Symbol | String | StringTable - | Rela | PlatformSpecific - | Data +type tag = | Meta + | Code + | Unknown + | Symbol + | SymbolTable + | String + | StringTable + | Rela + | PlatformSpecific + | Data + | Invalid + | Semantic + | Zero -let kind_to_string = +let tag_to_string = function | Meta -> "Meta" | Code -> "Code" | Unknown -> "Unknown" | Symbol -> "Symbol" + | SymbolTable -> "SymbolTable" | String -> "String" | StringTable -> "StringTable" | Rela -> "Rela" | PlatformSpecific -> "PlatformSpecific" | Data -> "Data" + | Invalid -> "Invalid" + | Semantic -> "Semantic" + | Zero -> "Zero" + +(* MUST ADD THESE IF ADDING NEW TAG, probably a way to ensure compile time we're safe... *) +let tags = [ + "Meta";"Code"; + "Unknown";"Symbol"; + "SymbolTable";"String"; + "StringTable";"Rela"; + "PlatformSpecific";"Data"; + "Invalid";"Semantic"; + "Zero";] type data = { size: int; - kind: kind; + tag: tag; range_start: int; range_end: int; extra: string; understood: bool; + container: bool; } -module Map = Map.Make( +(* range specific *) +let is_contained d1 d2 = + (d1.range_start > d2.range_start) && (d1.range_end <= d2.range_end) + || (d1.range_start >= d2.range_start) && (d1.range_end < d2.range_end) +let contains d2 d1 = is_contained d1 d2 +let same_range d1 d2 = (d1.range_start = d2.range_start) && (d1.range_end = d2.range_end) +(* end range specific *) + +(* this is imperfect w.r.t list output sorting *) +let sort a b = + if (a.range_start = b.range_start) then + if (a.range_end = b.range_end) then + (Pervasives.compare a.tag b.tag) + else if (contains a b) then + -1 + else + 1 + else + if (contains a b) then + -1 + else (* we don't consider overlapping instances for now *) + Pervasives.compare a.range_start b.range_start + +(* DataSet specific functions *) +module DataSet = Set.Make( struct - type t=int - let compare= (fun a b -> Pervasives.compare a b) + type t = data + let compare = sort end) -type t = data Map.t +type t = { + data: DataSet.t; + size: int; + total_coverage: int; + total_understood: int; + percent_coverage: float; + percent_understood: float; + tags: string list; +} + +let mem = DataSet.mem +let add data set = DataSet.add data set (* DataSet.add *) +let empty = DataSet.empty (* DataSet.empty *) +let fold = DataSet.fold +let iter = DataSet.iter +let remove = DataSet.remove +let to_list s = DataSet.elements s +(* checks whether the range is redundant (covered) in our set already *) +let is_covered x dataset = + DataSet.exists (fun y -> + y <> x + && (is_contained x y) + ) dataset + +let is_semantic x = x.tag = Semantic + + +(* there exists an equal range in the dataset *) +let is_same_range x dataset = + DataSet.exists (fun y -> same_range x y && x <> y) dataset + +(* is a range which is contained by another range *) +let is_sub_range x dataset = + DataSet.exists (fun y -> + not (is_semantic y) + && is_contained x y + && x <> y) dataset -(* TODO check for inconsistencies in the map; *) -let total_coverage m = - Map.fold (fun key range acc -> - if (range.understood) then +let contains_something x dataset = + DataSet.exists (fun y -> contains x y) dataset + +(* is a "byte island"; + there does not exist a range is contained by it *) +let is_unique x dataset = + not (contains_something x dataset) + && not (is_sub_range x dataset) + +(* checks whether is a top-level container *) +let is_container x dataset = + not (is_semantic x) + && + ((contains_something x dataset + && not (is_sub_range x dataset)) + || is_unique x dataset) + +(* unused *) +let pick_with f dataset = + fold (fun data acc -> + if (f data) then + data + else + acc) + +(* partitions data into largest covering ranges, and redundant data *) +let normalize dataset = + let norm,rest = + DataSet.partition (fun data -> + is_container data dataset) dataset + in + fold (fun el (acc,rest) -> + if (is_same_range el acc) then + acc,(add el rest) + else + (add el acc),rest + ) norm (empty,rest) + +(* finalizes the dataset by setting containerhood: + a container is the first byte range found which is a container + and which hasn't been added to the accumulator yet +*) +let finalize dataset = + fold (fun el acc -> + if (is_container el dataset + && not (is_same_range el acc)) then + add {el with container = true} acc + else + add el acc) dataset empty + +let data_to_string (data:data) = + Printf.sprintf "size: %d tag: %s\n range_start: 0x%x range_end 0x%x understood: %b container: %b extra: %s" + data.size + (tag_to_string data.tag) + data.range_start + data.range_end + data.understood + data.container + data.extra + +let print_data data = + iter (fun data -> Printf.printf "%s\n" (data_to_string data)) data + +let print coverage = + print_data coverage.data; + Printf.printf "Total Coverage: %d / %d = %f\n" + coverage.total_coverage + coverage.size + coverage.percent_coverage; + Printf.printf "Understood Coverage: %d / %d = %f\n" + coverage.total_understood + coverage.size + coverage.percent_understood + +(* creates data; defaults container to false and auto-computes size *) +let create_data + ~tag:tag + ~r1:range_start + ~r2:range_end + ~extra:extra + ~understood:understood = + let size = range_end - range_start in + {size; tag; range_start; range_end; extra; understood; container = false} + +(* count the byte ranges via a condition *) +let count data condition = + fold (fun range acc -> + if (condition range) then range.size + acc else acc - ) m 0 + ) data 0 + +(* counts the coverage, primarily relying on range.container *) +let count_coverage dataset = + let total = count dataset (fun range -> + range.container + && range.tag <> Semantic) + in + let understood = count dataset + (fun range -> range.container + && range.understood + && range.tag <> Semantic) + in + total,understood + +(* @invariant sorted, normalized *) +let compute_unknown dataset size = + let extra = "Unknown // Computed" in + let bindings = to_list dataset in + (* if (debug) then print dataset; *) + let unknown = + if (bindings = []) then + [create_data Unknown 0 size extra false] + else + let rec loop acc = + function + | [] -> acc + | d::[] -> + if (d.range_end >= size) then + acc + else + let data = create_data Unknown d.range_end size extra false in + if (debug) then Printf.printf "END %s\n" (data_to_string data); + data::acc + | d1::(d2::rest as tail)-> + if (d1.range_end = d2.range_start || same_range d1 d2) then + loop acc tail + else + (* if it's semantic, and the first's range end is + greater than the second's start + (it contains it, which is guaranteed by our sorting), we ignore it *) + if (d1.tag = Semantic && d1.range_end >= d2.range_start) then + loop acc tail + else + let data = create_data Unknown d1.range_end d2.range_start extra false in + if (debug) then Printf.printf "NEW %s\n" (data_to_string data); + loop (data::acc) tail + in loop [] bindings + in + (* add the unknown data back into the dataset *) + List.fold_left (fun dataset data -> add data dataset) dataset unknown + +(* the preliminary data comes from an oracle which knows about the binary format, e.g., elf or mach specific counters *) +let create size data = + (* normalize right here, send only the largest covers into compute_unknown, similarly for counting *) + let normalized_data,rest = normalize data in + if (debug) then + begin + print_string "\nSIGIL Normalized ranges\n\n----------------------\n\n"; + print_data normalized_data; + print_string "\nSIGIL Remainder\n\n---------------------------\n\n"; + print_data rest; + print_string "\n\n---------------------------\n\n"; + end; + let data = compute_unknown normalized_data size + |> DataSet.union rest |> finalize in + let total_coverage,total_understood = count_coverage data in + let percent_coverage = (float_of_int total_coverage) /. (float_of_int size) in + let percent_understood = (float_of_int total_understood) /. (float_of_int size) in + { + data; + size; + total_coverage; + total_understood; + percent_coverage; + percent_understood; + tags; + } + +(* UNIT *) +(* unusual sequences, like one range starting in one range and ending in another need to be dealt with *) +(* +let d1 = create_data ~r1:0 ~r2:0x100 ~understood:true ~tag:Meta ~extra:"Container1" +let d2 = create_data ~r1:0 ~r2:0x50 ~understood:true ~tag:Data ~extra:"is_contained11" +let d3 = create_data ~r1:0x50 ~r2:0x75 ~understood:true ~tag:Code ~extra:"is_contained12" +let d4 = create_data ~r1:0x100 ~r2:0x200 ~understood:true ~tag:Meta ~extra:"Container2" +let d5 = create_data ~r1:0x150 ~r2:0x175 ~understood:true ~tag:Code ~extra:"is_contained21" +let d6 = create_data ~r1:0x250 ~r2:0x300 ~understood:true ~tag:Meta ~extra:"Container3" +let d7 = create_data ~r1:0x250 ~r2:0x275 ~understood:true ~tag:Meta ~extra:"is_contained34" +let d8 = create_data ~r1:0x250 ~r2:0x250 ~understood:true ~tag:Meta ~extra:"null_is_contained31" +let d9 = create_data ~r1:0x250 ~r2:0x251 ~understood:true ~tag:Meta ~extra:"small_is_contained32" +let d10 = create_data ~r1:0x251 ~r2:0x252 ~understood:true ~tag:Meta ~extra:"small_is_contained33" +let d11 = create_data ~r1:0 ~r2:0x100 ~understood:true ~tag:Code ~extra:"SameContainer51" +let d12 = create_data ~r1:0x350 ~r2:0x400 ~understood:true ~tag:Meta ~extra:"IslandContainer4" + + +let d0 = create_data ~r1:0x200 ~r2:0x250 ~understood:true ~tag:Meta ~extra:"is_contained31" + +let set1 = + add d1 empty + |> add d2 |> add d3 + |> add d4 |> add d5 + |> add d6 |> add d7 |> add d8 + |> add d9 |> add d10 |> add d11 |> add d12 + +let size = 0x400 -let percent m size = (float_of_int @@ total_coverage m) /. (float_of_int size) +let one,two = normalize set1 +let un1 = compute_unknown one size +let u1 = DataSet.union un1 two +let f1 = finalize u1 -let data_to_string data = - Printf.sprintf "size: %d kind: %s\n range_start: 0x%x range_end 0x%x understood: %b extra: %s" data.size (kind_to_string data.kind) data.range_start data.range_end data.understood data.extra +let t1 = create size set1 -let print = - Map.iter - (fun key data -> Printf.printf "%s\n" (data_to_string data)) +let pd = print_data +let p = print -let stats (map:t) size = - Printf.printf "Coverage: %d / %d = %f\n" - (total_coverage map) - size @@ percent map size; +let m1 = add d0 one +let l1 = m1 |> to_list +*) diff --git a/lib/utils/Leb128.ml b/lib/utils/Leb128.ml index 04969e6..2b6462f 100644 --- a/lib/utils/Leb128.ml +++ b/lib/utils/Leb128.ml @@ -1,33 +1,5 @@ -(* -for testing -#directory "/Users/matthewbarney/git/rdr/_build/lib/utils/";; -#load "Binary.cmo";; - *) - open Binary -let d byte_stream = - let rec loop pos shift acc = - let byte = Char.code @@ List.nth byte_stream pos in - let high_order_bit_is_zero = (byte land 0x80) = 0x0 in - let acc' = acc lor ((byte land 0x7f) lsl shift) in - if (high_order_bit_is_zero) then - acc' - else - loop (pos + 1) (shift + 7) acc' in - loop 0 0 0 - -let d2 byte_stream = - let rec loop pos shift acc = - let byte = Char.code @@ List.nth byte_stream pos in - let high_order_bit_is_zero = (byte land 0x80) = 0x0 in - let acc' = acc lor ((byte land 0x7f) lsl shift) in - if (high_order_bit_is_zero) then - (acc', pos+1) - else - loop (pos + 1) (shift + 7) acc' in - loop 0 0 0 - let decode_uleb128 byte_stream = let rec loop pos shift acc = let byte = Binary.i8 byte_stream pos in @@ -95,7 +67,6 @@ let print_uleb128 bytes = break; } - /* sign bit of byte is second high order bit (0x40) */ if ((shift Char.chr @@ List.nth bytes i) in get_sleb128 b 0 @@ -164,9 +135,6 @@ let unit5 = [0x80; 0x80; 0x80; 0x4f] (* +0x9e00000 -0x6200000 *) let res = -624485 (* -624485 *) let u1 = [0x9b; 0xf1; 0x59]; - - (* let e1 = Bytes.init (List.length unit1) (fun i -> List.nth unit1 i) - - *) - (* printf = "b0d2 10" @ 0x9cc92 in libsystem_c.dylib *) +printf = "b0d2 10" @ 0x9cc92 in libsystem_c.dylib +*) diff --git a/lib/utils/META b/lib/utils/META new file mode 100644 index 0000000..18913bd --- /dev/null +++ b/lib/utils/META @@ -0,0 +1,12 @@ +# OASIS_START +# DO NOT EDIT (digest: e8a47bb4a2b8113d81482c5b08352e15) +version = "1.1" +description = +"Lightweight, cross platform binary parsing and analysis library with no dependencies" +archive(byte) = "utils.cma" +archive(byte, plugin) = "utils.cma" +archive(native) = "utils.cmxa" +archive(native, plugin) = "utils.cmxs" +exists_if = "utils.cma" +# OASIS_STOP + diff --git a/lib/utils/utils.mldylib b/lib/utils/utils.mldylib new file mode 100644 index 0000000..c06498f --- /dev/null +++ b/lib/utils/utils.mldylib @@ -0,0 +1,9 @@ +# OASIS_START +# DO NOT EDIT (digest: c9c5259c4b040c58a7b444e0369cc86d) +RdrUtils +Binary +Input +Leb128 +ByteCoverage +Generics +# OASIS_STOP diff --git a/lib/utils/utils.mllib b/lib/utils/utils.mllib new file mode 100644 index 0000000..c06498f --- /dev/null +++ b/lib/utils/utils.mllib @@ -0,0 +1,9 @@ +# OASIS_START +# DO NOT EDIT (digest: c9c5259c4b040c58a7b444e0369cc86d) +RdrUtils +Binary +Input +Leb128 +ByteCoverage +Generics +# OASIS_STOP diff --git a/myocamlbuild.ml b/myocamlbuild.ml index 23cdcea..430c47a 100644 --- a/myocamlbuild.ml +++ b/myocamlbuild.ml @@ -1,5 +1,5 @@ (* OASIS_START *) -(* DO NOT EDIT (digest: 9a77234dd57e6a02c354cd4b37af0c80) *) +(* DO NOT EDIT (digest: 28638e64b9ea6e73a9746f85ba05ddf4) *) module OASISGettext = struct (* # 22 "src/oasis/OASISGettext.ml" *) @@ -610,9 +610,9 @@ let package_default = MyOCamlbuildBase.lib_ocaml = [ ("utils", ["lib/utils"], []); - ("goblin", ["lib/goblin"], []); ("mach", ["lib/mach"], []); ("elf", ["lib/elf"], []); + ("goblin", ["lib/goblin"], []); ("rdr", ["lib"], []); ("rdrutils", ["src"], []) ]; @@ -621,8 +621,8 @@ let package_default = includes = [ ("src", ["lib/elf"; "lib/goblin"; "lib/mach"; "lib/utils"]); - ("lib/mach", ["lib/goblin"; "lib/utils"]); - ("lib/goblin", ["lib/utils"]); + ("lib/mach", ["lib/utils"]); + ("lib/goblin", ["lib/elf"; "lib/mach"; "lib/utils"]); ("lib/elf", ["lib/utils"]); ("lib", ["lib/elf"; "lib/goblin"; "lib/mach"; "lib/utils"]) ] diff --git a/opam b/opam deleted file mode 100644 index ce142a9..0000000 --- a/opam +++ /dev/null @@ -1,14 +0,0 @@ -opam-version: "1.2" -name: "rdr" -version: "1.1" -license: "BSD" -maintainer: "m4b " -authors: "m4b " -bug-reports: "m4b.github.io@gmail.com" -dev-repo: "git://github.com/m4b/rdr" -homepage: "http://www.m4b.io" -build: [ - [make "build"] -] -depends: "ocamlfind" {build} -available: [ocaml-version >= "4.02"] diff --git a/opam/descr b/opam/descr new file mode 100644 index 0000000..0d02ba6 --- /dev/null +++ b/opam/descr @@ -0,0 +1,25 @@ +Rdr is a cross-platform binary analysis and reverse engineering library, +utilizing a unique symbol map for global analysis. + +`rdr` is an OCaml tool/library for doing cross-platform analysis of binaries, +by printing headers, locating entry points, showing import and export +symbols, their binary offsets and size, etc. + +It also features a symbol map which allows fast lookups for arbitrary +symbols, and their associated data, on your system +(the default search location are binaries in /usr/lib). + +The latest release also makes `rdr` a package which you can link against +and use in your own projects. + +See the README at http://github.com/m4b/rdr for more details. + +Features: + +* 64-bit Linux and Mach-o binary analysis +* Searchable symbol-map of all the symbols on your system, including binary + offset, size, and exporting library +* Print imports and exports of binaries +* Make pretty graphs, at the binary or symbol map level +* Byte Coverage algorithm which marks byte sequences as understood (or not) + and provides other meta-data \ No newline at end of file diff --git a/opam/files/rdr.install b/opam/files/rdr.install new file mode 100644 index 0000000..8e31182 --- /dev/null +++ b/opam/files/rdr.install @@ -0,0 +1,4 @@ +bin: [ + "?_build/src/Rdr.byte" {"rdr"} + "?_build/src/Rdr.native" {"rdr"} +] diff --git a/opam/findlib b/opam/findlib new file mode 100644 index 0000000..cb29315 --- /dev/null +++ b/opam/findlib @@ -0,0 +1 @@ +rdr diff --git a/opam/opam b/opam/opam new file mode 100644 index 0000000..38b2e13 --- /dev/null +++ b/opam/opam @@ -0,0 +1,21 @@ +opam-version: "1.2" +name: "rdr" +version: "2.0.1" +maintainer: "" +authors: [ "m4b" ] +license: "BSD-3-clause" +homepage: "http://github.com/m4b/rdr" +build: [ + ["ocaml" "setup.ml" "-configure" "--prefix" prefix] + ["ocaml" "setup.ml" "-build"] +] +install: ["ocaml" "setup.ml" "-install"] +remove: [ + ["ocamlfind" "remove" "rdr"] +] +depends: [ + "ocamlfind" +] +depopts: [ + "base-unix" +] diff --git a/rdr.install b/rdr.install deleted file mode 100644 index bc415ae..0000000 --- a/rdr.install +++ /dev/null @@ -1 +0,0 @@ -bin:[ "rdr" { "rdr" } ] \ No newline at end of file diff --git a/setup.ml b/setup.ml new file mode 100644 index 0000000..1284c43 --- /dev/null +++ b/setup.ml @@ -0,0 +1,7040 @@ +(* setup.ml generated for the first time by OASIS v0.4.5 *) + +(* OASIS_START *) +(* DO NOT EDIT (digest: 3cb0436180a61a463b13984bd55794ab) *) +(* + Regenerated by OASIS v0.4.5 + Visit http://oasis.forge.ocamlcore.org for more information and + documentation about functions used in this file. +*) +module OASISGettext = struct +(* # 22 "src/oasis/OASISGettext.ml" *) + + + let ns_ str = + str + + + let s_ str = + str + + + let f_ (str: ('a, 'b, 'c, 'd) format4) = + str + + + let fn_ fmt1 fmt2 n = + if n = 1 then + fmt1^^"" + else + fmt2^^"" + + + let init = + [] + + +end + +module OASISContext = struct +(* # 22 "src/oasis/OASISContext.ml" *) + + + open OASISGettext + + + type level = + [ `Debug + | `Info + | `Warning + | `Error] + + + type t = + { + (* TODO: replace this by a proplist. *) + quiet: bool; + info: bool; + debug: bool; + ignore_plugins: bool; + ignore_unknown_fields: bool; + printf: level -> string -> unit; + } + + + let printf lvl str = + let beg = + match lvl with + | `Error -> s_ "E: " + | `Warning -> s_ "W: " + | `Info -> s_ "I: " + | `Debug -> s_ "D: " + in + prerr_endline (beg^str) + + + let default = + ref + { + quiet = false; + info = false; + debug = false; + ignore_plugins = false; + ignore_unknown_fields = false; + printf = printf; + } + + + let quiet = + {!default with quiet = true} + + + let fspecs () = + (* TODO: don't act on default. *) + let ignore_plugins = ref false in + ["-quiet", + Arg.Unit (fun () -> default := {!default with quiet = true}), + s_ " Run quietly"; + + "-info", + Arg.Unit (fun () -> default := {!default with info = true}), + s_ " Display information message"; + + + "-debug", + Arg.Unit (fun () -> default := {!default with debug = true}), + s_ " Output debug message"; + + "-ignore-plugins", + Arg.Set ignore_plugins, + s_ " Ignore plugin's field."; + + "-C", + (* TODO: remove this chdir. *) + Arg.String (fun str -> Sys.chdir str), + s_ "dir Change directory before running."], + fun () -> {!default with ignore_plugins = !ignore_plugins} +end + +module OASISString = struct +(* # 22 "src/oasis/OASISString.ml" *) + + + (** Various string utilities. + + Mostly inspired by extlib and batteries ExtString and BatString libraries. + + @author Sylvain Le Gall + *) + + + let nsplitf str f = + if str = "" then + [] + else + let buf = Buffer.create 13 in + let lst = ref [] in + let push () = + lst := Buffer.contents buf :: !lst; + Buffer.clear buf + in + let str_len = String.length str in + for i = 0 to str_len - 1 do + if f str.[i] then + push () + else + Buffer.add_char buf str.[i] + done; + push (); + List.rev !lst + + + (** [nsplit c s] Split the string [s] at char [c]. It doesn't include the + separator. + *) + let nsplit str c = + nsplitf str ((=) c) + + + let find ~what ?(offset=0) str = + let what_idx = ref 0 in + let str_idx = ref offset in + while !str_idx < String.length str && + !what_idx < String.length what do + if str.[!str_idx] = what.[!what_idx] then + incr what_idx + else + what_idx := 0; + incr str_idx + done; + if !what_idx <> String.length what then + raise Not_found + else + !str_idx - !what_idx + + + let sub_start str len = + let str_len = String.length str in + if len >= str_len then + "" + else + String.sub str len (str_len - len) + + + let sub_end ?(offset=0) str len = + let str_len = String.length str in + if len >= str_len then + "" + else + String.sub str 0 (str_len - len) + + + let starts_with ~what ?(offset=0) str = + let what_idx = ref 0 in + let str_idx = ref offset in + let ok = ref true in + while !ok && + !str_idx < String.length str && + !what_idx < String.length what do + if str.[!str_idx] = what.[!what_idx] then + incr what_idx + else + ok := false; + incr str_idx + done; + if !what_idx = String.length what then + true + else + false + + + let strip_starts_with ~what str = + if starts_with ~what str then + sub_start str (String.length what) + else + raise Not_found + + + let ends_with ~what ?(offset=0) str = + let what_idx = ref ((String.length what) - 1) in + let str_idx = ref ((String.length str) - 1) in + let ok = ref true in + while !ok && + offset <= !str_idx && + 0 <= !what_idx do + if str.[!str_idx] = what.[!what_idx] then + decr what_idx + else + ok := false; + decr str_idx + done; + if !what_idx = -1 then + true + else + false + + + let strip_ends_with ~what str = + if ends_with ~what str then + sub_end str (String.length what) + else + raise Not_found + + + let replace_chars f s = + let buf = Buffer.create (String.length s) in + String.iter (fun c -> Buffer.add_char buf (f c)) s; + Buffer.contents buf + + +end + +module OASISUtils = struct +(* # 22 "src/oasis/OASISUtils.ml" *) + + + open OASISGettext + + + module MapExt = + struct + module type S = + sig + include Map.S + val add_list: 'a t -> (key * 'a) list -> 'a t + val of_list: (key * 'a) list -> 'a t + val to_list: 'a t -> (key * 'a) list + end + + module Make (Ord: Map.OrderedType) = + struct + include Map.Make(Ord) + + let rec add_list t = + function + | (k, v) :: tl -> add_list (add k v t) tl + | [] -> t + + let of_list lst = add_list empty lst + + let to_list t = fold (fun k v acc -> (k, v) :: acc) t [] + end + end + + + module MapString = MapExt.Make(String) + + + module SetExt = + struct + module type S = + sig + include Set.S + val add_list: t -> elt list -> t + val of_list: elt list -> t + val to_list: t -> elt list + end + + module Make (Ord: Set.OrderedType) = + struct + include Set.Make(Ord) + + let rec add_list t = + function + | e :: tl -> add_list (add e t) tl + | [] -> t + + let of_list lst = add_list empty lst + + let to_list = elements + end + end + + + module SetString = SetExt.Make(String) + + + let compare_csl s1 s2 = + String.compare (String.lowercase s1) (String.lowercase s2) + + + module HashStringCsl = + Hashtbl.Make + (struct + type t = string + + let equal s1 s2 = + (String.lowercase s1) = (String.lowercase s2) + + let hash s = + Hashtbl.hash (String.lowercase s) + end) + + module SetStringCsl = + SetExt.Make + (struct + type t = string + let compare = compare_csl + end) + + + let varname_of_string ?(hyphen='_') s = + if String.length s = 0 then + begin + invalid_arg "varname_of_string" + end + else + begin + let buf = + OASISString.replace_chars + (fun c -> + if ('a' <= c && c <= 'z') + || + ('A' <= c && c <= 'Z') + || + ('0' <= c && c <= '9') then + c + else + hyphen) + s; + in + let buf = + (* Start with a _ if digit *) + if '0' <= s.[0] && s.[0] <= '9' then + "_"^buf + else + buf + in + String.lowercase buf + end + + + let varname_concat ?(hyphen='_') p s = + let what = String.make 1 hyphen in + let p = + try + OASISString.strip_ends_with ~what p + with Not_found -> + p + in + let s = + try + OASISString.strip_starts_with ~what s + with Not_found -> + s + in + p^what^s + + + let is_varname str = + str = varname_of_string str + + + let failwithf fmt = Printf.ksprintf failwith fmt + + +end + +module PropList = struct +(* # 22 "src/oasis/PropList.ml" *) + + + open OASISGettext + + + type name = string + + + exception Not_set of name * string option + exception No_printer of name + exception Unknown_field of name * name + + + let () = + Printexc.register_printer + (function + | Not_set (nm, Some rsn) -> + Some + (Printf.sprintf (f_ "Field '%s' is not set: %s") nm rsn) + | Not_set (nm, None) -> + Some + (Printf.sprintf (f_ "Field '%s' is not set") nm) + | No_printer nm -> + Some + (Printf.sprintf (f_ "No default printer for value %s") nm) + | Unknown_field (nm, schm) -> + Some + (Printf.sprintf + (f_ "Field %s is not defined in schema %s") nm schm) + | _ -> + None) + + + module Data = + struct + type t = + (name, unit -> unit) Hashtbl.t + + let create () = + Hashtbl.create 13 + + let clear t = + Hashtbl.clear t + + +(* # 78 "src/oasis/PropList.ml" *) + end + + + module Schema = + struct + type ('ctxt, 'extra) value = + { + get: Data.t -> string; + set: Data.t -> ?context:'ctxt -> string -> unit; + help: (unit -> string) option; + extra: 'extra; + } + + type ('ctxt, 'extra) t = + { + name: name; + fields: (name, ('ctxt, 'extra) value) Hashtbl.t; + order: name Queue.t; + name_norm: string -> string; + } + + let create ?(case_insensitive=false) nm = + { + name = nm; + fields = Hashtbl.create 13; + order = Queue.create (); + name_norm = + (if case_insensitive then + String.lowercase + else + fun s -> s); + } + + let add t nm set get extra help = + let key = + t.name_norm nm + in + + if Hashtbl.mem t.fields key then + failwith + (Printf.sprintf + (f_ "Field '%s' is already defined in schema '%s'") + nm t.name); + Hashtbl.add + t.fields + key + { + set = set; + get = get; + help = help; + extra = extra; + }; + Queue.add nm t.order + + let mem t nm = + Hashtbl.mem t.fields nm + + let find t nm = + try + Hashtbl.find t.fields (t.name_norm nm) + with Not_found -> + raise (Unknown_field (nm, t.name)) + + let get t data nm = + (find t nm).get data + + let set t data nm ?context x = + (find t nm).set + data + ?context + x + + let fold f acc t = + Queue.fold + (fun acc k -> + let v = + find t k + in + f acc k v.extra v.help) + acc + t.order + + let iter f t = + fold + (fun () -> f) + () + t + + let name t = + t.name + end + + + module Field = + struct + type ('ctxt, 'value, 'extra) t = + { + set: Data.t -> ?context:'ctxt -> 'value -> unit; + get: Data.t -> 'value; + sets: Data.t -> ?context:'ctxt -> string -> unit; + gets: Data.t -> string; + help: (unit -> string) option; + extra: 'extra; + } + + let new_id = + let last_id = + ref 0 + in + fun () -> incr last_id; !last_id + + let create ?schema ?name ?parse ?print ?default ?update ?help extra = + (* Default value container *) + let v = + ref None + in + + (* If name is not given, create unique one *) + let nm = + match name with + | Some s -> s + | None -> Printf.sprintf "_anon_%d" (new_id ()) + in + + (* Last chance to get a value: the default *) + let default () = + match default with + | Some d -> d + | None -> raise (Not_set (nm, Some (s_ "no default value"))) + in + + (* Get data *) + let get data = + (* Get value *) + try + (Hashtbl.find data nm) (); + match !v with + | Some x -> x + | None -> default () + with Not_found -> + default () + in + + (* Set data *) + let set data ?context x = + let x = + match update with + | Some f -> + begin + try + f ?context (get data) x + with Not_set _ -> + x + end + | None -> + x + in + Hashtbl.replace + data + nm + (fun () -> v := Some x) + in + + (* Parse string value, if possible *) + let parse = + match parse with + | Some f -> + f + | None -> + fun ?context s -> + failwith + (Printf.sprintf + (f_ "Cannot parse field '%s' when setting value %S") + nm + s) + in + + (* Set data, from string *) + let sets data ?context s = + set ?context data (parse ?context s) + in + + (* Output value as string, if possible *) + let print = + match print with + | Some f -> + f + | None -> + fun _ -> raise (No_printer nm) + in + + (* Get data, as a string *) + let gets data = + print (get data) + in + + begin + match schema with + | Some t -> + Schema.add t nm sets gets extra help + | None -> + () + end; + + { + set = set; + get = get; + sets = sets; + gets = gets; + help = help; + extra = extra; + } + + let fset data t ?context x = + t.set data ?context x + + let fget data t = + t.get data + + let fsets data t ?context s = + t.sets data ?context s + + let fgets data t = + t.gets data + end + + + module FieldRO = + struct + let create ?schema ?name ?parse ?print ?default ?update ?help extra = + let fld = + Field.create ?schema ?name ?parse ?print ?default ?update ?help extra + in + fun data -> Field.fget data fld + end +end + +module OASISMessage = struct +(* # 22 "src/oasis/OASISMessage.ml" *) + + + open OASISGettext + open OASISContext + + + let generic_message ~ctxt lvl fmt = + let cond = + if ctxt.quiet then + false + else + match lvl with + | `Debug -> ctxt.debug + | `Info -> ctxt.info + | _ -> true + in + Printf.ksprintf + (fun str -> + if cond then + begin + ctxt.printf lvl str + end) + fmt + + + let debug ~ctxt fmt = + generic_message ~ctxt `Debug fmt + + + let info ~ctxt fmt = + generic_message ~ctxt `Info fmt + + + let warning ~ctxt fmt = + generic_message ~ctxt `Warning fmt + + + let error ~ctxt fmt = + generic_message ~ctxt `Error fmt + +end + +module OASISVersion = struct +(* # 22 "src/oasis/OASISVersion.ml" *) + + + open OASISGettext + + + + + + type s = string + + + type t = string + + + type comparator = + | VGreater of t + | VGreaterEqual of t + | VEqual of t + | VLesser of t + | VLesserEqual of t + | VOr of comparator * comparator + | VAnd of comparator * comparator + + + + (* Range of allowed characters *) + let is_digit c = + '0' <= c && c <= '9' + + + let is_alpha c = + ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') + + + let is_special = + function + | '.' | '+' | '-' | '~' -> true + | _ -> false + + + let rec version_compare v1 v2 = + if v1 <> "" || v2 <> "" then + begin + (* Compare ascii string, using special meaning for version + * related char + *) + let val_ascii c = + if c = '~' then -1 + else if is_digit c then 0 + else if c = '\000' then 0 + else if is_alpha c then Char.code c + else (Char.code c) + 256 + in + + let len1 = String.length v1 in + let len2 = String.length v2 in + + let p = ref 0 in + + (** Compare ascii part *) + let compare_vascii () = + let cmp = ref 0 in + while !cmp = 0 && + !p < len1 && !p < len2 && + not (is_digit v1.[!p] && is_digit v2.[!p]) do + cmp := (val_ascii v1.[!p]) - (val_ascii v2.[!p]); + incr p + done; + if !cmp = 0 && !p < len1 && !p = len2 then + val_ascii v1.[!p] + else if !cmp = 0 && !p = len1 && !p < len2 then + - (val_ascii v2.[!p]) + else + !cmp + in + + (** Compare digit part *) + let compare_digit () = + let extract_int v p = + let start_p = !p in + while !p < String.length v && is_digit v.[!p] do + incr p + done; + let substr = + String.sub v !p ((String.length v) - !p) + in + let res = + match String.sub v start_p (!p - start_p) with + | "" -> 0 + | s -> int_of_string s + in + res, substr + in + let i1, tl1 = extract_int v1 (ref !p) in + let i2, tl2 = extract_int v2 (ref !p) in + i1 - i2, tl1, tl2 + in + + match compare_vascii () with + | 0 -> + begin + match compare_digit () with + | 0, tl1, tl2 -> + if tl1 <> "" && is_digit tl1.[0] then + 1 + else if tl2 <> "" && is_digit tl2.[0] then + -1 + else + version_compare tl1 tl2 + | n, _, _ -> + n + end + | n -> + n + end + else + begin + 0 + end + + + let version_of_string str = str + + + let string_of_version t = t + + + let version_compare_string s1 s2 = + version_compare (version_of_string s1) (version_of_string s2) + + + let chop t = + try + let pos = + String.rindex t '.' + in + String.sub t 0 pos + with Not_found -> + t + + + let rec comparator_apply v op = + match op with + | VGreater cv -> + (version_compare v cv) > 0 + | VGreaterEqual cv -> + (version_compare v cv) >= 0 + | VLesser cv -> + (version_compare v cv) < 0 + | VLesserEqual cv -> + (version_compare v cv) <= 0 + | VEqual cv -> + (version_compare v cv) = 0 + | VOr (op1, op2) -> + (comparator_apply v op1) || (comparator_apply v op2) + | VAnd (op1, op2) -> + (comparator_apply v op1) && (comparator_apply v op2) + + + let rec string_of_comparator = + function + | VGreater v -> "> "^(string_of_version v) + | VEqual v -> "= "^(string_of_version v) + | VLesser v -> "< "^(string_of_version v) + | VGreaterEqual v -> ">= "^(string_of_version v) + | VLesserEqual v -> "<= "^(string_of_version v) + | VOr (c1, c2) -> + (string_of_comparator c1)^" || "^(string_of_comparator c2) + | VAnd (c1, c2) -> + (string_of_comparator c1)^" && "^(string_of_comparator c2) + + + let rec varname_of_comparator = + let concat p v = + OASISUtils.varname_concat + p + (OASISUtils.varname_of_string + (string_of_version v)) + in + function + | VGreater v -> concat "gt" v + | VLesser v -> concat "lt" v + | VEqual v -> concat "eq" v + | VGreaterEqual v -> concat "ge" v + | VLesserEqual v -> concat "le" v + | VOr (c1, c2) -> + (varname_of_comparator c1)^"_or_"^(varname_of_comparator c2) + | VAnd (c1, c2) -> + (varname_of_comparator c1)^"_and_"^(varname_of_comparator c2) + + + let rec comparator_ge v' = + let cmp v = version_compare v v' >= 0 in + function + | VEqual v + | VGreaterEqual v + | VGreater v -> cmp v + | VLesserEqual _ + | VLesser _ -> false + | VOr (c1, c2) -> comparator_ge v' c1 || comparator_ge v' c2 + | VAnd (c1, c2) -> comparator_ge v' c1 && comparator_ge v' c2 + + +end + +module OASISLicense = struct +(* # 22 "src/oasis/OASISLicense.ml" *) + + + (** License for _oasis fields + @author Sylvain Le Gall + *) + + + + + + type license = string + + + type license_exception = string + + + type license_version = + | Version of OASISVersion.t + | VersionOrLater of OASISVersion.t + | NoVersion + + + + type license_dep_5_unit = + { + license: license; + excption: license_exception option; + version: license_version; + } + + + + type license_dep_5 = + | DEP5Unit of license_dep_5_unit + | DEP5Or of license_dep_5 list + | DEP5And of license_dep_5 list + + + type t = + | DEP5License of license_dep_5 + | OtherLicense of string (* URL *) + + + +end + +module OASISExpr = struct +(* # 22 "src/oasis/OASISExpr.ml" *) + + + + + + open OASISGettext + + + type test = string + + + type flag = string + + + type t = + | EBool of bool + | ENot of t + | EAnd of t * t + | EOr of t * t + | EFlag of flag + | ETest of test * string + + + + type 'a choices = (t * 'a) list + + + let eval var_get t = + let rec eval' = + function + | EBool b -> + b + + | ENot e -> + not (eval' e) + + | EAnd (e1, e2) -> + (eval' e1) && (eval' e2) + + | EOr (e1, e2) -> + (eval' e1) || (eval' e2) + + | EFlag nm -> + let v = + var_get nm + in + assert(v = "true" || v = "false"); + (v = "true") + + | ETest (nm, vl) -> + let v = + var_get nm + in + (v = vl) + in + eval' t + + + let choose ?printer ?name var_get lst = + let rec choose_aux = + function + | (cond, vl) :: tl -> + if eval var_get cond then + vl + else + choose_aux tl + | [] -> + let str_lst = + if lst = [] then + s_ "" + else + String.concat + (s_ ", ") + (List.map + (fun (cond, vl) -> + match printer with + | Some p -> p vl + | None -> s_ "") + lst) + in + match name with + | Some nm -> + failwith + (Printf.sprintf + (f_ "No result for the choice list '%s': %s") + nm str_lst) + | None -> + failwith + (Printf.sprintf + (f_ "No result for a choice list: %s") + str_lst) + in + choose_aux (List.rev lst) + + +end + +module OASISText = struct +(* # 22 "src/oasis/OASISText.ml" *) + + + + type elt = + | Para of string + | Verbatim of string + | BlankLine + + + type t = elt list + +end + +module OASISTypes = struct +(* # 22 "src/oasis/OASISTypes.ml" *) + + + + + + type name = string + type package_name = string + type url = string + type unix_dirname = string + type unix_filename = string + type host_dirname = string + type host_filename = string + type prog = string + type arg = string + type args = string list + type command_line = (prog * arg list) + + + type findlib_name = string + type findlib_full = string + + + type compiled_object = + | Byte + | Native + | Best + + + + type dependency = + | FindlibPackage of findlib_full * OASISVersion.comparator option + | InternalLibrary of name + + + + type tool = + | ExternalTool of name + | InternalExecutable of name + + + + type vcs = + | Darcs + | Git + | Svn + | Cvs + | Hg + | Bzr + | Arch + | Monotone + | OtherVCS of url + + + + type plugin_kind = + [ `Configure + | `Build + | `Doc + | `Test + | `Install + | `Extra + ] + + + type plugin_data_purpose = + [ `Configure + | `Build + | `Install + | `Clean + | `Distclean + | `Install + | `Uninstall + | `Test + | `Doc + | `Extra + | `Other of string + ] + + + type 'a plugin = 'a * name * OASISVersion.t option + + + type all_plugin = plugin_kind plugin + + + type plugin_data = (all_plugin * plugin_data_purpose * (unit -> unit)) list + + +(* # 115 "src/oasis/OASISTypes.ml" *) + + + type 'a conditional = 'a OASISExpr.choices + + + type custom = + { + pre_command: (command_line option) conditional; + post_command: (command_line option) conditional; + } + + + + type common_section = + { + cs_name: name; + cs_data: PropList.Data.t; + cs_plugin_data: plugin_data; + } + + + + type build_section = + { + bs_build: bool conditional; + bs_install: bool conditional; + bs_path: unix_dirname; + bs_compiled_object: compiled_object; + bs_build_depends: dependency list; + bs_build_tools: tool list; + bs_c_sources: unix_filename list; + bs_data_files: (unix_filename * unix_filename option) list; + bs_ccopt: args conditional; + bs_cclib: args conditional; + bs_dlllib: args conditional; + bs_dllpath: args conditional; + bs_byteopt: args conditional; + bs_nativeopt: args conditional; + } + + + + type library = + { + lib_modules: string list; + lib_pack: bool; + lib_internal_modules: string list; + lib_findlib_parent: findlib_name option; + lib_findlib_name: findlib_name option; + lib_findlib_containers: findlib_name list; + } + + + type object_ = + { + obj_modules: string list; + obj_findlib_fullname: findlib_name list option; + } + + + type executable = + { + exec_custom: bool; + exec_main_is: unix_filename; + } + + + type flag = + { + flag_description: string option; + flag_default: bool conditional; + } + + + type source_repository = + { + src_repo_type: vcs; + src_repo_location: url; + src_repo_browser: url option; + src_repo_module: string option; + src_repo_branch: string option; + src_repo_tag: string option; + src_repo_subdir: unix_filename option; + } + + + type test = + { + test_type: [`Test] plugin; + test_command: command_line conditional; + test_custom: custom; + test_working_directory: unix_filename option; + test_run: bool conditional; + test_tools: tool list; + } + + + type doc_format = + | HTML of unix_filename + | DocText + | PDF + | PostScript + | Info of unix_filename + | DVI + | OtherDoc + + + + type doc = + { + doc_type: [`Doc] plugin; + doc_custom: custom; + doc_build: bool conditional; + doc_install: bool conditional; + doc_install_dir: unix_filename; + doc_title: string; + doc_authors: string list; + doc_abstract: string option; + doc_format: doc_format; + doc_data_files: (unix_filename * unix_filename option) list; + doc_build_tools: tool list; + } + + + type section = + | Library of common_section * build_section * library + | Object of common_section * build_section * object_ + | Executable of common_section * build_section * executable + | Flag of common_section * flag + | SrcRepo of common_section * source_repository + | Test of common_section * test + | Doc of common_section * doc + + + + type section_kind = + [ `Library | `Object | `Executable | `Flag | `SrcRepo | `Test | `Doc ] + + + type package = + { + oasis_version: OASISVersion.t; + ocaml_version: OASISVersion.comparator option; + findlib_version: OASISVersion.comparator option; + alpha_features: string list; + beta_features: string list; + name: package_name; + version: OASISVersion.t; + license: OASISLicense.t; + license_file: unix_filename option; + copyrights: string list; + maintainers: string list; + authors: string list; + homepage: url option; + synopsis: string; + description: OASISText.t option; + categories: url list; + + conf_type: [`Configure] plugin; + conf_custom: custom; + + build_type: [`Build] plugin; + build_custom: custom; + + install_type: [`Install] plugin; + install_custom: custom; + uninstall_custom: custom; + + clean_custom: custom; + distclean_custom: custom; + + files_ab: unix_filename list; + sections: section list; + plugins: [`Extra] plugin list; + disable_oasis_section: unix_filename list; + schema_data: PropList.Data.t; + plugin_data: plugin_data; + } + + +end + +module OASISFeatures = struct +(* # 22 "src/oasis/OASISFeatures.ml" *) + + open OASISTypes + open OASISUtils + open OASISGettext + open OASISVersion + + module MapPlugin = + Map.Make + (struct + type t = plugin_kind * name + let compare = Pervasives.compare + end) + + module Data = + struct + type t = + { + oasis_version: OASISVersion.t; + plugin_versions: OASISVersion.t option MapPlugin.t; + alpha_features: string list; + beta_features: string list; + } + + let create oasis_version alpha_features beta_features = + { + oasis_version = oasis_version; + plugin_versions = MapPlugin.empty; + alpha_features = alpha_features; + beta_features = beta_features + } + + let of_package pkg = + create + pkg.OASISTypes.oasis_version + pkg.OASISTypes.alpha_features + pkg.OASISTypes.beta_features + + let add_plugin (plugin_kind, plugin_name, plugin_version) t = + {t with + plugin_versions = MapPlugin.add + (plugin_kind, plugin_name) + plugin_version + t.plugin_versions} + + let plugin_version plugin_kind plugin_name t = + MapPlugin.find (plugin_kind, plugin_name) t.plugin_versions + + let to_string t = + Printf.sprintf + "oasis_version: %s; alpha_features: %s; beta_features: %s; \ + plugins_version: %s" + (OASISVersion.string_of_version t.oasis_version) + (String.concat ", " t.alpha_features) + (String.concat ", " t.beta_features) + (String.concat ", " + (MapPlugin.fold + (fun (_, plg) ver_opt acc -> + (plg^ + (match ver_opt with + | Some v -> + " "^(OASISVersion.string_of_version v) + | None -> "")) + :: acc) + t.plugin_versions [])) + end + + type origin = + | Field of string * string + | Section of string + | NoOrigin + + type stage = Alpha | Beta + + + let string_of_stage = + function + | Alpha -> "alpha" + | Beta -> "beta" + + + let field_of_stage = + function + | Alpha -> "AlphaFeatures" + | Beta -> "BetaFeatures" + + type publication = InDev of stage | SinceVersion of OASISVersion.t + + type t = + { + name: string; + plugin: all_plugin option; + publication: publication; + description: unit -> string; + } + + (* TODO: mutex protect this. *) + let all_features = Hashtbl.create 13 + + + let since_version ver_str = SinceVersion (version_of_string ver_str) + let alpha = InDev Alpha + let beta = InDev Beta + + + let to_string t = + Printf.sprintf + "feature: %s; plugin: %s; publication: %s" + t.name + (match t.plugin with + | None -> "" + | Some (_, nm, _) -> nm) + (match t.publication with + | InDev stage -> string_of_stage stage + | SinceVersion ver -> ">= "^(OASISVersion.string_of_version ver)) + + let data_check t data origin = + let no_message = "no message" in + + let check_feature features stage = + let has_feature = List.mem t.name features in + if not has_feature then + match origin with + | Field (fld, where) -> + Some + (Printf.sprintf + (f_ "Field %s in %s is only available when feature %s \ + is in field %s.") + fld where t.name (field_of_stage stage)) + | Section sct -> + Some + (Printf.sprintf + (f_ "Section %s is only available when features %s \ + is in field %s.") + sct t.name (field_of_stage stage)) + | NoOrigin -> + Some no_message + else + None + in + + let version_is_good ~min_version version fmt = + let version_is_good = + OASISVersion.comparator_apply + version (OASISVersion.VGreaterEqual min_version) + in + Printf.ksprintf + (fun str -> + if version_is_good then + None + else + Some str) + fmt + in + + match origin, t.plugin, t.publication with + | _, _, InDev Alpha -> check_feature data.Data.alpha_features Alpha + | _, _, InDev Beta -> check_feature data.Data.beta_features Beta + | Field(fld, where), None, SinceVersion min_version -> + version_is_good ~min_version data.Data.oasis_version + (f_ "Field %s in %s is only valid since OASIS v%s, update \ + OASISFormat field from '%s' to '%s' after checking \ + OASIS changelog.") + fld where (string_of_version min_version) + (string_of_version data.Data.oasis_version) + (string_of_version min_version) + + | Field(fld, where), Some(plugin_knd, plugin_name, _), + SinceVersion min_version -> + begin + try + let plugin_version_current = + try + match Data.plugin_version plugin_knd plugin_name data with + | Some ver -> ver + | None -> + failwithf + (f_ "Field %s in %s is only valid for the OASIS \ + plugin %s since v%s, but no plugin version is \ + defined in the _oasis file, change '%s' to \ + '%s (%s)' in your _oasis file.") + fld where plugin_name (string_of_version min_version) + plugin_name + plugin_name (string_of_version min_version) + with Not_found -> + failwithf + (f_ "Field %s in %s is only valid when the OASIS plugin %s \ + is defined.") + fld where plugin_name + in + version_is_good ~min_version plugin_version_current + (f_ "Field %s in %s is only valid for the OASIS plugin %s \ + since v%s, update your plugin from '%s (%s)' to \ + '%s (%s)' after checking the plugin's changelog.") + fld where plugin_name (string_of_version min_version) + plugin_name (string_of_version plugin_version_current) + plugin_name (string_of_version min_version) + with Failure msg -> + Some msg + end + + | Section sct, None, SinceVersion min_version -> + version_is_good ~min_version data.Data.oasis_version + (f_ "Section %s is only valid for since OASIS v%s, update \ + OASISFormat field from '%s' to '%s' after checking OASIS \ + changelog.") + sct (string_of_version min_version) + (string_of_version data.Data.oasis_version) + (string_of_version min_version) + + | Section sct, Some(plugin_knd, plugin_name, _), + SinceVersion min_version -> + begin + try + let plugin_version_current = + try + match Data.plugin_version plugin_knd plugin_name data with + | Some ver -> ver + | None -> + failwithf + (f_ "Section %s is only valid for the OASIS \ + plugin %s since v%s, but no plugin version is \ + defined in the _oasis file, change '%s' to \ + '%s (%s)' in your _oasis file.") + sct plugin_name (string_of_version min_version) + plugin_name + plugin_name (string_of_version min_version) + with Not_found -> + failwithf + (f_ "Section %s is only valid when the OASIS plugin %s \ + is defined.") + sct plugin_name + in + version_is_good ~min_version plugin_version_current + (f_ "Section %s is only valid for the OASIS plugin %s \ + since v%s, update your plugin from '%s (%s)' to \ + '%s (%s)' after checking the plugin's changelog.") + sct plugin_name (string_of_version min_version) + plugin_name (string_of_version plugin_version_current) + plugin_name (string_of_version min_version) + with Failure msg -> + Some msg + end + + | NoOrigin, None, SinceVersion min_version -> + version_is_good ~min_version data.Data.oasis_version "%s" no_message + + | NoOrigin, Some(plugin_knd, plugin_name, _), SinceVersion min_version -> + begin + try + let plugin_version_current = + match Data.plugin_version plugin_knd plugin_name data with + | Some ver -> ver + | None -> raise Not_found + in + version_is_good ~min_version plugin_version_current + "%s" no_message + with Not_found -> + Some no_message + end + + + let data_assert t data origin = + match data_check t data origin with + | None -> () + | Some str -> failwith str + + + let data_test t data = + match data_check t data NoOrigin with + | None -> true + | Some str -> false + + + let package_test t pkg = + data_test t (Data.of_package pkg) + + + let create ?plugin name publication description = + let () = + if Hashtbl.mem all_features name then + failwithf "Feature '%s' is already declared." name + in + let t = + { + name = name; + plugin = plugin; + publication = publication; + description = description; + } + in + Hashtbl.add all_features name t; + t + + + let get_stage name = + try + (Hashtbl.find all_features name).publication + with Not_found -> + failwithf (f_ "Feature %s doesn't exist.") name + + + let list () = + Hashtbl.fold (fun _ v acc -> v :: acc) all_features [] + + (* + * Real flags. + *) + + + let features = + create "features_fields" + (since_version "0.4") + (fun () -> + s_ "Enable to experiment not yet official features.") + + + let flag_docs = + create "flag_docs" + (since_version "0.3") + (fun () -> + s_ "Building docs require '-docs' flag at configure.") + + + let flag_tests = + create "flag_tests" + (since_version "0.3") + (fun () -> + s_ "Running tests require '-tests' flag at configure.") + + + let pack = + create "pack" + (since_version "0.3") + (fun () -> + s_ "Allow to create packed library.") + + + let section_object = + create "section_object" beta + (fun () -> + s_ "Implement an object section.") + + + let dynrun_for_release = + create "dynrun_for_release" alpha + (fun () -> + s_ "Make '-setup-update dynamic' suitable for releasing project.") + + + let compiled_setup_ml = + create "compiled_setup_ml" alpha + (fun () -> + s_ "It compiles the setup.ml and speed-up actions done with it.") + + let disable_oasis_section = + create "disable_oasis_section" alpha + (fun () -> + s_ "Allows the OASIS section comments and digest to be omitted in \ + generated files.") + + let no_automatic_syntax = + create "no_automatic_syntax" alpha + (fun () -> + s_ "Disable the automatic inclusion of -syntax camlp4o for packages \ + that matches the internal heuristic (if a dependency ends with \ + a .syntax or is a well known syntax).") +end + +module OASISUnixPath = struct +(* # 22 "src/oasis/OASISUnixPath.ml" *) + + + type unix_filename = string + type unix_dirname = string + + + type host_filename = string + type host_dirname = string + + + let current_dir_name = "." + + + let parent_dir_name = ".." + + + let is_current_dir fn = + fn = current_dir_name || fn = "" + + + let concat f1 f2 = + if is_current_dir f1 then + f2 + else + let f1' = + try OASISString.strip_ends_with ~what:"/" f1 with Not_found -> f1 + in + f1'^"/"^f2 + + + let make = + function + | hd :: tl -> + List.fold_left + (fun f p -> concat f p) + hd + tl + | [] -> + invalid_arg "OASISUnixPath.make" + + + let dirname f = + try + String.sub f 0 (String.rindex f '/') + with Not_found -> + current_dir_name + + + let basename f = + try + let pos_start = + (String.rindex f '/') + 1 + in + String.sub f pos_start ((String.length f) - pos_start) + with Not_found -> + f + + + let chop_extension f = + try + let last_dot = + String.rindex f '.' + in + let sub = + String.sub f 0 last_dot + in + try + let last_slash = + String.rindex f '/' + in + if last_slash < last_dot then + sub + else + f + with Not_found -> + sub + + with Not_found -> + f + + + let capitalize_file f = + let dir = dirname f in + let base = basename f in + concat dir (String.capitalize base) + + + let uncapitalize_file f = + let dir = dirname f in + let base = basename f in + concat dir (String.uncapitalize base) + + +end + +module OASISHostPath = struct +(* # 22 "src/oasis/OASISHostPath.ml" *) + + + open Filename + + + module Unix = OASISUnixPath + + + let make = + function + | [] -> + invalid_arg "OASISHostPath.make" + | hd :: tl -> + List.fold_left Filename.concat hd tl + + + let of_unix ufn = + if Sys.os_type = "Unix" then + ufn + else + make + (List.map + (fun p -> + if p = Unix.current_dir_name then + current_dir_name + else if p = Unix.parent_dir_name then + parent_dir_name + else + p) + (OASISString.nsplit ufn '/')) + + +end + +module OASISSection = struct +(* # 22 "src/oasis/OASISSection.ml" *) + + + open OASISTypes + + + let section_kind_common = + function + | Library (cs, _, _) -> + `Library, cs + | Object (cs, _, _) -> + `Object, cs + | Executable (cs, _, _) -> + `Executable, cs + | Flag (cs, _) -> + `Flag, cs + | SrcRepo (cs, _) -> + `SrcRepo, cs + | Test (cs, _) -> + `Test, cs + | Doc (cs, _) -> + `Doc, cs + + + let section_common sct = + snd (section_kind_common sct) + + + let section_common_set cs = + function + | Library (_, bs, lib) -> Library (cs, bs, lib) + | Object (_, bs, obj) -> Object (cs, bs, obj) + | Executable (_, bs, exec) -> Executable (cs, bs, exec) + | Flag (_, flg) -> Flag (cs, flg) + | SrcRepo (_, src_repo) -> SrcRepo (cs, src_repo) + | Test (_, tst) -> Test (cs, tst) + | Doc (_, doc) -> Doc (cs, doc) + + + (** Key used to identify section + *) + let section_id sct = + let k, cs = + section_kind_common sct + in + k, cs.cs_name + + + let string_of_section sct = + let k, nm = + section_id sct + in + (match k with + | `Library -> "library" + | `Object -> "object" + | `Executable -> "executable" + | `Flag -> "flag" + | `SrcRepo -> "src repository" + | `Test -> "test" + | `Doc -> "doc") + ^" "^nm + + + let section_find id scts = + List.find + (fun sct -> id = section_id sct) + scts + + + module CSection = + struct + type t = section + + let id = section_id + + let compare t1 t2 = + compare (id t1) (id t2) + + let equal t1 t2 = + (id t1) = (id t2) + + let hash t = + Hashtbl.hash (id t) + end + + + module MapSection = Map.Make(CSection) + module SetSection = Set.Make(CSection) + + +end + +module OASISBuildSection = struct +(* # 22 "src/oasis/OASISBuildSection.ml" *) + + +end + +module OASISExecutable = struct +(* # 22 "src/oasis/OASISExecutable.ml" *) + + + open OASISTypes + + + let unix_exec_is (cs, bs, exec) is_native ext_dll suffix_program = + let dir = + OASISUnixPath.concat + bs.bs_path + (OASISUnixPath.dirname exec.exec_main_is) + in + let is_native_exec = + match bs.bs_compiled_object with + | Native -> true + | Best -> is_native () + | Byte -> false + in + + OASISUnixPath.concat + dir + (cs.cs_name^(suffix_program ())), + + if not is_native_exec && + not exec.exec_custom && + bs.bs_c_sources <> [] then + Some (dir^"/dll"^cs.cs_name^"_stubs"^(ext_dll ())) + else + None + + +end + +module OASISLibrary = struct +(* # 22 "src/oasis/OASISLibrary.ml" *) + + + open OASISTypes + open OASISUtils + open OASISGettext + open OASISSection + + + (* Look for a module file, considering capitalization or not. *) + let find_module source_file_exists bs modul = + let possible_base_fn = + List.map + (OASISUnixPath.concat bs.bs_path) + [modul; + OASISUnixPath.uncapitalize_file modul; + OASISUnixPath.capitalize_file modul] + in + (* TODO: we should be able to be able to determine the source for every + * files. Hence we should introduce a Module(source: fn) for the fields + * Modules and InternalModules + *) + List.fold_left + (fun acc base_fn -> + match acc with + | `No_sources _ -> + begin + let file_found = + List.fold_left + (fun acc ext -> + if source_file_exists (base_fn^ext) then + (base_fn^ext) :: acc + else + acc) + [] + [".ml"; ".mli"; ".mll"; ".mly"] + in + match file_found with + | [] -> + acc + | lst -> + `Sources (base_fn, lst) + end + | `Sources _ -> + acc) + (`No_sources possible_base_fn) + possible_base_fn + + + let source_unix_files ~ctxt (cs, bs, lib) source_file_exists = + List.fold_left + (fun acc modul -> + match find_module source_file_exists bs modul with + | `Sources (base_fn, lst) -> + (base_fn, lst) :: acc + | `No_sources _ -> + OASISMessage.warning + ~ctxt + (f_ "Cannot find source file matching \ + module '%s' in library %s") + modul cs.cs_name; + acc) + [] + (lib.lib_modules @ lib.lib_internal_modules) + + + let generated_unix_files + ~ctxt + ~is_native + ~has_native_dynlink + ~ext_lib + ~ext_dll + ~source_file_exists + (cs, bs, lib) = + + let find_modules lst ext = + let find_module modul = + match find_module source_file_exists bs modul with + | `Sources (base_fn, [fn]) when ext <> "cmi" + && Filename.check_suffix fn ".mli" -> + None (* No implementation files for pure interface. *) + | `Sources (base_fn, _) -> + Some [base_fn] + | `No_sources lst -> + OASISMessage.warning + ~ctxt + (f_ "Cannot find source file matching \ + module '%s' in library %s") + modul cs.cs_name; + Some lst + in + List.fold_left + (fun acc nm -> + match find_module nm with + | None -> acc + | Some base_fns -> + List.map (fun base_fn -> base_fn ^"."^ext) base_fns :: acc) + [] + lst + in + + (* The .cmx that be compiled along *) + let cmxs = + let should_be_built = + match bs.bs_compiled_object with + | Native -> true + | Best -> is_native + | Byte -> false + in + if should_be_built then + if lib.lib_pack then + find_modules + [cs.cs_name] + "cmx" + else + find_modules + (lib.lib_modules @ lib.lib_internal_modules) + "cmx" + else + [] + in + + let acc_nopath = + [] + in + + (* The headers and annot/cmt files that should be compiled along *) + let headers = + let sufx = + if lib.lib_pack + then [".cmti"; ".cmt"; ".annot"] + else [".cmi"; ".cmti"; ".cmt"; ".annot"] + in + List.map + begin + List.fold_left + begin fun accu s -> + let dot = String.rindex s '.' in + let base = String.sub s 0 dot in + List.map ((^) base) sufx @ accu + end + [] + end + (find_modules lib.lib_modules "cmi") + in + + (* Compute what libraries should be built *) + let acc_nopath = + (* Add the packed header file if required *) + let add_pack_header acc = + if lib.lib_pack then + [cs.cs_name^".cmi"; cs.cs_name^".cmti"; cs.cs_name^".cmt"] :: acc + else + acc + in + let byte acc = + add_pack_header ([cs.cs_name^".cma"] :: acc) + in + let native acc = + let acc = + add_pack_header + (if has_native_dynlink then + [cs.cs_name^".cmxs"] :: acc + else acc) + in + [cs.cs_name^".cmxa"] :: [cs.cs_name^ext_lib] :: acc + in + match bs.bs_compiled_object with + | Native -> + byte (native acc_nopath) + | Best when is_native -> + byte (native acc_nopath) + | Byte | Best -> + byte acc_nopath + in + + (* Add C library to be built *) + let acc_nopath = + if bs.bs_c_sources <> [] then + begin + ["lib"^cs.cs_name^"_stubs"^ext_lib] + :: + ["dll"^cs.cs_name^"_stubs"^ext_dll] + :: + acc_nopath + end + else + acc_nopath + in + + (* All the files generated *) + List.rev_append + (List.rev_map + (List.rev_map + (OASISUnixPath.concat bs.bs_path)) + acc_nopath) + (headers @ cmxs) + + +end + +module OASISObject = struct +(* # 22 "src/oasis/OASISObject.ml" *) + + + open OASISTypes + open OASISGettext + + + let source_unix_files ~ctxt (cs, bs, obj) source_file_exists = + List.fold_left + (fun acc modul -> + match OASISLibrary.find_module source_file_exists bs modul with + | `Sources (base_fn, lst) -> + (base_fn, lst) :: acc + | `No_sources _ -> + OASISMessage.warning + ~ctxt + (f_ "Cannot find source file matching \ + module '%s' in object %s") + modul cs.cs_name; + acc) + [] + obj.obj_modules + + + let generated_unix_files + ~ctxt + ~is_native + ~source_file_exists + (cs, bs, obj) = + + let find_module ext modul = + match OASISLibrary.find_module source_file_exists bs modul with + | `Sources (base_fn, _) -> [base_fn ^ ext] + | `No_sources lst -> + OASISMessage.warning + ~ctxt + (f_ "Cannot find source file matching \ + module '%s' in object %s") + modul cs.cs_name ; + lst + in + + let header, byte, native, c_object, f = + match obj.obj_modules with + | [ m ] -> (find_module ".cmi" m, + find_module ".cmo" m, + find_module ".cmx" m, + find_module ".o" m, + fun x -> x) + | _ -> ([cs.cs_name ^ ".cmi"], + [cs.cs_name ^ ".cmo"], + [cs.cs_name ^ ".cmx"], + [cs.cs_name ^ ".o"], + OASISUnixPath.concat bs.bs_path) + in + List.map (List.map f) ( + match bs.bs_compiled_object with + | Native -> + native :: c_object :: byte :: header :: [] + | Best when is_native -> + native :: c_object :: byte :: header :: [] + | Byte | Best -> + byte :: header :: []) + + +end + +module OASISFindlib = struct +(* # 22 "src/oasis/OASISFindlib.ml" *) + + + open OASISTypes + open OASISUtils + open OASISGettext + open OASISSection + + + type library_name = name + type findlib_part_name = name + type 'a map_of_findlib_part_name = 'a OASISUtils.MapString.t + + + exception InternalLibraryNotFound of library_name + exception FindlibPackageNotFound of findlib_name + + + type group_t = + | Container of findlib_name * group_t list + | Package of (findlib_name * + common_section * + build_section * + [`Library of library | `Object of object_] * + group_t list) + + + type data = common_section * + build_section * + [`Library of library | `Object of object_] + type tree = + | Node of (data option) * (tree MapString.t) + | Leaf of data + + + let findlib_mapping pkg = + (* Map from library name to either full findlib name or parts + parent. *) + let fndlb_parts_of_lib_name = + let fndlb_parts cs lib = + let name = + match lib.lib_findlib_name with + | Some nm -> nm + | None -> cs.cs_name + in + let name = + String.concat "." (lib.lib_findlib_containers @ [name]) + in + name + in + List.fold_left + (fun mp -> + function + | Library (cs, _, lib) -> + begin + let lib_name = cs.cs_name in + let fndlb_parts = fndlb_parts cs lib in + if MapString.mem lib_name mp then + failwithf + (f_ "The library name '%s' is used more than once.") + lib_name; + match lib.lib_findlib_parent with + | Some lib_name_parent -> + MapString.add + lib_name + (`Unsolved (lib_name_parent, fndlb_parts)) + mp + | None -> + MapString.add + lib_name + (`Solved fndlb_parts) + mp + end + + | Object (cs, _, obj) -> + begin + let obj_name = cs.cs_name in + if MapString.mem obj_name mp then + failwithf + (f_ "The object name '%s' is used more than once.") + obj_name; + let findlib_full_name = match obj.obj_findlib_fullname with + | Some ns -> String.concat "." ns + | None -> obj_name + in + MapString.add + obj_name + (`Solved findlib_full_name) + mp + end + + | Executable _ | Test _ | Flag _ | SrcRepo _ | Doc _ -> + mp) + MapString.empty + pkg.sections + in + + (* Solve the above graph to be only library name to full findlib name. *) + let fndlb_name_of_lib_name = + let rec solve visited mp lib_name lib_name_child = + if SetString.mem lib_name visited then + failwithf + (f_ "Library '%s' is involved in a cycle \ + with regard to findlib naming.") + lib_name; + let visited = SetString.add lib_name visited in + try + match MapString.find lib_name mp with + | `Solved fndlb_nm -> + fndlb_nm, mp + | `Unsolved (lib_nm_parent, post_fndlb_nm) -> + let pre_fndlb_nm, mp = + solve visited mp lib_nm_parent lib_name + in + let fndlb_nm = pre_fndlb_nm^"."^post_fndlb_nm in + fndlb_nm, MapString.add lib_name (`Solved fndlb_nm) mp + with Not_found -> + failwithf + (f_ "Library '%s', which is defined as the findlib parent of \ + library '%s', doesn't exist.") + lib_name lib_name_child + in + let mp = + MapString.fold + (fun lib_name status mp -> + match status with + | `Solved _ -> + (* Solved initialy, no need to go further *) + mp + | `Unsolved _ -> + let _, mp = solve SetString.empty mp lib_name "" in + mp) + fndlb_parts_of_lib_name + fndlb_parts_of_lib_name + in + MapString.map + (function + | `Solved fndlb_nm -> fndlb_nm + | `Unsolved _ -> assert false) + mp + in + + (* Convert an internal library name to a findlib name. *) + let findlib_name_of_library_name lib_nm = + try + MapString.find lib_nm fndlb_name_of_lib_name + with Not_found -> + raise (InternalLibraryNotFound lib_nm) + in + + (* Add a library to the tree. + *) + let add sct mp = + let fndlb_fullname = + let cs, _, _ = sct in + let lib_name = cs.cs_name in + findlib_name_of_library_name lib_name + in + let rec add_children nm_lst (children: tree MapString.t) = + match nm_lst with + | (hd :: tl) -> + begin + let node = + try + add_node tl (MapString.find hd children) + with Not_found -> + (* New node *) + new_node tl + in + MapString.add hd node children + end + | [] -> + (* Should not have a nameless library. *) + assert false + and add_node tl node = + if tl = [] then + begin + match node with + | Node (None, children) -> + Node (Some sct, children) + | Leaf (cs', _, _) | Node (Some (cs', _, _), _) -> + (* TODO: allow to merge Package, i.e. + * archive(byte) = "foo.cma foo_init.cmo" + *) + let cs, _, _ = sct in + failwithf + (f_ "Library '%s' and '%s' have the same findlib name '%s'") + cs.cs_name cs'.cs_name fndlb_fullname + end + else + begin + match node with + | Leaf data -> + Node (Some data, add_children tl MapString.empty) + | Node (data_opt, children) -> + Node (data_opt, add_children tl children) + end + and new_node = + function + | [] -> + Leaf sct + | hd :: tl -> + Node (None, MapString.add hd (new_node tl) MapString.empty) + in + add_children (OASISString.nsplit fndlb_fullname '.') mp + in + + let rec group_of_tree mp = + MapString.fold + (fun nm node acc -> + let cur = + match node with + | Node (Some (cs, bs, lib), children) -> + Package (nm, cs, bs, lib, group_of_tree children) + | Node (None, children) -> + Container (nm, group_of_tree children) + | Leaf (cs, bs, lib) -> + Package (nm, cs, bs, lib, []) + in + cur :: acc) + mp [] + in + + let group_mp = + List.fold_left + (fun mp -> + function + | Library (cs, bs, lib) -> + add (cs, bs, `Library lib) mp + | Object (cs, bs, obj) -> + add (cs, bs, `Object obj) mp + | _ -> + mp) + MapString.empty + pkg.sections + in + + let groups = + group_of_tree group_mp + in + + let library_name_of_findlib_name = + lazy begin + (* Revert findlib_name_of_library_name. *) + MapString.fold + (fun k v mp -> MapString.add v k mp) + fndlb_name_of_lib_name + MapString.empty + end + in + let library_name_of_findlib_name fndlb_nm = + try + MapString.find fndlb_nm (Lazy.force library_name_of_findlib_name) + with Not_found -> + raise (FindlibPackageNotFound fndlb_nm) + in + + groups, + findlib_name_of_library_name, + library_name_of_findlib_name + + + let findlib_of_group = + function + | Container (fndlb_nm, _) + | Package (fndlb_nm, _, _, _, _) -> fndlb_nm + + + let root_of_group grp = + let rec root_lib_aux = + (* We do a DFS in the group. *) + function + | Container (_, children) -> + List.fold_left + (fun res grp -> + if res = None then + root_lib_aux grp + else + res) + None + children + | Package (_, cs, bs, lib, _) -> + Some (cs, bs, lib) + in + match root_lib_aux grp with + | Some res -> + res + | None -> + failwithf + (f_ "Unable to determine root library of findlib library '%s'") + (findlib_of_group grp) + + +end + +module OASISFlag = struct +(* # 22 "src/oasis/OASISFlag.ml" *) + + +end + +module OASISPackage = struct +(* # 22 "src/oasis/OASISPackage.ml" *) + + +end + +module OASISSourceRepository = struct +(* # 22 "src/oasis/OASISSourceRepository.ml" *) + + +end + +module OASISTest = struct +(* # 22 "src/oasis/OASISTest.ml" *) + + +end + +module OASISDocument = struct +(* # 22 "src/oasis/OASISDocument.ml" *) + + +end + +module OASISExec = struct +(* # 22 "src/oasis/OASISExec.ml" *) + + + open OASISGettext + open OASISUtils + open OASISMessage + + + (* TODO: I don't like this quote, it is there because $(rm) foo expands to + * 'rm -f' foo... + *) + let run ~ctxt ?f_exit_code ?(quote=true) cmd args = + let cmd = + if quote then + if Sys.os_type = "Win32" then + if String.contains cmd ' ' then + (* Double the 1st double quote... win32... sigh *) + "\""^(Filename.quote cmd) + else + cmd + else + Filename.quote cmd + else + cmd + in + let cmdline = + String.concat " " (cmd :: args) + in + info ~ctxt (f_ "Running command '%s'") cmdline; + match f_exit_code, Sys.command cmdline with + | None, 0 -> () + | None, i -> + failwithf + (f_ "Command '%s' terminated with error code %d") + cmdline i + | Some f, i -> + f i + + + let run_read_output ~ctxt ?f_exit_code cmd args = + let fn = + Filename.temp_file "oasis-" ".txt" + in + try + begin + let () = + run ~ctxt ?f_exit_code cmd (args @ [">"; Filename.quote fn]) + in + let chn = + open_in fn + in + let routput = + ref [] + in + begin + try + while true do + routput := (input_line chn) :: !routput + done + with End_of_file -> + () + end; + close_in chn; + Sys.remove fn; + List.rev !routput + end + with e -> + (try Sys.remove fn with _ -> ()); + raise e + + + let run_read_one_line ~ctxt ?f_exit_code cmd args = + match run_read_output ~ctxt ?f_exit_code cmd args with + | [fst] -> + fst + | lst -> + failwithf + (f_ "Command return unexpected output %S") + (String.concat "\n" lst) +end + +module OASISFileUtil = struct +(* # 22 "src/oasis/OASISFileUtil.ml" *) + + + open OASISGettext + + + let file_exists_case fn = + let dirname = Filename.dirname fn in + let basename = Filename.basename fn in + if Sys.file_exists dirname then + if basename = Filename.current_dir_name then + true + else + List.mem + basename + (Array.to_list (Sys.readdir dirname)) + else + false + + + let find_file ?(case_sensitive=true) paths exts = + + (* Cardinal product of two list *) + let ( * ) lst1 lst2 = + List.flatten + (List.map + (fun a -> + List.map + (fun b -> a, b) + lst2) + lst1) + in + + let rec combined_paths lst = + match lst with + | p1 :: p2 :: tl -> + let acc = + (List.map + (fun (a, b) -> Filename.concat a b) + (p1 * p2)) + in + combined_paths (acc :: tl) + | [e] -> + e + | [] -> + [] + in + + let alternatives = + List.map + (fun (p, e) -> + if String.length e > 0 && e.[0] <> '.' then + p ^ "." ^ e + else + p ^ e) + ((combined_paths paths) * exts) + in + List.find (fun file -> + (if case_sensitive then + file_exists_case file + else + Sys.file_exists file) + && not (Sys.is_directory file) + ) alternatives + + + let which ~ctxt prg = + let path_sep = + match Sys.os_type with + | "Win32" -> + ';' + | _ -> + ':' + in + let path_lst = OASISString.nsplit (Sys.getenv "PATH") path_sep in + let exec_ext = + match Sys.os_type with + | "Win32" -> + "" :: (OASISString.nsplit (Sys.getenv "PATHEXT") path_sep) + | _ -> + [""] + in + find_file ~case_sensitive:false [path_lst; [prg]] exec_ext + + + (**/**) + let rec fix_dir dn = + (* Windows hack because Sys.file_exists "src\\" = false when + * Sys.file_exists "src" = true + *) + let ln = + String.length dn + in + if Sys.os_type = "Win32" && ln > 0 && dn.[ln - 1] = '\\' then + fix_dir (String.sub dn 0 (ln - 1)) + else + dn + + + let q = Filename.quote + (**/**) + + + let cp ~ctxt ?(recurse=false) src tgt = + if recurse then + match Sys.os_type with + | "Win32" -> + OASISExec.run ~ctxt + "xcopy" [q src; q tgt; "/E"] + | _ -> + OASISExec.run ~ctxt + "cp" ["-r"; q src; q tgt] + else + OASISExec.run ~ctxt + (match Sys.os_type with + | "Win32" -> "copy" + | _ -> "cp") + [q src; q tgt] + + + let mkdir ~ctxt tgt = + OASISExec.run ~ctxt + (match Sys.os_type with + | "Win32" -> "md" + | _ -> "mkdir") + [q tgt] + + + let rec mkdir_parent ~ctxt f tgt = + let tgt = + fix_dir tgt + in + if Sys.file_exists tgt then + begin + if not (Sys.is_directory tgt) then + OASISUtils.failwithf + (f_ "Cannot create directory '%s', a file of the same name already \ + exists") + tgt + end + else + begin + mkdir_parent ~ctxt f (Filename.dirname tgt); + if not (Sys.file_exists tgt) then + begin + f tgt; + mkdir ~ctxt tgt + end + end + + + let rmdir ~ctxt tgt = + if Sys.readdir tgt = [||] then begin + match Sys.os_type with + | "Win32" -> + OASISExec.run ~ctxt "rd" [q tgt] + | _ -> + OASISExec.run ~ctxt "rm" ["-r"; q tgt] + end else begin + OASISMessage.error ~ctxt + (f_ "Cannot remove directory '%s': not empty.") + tgt + end + + + let glob ~ctxt fn = + let basename = + Filename.basename fn + in + if String.length basename >= 2 && + basename.[0] = '*' && + basename.[1] = '.' then + begin + let ext_len = + (String.length basename) - 2 + in + let ext = + String.sub basename 2 ext_len + in + let dirname = + Filename.dirname fn + in + Array.fold_left + (fun acc fn -> + try + let fn_ext = + String.sub + fn + ((String.length fn) - ext_len) + ext_len + in + if fn_ext = ext then + (Filename.concat dirname fn) :: acc + else + acc + with Invalid_argument _ -> + acc) + [] + (Sys.readdir dirname) + end + else + begin + if file_exists_case fn then + [fn] + else + [] + end +end + + +# 2893 "setup.ml" +module BaseEnvLight = struct +(* # 22 "src/base/BaseEnvLight.ml" *) + + + module MapString = Map.Make(String) + + + type t = string MapString.t + + + let default_filename = + Filename.concat + (Sys.getcwd ()) + "setup.data" + + + let load ?(allow_empty=false) ?(filename=default_filename) () = + if Sys.file_exists filename then + begin + let chn = + open_in_bin filename + in + let st = + Stream.of_channel chn + in + let line = + ref 1 + in + let st_line = + Stream.from + (fun _ -> + try + match Stream.next st with + | '\n' -> incr line; Some '\n' + | c -> Some c + with Stream.Failure -> None) + in + let lexer = + Genlex.make_lexer ["="] st_line + in + let rec read_file mp = + match Stream.npeek 3 lexer with + | [Genlex.Ident nm; Genlex.Kwd "="; Genlex.String value] -> + Stream.junk lexer; + Stream.junk lexer; + Stream.junk lexer; + read_file (MapString.add nm value mp) + | [] -> + mp + | _ -> + failwith + (Printf.sprintf + "Malformed data file '%s' line %d" + filename !line) + in + let mp = + read_file MapString.empty + in + close_in chn; + mp + end + else if allow_empty then + begin + MapString.empty + end + else + begin + failwith + (Printf.sprintf + "Unable to load environment, the file '%s' doesn't exist." + filename) + end + + + let rec var_expand str env = + let buff = + Buffer.create ((String.length str) * 2) + in + Buffer.add_substitute + buff + (fun var -> + try + var_expand (MapString.find var env) env + with Not_found -> + failwith + (Printf.sprintf + "No variable %s defined when trying to expand %S." + var + str)) + str; + Buffer.contents buff + + + let var_get name env = + var_expand (MapString.find name env) env + + + let var_choose lst env = + OASISExpr.choose + (fun nm -> var_get nm env) + lst +end + + +# 2998 "setup.ml" +module BaseContext = struct +(* # 22 "src/base/BaseContext.ml" *) + + (* TODO: get rid of this module. *) + open OASISContext + + + let args () = fst (fspecs ()) + + + let default = default + +end + +module BaseMessage = struct +(* # 22 "src/base/BaseMessage.ml" *) + + + (** Message to user, overrid for Base + @author Sylvain Le Gall + *) + open OASISMessage + open BaseContext + + + let debug fmt = debug ~ctxt:!default fmt + + + let info fmt = info ~ctxt:!default fmt + + + let warning fmt = warning ~ctxt:!default fmt + + + let error fmt = error ~ctxt:!default fmt + +end + +module BaseEnv = struct +(* # 22 "src/base/BaseEnv.ml" *) + + open OASISGettext + open OASISUtils + open PropList + + + module MapString = BaseEnvLight.MapString + + + type origin_t = + | ODefault + | OGetEnv + | OFileLoad + | OCommandLine + + + type cli_handle_t = + | CLINone + | CLIAuto + | CLIWith + | CLIEnable + | CLIUser of (Arg.key * Arg.spec * Arg.doc) list + + + type definition_t = + { + hide: bool; + dump: bool; + cli: cli_handle_t; + arg_help: string option; + group: string option; + } + + + let schema = + Schema.create "environment" + + + (* Environment data *) + let env = + Data.create () + + + (* Environment data from file *) + let env_from_file = + ref MapString.empty + + + (* Lexer for var *) + let var_lxr = + Genlex.make_lexer [] + + + let rec var_expand str = + let buff = + Buffer.create ((String.length str) * 2) + in + Buffer.add_substitute + buff + (fun var -> + try + (* TODO: this is a quick hack to allow calling Test.Command + * without defining executable name really. I.e. if there is + * an exec Executable toto, then $(toto) should be replace + * by its real name. It is however useful to have this function + * for other variable that depend on the host and should be + * written better than that. + *) + let st = + var_lxr (Stream.of_string var) + in + match Stream.npeek 3 st with + | [Genlex.Ident "utoh"; Genlex.Ident nm] -> + OASISHostPath.of_unix (var_get nm) + | [Genlex.Ident "utoh"; Genlex.String s] -> + OASISHostPath.of_unix s + | [Genlex.Ident "ocaml_escaped"; Genlex.Ident nm] -> + String.escaped (var_get nm) + | [Genlex.Ident "ocaml_escaped"; Genlex.String s] -> + String.escaped s + | [Genlex.Ident nm] -> + var_get nm + | _ -> + failwithf + (f_ "Unknown expression '%s' in variable expansion of %s.") + var + str + with + | Unknown_field (_, _) -> + failwithf + (f_ "No variable %s defined when trying to expand %S.") + var + str + | Stream.Error e -> + failwithf + (f_ "Syntax error when parsing '%s' when trying to \ + expand %S: %s") + var + str + e) + str; + Buffer.contents buff + + + and var_get name = + let vl = + try + Schema.get schema env name + with Unknown_field _ as e -> + begin + try + MapString.find name !env_from_file + with Not_found -> + raise e + end + in + var_expand vl + + + let var_choose ?printer ?name lst = + OASISExpr.choose + ?printer + ?name + var_get + lst + + + let var_protect vl = + let buff = + Buffer.create (String.length vl) + in + String.iter + (function + | '$' -> Buffer.add_string buff "\\$" + | c -> Buffer.add_char buff c) + vl; + Buffer.contents buff + + + let var_define + ?(hide=false) + ?(dump=true) + ?short_desc + ?(cli=CLINone) + ?arg_help + ?group + name (* TODO: type constraint on the fact that name must be a valid OCaml + id *) + dflt = + + let default = + [ + OFileLoad, (fun () -> MapString.find name !env_from_file); + ODefault, dflt; + OGetEnv, (fun () -> Sys.getenv name); + ] + in + + let extra = + { + hide = hide; + dump = dump; + cli = cli; + arg_help = arg_help; + group = group; + } + in + + (* Try to find a value that can be defined + *) + let var_get_low lst = + let errors, res = + List.fold_left + (fun (errors, res) (o, v) -> + if res = None then + begin + try + errors, Some (v ()) + with + | Not_found -> + errors, res + | Failure rsn -> + (rsn :: errors), res + | e -> + (Printexc.to_string e) :: errors, res + end + else + errors, res) + ([], None) + (List.sort + (fun (o1, _) (o2, _) -> + Pervasives.compare o2 o1) + lst) + in + match res, errors with + | Some v, _ -> + v + | None, [] -> + raise (Not_set (name, None)) + | None, lst -> + raise (Not_set (name, Some (String.concat (s_ ", ") lst))) + in + + let help = + match short_desc with + | Some fs -> Some fs + | None -> None + in + + let var_get_lst = + FieldRO.create + ~schema + ~name + ~parse:(fun ?(context=ODefault) s -> [context, fun () -> s]) + ~print:var_get_low + ~default + ~update:(fun ?context x old_x -> x @ old_x) + ?help + extra + in + + fun () -> + var_expand (var_get_low (var_get_lst env)) + + + let var_redefine + ?hide + ?dump + ?short_desc + ?cli + ?arg_help + ?group + name + dflt = + if Schema.mem schema name then + begin + (* TODO: look suspsicious, we want to memorize dflt not dflt () *) + Schema.set schema env ~context:ODefault name (dflt ()); + fun () -> var_get name + end + else + begin + var_define + ?hide + ?dump + ?short_desc + ?cli + ?arg_help + ?group + name + dflt + end + + + let var_ignore (e: unit -> string) = () + + + let print_hidden = + var_define + ~hide:true + ~dump:false + ~cli:CLIAuto + ~arg_help:"Print even non-printable variable. (debug)" + "print_hidden" + (fun () -> "false") + + + let var_all () = + List.rev + (Schema.fold + (fun acc nm def _ -> + if not def.hide || bool_of_string (print_hidden ()) then + nm :: acc + else + acc) + [] + schema) + + + let default_filename = + BaseEnvLight.default_filename + + + let load ?allow_empty ?filename () = + env_from_file := BaseEnvLight.load ?allow_empty ?filename () + + + let unload () = + env_from_file := MapString.empty; + Data.clear env + + + let dump ?(filename=default_filename) () = + let chn = + open_out_bin filename + in + let output nm value = + Printf.fprintf chn "%s=%S\n" nm value + in + let mp_todo = + (* Dump data from schema *) + Schema.fold + (fun mp_todo nm def _ -> + if def.dump then + begin + try + let value = + Schema.get + schema + env + nm + in + output nm value + with Not_set _ -> + () + end; + MapString.remove nm mp_todo) + !env_from_file + schema + in + (* Dump data defined outside of schema *) + MapString.iter output mp_todo; + + (* End of the dump *) + close_out chn + + + let print () = + let printable_vars = + Schema.fold + (fun acc nm def short_descr_opt -> + if not def.hide || bool_of_string (print_hidden ()) then + begin + try + let value = + Schema.get + schema + env + nm + in + let txt = + match short_descr_opt with + | Some s -> s () + | None -> nm + in + (txt, value) :: acc + with Not_set _ -> + acc + end + else + acc) + [] + schema + in + let max_length = + List.fold_left max 0 + (List.rev_map String.length + (List.rev_map fst printable_vars)) + in + let dot_pad str = + String.make ((max_length - (String.length str)) + 3) '.' + in + + Printf.printf "\nConfiguration: \n"; + List.iter + (fun (name, value) -> + Printf.printf "%s: %s %s\n" name (dot_pad name) value) + (List.rev printable_vars); + Printf.printf "\n%!" + + + let args () = + let arg_concat = + OASISUtils.varname_concat ~hyphen:'-' + in + [ + "--override", + Arg.Tuple + ( + let rvr = ref "" + in + let rvl = ref "" + in + [ + Arg.Set_string rvr; + Arg.Set_string rvl; + Arg.Unit + (fun () -> + Schema.set + schema + env + ~context:OCommandLine + !rvr + !rvl) + ] + ), + "var+val Override any configuration variable."; + + ] + @ + List.flatten + (Schema.fold + (fun acc name def short_descr_opt -> + let var_set s = + Schema.set + schema + env + ~context:OCommandLine + name + s + in + + let arg_name = + OASISUtils.varname_of_string ~hyphen:'-' name + in + + let hlp = + match short_descr_opt with + | Some txt -> txt () + | None -> "" + in + + let arg_hlp = + match def.arg_help with + | Some s -> s + | None -> "str" + in + + let default_value = + try + Printf.sprintf + (f_ " [%s]") + (Schema.get + schema + env + name) + with Not_set _ -> + "" + in + + let args = + match def.cli with + | CLINone -> + [] + | CLIAuto -> + [ + arg_concat "--" arg_name, + Arg.String var_set, + Printf.sprintf (f_ "%s %s%s") arg_hlp hlp default_value + ] + | CLIWith -> + [ + arg_concat "--with-" arg_name, + Arg.String var_set, + Printf.sprintf (f_ "%s %s%s") arg_hlp hlp default_value + ] + | CLIEnable -> + let dflt = + if default_value = " [true]" then + s_ " [default: enabled]" + else + s_ " [default: disabled]" + in + [ + arg_concat "--enable-" arg_name, + Arg.Unit (fun () -> var_set "true"), + Printf.sprintf (f_ " %s%s") hlp dflt; + + arg_concat "--disable-" arg_name, + Arg.Unit (fun () -> var_set "false"), + Printf.sprintf (f_ " %s%s") hlp dflt + ] + | CLIUser lst -> + lst + in + args :: acc) + [] + schema) +end + +module BaseArgExt = struct +(* # 22 "src/base/BaseArgExt.ml" *) + + + open OASISUtils + open OASISGettext + + + let parse argv args = + (* Simulate command line for Arg *) + let current = + ref 0 + in + + try + Arg.parse_argv + ~current:current + (Array.concat [[|"none"|]; argv]) + (Arg.align args) + (failwithf (f_ "Don't know what to do with arguments: '%s'")) + (s_ "configure options:") + with + | Arg.Help txt -> + print_endline txt; + exit 0 + | Arg.Bad txt -> + prerr_endline txt; + exit 1 +end + +module BaseCheck = struct +(* # 22 "src/base/BaseCheck.ml" *) + + + open BaseEnv + open BaseMessage + open OASISUtils + open OASISGettext + + + let prog_best prg prg_lst = + var_redefine + prg + (fun () -> + let alternate = + List.fold_left + (fun res e -> + match res with + | Some _ -> + res + | None -> + try + Some (OASISFileUtil.which ~ctxt:!BaseContext.default e) + with Not_found -> + None) + None + prg_lst + in + match alternate with + | Some prg -> prg + | None -> raise Not_found) + + + let prog prg = + prog_best prg [prg] + + + let prog_opt prg = + prog_best prg [prg^".opt"; prg] + + + let ocamlfind = + prog "ocamlfind" + + + let version + var_prefix + cmp + fversion + () = + (* Really compare version provided *) + let var = + var_prefix^"_version_"^(OASISVersion.varname_of_comparator cmp) + in + var_redefine + ~hide:true + var + (fun () -> + let version_str = + match fversion () with + | "[Distributed with OCaml]" -> + begin + try + (var_get "ocaml_version") + with Not_found -> + warning + (f_ "Variable ocaml_version not defined, fallback \ + to default"); + Sys.ocaml_version + end + | res -> + res + in + let version = + OASISVersion.version_of_string version_str + in + if OASISVersion.comparator_apply version cmp then + version_str + else + failwithf + (f_ "Cannot satisfy version constraint on %s: %s (version: %s)") + var_prefix + (OASISVersion.string_of_comparator cmp) + version_str) + () + + + let package_version pkg = + OASISExec.run_read_one_line ~ctxt:!BaseContext.default + (ocamlfind ()) + ["query"; "-format"; "%v"; pkg] + + + let package ?version_comparator pkg () = + let var = + OASISUtils.varname_concat + "pkg_" + (OASISUtils.varname_of_string pkg) + in + let findlib_dir pkg = + let dir = + OASISExec.run_read_one_line ~ctxt:!BaseContext.default + (ocamlfind ()) + ["query"; "-format"; "%d"; pkg] + in + if Sys.file_exists dir && Sys.is_directory dir then + dir + else + failwithf + (f_ "When looking for findlib package %s, \ + directory %s return doesn't exist") + pkg dir + in + let vl = + var_redefine + var + (fun () -> findlib_dir pkg) + () + in + ( + match version_comparator with + | Some ver_cmp -> + ignore + (version + var + ver_cmp + (fun _ -> package_version pkg) + ()) + | None -> + () + ); + vl +end + +module BaseOCamlcConfig = struct +(* # 22 "src/base/BaseOCamlcConfig.ml" *) + + + open BaseEnv + open OASISUtils + open OASISGettext + + + module SMap = Map.Make(String) + + + let ocamlc = + BaseCheck.prog_opt "ocamlc" + + + let ocamlc_config_map = + (* Map name to value for ocamlc -config output + (name ^": "^value) + *) + let rec split_field mp lst = + match lst with + | line :: tl -> + let mp = + try + let pos_semicolon = + String.index line ':' + in + if pos_semicolon > 1 then + ( + let name = + String.sub line 0 pos_semicolon + in + let linelen = + String.length line + in + let value = + if linelen > pos_semicolon + 2 then + String.sub + line + (pos_semicolon + 2) + (linelen - pos_semicolon - 2) + else + "" + in + SMap.add name value mp + ) + else + ( + mp + ) + with Not_found -> + ( + mp + ) + in + split_field mp tl + | [] -> + mp + in + + let cache = + lazy + (var_protect + (Marshal.to_string + (split_field + SMap.empty + (OASISExec.run_read_output + ~ctxt:!BaseContext.default + (ocamlc ()) ["-config"])) + [])) + in + var_redefine + "ocamlc_config_map" + ~hide:true + ~dump:false + (fun () -> + (* TODO: update if ocamlc change !!! *) + Lazy.force cache) + + + let var_define nm = + (* Extract data from ocamlc -config *) + let avlbl_config_get () = + Marshal.from_string + (ocamlc_config_map ()) + 0 + in + let chop_version_suffix s = + try + String.sub s 0 (String.index s '+') + with _ -> + s + in + + let nm_config, value_config = + match nm with + | "ocaml_version" -> + "version", chop_version_suffix + | _ -> nm, (fun x -> x) + in + var_redefine + nm + (fun () -> + try + let map = + avlbl_config_get () + in + let value = + SMap.find nm_config map + in + value_config value + with Not_found -> + failwithf + (f_ "Cannot find field '%s' in '%s -config' output") + nm + (ocamlc ())) + +end + +module BaseStandardVar = struct +(* # 22 "src/base/BaseStandardVar.ml" *) + + + open OASISGettext + open OASISTypes + open OASISExpr + open BaseCheck + open BaseEnv + + + let ocamlfind = BaseCheck.ocamlfind + let ocamlc = BaseOCamlcConfig.ocamlc + let ocamlopt = prog_opt "ocamlopt" + let ocamlbuild = prog "ocamlbuild" + + + (**/**) + let rpkg = + ref None + + + let pkg_get () = + match !rpkg with + | Some pkg -> pkg + | None -> failwith (s_ "OASIS Package is not set") + + + let var_cond = ref [] + + + let var_define_cond ~since_version f dflt = + let holder = ref (fun () -> dflt) in + let since_version = + OASISVersion.VGreaterEqual (OASISVersion.version_of_string since_version) + in + var_cond := + (fun ver -> + if OASISVersion.comparator_apply ver since_version then + holder := f ()) :: !var_cond; + fun () -> !holder () + + + (**/**) + + + let pkg_name = + var_define + ~short_desc:(fun () -> s_ "Package name") + "pkg_name" + (fun () -> (pkg_get ()).name) + + + let pkg_version = + var_define + ~short_desc:(fun () -> s_ "Package version") + "pkg_version" + (fun () -> + (OASISVersion.string_of_version (pkg_get ()).version)) + + + let c = BaseOCamlcConfig.var_define + + + let os_type = c "os_type" + let system = c "system" + let architecture = c "architecture" + let ccomp_type = c "ccomp_type" + let ocaml_version = c "ocaml_version" + + + (* TODO: Check standard variable presence at runtime *) + + + let standard_library_default = c "standard_library_default" + let standard_library = c "standard_library" + let standard_runtime = c "standard_runtime" + let bytecomp_c_compiler = c "bytecomp_c_compiler" + let native_c_compiler = c "native_c_compiler" + let model = c "model" + let ext_obj = c "ext_obj" + let ext_asm = c "ext_asm" + let ext_lib = c "ext_lib" + let ext_dll = c "ext_dll" + let default_executable_name = c "default_executable_name" + let systhread_supported = c "systhread_supported" + + + let flexlink = + BaseCheck.prog "flexlink" + + + let flexdll_version = + var_define + ~short_desc:(fun () -> "FlexDLL version (Win32)") + "flexdll_version" + (fun () -> + let lst = + OASISExec.run_read_output ~ctxt:!BaseContext.default + (flexlink ()) ["-help"] + in + match lst with + | line :: _ -> + Scanf.sscanf line "FlexDLL version %s" (fun ver -> ver) + | [] -> + raise Not_found) + + + (**/**) + let p name hlp dflt = + var_define + ~short_desc:hlp + ~cli:CLIAuto + ~arg_help:"dir" + name + dflt + + + let (/) a b = + if os_type () = Sys.os_type then + Filename.concat a b + else if os_type () = "Unix" then + OASISUnixPath.concat a b + else + OASISUtils.failwithf (f_ "Cannot handle os_type %s filename concat") + (os_type ()) + (**/**) + + + let prefix = + p "prefix" + (fun () -> s_ "Install architecture-independent files dir") + (fun () -> + match os_type () with + | "Win32" -> + let program_files = + Sys.getenv "PROGRAMFILES" + in + program_files/(pkg_name ()) + | _ -> + "/usr/local") + + + let exec_prefix = + p "exec_prefix" + (fun () -> s_ "Install architecture-dependent files in dir") + (fun () -> "$prefix") + + + let bindir = + p "bindir" + (fun () -> s_ "User executables") + (fun () -> "$exec_prefix"/"bin") + + + let sbindir = + p "sbindir" + (fun () -> s_ "System admin executables") + (fun () -> "$exec_prefix"/"sbin") + + + let libexecdir = + p "libexecdir" + (fun () -> s_ "Program executables") + (fun () -> "$exec_prefix"/"libexec") + + + let sysconfdir = + p "sysconfdir" + (fun () -> s_ "Read-only single-machine data") + (fun () -> "$prefix"/"etc") + + + let sharedstatedir = + p "sharedstatedir" + (fun () -> s_ "Modifiable architecture-independent data") + (fun () -> "$prefix"/"com") + + + let localstatedir = + p "localstatedir" + (fun () -> s_ "Modifiable single-machine data") + (fun () -> "$prefix"/"var") + + + let libdir = + p "libdir" + (fun () -> s_ "Object code libraries") + (fun () -> "$exec_prefix"/"lib") + + + let datarootdir = + p "datarootdir" + (fun () -> s_ "Read-only arch-independent data root") + (fun () -> "$prefix"/"share") + + + let datadir = + p "datadir" + (fun () -> s_ "Read-only architecture-independent data") + (fun () -> "$datarootdir") + + + let infodir = + p "infodir" + (fun () -> s_ "Info documentation") + (fun () -> "$datarootdir"/"info") + + + let localedir = + p "localedir" + (fun () -> s_ "Locale-dependent data") + (fun () -> "$datarootdir"/"locale") + + + let mandir = + p "mandir" + (fun () -> s_ "Man documentation") + (fun () -> "$datarootdir"/"man") + + + let docdir = + p "docdir" + (fun () -> s_ "Documentation root") + (fun () -> "$datarootdir"/"doc"/"$pkg_name") + + + let htmldir = + p "htmldir" + (fun () -> s_ "HTML documentation") + (fun () -> "$docdir") + + + let dvidir = + p "dvidir" + (fun () -> s_ "DVI documentation") + (fun () -> "$docdir") + + + let pdfdir = + p "pdfdir" + (fun () -> s_ "PDF documentation") + (fun () -> "$docdir") + + + let psdir = + p "psdir" + (fun () -> s_ "PS documentation") + (fun () -> "$docdir") + + + let destdir = + p "destdir" + (fun () -> s_ "Prepend a path when installing package") + (fun () -> + raise + (PropList.Not_set + ("destdir", + Some (s_ "undefined by construct")))) + + + let findlib_version = + var_define + "findlib_version" + (fun () -> + BaseCheck.package_version "findlib") + + + let is_native = + var_define + "is_native" + (fun () -> + try + let _s: string = + ocamlopt () + in + "true" + with PropList.Not_set _ -> + let _s: string = + ocamlc () + in + "false") + + + let ext_program = + var_define + "suffix_program" + (fun () -> + match os_type () with + | "Win32" | "Cygwin" -> ".exe" + | _ -> "") + + + let rm = + var_define + ~short_desc:(fun () -> s_ "Remove a file.") + "rm" + (fun () -> + match os_type () with + | "Win32" -> "del" + | _ -> "rm -f") + + + let rmdir = + var_define + ~short_desc:(fun () -> s_ "Remove a directory.") + "rmdir" + (fun () -> + match os_type () with + | "Win32" -> "rd" + | _ -> "rm -rf") + + + let debug = + var_define + ~short_desc:(fun () -> s_ "Turn ocaml debug flag on") + ~cli:CLIEnable + "debug" + (fun () -> "true") + + + let profile = + var_define + ~short_desc:(fun () -> s_ "Turn ocaml profile flag on") + ~cli:CLIEnable + "profile" + (fun () -> "false") + + + let tests = + var_define_cond ~since_version:"0.3" + (fun () -> + var_define + ~short_desc:(fun () -> + s_ "Compile tests executable and library and run them") + ~cli:CLIEnable + "tests" + (fun () -> "false")) + "true" + + + let docs = + var_define_cond ~since_version:"0.3" + (fun () -> + var_define + ~short_desc:(fun () -> s_ "Create documentations") + ~cli:CLIEnable + "docs" + (fun () -> "true")) + "true" + + + let native_dynlink = + var_define + ~short_desc:(fun () -> s_ "Compiler support generation of .cmxs.") + ~cli:CLINone + "native_dynlink" + (fun () -> + let res = + let ocaml_lt_312 () = + OASISVersion.comparator_apply + (OASISVersion.version_of_string (ocaml_version ())) + (OASISVersion.VLesser + (OASISVersion.version_of_string "3.12.0")) + in + let flexdll_lt_030 () = + OASISVersion.comparator_apply + (OASISVersion.version_of_string (flexdll_version ())) + (OASISVersion.VLesser + (OASISVersion.version_of_string "0.30")) + in + let has_native_dynlink = + let ocamlfind = ocamlfind () in + try + let fn = + OASISExec.run_read_one_line + ~ctxt:!BaseContext.default + ocamlfind + ["query"; "-predicates"; "native"; "dynlink"; + "-format"; "%d/%a"] + in + Sys.file_exists fn + with _ -> + false + in + if not has_native_dynlink then + false + else if ocaml_lt_312 () then + false + else if (os_type () = "Win32" || os_type () = "Cygwin") + && flexdll_lt_030 () then + begin + BaseMessage.warning + (f_ ".cmxs generation disabled because FlexDLL needs to be \ + at least 0.30. Please upgrade FlexDLL from %s to 0.30.") + (flexdll_version ()); + false + end + else + true + in + string_of_bool res) + + + let init pkg = + rpkg := Some pkg; + List.iter (fun f -> f pkg.oasis_version) !var_cond + +end + +module BaseFileAB = struct +(* # 22 "src/base/BaseFileAB.ml" *) + + + open BaseEnv + open OASISGettext + open BaseMessage + + + let to_filename fn = + let fn = + OASISHostPath.of_unix fn + in + if not (Filename.check_suffix fn ".ab") then + warning + (f_ "File '%s' doesn't have '.ab' extension") + fn; + Filename.chop_extension fn + + + let replace fn_lst = + let buff = + Buffer.create 13 + in + List.iter + (fun fn -> + let fn = + OASISHostPath.of_unix fn + in + let chn_in = + open_in fn + in + let chn_out = + open_out (to_filename fn) + in + ( + try + while true do + Buffer.add_string buff (var_expand (input_line chn_in)); + Buffer.add_char buff '\n' + done + with End_of_file -> + () + ); + Buffer.output_buffer chn_out buff; + Buffer.clear buff; + close_in chn_in; + close_out chn_out) + fn_lst +end + +module BaseLog = struct +(* # 22 "src/base/BaseLog.ml" *) + + + open OASISUtils + + + let default_filename = + Filename.concat + (Filename.dirname BaseEnv.default_filename) + "setup.log" + + + module SetTupleString = + Set.Make + (struct + type t = string * string + let compare (s11, s12) (s21, s22) = + match String.compare s11 s21 with + | 0 -> String.compare s12 s22 + | n -> n + end) + + + let load () = + if Sys.file_exists default_filename then + begin + let chn = + open_in default_filename + in + let scbuf = + Scanf.Scanning.from_file default_filename + in + let rec read_aux (st, lst) = + if not (Scanf.Scanning.end_of_input scbuf) then + begin + let acc = + try + Scanf.bscanf scbuf "%S %S\n" + (fun e d -> + let t = + e, d + in + if SetTupleString.mem t st then + st, lst + else + SetTupleString.add t st, + t :: lst) + with Scanf.Scan_failure _ -> + failwith + (Scanf.bscanf scbuf + "%l" + (fun line -> + Printf.sprintf + "Malformed log file '%s' at line %d" + default_filename + line)) + in + read_aux acc + end + else + begin + close_in chn; + List.rev lst + end + in + read_aux (SetTupleString.empty, []) + end + else + begin + [] + end + + + let register event data = + let chn_out = + open_out_gen [Open_append; Open_creat; Open_text] 0o644 default_filename + in + Printf.fprintf chn_out "%S %S\n" event data; + close_out chn_out + + + let unregister event data = + if Sys.file_exists default_filename then + begin + let lst = + load () + in + let chn_out = + open_out default_filename + in + let write_something = + ref false + in + List.iter + (fun (e, d) -> + if e <> event || d <> data then + begin + write_something := true; + Printf.fprintf chn_out "%S %S\n" e d + end) + lst; + close_out chn_out; + if not !write_something then + Sys.remove default_filename + end + + + let filter events = + let st_events = + List.fold_left + (fun st e -> + SetString.add e st) + SetString.empty + events + in + List.filter + (fun (e, _) -> SetString.mem e st_events) + (load ()) + + + let exists event data = + List.exists + (fun v -> (event, data) = v) + (load ()) +end + +module BaseBuilt = struct +(* # 22 "src/base/BaseBuilt.ml" *) + + + open OASISTypes + open OASISGettext + open BaseStandardVar + open BaseMessage + + + type t = + | BExec (* Executable *) + | BExecLib (* Library coming with executable *) + | BLib (* Library *) + | BObj (* Library *) + | BDoc (* Document *) + + + let to_log_event_file t nm = + "built_"^ + (match t with + | BExec -> "exec" + | BExecLib -> "exec_lib" + | BLib -> "lib" + | BObj -> "obj" + | BDoc -> "doc")^ + "_"^nm + + + let to_log_event_done t nm = + "is_"^(to_log_event_file t nm) + + + let register t nm lst = + BaseLog.register + (to_log_event_done t nm) + "true"; + List.iter + (fun alt -> + let registered = + List.fold_left + (fun registered fn -> + if OASISFileUtil.file_exists_case fn then + begin + BaseLog.register + (to_log_event_file t nm) + (if Filename.is_relative fn then + Filename.concat (Sys.getcwd ()) fn + else + fn); + true + end + else + registered) + false + alt + in + if not registered then + warning + (f_ "Cannot find an existing alternative files among: %s") + (String.concat (s_ ", ") alt)) + lst + + + let unregister t nm = + List.iter + (fun (e, d) -> + BaseLog.unregister e d) + (BaseLog.filter + [to_log_event_file t nm; + to_log_event_done t nm]) + + + let fold t nm f acc = + List.fold_left + (fun acc (_, fn) -> + if OASISFileUtil.file_exists_case fn then + begin + f acc fn + end + else + begin + warning + (f_ "File '%s' has been marked as built \ + for %s but doesn't exist") + fn + (Printf.sprintf + (match t with + | BExec | BExecLib -> + (f_ "executable %s") + | BLib -> + (f_ "library %s") + | BObj -> + (f_ "object %s") + | BDoc -> + (f_ "documentation %s")) + nm); + acc + end) + acc + (BaseLog.filter + [to_log_event_file t nm]) + + + let is_built t nm = + List.fold_left + (fun is_built (_, d) -> + (try + bool_of_string d + with _ -> + false)) + false + (BaseLog.filter + [to_log_event_done t nm]) + + + let of_executable ffn (cs, bs, exec) = + let unix_exec_is, unix_dll_opt = + OASISExecutable.unix_exec_is + (cs, bs, exec) + (fun () -> + bool_of_string + (is_native ())) + ext_dll + ext_program + in + let evs = + (BExec, cs.cs_name, [[ffn unix_exec_is]]) + :: + (match unix_dll_opt with + | Some fn -> + [BExecLib, cs.cs_name, [[ffn fn]]] + | None -> + []) + in + evs, + unix_exec_is, + unix_dll_opt + + + let of_library ffn (cs, bs, lib) = + let unix_lst = + OASISLibrary.generated_unix_files + ~ctxt:!BaseContext.default + ~source_file_exists:(fun fn -> + OASISFileUtil.file_exists_case (OASISHostPath.of_unix fn)) + ~is_native:(bool_of_string (is_native ())) + ~has_native_dynlink:(bool_of_string (native_dynlink ())) + ~ext_lib:(ext_lib ()) + ~ext_dll:(ext_dll ()) + (cs, bs, lib) + in + let evs = + [BLib, + cs.cs_name, + List.map (List.map ffn) unix_lst] + in + evs, unix_lst + + + let of_object ffn (cs, bs, obj) = + let unix_lst = + OASISObject.generated_unix_files + ~ctxt:!BaseContext.default + ~source_file_exists:(fun fn -> + OASISFileUtil.file_exists_case (OASISHostPath.of_unix fn)) + ~is_native:(bool_of_string (is_native ())) + (cs, bs, obj) + in + let evs = + [BObj, + cs.cs_name, + List.map (List.map ffn) unix_lst] + in + evs, unix_lst + +end + +module BaseCustom = struct +(* # 22 "src/base/BaseCustom.ml" *) + + + open BaseEnv + open BaseMessage + open OASISTypes + open OASISGettext + + + let run cmd args extra_args = + OASISExec.run ~ctxt:!BaseContext.default ~quote:false + (var_expand cmd) + (List.map + var_expand + (args @ (Array.to_list extra_args))) + + + let hook ?(failsafe=false) cstm f e = + let optional_command lst = + let printer = + function + | Some (cmd, args) -> String.concat " " (cmd :: args) + | None -> s_ "No command" + in + match + var_choose + ~name:(s_ "Pre/Post Command") + ~printer + lst with + | Some (cmd, args) -> + begin + try + run cmd args [||] + with e when failsafe -> + warning + (f_ "Command '%s' fail with error: %s") + (String.concat " " (cmd :: args)) + (match e with + | Failure msg -> msg + | e -> Printexc.to_string e) + end + | None -> + () + in + let res = + optional_command cstm.pre_command; + f e + in + optional_command cstm.post_command; + res +end + +module BaseDynVar = struct +(* # 22 "src/base/BaseDynVar.ml" *) + + + open OASISTypes + open OASISGettext + open BaseEnv + open BaseBuilt + + + let init pkg = + (* TODO: disambiguate exec vs other variable by adding exec_VARNAME. *) + (* TODO: provide compile option for library libary_byte_args_VARNAME... *) + List.iter + (function + | Executable (cs, bs, exec) -> + if var_choose bs.bs_build then + var_ignore + (var_redefine + (* We don't save this variable *) + ~dump:false + ~short_desc:(fun () -> + Printf.sprintf + (f_ "Filename of executable '%s'") + cs.cs_name) + (OASISUtils.varname_of_string cs.cs_name) + (fun () -> + let fn_opt = + fold + BExec cs.cs_name + (fun _ fn -> Some fn) + None + in + match fn_opt with + | Some fn -> fn + | None -> + raise + (PropList.Not_set + (cs.cs_name, + Some (Printf.sprintf + (f_ "Executable '%s' not yet built.") + cs.cs_name))))) + + | Library _ | Object _ | Flag _ | Test _ | SrcRepo _ | Doc _ -> + ()) + pkg.sections +end + +module BaseTest = struct +(* # 22 "src/base/BaseTest.ml" *) + + + open BaseEnv + open BaseMessage + open OASISTypes + open OASISExpr + open OASISGettext + + + let test lst pkg extra_args = + + let one_test (failure, n) (test_plugin, cs, test) = + if var_choose + ~name:(Printf.sprintf + (f_ "test %s run") + cs.cs_name) + ~printer:string_of_bool + test.test_run then + begin + let () = + info (f_ "Running test '%s'") cs.cs_name + in + let back_cwd = + match test.test_working_directory with + | Some dir -> + let cwd = + Sys.getcwd () + in + let chdir d = + info (f_ "Changing directory to '%s'") d; + Sys.chdir d + in + chdir dir; + fun () -> chdir cwd + + | None -> + fun () -> () + in + try + let failure_percent = + BaseCustom.hook + test.test_custom + (test_plugin pkg (cs, test)) + extra_args + in + back_cwd (); + (failure_percent +. failure, n + 1) + with e -> + begin + back_cwd (); + raise e + end + end + else + begin + info (f_ "Skipping test '%s'") cs.cs_name; + (failure, n) + end + in + let failed, n = + List.fold_left + one_test + (0.0, 0) + lst + in + let failure_percent = + if n = 0 then + 0.0 + else + failed /. (float_of_int n) + in + let msg = + Printf.sprintf + (f_ "Tests had a %.2f%% failure rate") + (100. *. failure_percent) + in + if failure_percent > 0.0 then + failwith msg + else + info "%s" msg; + + (* Possible explanation why the tests where not run. *) + if OASISFeatures.package_test OASISFeatures.flag_tests pkg && + not (bool_of_string (BaseStandardVar.tests ())) && + lst <> [] then + BaseMessage.warning + "Tests are turned off, consider enabling with \ + 'ocaml setup.ml -configure --enable-tests'" +end + +module BaseDoc = struct +(* # 22 "src/base/BaseDoc.ml" *) + + + open BaseEnv + open BaseMessage + open OASISTypes + open OASISGettext + + + let doc lst pkg extra_args = + + let one_doc (doc_plugin, cs, doc) = + if var_choose + ~name:(Printf.sprintf + (f_ "documentation %s build") + cs.cs_name) + ~printer:string_of_bool + doc.doc_build then + begin + info (f_ "Building documentation '%s'") cs.cs_name; + BaseCustom.hook + doc.doc_custom + (doc_plugin pkg (cs, doc)) + extra_args + end + in + List.iter one_doc lst; + + if OASISFeatures.package_test OASISFeatures.flag_docs pkg && + not (bool_of_string (BaseStandardVar.docs ())) && + lst <> [] then + BaseMessage.warning + "Docs are turned off, consider enabling with \ + 'ocaml setup.ml -configure --enable-docs'" +end + +module BaseSetup = struct +(* # 22 "src/base/BaseSetup.ml" *) + + open BaseEnv + open BaseMessage + open OASISTypes + open OASISSection + open OASISGettext + open OASISUtils + + + type std_args_fun = + package -> string array -> unit + + + type ('a, 'b) section_args_fun = + name * (package -> (common_section * 'a) -> string array -> 'b) + + + type t = + { + configure: std_args_fun; + build: std_args_fun; + doc: ((doc, unit) section_args_fun) list; + test: ((test, float) section_args_fun) list; + install: std_args_fun; + uninstall: std_args_fun; + clean: std_args_fun list; + clean_doc: (doc, unit) section_args_fun list; + clean_test: (test, unit) section_args_fun list; + distclean: std_args_fun list; + distclean_doc: (doc, unit) section_args_fun list; + distclean_test: (test, unit) section_args_fun list; + package: package; + oasis_fn: string option; + oasis_version: string; + oasis_digest: Digest.t option; + oasis_exec: string option; + oasis_setup_args: string list; + setup_update: bool; + } + + + (* Associate a plugin function with data from package *) + let join_plugin_sections filter_map lst = + List.rev + (List.fold_left + (fun acc sct -> + match filter_map sct with + | Some e -> + e :: acc + | None -> + acc) + [] + lst) + + + (* Search for plugin data associated with a section name *) + let lookup_plugin_section plugin action nm lst = + try + List.assoc nm lst + with Not_found -> + failwithf + (f_ "Cannot find plugin %s matching section %s for %s action") + plugin + nm + action + + + let configure t args = + (* Run configure *) + BaseCustom.hook + t.package.conf_custom + (fun () -> + (* Reload if preconf has changed it *) + begin + try + unload (); + load (); + with _ -> + () + end; + + (* Run plugin's configure *) + t.configure t.package args; + + (* Dump to allow postconf to change it *) + dump ()) + (); + + (* Reload environment *) + unload (); + load (); + + (* Save environment *) + print (); + + (* Replace data in file *) + BaseFileAB.replace t.package.files_ab + + + let build t args = + BaseCustom.hook + t.package.build_custom + (t.build t.package) + args + + + let doc t args = + BaseDoc.doc + (join_plugin_sections + (function + | Doc (cs, e) -> + Some + (lookup_plugin_section + "documentation" + (s_ "build") + cs.cs_name + t.doc, + cs, + e) + | _ -> + None) + t.package.sections) + t.package + args + + + let test t args = + BaseTest.test + (join_plugin_sections + (function + | Test (cs, e) -> + Some + (lookup_plugin_section + "test" + (s_ "run") + cs.cs_name + t.test, + cs, + e) + | _ -> + None) + t.package.sections) + t.package + args + + + let all t args = + let rno_doc = + ref false + in + let rno_test = + ref false + in + let arg_rest = + ref [] + in + Arg.parse_argv + ~current:(ref 0) + (Array.of_list + ((Sys.executable_name^" all") :: + (Array.to_list args))) + [ + "-no-doc", + Arg.Set rno_doc, + s_ "Don't run doc target"; + + "-no-test", + Arg.Set rno_test, + s_ "Don't run test target"; + + "--", + Arg.Rest (fun arg -> arg_rest := arg :: !arg_rest), + s_ "All arguments for configure."; + ] + (failwithf (f_ "Don't know what to do with '%s'")) + ""; + + info "Running configure step"; + configure t (Array.of_list (List.rev !arg_rest)); + + info "Running build step"; + build t [||]; + + (* Load setup.log dynamic variables *) + BaseDynVar.init t.package; + + if not !rno_doc then + begin + info "Running doc step"; + doc t [||]; + end + else + begin + info "Skipping doc step" + end; + + if not !rno_test then + begin + info "Running test step"; + test t [||] + end + else + begin + info "Skipping test step" + end + + + let install t args = + BaseCustom.hook + t.package.install_custom + (t.install t.package) + args + + + let uninstall t args = + BaseCustom.hook + t.package.uninstall_custom + (t.uninstall t.package) + args + + + let reinstall t args = + uninstall t args; + install t args + + + let clean, distclean = + let failsafe f a = + try + f a + with e -> + warning + (f_ "Action fail with error: %s") + (match e with + | Failure msg -> msg + | e -> Printexc.to_string e) + in + + let generic_clean t cstm mains docs tests args = + BaseCustom.hook + ~failsafe:true + cstm + (fun () -> + (* Clean section *) + List.iter + (function + | Test (cs, test) -> + let f = + try + List.assoc cs.cs_name tests + with Not_found -> + fun _ _ _ -> () + in + failsafe + (f t.package (cs, test)) + args + | Doc (cs, doc) -> + let f = + try + List.assoc cs.cs_name docs + with Not_found -> + fun _ _ _ -> () + in + failsafe + (f t.package (cs, doc)) + args + | Library _ + | Object _ + | Executable _ + | Flag _ + | SrcRepo _ -> + ()) + t.package.sections; + (* Clean whole package *) + List.iter + (fun f -> + failsafe + (f t.package) + args) + mains) + () + in + + let clean t args = + generic_clean + t + t.package.clean_custom + t.clean + t.clean_doc + t.clean_test + args + in + + let distclean t args = + (* Call clean *) + clean t args; + + (* Call distclean code *) + generic_clean + t + t.package.distclean_custom + t.distclean + t.distclean_doc + t.distclean_test + args; + + (* Remove generated file *) + List.iter + (fun fn -> + if Sys.file_exists fn then + begin + info (f_ "Remove '%s'") fn; + Sys.remove fn + end) + (BaseEnv.default_filename + :: + BaseLog.default_filename + :: + (List.rev_map BaseFileAB.to_filename t.package.files_ab)) + in + + clean, distclean + + + let version t _ = + print_endline t.oasis_version + + + let update_setup_ml, no_update_setup_ml_cli = + let b = ref true in + b, + ("-no-update-setup-ml", + Arg.Clear b, + s_ " Don't try to update setup.ml, even if _oasis has changed.") + + + let default_oasis_fn = "_oasis" + + + let update_setup_ml t = + let oasis_fn = + match t.oasis_fn with + | Some fn -> fn + | None -> default_oasis_fn + in + let oasis_exec = + match t.oasis_exec with + | Some fn -> fn + | None -> "oasis" + in + let ocaml = + Sys.executable_name + in + let setup_ml, args = + match Array.to_list Sys.argv with + | setup_ml :: args -> + setup_ml, args + | [] -> + failwith + (s_ "Expecting non-empty command line arguments.") + in + let ocaml, setup_ml = + if Sys.executable_name = Sys.argv.(0) then + (* We are not running in standard mode, probably the script + * is precompiled. + *) + "ocaml", "setup.ml" + else + ocaml, setup_ml + in + let no_update_setup_ml_cli, _, _ = no_update_setup_ml_cli in + let do_update () = + let oasis_exec_version = + OASISExec.run_read_one_line + ~ctxt:!BaseContext.default + ~f_exit_code: + (function + | 0 -> + () + | 1 -> + failwithf + (f_ "Executable '%s' is probably an old version \ + of oasis (< 0.3.0), please update to version \ + v%s.") + oasis_exec t.oasis_version + | 127 -> + failwithf + (f_ "Cannot find executable '%s', please install \ + oasis v%s.") + oasis_exec t.oasis_version + | n -> + failwithf + (f_ "Command '%s version' exited with code %d.") + oasis_exec n) + oasis_exec ["version"] + in + if OASISVersion.comparator_apply + (OASISVersion.version_of_string oasis_exec_version) + (OASISVersion.VGreaterEqual + (OASISVersion.version_of_string t.oasis_version)) then + begin + (* We have a version >= for the executable oasis, proceed with + * update. + *) + (* TODO: delegate this check to 'oasis setup'. *) + if Sys.os_type = "Win32" then + failwithf + (f_ "It is not possible to update the running script \ + setup.ml on Windows. Please update setup.ml by \ + running '%s'.") + (String.concat " " (oasis_exec :: "setup" :: t.oasis_setup_args)) + else + begin + OASISExec.run + ~ctxt:!BaseContext.default + ~f_exit_code: + (function + | 0 -> + () + | n -> + failwithf + (f_ "Unable to update setup.ml using '%s', \ + please fix the problem and retry.") + oasis_exec) + oasis_exec ("setup" :: t.oasis_setup_args); + OASISExec.run ~ctxt:!BaseContext.default ocaml (setup_ml :: args) + end + end + else + failwithf + (f_ "The version of '%s' (v%s) doesn't match the version of \ + oasis used to generate the %s file. Please install at \ + least oasis v%s.") + oasis_exec oasis_exec_version setup_ml t.oasis_version + in + + if !update_setup_ml then + begin + try + match t.oasis_digest with + | Some dgst -> + if Sys.file_exists oasis_fn && + dgst <> Digest.file default_oasis_fn then + begin + do_update (); + true + end + else + false + | None -> + false + with e -> + error + (f_ "Error when updating setup.ml. If you want to avoid this error, \ + you can bypass the update of %s by running '%s %s %s %s'") + setup_ml ocaml setup_ml no_update_setup_ml_cli + (String.concat " " args); + raise e + end + else + false + + + let setup t = + let catch_exn = + ref true + in + try + let act_ref = + ref (fun _ -> + failwithf + (f_ "No action defined, run '%s %s -help'") + Sys.executable_name + Sys.argv.(0)) + + in + let extra_args_ref = + ref [] + in + let allow_empty_env_ref = + ref false + in + let arg_handle ?(allow_empty_env=false) act = + Arg.Tuple + [ + Arg.Rest (fun str -> extra_args_ref := str :: !extra_args_ref); + + Arg.Unit + (fun () -> + allow_empty_env_ref := allow_empty_env; + act_ref := act); + ] + in + + Arg.parse + (Arg.align + ([ + "-configure", + arg_handle ~allow_empty_env:true configure, + s_ "[options*] Configure the whole build process."; + + "-build", + arg_handle build, + s_ "[options*] Build executables and libraries."; + + "-doc", + arg_handle doc, + s_ "[options*] Build documents."; + + "-test", + arg_handle test, + s_ "[options*] Run tests."; + + "-all", + arg_handle ~allow_empty_env:true all, + s_ "[options*] Run configure, build, doc and test targets."; + + "-install", + arg_handle install, + s_ "[options*] Install libraries, data, executables \ + and documents."; + + "-uninstall", + arg_handle uninstall, + s_ "[options*] Uninstall libraries, data, executables \ + and documents."; + + "-reinstall", + arg_handle reinstall, + s_ "[options*] Uninstall and install libraries, data, \ + executables and documents."; + + "-clean", + arg_handle ~allow_empty_env:true clean, + s_ "[options*] Clean files generated by a build."; + + "-distclean", + arg_handle ~allow_empty_env:true distclean, + s_ "[options*] Clean files generated by a build and configure."; + + "-version", + arg_handle ~allow_empty_env:true version, + s_ " Display version of OASIS used to generate this setup.ml."; + + "-no-catch-exn", + Arg.Clear catch_exn, + s_ " Don't catch exception, useful for debugging."; + ] + @ + (if t.setup_update then + [no_update_setup_ml_cli] + else + []) + @ (BaseContext.args ()))) + (failwithf (f_ "Don't know what to do with '%s'")) + (s_ "Setup and run build process current package\n"); + + (* Build initial environment *) + load ~allow_empty:!allow_empty_env_ref (); + + (** Initialize flags *) + List.iter + (function + | Flag (cs, {flag_description = hlp; + flag_default = choices}) -> + begin + let apply ?short_desc () = + var_ignore + (var_define + ~cli:CLIEnable + ?short_desc + (OASISUtils.varname_of_string cs.cs_name) + (fun () -> + string_of_bool + (var_choose + ~name:(Printf.sprintf + (f_ "default value of flag %s") + cs.cs_name) + ~printer:string_of_bool + choices))) + in + match hlp with + | Some hlp -> + apply ~short_desc:(fun () -> hlp) () + | None -> + apply () + end + | _ -> + ()) + t.package.sections; + + BaseStandardVar.init t.package; + + BaseDynVar.init t.package; + + if t.setup_update && update_setup_ml t then + () + else + !act_ref t (Array.of_list (List.rev !extra_args_ref)) + + with e when !catch_exn -> + error "%s" (Printexc.to_string e); + exit 1 + + +end + + +# 5409 "setup.ml" +module InternalConfigurePlugin = struct +(* # 22 "src/plugins/internal/InternalConfigurePlugin.ml" *) + + + (** Configure using internal scheme + @author Sylvain Le Gall + *) + + + open BaseEnv + open OASISTypes + open OASISUtils + open OASISGettext + open BaseMessage + + + (** Configure build using provided series of check to be done + * and then output corresponding file. + *) + let configure pkg argv = + let var_ignore_eval var = let _s: string = var () in () in + let errors = ref SetString.empty in + let buff = Buffer.create 13 in + + let add_errors fmt = + Printf.kbprintf + (fun b -> + errors := SetString.add (Buffer.contents b) !errors; + Buffer.clear b) + buff + fmt + in + + let warn_exception e = + warning "%s" (Printexc.to_string e) + in + + (* Check tools *) + let check_tools lst = + List.iter + (function + | ExternalTool tool -> + begin + try + var_ignore_eval (BaseCheck.prog tool) + with e -> + warn_exception e; + add_errors (f_ "Cannot find external tool '%s'") tool + end + | InternalExecutable nm1 -> + (* Check that matching tool is built *) + List.iter + (function + | Executable ({cs_name = nm2}, + {bs_build = build}, + _) when nm1 = nm2 -> + if not (var_choose build) then + add_errors + (f_ "Cannot find buildable internal executable \ + '%s' when checking build depends") + nm1 + | _ -> + ()) + pkg.sections) + lst + in + + let build_checks sct bs = + if var_choose bs.bs_build then + begin + if bs.bs_compiled_object = Native then + begin + try + var_ignore_eval BaseStandardVar.ocamlopt + with e -> + warn_exception e; + add_errors + (f_ "Section %s requires native compilation") + (OASISSection.string_of_section sct) + end; + + (* Check tools *) + check_tools bs.bs_build_tools; + + (* Check depends *) + List.iter + (function + | FindlibPackage (findlib_pkg, version_comparator) -> + begin + try + var_ignore_eval + (BaseCheck.package ?version_comparator findlib_pkg) + with e -> + warn_exception e; + match version_comparator with + | None -> + add_errors + (f_ "Cannot find findlib package %s") + findlib_pkg + | Some ver_cmp -> + add_errors + (f_ "Cannot find findlib package %s (%s)") + findlib_pkg + (OASISVersion.string_of_comparator ver_cmp) + end + | InternalLibrary nm1 -> + (* Check that matching library is built *) + List.iter + (function + | Library ({cs_name = nm2}, + {bs_build = build}, + _) when nm1 = nm2 -> + if not (var_choose build) then + add_errors + (f_ "Cannot find buildable internal library \ + '%s' when checking build depends") + nm1 + | _ -> + ()) + pkg.sections) + bs.bs_build_depends + end + in + + (* Parse command line *) + BaseArgExt.parse argv (BaseEnv.args ()); + + (* OCaml version *) + begin + match pkg.ocaml_version with + | Some ver_cmp -> + begin + try + var_ignore_eval + (BaseCheck.version + "ocaml" + ver_cmp + BaseStandardVar.ocaml_version) + with e -> + warn_exception e; + add_errors + (f_ "OCaml version %s doesn't match version constraint %s") + (BaseStandardVar.ocaml_version ()) + (OASISVersion.string_of_comparator ver_cmp) + end + | None -> + () + end; + + (* Findlib version *) + begin + match pkg.findlib_version with + | Some ver_cmp -> + begin + try + var_ignore_eval + (BaseCheck.version + "findlib" + ver_cmp + BaseStandardVar.findlib_version) + with e -> + warn_exception e; + add_errors + (f_ "Findlib version %s doesn't match version constraint %s") + (BaseStandardVar.findlib_version ()) + (OASISVersion.string_of_comparator ver_cmp) + end + | None -> + () + end; + (* Make sure the findlib version is fine for the OCaml compiler. *) + begin + let ocaml_ge4 = + OASISVersion.version_compare + (OASISVersion.version_of_string (BaseStandardVar.ocaml_version())) + (OASISVersion.version_of_string "4.0.0") >= 0 in + if ocaml_ge4 then + let findlib_lt132 = + OASISVersion.version_compare + (OASISVersion.version_of_string (BaseStandardVar.findlib_version())) + (OASISVersion.version_of_string "1.3.2") < 0 in + if findlib_lt132 then + add_errors "OCaml >= 4.0.0 requires Findlib version >= 1.3.2" + end; + + (* FlexDLL *) + if BaseStandardVar.os_type () = "Win32" || + BaseStandardVar.os_type () = "Cygwin" then + begin + try + var_ignore_eval BaseStandardVar.flexlink + with e -> + warn_exception e; + add_errors (f_ "Cannot find 'flexlink'") + end; + + (* Check build depends *) + List.iter + (function + | Executable (_, bs, _) + | Library (_, bs, _) as sct -> + build_checks sct bs + | Doc (_, doc) -> + if var_choose doc.doc_build then + check_tools doc.doc_build_tools + | Test (_, test) -> + if var_choose test.test_run then + check_tools test.test_tools + | _ -> + ()) + pkg.sections; + + (* Check if we need native dynlink (presence of libraries that compile to + * native) + *) + begin + let has_cmxa = + List.exists + (function + | Library (_, bs, _) -> + var_choose bs.bs_build && + (bs.bs_compiled_object = Native || + (bs.bs_compiled_object = Best && + bool_of_string (BaseStandardVar.is_native ()))) + | _ -> + false) + pkg.sections + in + if has_cmxa then + var_ignore_eval BaseStandardVar.native_dynlink + end; + + (* Check errors *) + if SetString.empty != !errors then + begin + List.iter + (fun e -> error "%s" e) + (SetString.elements !errors); + failwithf + (fn_ + "%d configuration error" + "%d configuration errors" + (SetString.cardinal !errors)) + (SetString.cardinal !errors) + end + + +end + +module InternalInstallPlugin = struct +(* # 22 "src/plugins/internal/InternalInstallPlugin.ml" *) + + + (** Install using internal scheme + @author Sylvain Le Gall + *) + + + open BaseEnv + open BaseStandardVar + open BaseMessage + open OASISTypes + open OASISFindlib + open OASISGettext + open OASISUtils + + + let exec_hook = + ref (fun (cs, bs, exec) -> cs, bs, exec) + + + let lib_hook = + ref (fun (cs, bs, lib) -> cs, bs, lib, []) + + + let obj_hook = + ref (fun (cs, bs, obj) -> cs, bs, obj, []) + + + let doc_hook = + ref (fun (cs, doc) -> cs, doc) + + + let install_file_ev = + "install-file" + + + let install_dir_ev = + "install-dir" + + + let install_findlib_ev = + "install-findlib" + + + let win32_max_command_line_length = 8000 + + + let split_install_command ocamlfind findlib_name meta files = + if Sys.os_type = "Win32" then + (* Arguments for the first command: *) + let first_args = ["install"; findlib_name; meta] in + (* Arguments for remaining commands: *) + let other_args = ["install"; findlib_name; "-add"] in + (* Extract as much files as possible from [files], [len] is + the current command line length: *) + let rec get_files len acc files = + match files with + | [] -> + (List.rev acc, []) + | file :: rest -> + let len = len + 1 + String.length file in + if len > win32_max_command_line_length then + (List.rev acc, files) + else + get_files len (file :: acc) rest + in + (* Split the command into several commands. *) + let rec split args files = + match files with + | [] -> + [] + | _ -> + (* Length of "ocamlfind install [META|-add]" *) + let len = + List.fold_left + (fun len arg -> + len + 1 (* for the space *) + String.length arg) + (String.length ocamlfind) + args + in + match get_files len [] files with + | ([], _) -> + failwith (s_ "Command line too long.") + | (firsts, others) -> + let cmd = args @ firsts in + (* Use -add for remaining commands: *) + let () = + let findlib_ge_132 = + OASISVersion.comparator_apply + (OASISVersion.version_of_string + (BaseStandardVar.findlib_version ())) + (OASISVersion.VGreaterEqual + (OASISVersion.version_of_string "1.3.2")) + in + if not findlib_ge_132 then + failwithf + (f_ "Installing the library %s require to use the \ + flag '-add' of ocamlfind because the command \ + line is too long. This flag is only available \ + for findlib 1.3.2. Please upgrade findlib from \ + %s to 1.3.2") + findlib_name (BaseStandardVar.findlib_version ()) + in + let cmds = split other_args others in + cmd :: cmds + in + (* The first command does not use -add: *) + split first_args files + else + ["install" :: findlib_name :: meta :: files] + + + let install pkg argv = + + let in_destdir = + try + let destdir = + destdir () + in + (* Practically speaking destdir is prepended + * at the beginning of the target filename + *) + fun fn -> destdir^fn + with PropList.Not_set _ -> + fun fn -> fn + in + + let install_file ?tgt_fn src_file envdir = + let tgt_dir = + in_destdir (envdir ()) + in + let tgt_file = + Filename.concat + tgt_dir + (match tgt_fn with + | Some fn -> + fn + | None -> + Filename.basename src_file) + in + (* Create target directory if needed *) + OASISFileUtil.mkdir_parent + ~ctxt:!BaseContext.default + (fun dn -> + info (f_ "Creating directory '%s'") dn; + BaseLog.register install_dir_ev dn) + tgt_dir; + + (* Really install files *) + info (f_ "Copying file '%s' to '%s'") src_file tgt_file; + OASISFileUtil.cp ~ctxt:!BaseContext.default src_file tgt_file; + BaseLog.register install_file_ev tgt_file + in + + (* Install data into defined directory *) + let install_data srcdir lst tgtdir = + let tgtdir = + OASISHostPath.of_unix (var_expand tgtdir) + in + List.iter + (fun (src, tgt_opt) -> + let real_srcs = + OASISFileUtil.glob + ~ctxt:!BaseContext.default + (Filename.concat srcdir src) + in + if real_srcs = [] then + failwithf + (f_ "Wildcard '%s' doesn't match any files") + src; + List.iter + (fun fn -> + install_file + fn + (fun () -> + match tgt_opt with + | Some s -> + OASISHostPath.of_unix (var_expand s) + | None -> + tgtdir)) + real_srcs) + lst + in + + let make_fnames modul sufx = + List.fold_right + begin fun sufx accu -> + (String.capitalize modul ^ sufx) :: + (String.uncapitalize modul ^ sufx) :: + accu + end + sufx + [] + in + + (** Install all libraries *) + let install_libs pkg = + + let files_of_library (f_data, acc) data_lib = + let cs, bs, lib, lib_extra = + !lib_hook data_lib + in + if var_choose bs.bs_install && + BaseBuilt.is_built BaseBuilt.BLib cs.cs_name then + begin + let acc = + (* Start with acc + lib_extra *) + List.rev_append lib_extra acc + in + let acc = + (* Add uncompiled header from the source tree *) + let path = + OASISHostPath.of_unix bs.bs_path + in + List.fold_left + begin fun acc modul -> + begin + try + [List.find + OASISFileUtil.file_exists_case + (List.map + (Filename.concat path) + (make_fnames modul [".mli"; ".ml"]))] + with Not_found -> + warning + (f_ "Cannot find source header for module %s \ + in library %s") + modul cs.cs_name; + [] + end + @ + List.filter + OASISFileUtil.file_exists_case + (List.map + (Filename.concat path) + (make_fnames modul [".annot";".cmti";".cmt"])) + @ acc + end + acc + lib.lib_modules + in + + let acc = + (* Get generated files *) + BaseBuilt.fold + BaseBuilt.BLib + cs.cs_name + (fun acc fn -> fn :: acc) + acc + in + + let f_data () = + (* Install data associated with the library *) + install_data + bs.bs_path + bs.bs_data_files + (Filename.concat + (datarootdir ()) + pkg.name); + f_data () + in + + (f_data, acc) + end + else + begin + (f_data, acc) + end + and files_of_object (f_data, acc) data_obj = + let cs, bs, obj, obj_extra = + !obj_hook data_obj + in + if var_choose bs.bs_install && + BaseBuilt.is_built BaseBuilt.BObj cs.cs_name then + begin + let acc = + (* Start with acc + obj_extra *) + List.rev_append obj_extra acc + in + let acc = + (* Add uncompiled header from the source tree *) + let path = + OASISHostPath.of_unix bs.bs_path + in + List.fold_left + begin fun acc modul -> + begin + try + [List.find + OASISFileUtil.file_exists_case + (List.map + (Filename.concat path) + (make_fnames modul [".mli"; ".ml"]))] + with Not_found -> + warning + (f_ "Cannot find source header for module %s \ + in object %s") + modul cs.cs_name; + [] + end + @ + List.filter + OASISFileUtil.file_exists_case + (List.map + (Filename.concat path) + (make_fnames modul [".annot";".cmti";".cmt"])) + @ acc + end + acc + obj.obj_modules + in + + let acc = + (* Get generated files *) + BaseBuilt.fold + BaseBuilt.BObj + cs.cs_name + (fun acc fn -> fn :: acc) + acc + in + + let f_data () = + (* Install data associated with the object *) + install_data + bs.bs_path + bs.bs_data_files + (Filename.concat + (datarootdir ()) + pkg.name); + f_data () + in + + (f_data, acc) + end + else + begin + (f_data, acc) + end + + in + + (* Install one group of library *) + let install_group_lib grp = + (* Iterate through all group nodes *) + let rec install_group_lib_aux data_and_files grp = + let data_and_files, children = + match grp with + | Container (_, children) -> + data_and_files, children + | Package (_, cs, bs, `Library lib, children) -> + files_of_library data_and_files (cs, bs, lib), children + | Package (_, cs, bs, `Object obj, children) -> + files_of_object data_and_files (cs, bs, obj), children + in + List.fold_left + install_group_lib_aux + data_and_files + children + in + + (* Findlib name of the root library *) + let findlib_name = + findlib_of_group grp + in + + (* Determine root library *) + let root_lib = + root_of_group grp + in + + (* All files to install for this library *) + let f_data, files = + install_group_lib_aux (ignore, []) grp + in + + (* Really install, if there is something to install *) + if files = [] then + begin + warning + (f_ "Nothing to install for findlib library '%s'") + findlib_name + end + else + begin + let meta = + (* Search META file *) + let _, bs, _ = + root_lib + in + let res = + Filename.concat bs.bs_path "META" + in + if not (OASISFileUtil.file_exists_case res) then + failwithf + (f_ "Cannot find file '%s' for findlib library %s") + res + findlib_name; + res + in + let files = + (* Make filename shorter to avoid hitting command max line length + * too early, esp. on Windows. + *) + let remove_prefix p n = + let plen = String.length p in + let nlen = String.length n in + if plen <= nlen && String.sub n 0 plen = p then + begin + let fn_sep = + if Sys.os_type = "Win32" then + '\\' + else + '/' + in + let cutpoint = plen + + (if plen < nlen && n.[plen] = fn_sep then + 1 + else + 0) + in + String.sub n cutpoint (nlen - cutpoint) + end + else + n + in + List.map (remove_prefix (Sys.getcwd ())) files + in + info + (f_ "Installing findlib library '%s'") + findlib_name; + let ocamlfind = ocamlfind () in + let commands = + split_install_command + ocamlfind + findlib_name + meta + files + in + List.iter + (OASISExec.run ~ctxt:!BaseContext.default ocamlfind) + commands; + BaseLog.register install_findlib_ev findlib_name + end; + + (* Install data files *) + f_data (); + + in + + let group_libs, _, _ = + findlib_mapping pkg + in + + (* We install libraries in groups *) + List.iter install_group_lib group_libs + in + + let install_execs pkg = + let install_exec data_exec = + let cs, bs, exec = + !exec_hook data_exec + in + if var_choose bs.bs_install && + BaseBuilt.is_built BaseBuilt.BExec cs.cs_name then + begin + let exec_libdir () = + Filename.concat + (libdir ()) + pkg.name + in + BaseBuilt.fold + BaseBuilt.BExec + cs.cs_name + (fun () fn -> + install_file + ~tgt_fn:(cs.cs_name ^ ext_program ()) + fn + bindir) + (); + BaseBuilt.fold + BaseBuilt.BExecLib + cs.cs_name + (fun () fn -> + install_file + fn + exec_libdir) + (); + install_data + bs.bs_path + bs.bs_data_files + (Filename.concat + (datarootdir ()) + pkg.name) + end + in + List.iter + (function + | Executable (cs, bs, exec)-> + install_exec (cs, bs, exec) + | _ -> + ()) + pkg.sections + in + + let install_docs pkg = + let install_doc data = + let cs, doc = + !doc_hook data + in + if var_choose doc.doc_install && + BaseBuilt.is_built BaseBuilt.BDoc cs.cs_name then + begin + let tgt_dir = + OASISHostPath.of_unix (var_expand doc.doc_install_dir) + in + BaseBuilt.fold + BaseBuilt.BDoc + cs.cs_name + (fun () fn -> + install_file + fn + (fun () -> tgt_dir)) + (); + install_data + Filename.current_dir_name + doc.doc_data_files + doc.doc_install_dir + end + in + List.iter + (function + | Doc (cs, doc) -> + install_doc (cs, doc) + | _ -> + ()) + pkg.sections + in + + install_libs pkg; + install_execs pkg; + install_docs pkg + + + (* Uninstall already installed data *) + let uninstall _ argv = + List.iter + (fun (ev, data) -> + if ev = install_file_ev then + begin + if OASISFileUtil.file_exists_case data then + begin + info + (f_ "Removing file '%s'") + data; + Sys.remove data + end + else + begin + warning + (f_ "File '%s' doesn't exist anymore") + data + end + end + else if ev = install_dir_ev then + begin + if Sys.file_exists data && Sys.is_directory data then + begin + if Sys.readdir data = [||] then + begin + info + (f_ "Removing directory '%s'") + data; + OASISFileUtil.rmdir ~ctxt:!BaseContext.default data + end + else + begin + warning + (f_ "Directory '%s' is not empty (%s)") + data + (String.concat + ", " + (Array.to_list + (Sys.readdir data))) + end + end + else + begin + warning + (f_ "Directory '%s' doesn't exist anymore") + data + end + end + else if ev = install_findlib_ev then + begin + info (f_ "Removing findlib library '%s'") data; + OASISExec.run ~ctxt:!BaseContext.default + (ocamlfind ()) ["remove"; data] + end + else + failwithf (f_ "Unknown log event '%s'") ev; + BaseLog.unregister ev data) + (* We process event in reverse order *) + (List.rev + (BaseLog.filter + [install_file_ev; + install_dir_ev; + install_findlib_ev])) + + +end + + +# 6273 "setup.ml" +module OCamlbuildCommon = struct +(* # 22 "src/plugins/ocamlbuild/OCamlbuildCommon.ml" *) + + + (** Functions common to OCamlbuild build and doc plugin + *) + + + open OASISGettext + open BaseEnv + open BaseStandardVar + open OASISTypes + + + + + type extra_args = string list + + + let ocamlbuild_clean_ev = "ocamlbuild-clean" + + + let ocamlbuildflags = + var_define + ~short_desc:(fun () -> "OCamlbuild additional flags") + "ocamlbuildflags" + (fun () -> "") + + + (** Fix special arguments depending on environment *) + let fix_args args extra_argv = + List.flatten + [ + if (os_type ()) = "Win32" then + [ + "-classic-display"; + "-no-log"; + "-no-links"; + "-install-lib-dir"; + (Filename.concat (standard_library ()) "ocamlbuild") + ] + else + []; + + if not (bool_of_string (is_native ())) || (os_type ()) = "Win32" then + [ + "-byte-plugin" + ] + else + []; + args; + + if bool_of_string (debug ()) then + ["-tag"; "debug"] + else + []; + + if bool_of_string (tests ()) then + ["-tag"; "tests"] + else + []; + + if bool_of_string (profile ()) then + ["-tag"; "profile"] + else + []; + + OASISString.nsplit (ocamlbuildflags ()) ' '; + + Array.to_list extra_argv; + ] + + + (** Run 'ocamlbuild -clean' if not already done *) + let run_clean extra_argv = + let extra_cli = + String.concat " " (Array.to_list extra_argv) + in + (* Run if never called with these args *) + if not (BaseLog.exists ocamlbuild_clean_ev extra_cli) then + begin + OASISExec.run ~ctxt:!BaseContext.default + (ocamlbuild ()) (fix_args ["-clean"] extra_argv); + BaseLog.register ocamlbuild_clean_ev extra_cli; + at_exit + (fun () -> + try + BaseLog.unregister ocamlbuild_clean_ev extra_cli + with _ -> + ()) + end + + + (** Run ocamlbuild, unregister all clean events *) + let run_ocamlbuild args extra_argv = + (* TODO: enforce that target in args must be UNIX encoded i.e. toto/index.html + *) + OASISExec.run ~ctxt:!BaseContext.default + (ocamlbuild ()) (fix_args args extra_argv); + (* Remove any clean event, we must run it again *) + List.iter + (fun (e, d) -> BaseLog.unregister e d) + (BaseLog.filter [ocamlbuild_clean_ev]) + + + (** Determine real build directory *) + let build_dir extra_argv = + let rec search_args dir = + function + | "-build-dir" :: dir :: tl -> + search_args dir tl + | _ :: tl -> + search_args dir tl + | [] -> + dir + in + search_args "_build" (fix_args [] extra_argv) + + +end + +module OCamlbuildPlugin = struct +(* # 22 "src/plugins/ocamlbuild/OCamlbuildPlugin.ml" *) + + + (** Build using ocamlbuild + @author Sylvain Le Gall + *) + + + open OASISTypes + open OASISGettext + open OASISUtils + open OASISString + open BaseEnv + open OCamlbuildCommon + open BaseStandardVar + open BaseMessage + + + + + + let cond_targets_hook = + ref (fun lst -> lst) + + + let build extra_args pkg argv = + (* Return the filename in build directory *) + let in_build_dir fn = + Filename.concat + (build_dir argv) + fn + in + + (* Return the unix filename in host build directory *) + let in_build_dir_of_unix fn = + in_build_dir (OASISHostPath.of_unix fn) + in + + let cond_targets = + List.fold_left + (fun acc -> + function + | Library (cs, bs, lib) when var_choose bs.bs_build -> + begin + let evs, unix_files = + BaseBuilt.of_library + in_build_dir_of_unix + (cs, bs, lib) + in + + let tgts = + List.flatten + (List.filter + (fun l -> l <> []) + (List.map + (List.filter + (fun fn -> + ends_with ~what:".cma" fn + || ends_with ~what:".cmxs" fn + || ends_with ~what:".cmxa" fn + || ends_with ~what:(ext_lib ()) fn + || ends_with ~what:(ext_dll ()) fn)) + unix_files)) + in + + match tgts with + | _ :: _ -> + (evs, tgts) :: acc + | [] -> + failwithf + (f_ "No possible ocamlbuild targets for library %s") + cs.cs_name + end + + | Object (cs, bs, obj) when var_choose bs.bs_build -> + begin + let evs, unix_files = + BaseBuilt.of_object + in_build_dir_of_unix + (cs, bs, obj) + in + + let tgts = + List.flatten + (List.filter + (fun l -> l <> []) + (List.map + (List.filter + (fun fn -> + ends_with ".cmo" fn + || ends_with ".cmx" fn)) + unix_files)) + in + + match tgts with + | _ :: _ -> + (evs, tgts) :: acc + | [] -> + failwithf + (f_ "No possible ocamlbuild targets for object %s") + cs.cs_name + end + + | Executable (cs, bs, exec) when var_choose bs.bs_build -> + begin + let evs, unix_exec_is, unix_dll_opt = + BaseBuilt.of_executable + in_build_dir_of_unix + (cs, bs, exec) + in + + let target ext = + let unix_tgt = + (OASISUnixPath.concat + bs.bs_path + (OASISUnixPath.chop_extension + exec.exec_main_is))^ext + in + let evs = + (* Fix evs, we want to use the unix_tgt, without copying *) + List.map + (function + | BaseBuilt.BExec, nm, lst when nm = cs.cs_name -> + BaseBuilt.BExec, nm, + [[in_build_dir_of_unix unix_tgt]] + | ev -> + ev) + evs + in + evs, [unix_tgt] + in + + (* Add executable *) + let acc = + match bs.bs_compiled_object with + | Native -> + (target ".native") :: acc + | Best when bool_of_string (is_native ()) -> + (target ".native") :: acc + | Byte + | Best -> + (target ".byte") :: acc + in + acc + end + + | Library _ | Object _ | Executable _ | Test _ + | SrcRepo _ | Flag _ | Doc _ -> + acc) + [] + (* Keep the pkg.sections ordered *) + (List.rev pkg.sections); + in + + (* Check and register built files *) + let check_and_register (bt, bnm, lst) = + List.iter + (fun fns -> + if not (List.exists OASISFileUtil.file_exists_case fns) then + failwithf + (fn_ + "Expected built file %s doesn't exist." + "None of expected built files %s exists." + (List.length fns)) + (String.concat (s_ " or ") (List.map (Printf.sprintf "'%s'") fns))) + lst; + (BaseBuilt.register bt bnm lst) + in + + (* Run the hook *) + let cond_targets = !cond_targets_hook cond_targets in + + (* Run a list of target... *) + run_ocamlbuild (List.flatten (List.map snd cond_targets) @ extra_args) argv; + (* ... and register events *) + List.iter check_and_register (List.flatten (List.map fst cond_targets)) + + + let clean pkg extra_args = + run_clean extra_args; + List.iter + (function + | Library (cs, _, _) -> + BaseBuilt.unregister BaseBuilt.BLib cs.cs_name + | Executable (cs, _, _) -> + BaseBuilt.unregister BaseBuilt.BExec cs.cs_name; + BaseBuilt.unregister BaseBuilt.BExecLib cs.cs_name + | _ -> + ()) + pkg.sections + + +end + +module OCamlbuildDocPlugin = struct +(* # 22 "src/plugins/ocamlbuild/OCamlbuildDocPlugin.ml" *) + + + (* Create documentation using ocamlbuild .odocl files + @author Sylvain Le Gall + *) + + + open OASISTypes + open OASISGettext + open OASISMessage + open OCamlbuildCommon + open BaseStandardVar + + + + + type run_t = + { + extra_args: string list; + run_path: unix_filename; + } + + + let doc_build run pkg (cs, doc) argv = + let index_html = + OASISUnixPath.make + [ + run.run_path; + cs.cs_name^".docdir"; + "index.html"; + ] + in + let tgt_dir = + OASISHostPath.make + [ + build_dir argv; + OASISHostPath.of_unix run.run_path; + cs.cs_name^".docdir"; + ] + in + run_ocamlbuild (index_html :: run.extra_args) argv; + List.iter + (fun glb -> + BaseBuilt.register + BaseBuilt.BDoc + cs.cs_name + [OASISFileUtil.glob ~ctxt:!BaseContext.default + (Filename.concat tgt_dir glb)]) + ["*.html"; "*.css"] + + + let doc_clean run pkg (cs, doc) argv = + run_clean argv; + BaseBuilt.unregister BaseBuilt.BDoc cs.cs_name + + +end + + +# 6651 "setup.ml" +open OASISTypes;; + +let setup_t = + { + BaseSetup.configure = InternalConfigurePlugin.configure; + build = OCamlbuildPlugin.build []; + test = []; + doc = []; + install = InternalInstallPlugin.install; + uninstall = InternalInstallPlugin.uninstall; + clean = [OCamlbuildPlugin.clean]; + clean_test = []; + clean_doc = []; + distclean = []; + distclean_test = []; + distclean_doc = []; + package = + { + oasis_version = "0.4"; + ocaml_version = None; + findlib_version = None; + alpha_features = []; + beta_features = []; + name = "rdr"; + version = "2.0.1"; + license = + OASISLicense.DEP5License + (OASISLicense.DEP5Unit + { + OASISLicense.license = "BSD-3-clause"; + excption = None; + version = OASISLicense.NoVersion + }); + license_file = None; + copyrights = []; + maintainers = [""]; + authors = ["m4b"]; + homepage = Some "http://github.com/m4b/rdr"; + synopsis = + "Lightweight, cross platform binary parsing and analysis library with no dependencies"; + description = + Some + [ + OASISText.Para + "`rdr` is an OCaml tool/library for doing cross-platform analysis of binaries, by printing headers, locating entry points, showing import and export symbols, their binary offsets and size, etc. It also features a symbol map which allows fast lookups for arbitrary symbols, and their associated data, on your system (the default search location are binaries in /usr/lib). The latest release also makes `rdr` a package which you can link against and use in your own projects. See the README at http://github.com/m4b/rdr for more details. Features: * 64-bit Linux and Mach-o binary analysis * Searchable symbol-map of all the symbols on your system, including binary"; + OASISText.Verbatim " offset, size, and exporting library"; + OASISText.Para + "* Print imports and exports of binaries * Make pretty graphs, at the binary or symbol map level * Byte Coverage algorithm which marks byte sequences as understood (or not)"; + OASISText.Verbatim " and provides other meta-data" + ]; + categories = []; + conf_type = (`Configure, "internal", Some "0.4"); + conf_custom = + { + pre_command = [(OASISExpr.EBool true, None)]; + post_command = [(OASISExpr.EBool true, None)] + }; + build_type = (`Build, "ocamlbuild", Some "0.4"); + build_custom = + { + pre_command = [(OASISExpr.EBool true, None)]; + post_command = [(OASISExpr.EBool true, None)] + }; + install_type = (`Install, "internal", Some "0.4"); + install_custom = + { + pre_command = [(OASISExpr.EBool true, None)]; + post_command = [(OASISExpr.EBool true, None)] + }; + uninstall_custom = + { + pre_command = [(OASISExpr.EBool true, None)]; + post_command = [(OASISExpr.EBool true, None)] + }; + clean_custom = + { + pre_command = [(OASISExpr.EBool true, None)]; + post_command = [(OASISExpr.EBool true, None)] + }; + distclean_custom = + { + pre_command = [(OASISExpr.EBool true, None)]; + post_command = [(OASISExpr.EBool true, None)] + }; + files_ab = []; + sections = + [ + Library + ({ + cs_name = "utils"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + bs_build = [(OASISExpr.EBool true, true)]; + bs_install = [(OASISExpr.EBool true, true)]; + bs_path = "lib/utils"; + bs_compiled_object = Best; + bs_build_depends = []; + bs_build_tools = [ExternalTool "ocamlbuild"]; + bs_c_sources = []; + bs_data_files = []; + bs_ccopt = [(OASISExpr.EBool true, [])]; + bs_cclib = [(OASISExpr.EBool true, [])]; + bs_dlllib = [(OASISExpr.EBool true, [])]; + bs_dllpath = [(OASISExpr.EBool true, [])]; + bs_byteopt = [(OASISExpr.EBool true, [])]; + bs_nativeopt = [(OASISExpr.EBool true, [])] + }, + { + lib_modules = + [ + "RdrUtils"; + "Binary"; + "Input"; + "Leb128"; + "ByteCoverage"; + "Generics" + ]; + lib_pack = false; + lib_internal_modules = []; + lib_findlib_parent = Some "rdr"; + lib_findlib_name = Some "utils"; + lib_findlib_containers = [] + }); + Library + ({ + cs_name = "mach"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + bs_build = [(OASISExpr.EBool true, true)]; + bs_install = [(OASISExpr.EBool true, true)]; + bs_path = "lib/mach"; + bs_compiled_object = Best; + bs_build_depends = [InternalLibrary "utils"]; + bs_build_tools = [ExternalTool "ocamlbuild"]; + bs_c_sources = []; + bs_data_files = []; + bs_ccopt = [(OASISExpr.EBool true, [])]; + bs_cclib = [(OASISExpr.EBool true, [])]; + bs_dlllib = [(OASISExpr.EBool true, [])]; + bs_dllpath = [(OASISExpr.EBool true, [])]; + bs_byteopt = [(OASISExpr.EBool true, [])]; + bs_nativeopt = [(OASISExpr.EBool true, [])] + }, + { + lib_modules = + [ + "Mach"; + "MachBindOpcodes"; + "MachCpuTypes"; + "MachFat"; + "MachLoadCommand"; + "MachLoadCommandTypes"; + "MachConstants"; + "MachExports"; + "MachHeader"; + "MachImports"; + "MachSection"; + "MachSymbolTable"; + "MachRebaseOpcodes"; + "MachVersion"; + "MachCoverage"; + "MachLoadCommandMacro" + ]; + lib_pack = false; + lib_internal_modules = []; + lib_findlib_parent = Some "rdr"; + lib_findlib_name = Some "mach"; + lib_findlib_containers = [] + }); + Library + ({ + cs_name = "elf"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + bs_build = [(OASISExpr.EBool true, true)]; + bs_install = [(OASISExpr.EBool true, true)]; + bs_path = "lib/elf"; + bs_compiled_object = Best; + bs_build_depends = [InternalLibrary "utils"]; + bs_build_tools = [ExternalTool "ocamlbuild"]; + bs_c_sources = []; + bs_data_files = []; + bs_ccopt = [(OASISExpr.EBool true, [])]; + bs_cclib = [(OASISExpr.EBool true, [])]; + bs_dlllib = [(OASISExpr.EBool true, [])]; + bs_dllpath = [(OASISExpr.EBool true, [])]; + bs_byteopt = [(OASISExpr.EBool true, [])]; + bs_nativeopt = [(OASISExpr.EBool true, [])] + }, + { + lib_modules = + [ + "Elf"; + "ElfHeader"; + "ElfProgramHeader"; + "ElfSectionHeader"; + "ElfConstants"; + "ElfDynamic"; + "ElfReloc"; + "ElfSymbolTable"; + "ElfCoverage" + ]; + lib_pack = false; + lib_internal_modules = []; + lib_findlib_parent = Some "rdr"; + lib_findlib_name = Some "elf"; + lib_findlib_containers = [] + }); + Library + ({ + cs_name = "goblin"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + bs_build = [(OASISExpr.EBool true, true)]; + bs_install = [(OASISExpr.EBool true, true)]; + bs_path = "lib/goblin"; + bs_compiled_object = Best; + bs_build_depends = + [ + InternalLibrary "utils"; + InternalLibrary "mach"; + InternalLibrary "elf" + ]; + bs_build_tools = [ExternalTool "ocamlbuild"]; + bs_c_sources = []; + bs_data_files = []; + bs_ccopt = [(OASISExpr.EBool true, [])]; + bs_cclib = [(OASISExpr.EBool true, [])]; + bs_dlllib = [(OASISExpr.EBool true, [])]; + bs_dllpath = [(OASISExpr.EBool true, [])]; + bs_byteopt = [(OASISExpr.EBool true, [])]; + bs_nativeopt = [(OASISExpr.EBool true, [])] + }, + { + lib_modules = + [ + "Goblin"; + "GoblinSymbol"; + "GoblinExport"; + "GoblinImport"; + "GoblinMach"; + "GoblinElf" + ]; + lib_pack = false; + lib_internal_modules = []; + lib_findlib_parent = Some "rdr"; + lib_findlib_name = Some "goblin"; + lib_findlib_containers = [] + }); + Library + ({ + cs_name = "rdr"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + bs_build = [(OASISExpr.EBool true, true)]; + bs_install = [(OASISExpr.EBool true, true)]; + bs_path = "lib"; + bs_compiled_object = Best; + bs_build_depends = + [ + InternalLibrary "goblin"; + InternalLibrary "utils"; + InternalLibrary "mach"; + InternalLibrary "elf" + ]; + bs_build_tools = [ExternalTool "ocamlbuild"]; + bs_c_sources = []; + bs_data_files = []; + bs_ccopt = [(OASISExpr.EBool true, [])]; + bs_cclib = [(OASISExpr.EBool true, [])]; + bs_dlllib = [(OASISExpr.EBool true, [])]; + bs_dllpath = [(OASISExpr.EBool true, [])]; + bs_byteopt = [(OASISExpr.EBool true, [])]; + bs_nativeopt = [(OASISExpr.EBool true, [])] + }, + { + lib_modules = ["LibRdr"]; + lib_pack = false; + lib_internal_modules = []; + lib_findlib_parent = None; + lib_findlib_name = Some "rdr"; + lib_findlib_containers = [] + }); + Library + ({ + cs_name = "rdrutils"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + bs_build = [(OASISExpr.EBool true, true)]; + bs_install = [(OASISExpr.EBool true, false)]; + bs_path = "src"; + bs_compiled_object = Best; + bs_build_depends = + [ + InternalLibrary "elf"; + InternalLibrary "mach"; + InternalLibrary "goblin"; + InternalLibrary "utils"; + FindlibPackage ("str", None); + FindlibPackage ("unix", None) + ]; + bs_build_tools = [ExternalTool "ocamlbuild"]; + bs_c_sources = []; + bs_data_files = []; + bs_ccopt = [(OASISExpr.EBool true, [])]; + bs_cclib = [(OASISExpr.EBool true, [])]; + bs_dlllib = [(OASISExpr.EBool true, [])]; + bs_dllpath = [(OASISExpr.EBool true, [])]; + bs_byteopt = [(OASISExpr.EBool true, [])]; + bs_nativeopt = [(OASISExpr.EBool true, [])] + }, + { + lib_modules = + [ + "Config"; + "Command"; + "Object"; + "Graph"; + "Storage"; + "SymbolMap"; + "ReadMach"; + "ReadElf"; + "ToL" + ]; + lib_pack = false; + lib_internal_modules = []; + lib_findlib_parent = None; + lib_findlib_name = None; + lib_findlib_containers = [] + }); + Executable + ({ + cs_name = "rdr"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + bs_build = [(OASISExpr.EBool true, true)]; + bs_install = [(OASISExpr.EBool true, true)]; + bs_path = "src"; + bs_compiled_object = Best; + bs_build_depends = + [ + InternalLibrary "rdrutils"; + FindlibPackage ("str", None) + ]; + bs_build_tools = [ExternalTool "ocamlbuild"]; + bs_c_sources = []; + bs_data_files = []; + bs_ccopt = [(OASISExpr.EBool true, [])]; + bs_cclib = [(OASISExpr.EBool true, [])]; + bs_dlllib = [(OASISExpr.EBool true, [])]; + bs_dllpath = [(OASISExpr.EBool true, [])]; + bs_byteopt = [(OASISExpr.EBool true, [])]; + bs_nativeopt = [(OASISExpr.EBool true, [])] + }, + {exec_custom = false; exec_main_is = "Rdr.ml"}) + ]; + plugins = + [(`Extra, "META", Some "0.4"); (`Extra, "DevFiles", Some "0.4")]; + disable_oasis_section = []; + schema_data = PropList.Data.create (); + plugin_data = [] + }; + oasis_fn = Some "_oasis"; + oasis_version = "0.4.5"; + oasis_digest = Some "¸Z\028µ\006ÑB\130n\141\019\004bÕÎ\128"; + oasis_exec = None; + oasis_setup_args = []; + setup_update = false + };; + +let setup () = BaseSetup.setup setup_t;; + +# 7039 "setup.ml" +(* OASIS_STOP *) +let () = setup ();; diff --git a/src/Config.ml b/src/Config.ml index 10fa826..b3668e8 100644 --- a/src/Config.ml +++ b/src/Config.ml @@ -1,4 +1,4 @@ -type basic_config = +type t = { (* internal *) analyze: bool; @@ -10,11 +10,12 @@ type basic_config = install_name: string; (* analysis *) verbose: bool; (* -v *) - print_headers: bool; (* -h *) + print_headers: bool; (* -h *) print_nlist: bool; (* -s *) - print_libraries: bool; (* -l *) + print_libraries: bool; (* -l *) print_exports: bool; (* -e *) - print_imports: bool; (* -i *) + print_imports: bool; (* -i *) + print_coverage: bool; (* -c *) disassemble: bool; (* -D *) (* building *) use_map: bool; (* -m *) diff --git a/src/Graph.ml b/src/Graph.ml index 49e87af..68a095c 100644 --- a/src/Graph.ml +++ b/src/Graph.ml @@ -6,8 +6,8 @@ TODO: (* for testing -#directory "/Users/matthewbarney/projects/binreader/_build/src/utils/";; -#directory "/Users/matthewbarney/projects/binreader/_build/src/mach/";; +#directory "/Users/matthewbarney/git/rdr/_build/src/utils/";; +#directory "/Users/matthewbarney/git/rdr/_build/src/mach/";; #load "Binary.cmo";; #load "InputUtils.cmo";; #load "Version.cmo";; @@ -72,35 +72,6 @@ let get_html_exports_header name fullname nexports = let html_footer = " \n>];\n" -let get_html_export_row symbol_name export libraries = - let size = try Goblin.Symbol.find_symbol_size export |> Printf.sprintf "%d" with Not_found -> "" in - (* i'm being lazy as shit and converting it back *) - match Mach.Exports.mach_export_data_to_export_info export with - | Regular info -> - Printf.sprintf " - %s%s0x%x - -" symbol_name symbol_name size info.address - | Reexport info -> - begin - match info.lib_symbol_name with - | Some str -> - Printf.sprintf " - %s%s%s
@ %s - -" symbol_name symbol_name size str info.lib - | None -> - Printf.sprintf " - %s%s@ %s - -" symbol_name symbol_name size info.lib - end - | Stub info -> - Printf.sprintf " - %s%s0x%x , 0x%x - -" symbol_name symbol_name size info.stub_offset info.resolver_offset - (* libs *) let get_html_libs_header name fullname nlibs = @@ -133,9 +104,9 @@ let get_html_imports_header name fullname nimports = " name fullname nimports let get_html_import_row name import = - let is_lazy = Mach.Imports.is_lazy import in - let name = Mach.Imports.import_name import in - let lib = Mach.Imports.import_lib import in + let is_lazy = Goblin.Mach.Imports.is_lazy import in + let name = Goblin.Mach.Imports.import_name import in + let lib = Goblin.Mach.Imports.import_lib import in let color = if (is_lazy) then "#e0ffda" else "#ffffff" in Printf.sprintf " %s%s @@ -143,8 +114,42 @@ let get_html_import_row name import = " color name name color lib (* end *) +(* +let get_html_export_row symbol_name export libraries = + (* let size = try Goblin.Symbol.find_symbol_size export |> Printf.sprintf "%d" with Not_found -> "" in *) + (* i'm being lazy as shit and converting it back *) + (* match Goblin.Mach.Exports.mach_export_data_to_export_info export with *) + match export with + | Regular info -> + Printf.sprintf " + %s%s0x%x + +" symbol_name symbol_name size info.address + | Reexport info -> + begin + match info.lib_symbol_name with + | Some str -> + Printf.sprintf " + %s%s%s
@ %s + +" symbol_name symbol_name size str info.lib + | None -> + Printf.sprintf " + %s%s@ %s + +" symbol_name symbol_name size info.lib + end + | Stub info -> + Printf.sprintf " + %s%s0x%x , 0x%x + +" symbol_name symbol_name size info.stub_offset info.resolver_offset + + +open Mach + (* todo: add header and footer to a wrapper function? *) -let mach_to_html_dot (binary:mach_binary) draw_imports draw_libs = +let mach_to_html_dot (binary:Mach.t) draw_imports draw_libs = let b = Buffer.create @@ binary.nexports * 16 in let name = to_dot_name binary.name in let header = Printf.sprintf "digraph %s {\n" name in @@ -160,9 +165,9 @@ node [shape=plaintext]\n"; begin let nodename = Printf.sprintf "%s_libs" name in Buffer.add_string b @@ Printf.sprintf "{ rank=same; 0->%s [style=invis]}" nodename; - Buffer.add_string b @@ get_html_libs_header nodename binary.name binary.nlibs; + Buffer.add_string b @@ get_html_libs_header nodename binary.name binary.nlibraries; (* was a stupid idea to include the binary in the libraries... *) - Array.iteri (fun i lib -> if (i <> 0) then Buffer.add_string b @@ get_html_lib_row name (i - 1) lib) binary.libs; + Array.iteri (fun i lib -> if (i <> 0) then Buffer.add_string b @@ get_html_lib_row name (i - 1) lib) binary.libraries; Buffer.add_string b html_footer; end; (* end libs *) @@ -170,9 +175,11 @@ node [shape=plaintext]\n"; let nodename = Printf.sprintf "%s_exports" name in Buffer.add_string b @@ Printf.sprintf "{ rank=same; 1->%s [style=invis]}" nodename; Buffer.add_string b @@ get_html_exports_header nodename binary.name binary.nexports; - Array.iter (fun mach_export_data -> - let name = Goblin.Symbol.find_symbol_name mach_export_data in - Buffer.add_string b @@ get_html_export_row name mach_export_data binary.libs + List.iter (fun export -> + (* + let name = Goblin.Symbol.find_symbol_name mach_export_data in *) + let name = export.name in + Buffer.add_string b @@ get_html_export_row name export binary.nlibraries ) binary.exports; Buffer.add_string b html_footer; (* end exports *) @@ -197,9 +204,12 @@ let graph_mach_binary ?draw_imports:(draw_imports=true) ?draw_libs:(draw_libs=tr Printf.fprintf oc "%s" call_graph; close_out oc; graph_with_dot file + *) +(* ====================== *) (* lib dependency graph *) (* [binary, [libs]] *) +(* ====================== *) (* todo add solid background color here *) let lib_header = "digraph lib_deps { diff --git a/src/META b/src/META new file mode 100644 index 0000000..9f140a3 --- /dev/null +++ b/src/META @@ -0,0 +1,13 @@ +# OASIS_START +# DO NOT EDIT (digest: ba5008755c5582c056b0af13e4b5998d) +version = "2.0.1" +description = +"Lightweight, cross platform binary parsing and analysis library with no dependencies" +requires = "rdr.elf rdr.mach rdr.goblin rdr.utils str unix" +archive(byte) = "rdrutils.cma" +archive(byte, plugin) = "rdrutils.cma" +archive(native) = "rdrutils.cmxa" +archive(native, plugin) = "rdrutils.cmxs" +exists_if = "rdrutils.cma" +# OASIS_STOP + diff --git a/src/Object.ml b/src/Object.ml index b52b047..53d4928 100644 --- a/src/Object.ml +++ b/src/Object.ml @@ -1,47 +1,54 @@ open Config - -type t = | Mach of bytes | Elf of bytes | Unknown -exception Unimplemented_binary_type of string +type t = | Mach of bytes | Elf of bytes | Unknown of string -let get_bytes ?verbose:(verbose=false) filename = +exception Unknown_binary_type of string + +let get_bytes ?verbose:(verbose=false) filename = let ic = open_in_bin filename in if (in_channel_length ic < 4) then (* 4 bytes, less than any magic number we're looking for *) - Unknown + begin + close_in ic; Unknown filename + end else let magic = Input.input_i32be ic in if (verbose) then Printf.printf "opening %s with magic: 0x%x\n" filename magic; if (magic = Mach.Fat.kFAT_MAGIC) (* cafe babe *) then let nfat_arch = Input.input_i32be ic in - let sizeof_arch_bytes = nfat_arch * Mach.Fat.sizeof_fat_arch in - let fat_arch_bytes = Bytes.create sizeof_arch_bytes in - really_input ic fat_arch_bytes 0 sizeof_arch_bytes; - let offset = - Mach.Fat.get_x86_64_binary_offset fat_arch_bytes nfat_arch in - match offset with - | Some (offset, size) -> - seek_in ic offset; - let magic = Input.input_i32be ic in - if (magic = Mach.Header.kMH_CIGAM_64) then - begin - seek_in ic offset; - let binary = Bytes.create size in - really_input ic binary 0 size; - close_in ic; - Mach binary - end - else - begin - close_in ic; - Unknown - end - | None -> - close_in ic; - Printf.eprintf "ERROR, bad binary: %s\n" filename; - Unknown - (* backwards cause we read the 32bit int big E style *) + if (nfat_arch > 4) then (* hack to avoid java class file errors which have same magic num *) + begin + close_in ic; + Unknown filename + end + else + let sizeof_arch_bytes = nfat_arch * Mach.Fat.sizeof_fat_arch in + let fat_arch_bytes = Bytes.create sizeof_arch_bytes in + really_input ic fat_arch_bytes 0 sizeof_arch_bytes; + let offset = + Mach.Fat.get_x86_64_binary_offset fat_arch_bytes nfat_arch in + match offset with + | Some (offset, size) -> + seek_in ic offset; + let magic = Input.input_i32be ic in + if (magic = Mach.Header.kMH_CIGAM_64) then + begin + seek_in ic offset; + let binary = Bytes.create size in + really_input ic binary 0 size; + close_in ic; + Mach binary + end + else + begin + close_in ic; Unknown filename + end + | None -> + close_in ic; + Printf.eprintf "ERROR, bad binary: %s\n" filename; + Unknown filename + (* backwards cause we read the 32bit int big E style *) else if (magic = Mach.Header.kMH_CIGAM_64) then begin seek_in ic 0; @@ -59,64 +66,71 @@ let get_bytes ?verbose:(verbose=false) filename = if (Elf.Header.check_64bit binary) then Elf binary else - Unknown + Unknown filename end else begin close_in ic; if (verbose) then Printf.printf "ignoring binary: %s\n" filename; - Unknown + Unknown filename end -(* if I return goblin binaries and destroy all the beautiful structure of each kind of binary, i can have one return type without matches... -so... I should do it, right? - *) let analyze config binary = match binary with | Mach bytes -> - let binary = ReadMach.analyze config bytes in - if (config.search) then - try - ReadMach.find_export_symbol - config.search_term binary - |> Mach.Exports.print_mach_export_data ~simple:true - (* TODO: add find import symbol *) - with Not_found -> - Printf.printf ""; - else - if (config.graph) then + let binary = ReadMach.analyze config bytes in + if (config.search) then + try + ReadMach.find_export + config.search_term binary + |> Goblin.Export.print + (* |> Goblin.Mach.Exports.print_mach_export_data ~simple:true *) + (* TODO: add find import symbol *) + with Not_found -> + Printf.printf ""; + else + if (config.graph) then + Graph.graph_goblin + ~draw_imports:true + ~draw_libs:true binary + @@ Filename.basename config.filename; + + (* if (config.use_goblin) then begin let goblin = ReadMach.to_goblin binary in Graph.graph_goblin - ~draw_imports:true - ~draw_libs:true goblin - @@ Filename.basename config.filename; +~draw_imports:true +~draw_libs:true goblin +@@ Filename.basename config.filename; end else - Graph.graph_mach_binary + *) + (* +Graph.graph_mach_binary ~draw_imports:true ~draw_libs:true binary (Filename.basename config.filename); - (* ===================== *) - (* ELF *) - (* ===================== *) + *) + (* ===================== *) + (* ELF *) + (* ===================== *) | Elf binary -> - (* analyze the binary and print program headers, etc. *) - let binary = ReadElf.analyze config binary in - if (config.search) then - try - ReadElf.find_export_symbol - config.search_term - binary |> Goblin.Export.print - with Not_found -> - Printf.printf ""; - else - if (config.graph) then - Graph.graph_goblin binary - @@ Filename.basename config.filename; - - | Unknown -> - raise @@ Unimplemented_binary_type (Printf.sprintf "Unknown binary %s" config.install_name) + (* analyze the binary and print program headers, etc. *) + let binary = ReadElf.analyze config binary in + if (config.search) then + try + ReadElf.find_export_symbol + config.search_term + binary |> Goblin.Export.print + with Not_found -> + Printf.printf ""; + else + if (config.graph) then + Graph.graph_goblin binary + @@ Filename.basename config.filename; + + | Unknown string -> + raise @@ Unknown_binary_type (Printf.sprintf "Unknown binary %s" config.install_name) diff --git a/src/Rdr.ml b/src/Rdr.ml index 485ca78..f7c9557 100644 --- a/src/Rdr.ml +++ b/src/Rdr.ml @@ -3,6 +3,8 @@ which would essentially build a map of the entire system, or something like that *) +let version = "2.0" + open Config (* because only has a record type *) type os = Darwin | Linux | Other @@ -22,6 +24,7 @@ let print_headers = ref false let print_libraries = ref false let print_exports = ref false let print_imports = ref false +let print_coverage = ref false let use_goblin = ref false let recursive = ref false let write_symbols = ref false @@ -34,6 +37,8 @@ let anonarg = ref "" let disassemble = ref false let search_term_string = ref "" +let print_version = ref false + let get_config () = let analyze = not (!use_map || !marshal_symbols) in let search = !search_term_string <> "" in @@ -59,7 +64,8 @@ let get_config () = print_headers = !print_headers; print_libraries = !print_libraries; print_exports = !print_exports; - print_imports = !print_imports; + print_imports = !print_imports; + print_coverage = !print_coverage; disassemble = !disassemble; use_map = !use_map; recursive = !recursive; @@ -68,7 +74,7 @@ let get_config () = base_symbol_map_directories = !base_symbol_map_directories; framework_directories = !framework_directories; graph = !graph; - filename = !anonarg; (* TODO: this should be Filname.basename, but unchanged for now *) + filename = !anonarg; (* TODO: this should be Filename.basename, but unchanged for now *) search_term = !search_term_string; use_goblin = !use_goblin; } @@ -102,10 +108,12 @@ let main = ("-F", Arg.String (set_framework_directories), "(OSX Only) String of space or colon separated base framework directories to additionally search when building the symbol map"); ("-r", Arg.Set recursive, "Recursively search directories for binaries; use with -b"); ("-v", Arg.Set verbose, "Print all the things"); + ("--version", Arg.Set print_version, "Print the version and exit"); ("-h", Arg.Set print_headers, "Print the header"); ("-l", Arg.Set print_libraries, "Print the dynamic libraries"); ("-e", Arg.Set print_exports, "Print the exported symbols"); ("-i", Arg.Set print_imports, "Print the imported symbols"); + ("-c", Arg.Set print_coverage, "Print the byte coverage"); ("-s", Arg.Set print_nlist, "Print the symbol table, if present"); ("-f", Arg.Set_string search_term_string, "Find symbol in binary"); ("-b", Arg.Set marshal_symbols, "Build a symbol map and write to $(HOME)/.rdr/tol; default directory is /usr/lib, change with -d"); @@ -115,17 +123,30 @@ let main = ("-D", Arg.Set disassemble, "Disassemble all found symbols"); ("--dis", Arg.Set disassemble, "Disassemble all found symbols"); ] in - let usage_msg = "usage: rdr [-r] [-b] [-m] [-d] [-g] [-G --goblin] [-v | -l | -e | -i] [ | ]\noptions:" in + let usage_msg = "usage: rdr [-r] [-b] [-m] [-d] [-g] [-G --goblin] [-v | -l | -e | -i] []\noptions:" in Arg.parse speclist set_anon_argument usage_msg; + if (!print_version) then + begin + Printf.printf "v%s\n" version; + exit 0 + end + else (* BEGIN program init *) Storage.create_dot_directory (); (* make our .rdr/ if we haven't already *) let config = get_config () in if (config.analyze && config.filename = "") then - begin - Printf.eprintf "Error: no path to binary given\n"; - Arg.usage speclist usage_msg; - exit 1; - end; + if (config.verbose) then + begin + (* hack to print version *) + Printf.printf "v%s\n" version; + exit 0 + end + else + begin + Printf.eprintf "Error: no path to binary given\n"; + Arg.usage speclist usage_msg; + exit 1; + end; if (config.use_map) then (* -m *) SymbolMap.use_symbol_map config diff --git a/src/ReadElf.ml b/src/ReadElf.ml index b2fe2f4..f63d47a 100644 --- a/src/ReadElf.ml +++ b/src/ReadElf.ml @@ -136,11 +136,18 @@ let analyze config binary = ~verbose:(config.verbose || config.print_headers) elf.Elf.header; Elf.ProgramHeader.print_program_headers elf.Elf.program_headers; - Elf.SectionHeader.print_section_headers elf.Elf.section_headers + Elf.SectionHeader.print_section_headers elf.Elf.section_headers; + Elf.Dynamic.print_dynamic elf.Elf._dynamic; + if (elf.Elf.interpreter <> "") then + Printf.printf "Interpreter: %s\n" elf.Elf.interpreter end; - if (config.print_headers) then Elf.Dynamic.print_dynamic elf.Elf._dynamic; - if (config.print_nlist) then - symbols_to_goblin ~use_tol:config.use_tol ~libs:elf.Elf.libraries (soname,config.install_name) elf.Elf.symbol_table elf.Elf.relocations + if (config.verbose || config.print_nlist) then + symbols_to_goblin + ~use_tol:config.use_tol + ~libs:elf.Elf.libraries + (soname,config.install_name) + elf.Elf.symbol_table + elf.Elf.relocations |> Goblin.Symbol.sort_symbols |> List.iter (Goblin.Symbol.print_symbol_data ~like_nlist:true); @@ -159,7 +166,9 @@ let analyze config binary = begin Printf.printf "Imports (%d)\n" (List.length goblin_imports); List.iter (Goblin.Symbol.print_symbol_data ~with_lib:true) goblin_imports - end + end; + if (config.verbose || config.print_coverage) then + ByteCoverage.print elf.Elf.byte_coverage end; (* ============== *) (* create goblin binary *) diff --git a/src/ReadMach.ml b/src/ReadMach.ml index c22e5af..b853c76 100644 --- a/src/ReadMach.ml +++ b/src/ReadMach.ml @@ -1,156 +1,46 @@ -(* TODO: - (0) add load segment boundaries, and nlists locals as a parameters to the compute size - (1) compute final sizes after imports, locals, - and exports are glommed into a goblin symbol soup, using all the information available +(* + #TODO + * Implement byte coverage + * add load segment boundaries, and nlists locals as a parameters to the compute size + * compute final sizes after imports, locals, and exports are glommed into a goblin symbol soup, using all the information available *) open Printf open Mach.LoadCommand -open Config (* only contains a record *) +open Goblin.Export +open Goblin.Import +open Config -type mach_binary = { - name: string; - install_name: string; - imports: Mach.Imports.mach_import_data array; - nimports: int; - exports: Mach.Exports.mach_export_data array; - nexports: int; - islib: bool; - libs: string array; - nlibs: int; - code: bytes; -} - -let imports_to_string imports = - let b = Buffer.create (Array.length imports) in - Array.fold_left (fun acc import -> - Buffer.add_string acc - @@ Printf.sprintf "%s" - @@ Mach.Imports.mach_import_data_to_string import; - acc - ) b imports |> Buffer.contents - -let exports_to_string exports = - let b = Buffer.create (Array.length exports) in - Array.fold_left (fun acc export -> - Buffer.add_string acc - @@ Printf.sprintf "%s" - @@ Mach.Exports.mach_export_data_to_string export; - acc - ) b exports |> Buffer.contents - -let binary_to_string binary = - let libstr = if (binary.islib) then " (LIB)" else "" in - Printf.sprintf "%s%s:\nMach.Imports (%d):\n%sMach.Exports (%d):\n%s\n" - binary.name libstr - (binary.nimports) - (imports_to_string binary.imports) - (binary.nexports) - (exports_to_string binary.exports) - -let debug = false - -let create_binary (name,install_name) (nls,las) exports islib libs = - (* flatten and condense import info *) - let imports = nls @ las |> Array.of_list in - let nimports = Array.length imports in - let exports = Array.of_list exports in - let nexports = Array.length exports in (* careful here, due to aliasing, if order swapped, in trouble *) - let nlibs = Array.length libs in - let code = Bytes.empty in - {name; install_name; imports; nimports; exports; nexports; islib; libs; nlibs; code} - -let to_goblin mach = - let name = mach.name in - let install_name = mach.install_name in - let libs = mach.libs in - let nlibs = mach.nlibs in - let exports = - Array.init (mach.nexports) - (fun i -> - let export = mach.exports.(i) in - (Mach.Exports.mach_export_data_to_symbol_data export - |> Goblin.Symbol.to_goblin_export)) - in - let nexports = mach.nexports in - let imports = - Array.init (mach.nimports) - (fun i -> - let import = mach.imports.(i) in - let name = Goblin.Symbol.find_symbol_name import in - let lib = Goblin.Symbol.find_symbol_lib import |> fst in - let is_lazy = Mach.Imports.is_lazy import in - let idx = i in - let offset = Goblin.Symbol.find_symbol_offset import in - let size = Goblin.Symbol.find_symbol_size import in - {Goblin.Import.name = name; lib; is_lazy; idx; offset; size}) in - let nimports = mach.nimports in - let islib = mach.islib in - let code = mach.code in - {Goblin.name; install_name; islib; libs; nlibs; exports; nexports; imports; nimports; code} - -let analyze config binary = - let mach_header = Mach.Header.get_mach_header binary in - let lcs = Mach.LoadCommand.get_load_commands binary Mach.Header.sizeof_mach_header mach_header.Mach.Header.ncmds mach_header.Mach.Header.sizeofcmds in +let analyze config binary = + let mach = Mach.get binary in + let goblin = Goblin.Mach.to_goblin mach config.install_name in if (not config.silent) then begin - if (not config.search) then Mach.Header.print_header mach_header; - if (config.verbose || config.print_headers) then Mach.LoadCommand.print_load_commands lcs + if (not config.search) then Mach.Header.print_header mach.Mach.header; + if (config.verbose || config.print_headers) then Mach.LoadCommand.print_load_commands mach.Mach.load_commands; + if (config.verbose || config.print_libraries) then Mach.LoadCommand.print_libraries mach.Mach.libraries; + if (config.verbose || config.print_exports) then Goblin.print_exports goblin.Goblin.exports; + if (config.verbose || config.print_imports) then Goblin.print_imports goblin.Goblin.imports; + if (config.print_nlist) then Mach.SymbolTable.print mach.Mach.nlist; + if (config.print_coverage) then + ByteCoverage.print mach.Mach.byte_coverage end; - let name = - match Mach.LoadCommand.get_lib_name lcs with - | Some dylib -> - dylib.lc_str - | _ -> config.name (* we're not a dylib *) - in - let install_name = config.install_name in - (* lib.(0) = install_name *) - let segments = Mach.LoadCommand.get_segments lcs in - let libraries = Mach.LoadCommand.get_libraries lcs install_name in - (* move this inside of dyld, need the nlist info to compute locals... *) - let islib = mach_header.Mach.Header.filetype = Mach.Header.kMH_DYLIB in - let dyld_info = Mach.LoadCommand.get_dyld_info lcs in - match dyld_info with - | Some dyld_info -> - (* TODO: add load segment boundaries, and nlists locals as a parameters *) - let symbols = - try - let symtab = Mach.LoadCommand.find_load_command Mach.LoadCommand.SYMTAB lcs in - Mach.Nlist.get_symbols binary symtab - with Not_found -> - [] - in - let locals = Mach.Nlist.filter_by_kind Goblin.Symbol.Local symbols in - ignore locals; - let exports = Mach.Exports.get_exports binary dyld_info libraries in - (* TODO: yea, need to fix imports like machExports; send in the libraries, - do all that preprocessing there, and not in create binary *) - let imports = Mach.Imports.get_imports binary dyld_info libraries segments in - if (not config.silent) then - begin - if (config.verbose || config.print_libraries) then Mach.LoadCommand.print_libraries libraries; - if (config.verbose || config.print_exports) then Mach.Exports.print_exports exports; - if (config.verbose || config.print_imports) then Mach.Imports.print_imports imports; - if (config.print_nlist) then Mach.Nlist.print_symbols symbols; - end; - (* TODO: compute final sizes here, after imports, locals, - and exports are glommed into a goblin soup, using all the information available*) - create_binary (name,install_name) imports exports islib libraries - | None -> - if (config.verbose && not config.silent) then Printf.printf "No dyld_info_only\n"; - create_binary (name,install_name) Mach.Imports.empty Mach.Exports.empty islib libraries + goblin -let find_export_symbol symbol binary = - let len = binary.nexports in +(* this will lookup the binary in the goblin binary *) +let find_export symbol (binary:Goblin.t) = + let len = binary.Goblin.nexports in let rec loop i = if (i >= len) then raise Not_found - else if (Goblin.Symbol.find_symbol_name binary.exports.(i) = symbol) then - binary.exports.(i) + else if (binary.Goblin.exports.(i).name = symbol) then + binary.Goblin.exports.(i) else loop (i + 1) in loop 0 - +(* let find_import_symbol symbol binary = Mach.Imports.find symbol binary.imports + *) + diff --git a/src/SymbolMap.ml b/src/SymbolMap.ml index 4633106..54957d7 100644 --- a/src/SymbolMap.ml +++ b/src/SymbolMap.ml @@ -201,7 +201,7 @@ let build_polymorphic_map config = end else let lib = Stack.pop libstack in - let bytes = Object.get_bytes ~verbose:verbose lib in + let bytes = try Object.get_bytes ~verbose:verbose lib with _ -> Object.Unknown lib in let name = Filename.basename lib in let install_name = lib in let config = {config with silent=true; verbose=false; name; install_name; filename=lib} in @@ -209,10 +209,11 @@ let build_polymorphic_map config = (* could do a |> ReadMach.to_goblin here ? --- better yet, to goblin, then map building code after to avoid DRY violations *) | Object.Mach binary -> let binary = ReadMach.analyze config binary in - let imports = binary.ReadMach.imports in + let imports = binary.Goblin.imports in Array.iter (fun import -> - let symbol = Mach.Imports.import_name import in + (* let symbol = Goblin.Mach.Imports.import_name import in *) + let symbol = import.Goblin.Import.name in if (Hashtbl.mem tbl symbol) then let count = Hashtbl.find tbl symbol in Hashtbl.replace tbl symbol (count + 1) @@ -220,11 +221,13 @@ let build_polymorphic_map config = Hashtbl.add tbl symbol 1 ) imports; (* let symbols = Mach.Exports.export_map_to_mach_export_data_list binary.ReadMach.exports in *) - let symbols = binary.ReadMach.exports in + let symbols = binary.Goblin.exports in (* now we fold over the export -> polymorphic variant list of [mach_export_data] mappings returned from above *) let map' = Array.fold_left - (fun acc data -> + (fun acc data -> + let data = Goblin.Symbol.from_goblin_export + data ~libname:binary.Goblin.name ~libinstall_name:binary.Goblin.install_name in (* this is bad, not checking for weird state of no export symbol name, but since i construct the data it isn't possible... right? *) let symbol = Goblin.Symbol.find_symbol_name data in try @@ -239,7 +242,7 @@ let build_polymorphic_map config = ToL.add symbol [data] acc ) map symbols in - loop map' ((binary.ReadMach.name, binary.ReadMach.libs)::lib_deps) + loop map' ((binary.Goblin.name, binary.Goblin.libs)::lib_deps) | Object.Elf binary -> (* hurr durr iman elf *) let binary = ReadElf.analyze config binary in @@ -293,7 +296,7 @@ let polymorphic_list_to_string list = | export::exports -> Buffer.add_string b (* need to not use mach export to string? *) - @@ Mach.Exports.mach_export_data_to_string + @@ Goblin.Mach.Exports.mach_export_data_to_string ~use_flags:false export; Buffer.add_string b "\n"; loop exports diff --git a/src/ToL.ml b/src/ToL.ml index e06f752..a3dfe5a 100644 --- a/src/ToL.ml +++ b/src/ToL.ml @@ -3,7 +3,7 @@ module SystemSymbolMap = Map.Make(String) -type t = [ `Kind of Goblin.Symbol.symbol_kind +type t = [ `Kind of GoblinSymbol.symbol_kind | `Lib of string * string | `Name of string | `Offset of int @@ -50,7 +50,7 @@ let get_libraries ?bin_libs:(bin_libs=[]) symbol map = Generics.list_with_stringer ~newline:true ~omit_singleton_braces:true (fun symbol -> - let libname,libinstall_name = Goblin.Symbol.find_symbol_lib symbol in + let libname,libinstall_name = GoblinSymbol.find_symbol_lib symbol in libname (* TODO: check if correct *) ) symbols @@ -63,7 +63,7 @@ let print_map map = Printf.printf "%s -> %s\n" key @@ (Generics.list_with_stringer (fun export -> - Goblin.Symbol.find_symbol_lib export |> fst) + GoblinSymbol.find_symbol_lib export |> fst) (* TODO: check if correct *) values)) map diff --git a/src/rdrutils.mldylib b/src/rdrutils.mldylib new file mode 100644 index 0000000..795e241 --- /dev/null +++ b/src/rdrutils.mldylib @@ -0,0 +1,12 @@ +# OASIS_START +# DO NOT EDIT (digest: 6f355e939c9a50c3db107ae9169b1bbd) +Config +Command +Object +Graph +Storage +SymbolMap +ReadMach +ReadElf +ToL +# OASIS_STOP diff --git a/src/rdrutils.mllib b/src/rdrutils.mllib new file mode 100644 index 0000000..795e241 --- /dev/null +++ b/src/rdrutils.mllib @@ -0,0 +1,12 @@ +# OASIS_START +# DO NOT EDIT (digest: 6f355e939c9a50c3db107ae9169b1bbd) +Config +Command +Object +Graph +Storage +SymbolMap +ReadMach +ReadElf +ToL +# OASIS_STOP