diff --git a/Project.toml b/Project.toml index ef452bf..3708ffc 100644 --- a/Project.toml +++ b/Project.toml @@ -27,7 +27,8 @@ julia = "1.10" [apps.juliac] [extras] +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Test", "JSON"] diff --git a/src/JuliaC.jl b/src/JuliaC.jl index fda774f..7ec7641 100644 --- a/src/JuliaC.jl +++ b/src/JuliaC.jl @@ -25,6 +25,7 @@ Base.@kwdef mutable struct ImageRecipe c_sources::Vector{String} = String[] cflags::Vector{String} = String[] extra_objects::Vector{String} = String[] + export_abi::Union{String, Nothing} = nothing end Base.@kwdef mutable struct LinkRecipe @@ -74,6 +75,7 @@ function _print_usage(io::IO=stdout) println(io, " --privatize Privatize bundled libjulia (Unix)") println(io, " --trim[=mode] Strip IR/metadata (e.g. --trim=safe)") println(io, " --compile-ccallable Export ccallable entrypoints") + println(io, " --export-abi Emit type / function information for the ABI (in JSON format)") println(io, " --experimental Forwarded to Julia (needed for --trim)") println(io, " --verbose Print commands and timings") println(io, " -h, --help Show this help") @@ -113,6 +115,10 @@ function _parse_cli_args(args::Vector{String}) push!(image_recipe.julia_args, arg) elseif arg == "--compile-ccallable" image_recipe.add_ccallables = true + elseif arg == "--export-abi" + i == length(args) && error("--export-abi requires an argument") + image_recipe.export_abi = args[i+1] + i += 1 elseif startswith(arg, "--project") if occursin('=', arg) proj = split(arg, '='; limit=2)[2] diff --git a/src/abi_export.jl b/src/abi_export.jl new file mode 100644 index 0000000..c62e36f --- /dev/null +++ b/src/abi_export.jl @@ -0,0 +1,221 @@ +const C_friendly_types = Base.IdSet{Any}([ # a few of these are redundant to make it easier to maintain + Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64, Bool, + Cvoid, Cint, Cshort, Clong, Cuint, Cushort, Culong, Cssize_t, Csize_t, + Cchar, Cwchar_t, Cstring, Cwstring, + RawFD, +]) + +function recursively_add_types!(types::Base.IdSet{DataType}, @nospecialize(T::DataType)) + T in types && return types + while T.name === Ptr.body.name + push!(types, T) + T = T.parameters[1] # unwrap Ptr{...} + T in types && return types + end + if T.name.module === Core && T ∉ C_friendly_types + error("invalid type for juliac: ", T) # exclude internals (they may change) + end + push!(types, T) + for list in (T.parameters, fieldtypes(T)) + for S in list + recursively_add_types!(types, S) + end + end + return types +end + +struct TypeEmitter + io::IO + type_ids::IdDict{Any,Int} +end + +function escape_string_json(s::AbstractString) + iob = IOBuffer() + print(iob, '"') + for c in s + if c == '"' + print(iob, "\\\"") + elseif c == '\\' + print(iob, "\\\\") + elseif c == '\b' + print(iob, "\\b") + elseif c == '\f' + print(iob, "\\f") + elseif c == '\n' + print(iob, "\\n") + elseif c == '\r' + print(iob, "\\r") + elseif c == '\t' + print(iob, "\\t") + elseif '\x00' <= c <= '\x1f' + print(iob, "\\u", lpad(string(UInt16(c), base=16), 4, '0')) + else + @assert isvalid(c) "invalid unicode character" + print(iob, c) + end + end + print(iob, '"') + return String(take!(iob)) +end + +function type_name_json(@nospecialize(dt::DataType)) + return escape_string_json(repr(dt; context=:compact=>true)) +end + +function field_name_json(@nospecialize(dt::DataType), field::Int) + name = String(fieldname(dt, field)) + return escape_string_json(name) +end + +function emit_pointer_info!(ctx::TypeEmitter, @nospecialize(dt::DataType); indent::Int = 0) + pointee_type_id = ctx.type_ids[dt.parameters[1]] + let indented_println(args...) = println(ctx.io, " " ^ indent, args...) + indented_println("{") + indented_println(" \"id\": ", ctx.type_ids[dt], ",") + indented_println(" \"kind\": \"pointer\",") + indented_println(" \"name\": ", type_name_json(dt), ",") + indented_println(" \"pointee_type_id\": ", pointee_type_id) + print(ctx.io, " " ^ indent, "}") + end +end + +function emit_field_info!(ctx::TypeEmitter, @nospecialize(dt::DataType), field::Int; indent::Int = 0) + desc = Base.DataTypeFieldDesc(dt)[field] + type_id = ctx.type_ids[fieldtype(dt, field)] + print(ctx.io, " " ^ indent) + print(ctx.io, "{") + print(ctx.io, " \"name\": ", field_name_json(dt, field), ",") + print(ctx.io, " \"type_id\": ", type_id, ",") + print(ctx.io, " \"offset\": ", desc.offset, ",") + print(ctx.io, " \"isptr\": ", desc.isptr, ",") + print(ctx.io, " \"isfieldatomic\": ", Base.isfieldatomic(dt, field)) + print(ctx.io, " }") +end + +function emit_struct_info!(ctx::TypeEmitter, @nospecialize(dt::DataType); indent::Int = 0) + type_id = ctx.type_ids[dt] + let indented_println(args...) = println(ctx.io, " " ^ indent, args...) + indented_println("{") + indented_println(" \"id\": ", type_id, ",") + indented_println(ismutabletype(dt) ? " \"kind\": \"mutable struct\"," : " \"kind\": \"struct\",") + indented_println(" \"name\": ", type_name_json(dt), ",") + indented_println(" \"size\": ", Core.sizeof(dt), ",") + indented_println(" \"alignment\": ", Base.datatype_alignment(dt), ",") + indented_println(" \"fields\": [") + for i = 1:Base.datatype_nfields(dt) + emit_field_info!(ctx, dt, i; indent = indent + 4) + println(ctx.io, i == Base.datatype_nfields(dt) ? "" : ",") + end + indented_println(" ]") + print(ctx.io, " " ^ indent, "}") + end +end + +function emit_primitive_type!(ctx::TypeEmitter, @nospecialize(dt::DataType); indent::Int = 0) + type_id = ctx.type_ids[dt] + let indented_println(args...) = println(ctx.io, " " ^ indent, args...) + indented_println("{") + indented_println(" \"id\": ", type_id, ",") + indented_println(" \"kind\": \"primitive\",") + indented_println(" \"name\": ", type_name_json(dt), ",") + indented_println(" \"signed\": ", (dt <: Signed), ",") + indented_println(" \"bits\": ", 8 * Base.packedsize(dt), ",") # size for reinterpret / in-register + indented_println(" \"size\": ", Base.aligned_sizeof(dt), ",") # size with padding / in-memory + indented_println(" \"alignment\": ", Base.datatype_alignment(dt)) + print(ctx.io, " " ^ indent, "}") + end +end + +function emit_type_info!(ctx::TypeEmitter, @nospecialize(dt::DataType); indent::Int = 0) + if dt.name === Ptr.body.name + emit_pointer_info!(ctx, dt; indent) + elseif Base.isprimitivetype(dt) + emit_primitive_type!(ctx, dt; indent) + else + emit_struct_info!(ctx, dt; indent) + end +end + +function emit_method_info!(ctx::TypeEmitter, method::Core.Method; indent::Int = 0) + (rt, sig) = method.ccallable + (name, symbol) = let + symbol = length(method.ccallable) > 2 ? Symbol(method.ccallable[3]) : method.name + iob = IOBuffer() + print(IOContext(iob, :print_method_signature_only => true), method) + str = String(take!(iob)) + if symbol !== method.name && startswith(str, String(method.name)) + # Make a best-effort attempt to use the exported name + # + # Note: the `startswith` check is to make sure we support 'functor's in arg0, + # which Base.@ccallable supports as long as they are singletons. + str = replace(str, String(method.name) => String(symbol); count = 1) + end + (str, String(symbol)) + end + + argnames = String.(Base.method_argnames(method)) + let indented_println(args...) = println(ctx.io, " " ^ indent, args...) + indented_println("{") + indented_println(" \"symbol\": ", escape_string_json(symbol), ",") + indented_println(" \"name\": ", escape_string_json(name), ",") + indented_println(" \"arguments\": [") + for i in 2:length(sig.parameters) + print(ctx.io, " " ^ (indent + 4)) + print(ctx.io, "{") + print(ctx.io, " \"name\": ", escape_string_json(argnames[i]), ",") + print(ctx.io, " \"type_id\": ", ctx.type_ids[sig.parameters[i]]) + println(ctx.io, i == length(sig.parameters) ? " }" : " },") + end + indented_println(" ],") + indented_println(" \"returns\": { \"type_id\": ", ctx.type_ids[rt], " }") + print(ctx.io, " " ^ indent, "}") + end +end + +function emit_abi_info!(ctx::TypeEmitter, exported::Vector{Core.Method}, types::IdSet{DataType}) + println(ctx.io, "{") + + # assign an ID to each type, so that we can refer to them + for (i, T) in enumerate(types) + ctx.type_ids[T] = i + end + + # print exported functions + println(ctx.io, " \"functions\": [") + for (i, method) in enumerate(exported) + emit_method_info!(ctx, method; indent = 4) + println(ctx.io, i == length(exported) ? "" : ",") + end + println(ctx.io, " ],") + + # print type / structure information + println(ctx.io, " \"types\": [") + for (i, T) in enumerate(types) + emit_type_info!(ctx, T; indent = 4) + println(ctx.io, i == length(types) ? "" : ",") + end + println(ctx.io, " ]") + + println(ctx.io, "}") +end + +function write_abi_metadata(io::IO) + types = Base.IdSet{DataType}() + + # discover all exported methods + any types they reference + exported = Core.Method[] + Base.visit(Core.methodtable) do method + if isdefined(method, :ccallable) + push!(exported, method) + (rt, sig) = method.ccallable + for T in sig.parameters[2:end] + recursively_add_types!(types, T) + end + recursively_add_types!(types, rt) + end + end + + # print the discovered ABI info + ctx = TypeEmitter(io, IdDict{Any,Int}()) + emit_abi_info!(ctx, exported, types) +end diff --git a/src/compiling.jl b/src/compiling.jl index 1d745c2..3c34b55 100644 --- a/src/compiling.jl +++ b/src/compiling.jl @@ -108,6 +108,9 @@ function compile_products(recipe::ImageRecipe) if recipe.use_loaded_libs cmd = `$cmd --use-loaded-libs` end + if recipe.export_abi !== nothing + cmd = `$cmd --export-abi $(recipe.export_abi)` + end # Threading cmd = addenv(cmd, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1, env_overrides...) diff --git a/src/scripts/juliac-buildscript.jl b/src/scripts/juliac-buildscript.jl index 8483b92..b6688bf 100644 --- a/src/scripts/juliac-buildscript.jl +++ b/src/scripts/juliac-buildscript.jl @@ -27,11 +27,13 @@ end # --output- : One of: exe | lib | sysimage | o | bc. Controls entrypoint setup. # --compile-ccallable : Export ccallable entrypoints (for shared libraries). # --use-loaded-libs : Enable Libdl.dlopen override to reuse existing loads. -source_path, output_type, add_ccallables, use_loaded_libs = let +# --export-abi : Emit JSON ABI spec +source_path, output_type, add_ccallables, use_loaded_libs, export_abi = let source_path = "" output_type = "" add_ccallables = false use_loaded_libs = false + export_abi = nothing it = Iterators.Stateful(ARGS) for arg in it if startswith(arg, "--source=") @@ -46,10 +48,12 @@ source_path, output_type, add_ccallables, use_loaded_libs = let add_ccallables = true elseif arg == "--use-loaded-libs" use_loaded_libs = true + elseif arg == "--export-abi" + export_abi = popfirst!(it) end end source_path == "" && error("Missing required --source ") - (source_path, output_type, add_ccallables, use_loaded_libs) + (source_path, output_type, add_ccallables, use_loaded_libs, export_abi) end # Load user code @@ -130,6 +134,14 @@ let usermod end end +if export_abi !== nothing + include(joinpath(@__DIR__, "..", "abi_export.jl")) + Core.@latestworld + open(export_abi, "w") do io + write_abi_metadata(io) + end +end + # Run the verifier in the current world (before build-script modifications), # so that error messages and types print in their usual way. Core.Compiler._verify_trim_world_age[] = Base.get_world_counter() diff --git a/test/cli.jl b/test/cli.jl index 7994ba8..621386b 100644 --- a/test/cli.jl +++ b/test/cli.jl @@ -1,8 +1,7 @@ +using JSON + function run_juliac_cli(args::Vector{String}) - cmd = `$(Base.julia_cmd()) --startup-file=no --history-file=no --project=$(ROOT) -m JuliaC` - for a in args - cmd = `$cmd $a` - end + cmd = `$(Base.julia_cmd()) --startup-file=no --history-file=no --project=$(ROOT) -m JuliaC $args` run(cmd) end @@ -26,6 +25,69 @@ end print_tree_with_sizes(outdir) end +@testset "ABI export" begin + outdir = mktempdir() + libout = joinpath(outdir, "libsimple") + abiout = joinpath(outdir, "bindinginfo_libsimple.json") + cliargs = String[ + "--output-lib", libout, + "--compile-ccallable", + "--trim=safe", + joinpath(@__DIR__, "libsimple.jl"), + "--export-abi", + abiout, + "--verbose", + ] + run_juliac_cli(cliargs) + str = read(abiout, String) + abi = JSON.parse(str) + + # `copyto_and_sum` should have been exported + @test any(Bool[func["symbol"] == "copyto_and_sum" for func in abi["functions"]]) + + # `CVector{Float32}` should have been exported with the correct info + @test any(Bool[type["name"] == "CVector{Float32}" for type in abi["types"]]) + CVector_Float32 = abi["types"][findfirst(type["name"] == "CVector{Float32}" for type in abi["types"])] + @test length(CVector_Float32["fields"]) == 2 + @test CVector_Float32["fields"][1]["offset"] == 0 + @test CVector_Float32["fields"][2]["offset"] == 8 + @test abi["types"][CVector_Float32["fields"][1]["type_id"]]["name"] == "Int32" + @test abi["types"][CVector_Float32["fields"][2]["type_id"]]["name"] == "Ptr{Float32}" + @test CVector_Float32["size"] == 16 + + # `CVectorPair{Float32}` should have been exported with the correct info + @test any(Bool[type["name"] == "CVectorPair{Float32}" for type in abi["types"]]) + CVectorPair_Float32 = abi["types"][findfirst(type["name"] == "CVectorPair{Float32}" for type in abi["types"])] + @test length(CVectorPair_Float32["fields"]) == 2 + @test CVectorPair_Float32["fields"][1]["offset"] == 0 + @test CVectorPair_Float32["fields"][2]["offset"] == 16 + @test abi["types"][CVectorPair_Float32["fields"][1]["type_id"]]["name"] == "CVector{Float32}" + @test abi["types"][CVectorPair_Float32["fields"][2]["type_id"]]["name"] == "CVector{Float32}" + @test CVectorPair_Float32["size"] == 32 + + # `CTree{Float64}` should have been exported with the correct info + @test any(Bool[type["name"] == "CTree{Float64}" for type in abi["types"]]) + CTree_Float64_id = findfirst(type["name"] == "CTree{Float64}" for type in abi["types"]) + CTree_Float64 = abi["types"][CTree_Float64_id] + @test length(CTree_Float64["fields"]) == 1 + @test CTree_Float64["fields"][1]["offset"] == 0 + CVector_CTree_Float64 = abi["types"][CTree_Float64["fields"][1]["type_id"]] + @test CVector_CTree_Float64["name"] == "CVector{CTree{Float64}}" + @test CTree_Float64["size"] == sizeof(UInt) * 2 + + # `CVector{CTree{Float64}}` should have been exported with the correct info + @test length(CVector_CTree_Float64["fields"]) == 2 + @test CVector_CTree_Float64["fields"][1]["offset"] == 0 + @test CVector_CTree_Float64["fields"][2]["offset"] == sizeof(UInt) + @test abi["types"][CVector_CTree_Float64["fields"][1]["type_id"]]["name"] == "Int32" + @test abi["types"][CVector_CTree_Float64["fields"][2]["type_id"]]["name"] == "Ptr{CTree{Float64}}" + @test CVector_CTree_Float64["size"] == sizeof(UInt) * 2 + + # `Ptr{CTree{Float64}}` should refer (recursively) back to the original type id + Ptr_CTree_Float64 = abi["types"][CVector_CTree_Float64["fields"][2]["type_id"]] + @test Ptr_CTree_Float64["pointee_type_id"] == CTree_Float64_id +end + @testset "CLI library privatize end-to-end" begin outdir = mktempdir() libout = joinpath(outdir, "libpriv") diff --git a/test/libsimple.jl b/test/libsimple.jl new file mode 100644 index 0000000..64126ff --- /dev/null +++ b/test/libsimple.jl @@ -0,0 +1,58 @@ +module SimpleLib +# Test the logging of entrypoints and types in a C-callable Julia library. + +struct CVector{T} + length::Cint + data::Ptr{T} +end + +struct CVectorPair{T} + from::CVector{T} + to::CVector{T} +end + +struct MyTwoVec + x::Int32 + y::Int32 +end + +struct CTree{T} + # test that recursive datatypes work as expected + children::CVector{CTree{T}} +end + +Base.@ccallable "tree_size" function size(tree::CTree{Float64})::Int64 + children = unsafe_wrap(Array, tree.children.data, tree.children.length) + # Return the size of this sub-tree + return sum(Int64[ + size(child) + for child in children + ]; init=1) +end + +Base.@ccallable "copyto_and_sum" function badname(fromto::CVectorPair{Float32})::Float32 + from, to = unsafe_wrap(Array, fromto.from.data, fromto.from.length), unsafe_wrap(Array, fromto.to.data, fromto.to.length) + copyto!(to, from) + return sum(to) +end + +Base.@ccallable function countsame(list::Ptr{MyTwoVec}, n::Int32)::Int32 + list = unsafe_wrap(Array, list, n) + count = 0 + for v in list + count += v.x == v.y + end + return count +end + +export countsame, copyto_and_sum + +# FIXME? varargs +# Base.@ccallable function printints(x::Cint...)::Nothing +# for i in 1:length(x) +# print(x[i], " ") +# end +# println() +# end + +end