UAI reference solution comparison tests (#41)

mroavi · web-flow · commit e5ae5a2cb0fa · 2023-07-11T21:20:10.000+08:00
* Update Artifacts.toml

* Add MAP UAI reference solution comparison test

* Implement `read_query_file`

* Make `get_instance_filepaths` return the query file path as well

* Add a `queryvars` field to UAIInstance composite type

* Add MMAP UAI reference solution comparison test

* Add comments beside MMAP problems that fail the test

* Disable Promedus MAR tests

* Treat reading PR sol file as a separate case and simplify tests

* Refactor `read_solution_file` function

* Print problem name being tested for all "UAI Reference Sols Comp" testsets

* Move `get_problems_names` function to `test/utils.jl`

* Fix typo in function name

* Parse PR solution as Float64

* Minor

* Enable Pedigree MAR UAI reference tests

* Add PR UAI reference solution comparison test

* Update the UAI file format URL
diff --git a/benchmark/bench_map.jl b/benchmark/bench_map.jl
@@ -6,7 +6,7 @@ using Artifacts
 
 const SUITE = BenchmarkGroup()
 
-model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
 problem = read_instance(model_filepath; evidence_filepath, solution_filepath)
 
 optimizer = TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40)
diff --git a/benchmark/bench_mar.jl b/benchmark/bench_mar.jl
@@ -8,7 +8,7 @@ using Artifacts
 
 const SUITE = BenchmarkGroup()
 
-model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
 problem = read_instance(model_filepath; evidence_filepath, solution_filepath)
 
 optimizer = TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)
diff --git a/benchmark/bench_mmap.jl b/benchmark/bench_mmap.jl
@@ -6,7 +6,7 @@ using Artifacts
 
 const SUITE = BenchmarkGroup()
 
-model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
 problem = read_instance(model_filepath; evidence_filepath, solution_filepath)
 optimizer = TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40)
 
diff --git a/src/Core.jl b/src/Core.jl
@@ -24,6 +24,7 @@ $(TYPEDEF)
 
 * `obsvars` is a vector of observed variables,
 * `obsvals` is a vector of observed values,
+* `queryvars` is a vector of query variables,
 * `reference_solution` is a vector with the reference solution.
 """
 struct UAIInstance{ET, FT <: Factor{ET}}
@@ -34,7 +35,8 @@ struct UAIInstance{ET, FT <: Factor{ET}}
 
     obsvars::Vector{Int}
     obsvals::Vector{Int}
-    reference_solution::Union{Vector{Vector{ET}}, Vector{Int}}
+    queryvars::Vector{Int}
+    reference_solution::Union{Vector{Vector{ET}}, Vector{Int}, Float64}
 end
 
 """
diff --git a/src/utils.jl b/src/utils.jl
@@ -5,7 +5,7 @@ Parse the problem instance found in `model_filepath` defined in the UAI model
 format.
 
 The UAI file formats are defined in:
-https://personal.utdallas.edu/~vibhav.gogate/uai16-evaluation/uaiformat.html
+https://uaicompetition.github.io/uci-2022/file-formats/
 """
 function read_model_file(model_filepath; factor_eltype = Float64)
     # Read the uai file into an array of lines
@@ -69,7 +69,7 @@ Return the observed variables and values in `evidence_filepath`. If the passed
 file path is an empty string, return empty vectors.
 
 The UAI file formats are defined in:
-https://personal.utdallas.edu/~vibhav.gogate/uai16-evaluation/uaiformat.html
+https://uaicompetition.github.io/uci-2022/file-formats/
 """
 function read_evidence_file(evidence_filepath::AbstractString)
 
@@ -93,35 +93,67 @@ function read_evidence_file(evidence_filepath::AbstractString)
     return obsvars, obsvals
 end
 
+"""
+$(TYPEDSIGNATURES)
+
+Return the query variables in `query_filepath`. If the passed file path is an
+empty string, return an empty vector.
+
+The UAI file formats are defined in:
+https://uaicompetition.github.io/uci-2022/file-formats/
+"""
+function read_query_file(query_filepath::AbstractString)
+    isempty(query_filepath) && return Int64[]
+
+    # Read the first line of the uai query file
+    line = open(query_filepath) do file
+        readlines(file)
+    end |> first
+
+    # Separate the number of query vars and their indices
+    nqueryvars, queryvars_zero_based = split(line) |> x -> parse.(Int, x) |> x -> (x[1], x[2:end])
+
+    # Convert to 1-based indexing
+    queryvars = queryvars_zero_based .+ 1
+
+    @assert nqueryvars == length(queryvars)
+
+    return queryvars
+end
+
 function read_solution_file(solution_filepath::AbstractString; factor_eltype = Float64)
+
     result = Vector{factor_eltype}[]
     extension = splitext(solution_filepath)[2]
+
+    # Read the solution file into an array of lines
+    rawlines = open(solution_filepath) do file
+        readlines(file)
+    end
+
     if extension == ".MAR"
-        return read_mar_solution_file(solution_filepath; factor_eltype)
-    elseif extension == ".MAP" || extension == ".MMAP" || extension == ".PR"
-      # Return the last line of the file as a vector of integers
-      result = open(solution_filepath) do file
-          readlines(file)
-      end |> last |> split |> x -> parse.(Int, x)
+        result = parse_mar_solution_file(rawlines; factor_eltype)
+    elseif extension == ".MAP" || extension == ".MMAP"
+        # Return all elements except the first in the last line as a vector of integers
+        result = last(rawlines) |> split |> x -> x[2:end] |> x -> parse.(Int, x)
+    elseif extension == ".PR"
+        # Parse the number in the last line as a floating point
+        result = last(rawlines) |> x -> parse(Float64, x)
     end
+
     return result
 end
 
 """
 $(TYPEDSIGNATURES)
 
-Return the marginals of all variables. The order of the variables is the same
-as in the model
+Parse the solution marginals of all variables from the UAI MAR solution file.
+The order of the variables is the same as in the model definition.
 
 The UAI file formats are defined in:
-https://personal.utdallas.edu/~vibhav.gogate/uai16-evaluation/uaiformat.html
+https://uaicompetition.github.io/uci-2022/file-formats/
 """
-function read_mar_solution_file(solution_filepath::AbstractString; factor_eltype = Float64)
-
-    # Read the uai mar file into an array of lines
-    rawlines = open(solution_filepath) do file
-        readlines(file)
-    end
+function parse_mar_solution_file(rawlines::Vector{String}; factor_eltype = Float64)
 
     parsed_margs = split(rawlines[2]) |> x -> x[2:end] |> x -> parse.(factor_eltype, x)
 
@@ -192,13 +224,15 @@ Read a UAI problem instance from a file.
 function read_instance(
     model_filepath::AbstractString;
     evidence_filepath::AbstractString = "",
+    query_filepath::AbstractString = "",
     solution_filepath::AbstractString = "",
     eltype = Float64
 )::UAIInstance
     nvars, cards, ncliques, factors = read_model_file(model_filepath; factor_eltype = eltype)
     obsvars, obsvals = read_evidence_file(evidence_filepath)
+    queryvars = read_query_file(query_filepath)
     reference_solution = isempty(solution_filepath) ? Vector{eltype}[] : read_solution_file(solution_filepath)
-    return UAIInstance(nvars, ncliques, cards, factors, obsvars, obsvals, reference_solution)
+    return UAIInstance(nvars, ncliques, cards, factors, obsvars, obsvals, queryvars, reference_solution)
 end
 
 function read_instance_from_string(uai::AbstractString; eltype = Float64)::UAIInstance
diff --git a/test/Artifacts.toml b/test/Artifacts.toml
@@ -1,6 +1,6 @@
 [uai2014]
-git-tree-sha1 = "d05c5e541cc06f1cb4d8e24c1067e07472e36c24"
+git-tree-sha1 = "199ed43697fe22447c6c64a939b222fd4073f2d0"
 
     [[uai2014.download]]
-    sha256 = "73c91cd68931aec562499ab66ed2326771b829aa715e790609c6a1b86c9a9ad8"
+    sha256 = "5d93ced227cff3eb2ae7feb77dcb6c780212b47a0c0355dda8439de6f5b9d369"
     url = "https://github.com/mroavi/uai-2014-inference-competition/raw/main/uai2014.tar.gz"
diff --git a/test/cuda.jl b/test/cuda.jl
@@ -4,7 +4,7 @@ using TensorInference, CUDA
 CUDA.allowscalar(false)
 
 @testset "gradient-based tensor network solvers" begin
-    model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
     instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
 
     # does not optimize over open vertices
@@ -21,7 +21,7 @@ CUDA.allowscalar(false)
 end
 
 @testset "map" begin
-    model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
     instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
 
     # does not optimize over open vertices
@@ -36,7 +36,7 @@ end
 end
 
 @testset "mmap" begin
-    model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
     instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
 
     optimizer = TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40)
diff --git a/test/map.jl b/test/map.jl
@@ -3,7 +3,7 @@ using OMEinsum
 using TensorInference
 
 @testset "gradient-based tensor network solvers" begin
-    model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
     instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
 
     # does not optimize over open vertices
@@ -14,3 +14,13 @@ using TensorInference
     @test log_probability(tn, config) ≈ logp
     @test maximum_logp(tn)[] ≈ logp
 end
+
+@testset "UAI Reference Solution Comparison" begin
+    problem_name = "Promedas_70"
+    @info "Testing: $problem_name"
+    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths(problem_name, "MAP")
+    instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
+    tn = TensorNetworkModel(instance; optimizer = TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100))
+    _, solution = most_probable_config(tn)
+    @test solution == instance.reference_solution
+end
diff --git a/test/mar.jl b/test/mar.jl
@@ -12,7 +12,7 @@ using TensorInference
 end
 
 @testset "cached, rescaled contract" begin
-    model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
     instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
     ref_sol = instance.reference_solution
     optimizer = TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)
@@ -34,39 +34,27 @@ end
     @test isapprox(ti_sol, ref_sol; atol = 1e-5)
 end
 
-function get_problems_names(problem_set::String)
-    # Capture the problem names that belong to the current problem_set
-    regex = Regex("($(problem_set)_\\d*)(\\.uai)\$")
-    return readdir(joinpath(artifact"uai2014", "MAR"); sort = false) |>
-           x -> map(y -> match(regex, y), x) |> # apply regex
-                x -> filter(!isnothing, x) |> # filter out `nothing` values
-                     x -> map(first, x) # get the first capture of each element
-end
-
-@testset "gradient-based tensor network solvers" begin
+@testset "UAI Reference Solution Comparison" begin
     problem_sets = [
         #("Alchemy", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)),
         #("CSP", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)),
         #("DBN", KaHyParBipartite(sc_target = 25)),
         #("Grids", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)), # greedy also works
         #("linkage", TreeSA(ntrials = 3, niters = 20, βs = 0.1:0.1:40)), # linkage_15 fails
         #("ObjectDetection", TreeSA(ntrials = 1, niters = 5, βs = 1:0.1:100)),
-        #("Pedigree", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)), # greedy also works
-        ("Promedus", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)), # greedy also works
+        ("Pedigree", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)), # greedy also works
+        #("Promedus", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)), # greedy also works
         #("relational", TreeSA(ntrials=1, niters=5, βs=0.1:0.1:100)),
         ("Segmentation", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100))  # greedy also works
     ]
-
     for (problem_set, optimizer) in problem_sets
-        @testset "$(problem_set) problem_set" begin
-
+        @testset "$(problem_set) problem set" begin
             # Capture the problem names that belong to the current problem set
-            problem_names = get_problems_names(problem_set)
-
+            problem_names = get_problem_names(problem_set, "MAR")
             for problem_name in problem_names
                 @info "Testing: $problem_name"
                 @testset "$(problem_name)" begin
-                    model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths(problem_name, "MAR")
+                    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths(problem_name, "MAR")
                     instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
                     ref_sol = instance.reference_solution
                     obsvars = instance.obsvars
diff --git a/test/mmap.jl b/test/mmap.jl
@@ -8,24 +8,42 @@ using TensorInference
 end
 
 @testset "gradient-based tensor network solvers" begin
-    model_filepath, evidence_filepath, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
+    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
     instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
 
     optimizer = TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40)
     tn_ref = TensorNetworkModel(instance; optimizer)
-    # does not marginalize any var
+
+    # Does not marginalize any var
     mmap = MMAPModel(instance; marginalized = Int[], optimizer)
     @debug(mmap)
     @test maximum_logp(tn_ref) ≈ maximum_logp(mmap)
 
-    # marginalize all vars
+    # Marginalize all vars
     mmap2 = MMAPModel(instance; marginalized = collect(1:(instance.nvars)), optimizer)
     @debug(mmap2)
     @test Array(probability(tn_ref))[] ≈ exp(maximum_logp(mmap2)[])
 
-    # does not optimize over open vertices
+    # Does not optimize over open vertices
     mmap3 = MMAPModel(instance; marginalized = [2, 4, 6], optimizer)
     @debug(mmap3)
     logp, config = most_probable_config(mmap3)
     @test log_probability(mmap3, config) ≈ logp
-end
+
+end
+
+@testset "UAI Reference Solution Comparison" begin
+    problems = [
+        ("Segmentation_12", TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40)),
+        # ("Segmentation_13", TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40)), # fails!
+        # ("Segmentation_14", TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40))  # fails!
+    ]
+    for (problem_name, optimizer) in problems
+      @info "Testing: $problem_name"
+      model_filepath, evidence_filepath, query_filepath, solution_filepath = get_instance_filepaths(problem_name, "MMAP")
+      instance = read_instance(model_filepath; evidence_filepath, query_filepath, solution_filepath)
+      model = MMAPModel(instance; marginalized = setdiff(1:(instance.nvars), instance.queryvars), optimizer)
+      _, solution = most_probable_config(model)
+      @test solution == instance.reference_solution
+    end
+end
diff --git a/test/pr.jl b/test/pr.jl
@@ -0,0 +1,34 @@
+using Test
+using OMEinsum
+using KaHyPar
+using TensorInference
+
+@testset "UAI Reference Solution Comparison" begin
+    problem_sets = [
+        #("Alchemy", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)), # fails
+        #("CSP", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)),
+        #("DBN", KaHyParBipartite(sc_target = 25)),
+        #("Grids", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)), # fails
+        #("linkage", TreeSA(ntrials = 3, niters = 20, βs = 0.1:0.1:40)), # fails
+        #("ObjectDetection", TreeSA(ntrials = 1, niters = 5, βs = 1:0.1:100)),
+        ("Pedigree", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)),
+        #("Promedus", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)),
+        #("relational", TreeSA(ntrials=1, niters=5, βs=0.1:0.1:100)), # fails
+        ("Segmentation", TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100))
+    ]
+    for (problem_set_name, optimizer) in problem_sets
+        @testset "$(problem_set_name) problem set" begin
+            problem_names = get_problem_names(problem_set_name, "PR")
+            for problem_name in problem_names
+                @testset "$(problem_name)" begin
+                    @info "Testing: $problem_name"
+                    model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths(problem_name, "PR")
+                    instance = read_instance(model_filepath; evidence_filepath, solution_filepath)
+                    tn = TensorNetworkModel(instance; optimizer)
+                    solution = probability(tn) |> first |> log10
+                    @test isapprox(solution, instance.reference_solution; atol = 1e-3)
+                end
+            end
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -17,6 +17,10 @@ end
     include("mmap.jl")
 end
 
+@testset "PR" begin
+    include("pr.jl")
+end
+
 using CUDA
 if CUDA.functional()
     include("cuda.jl")
diff --git a/test/utils.jl b/test/utils.jl
@@ -10,6 +10,18 @@ task.
 function get_instance_filepaths(problem_name::AbstractString, task::AbstractString)
     model_filepath = joinpath(artifact"uai2014", task, problem_name * ".uai")
     evidence_filepath = joinpath(artifact"uai2014", task, problem_name * ".uai.evid")
+    query_filepath = joinpath(artifact"uai2014", task, problem_name * ".uai.query")
     solution_filepath = joinpath(artifact"uai2014", task, problem_name * ".uai." * task)
-    return model_filepath, evidence_filepath, solution_filepath
+    return model_filepath, evidence_filepath, query_filepath, solution_filepath
+end
+
+"""
+# Capture the problem names that belong to `problem_set`.
+"""
+function get_problem_names(problem_set::AbstractString, task::AbstractString)
+    regex = Regex("($(problem_set)_\\d*)(\\.uai)\$")
+    return readdir(joinpath(artifact"uai2014", task); sort = false) |>
+           x -> map(y -> match(regex, y), x) |> # apply regex
+                x -> filter(!isnothing, x) |> # filter out `nothing` values
+                     x -> map(first, x) # get the first capture of each element
 end