STOR-i · Red-Portal · Dec 21, 2019 · Dec 21, 2019 · Dec 21, 2019 · Jan 6, 2020
diff --git a/Project.toml b/Project.toml
@@ -23,6 +23,7 @@ ScikitLearnBase = "6e75b9c4-186b-50bd-896f-2d2496a4843e"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 

diff --git a/src/GaussianProcesses.jl b/src/GaussianProcesses.jl
@@ -6,6 +6,7 @@ using StatsFuns, SpecialFunctions
 
 using LinearAlgebra, Printf, Random, Statistics
 import Statistics: mean, cov
+import StatsBase: autocor!
 import Base: size
 import PDMats: dim, Matrix, diag, pdadd!, *, \, inv, logdet, eigmax, eigmin, whiten!, unwhiten!, quad, quad!, invquad, invquad!, X_A_Xt, Xt_A_X, X_invA_Xt, Xt_invA_X
 
@@ -14,7 +15,7 @@ import PDMats: dim, Matrix, diag, pdadd!, *, \, inv, logdet, eigmax, eigmin, whi
 export GPBase, GP, GPE, GPA, ElasticGPE, Approx, predict_f, predict_y, Kernel, Likelihood, CompositeKernel, SumKernel, ProdKernel, Masked, FixedKernel, fix, Noise, Const, SE, SEIso, SEArd, Periodic, Poly, RQ, RQIso, RQArd, Lin, LinIso, LinArd, Matern, Mat12Iso, Mat12Ard, Mat32Iso, Mat32Ard, Mat52Iso, Mat52Ard, #kernel functions
     MeanZero, MeanConst, MeanLin, MeanPoly, SumMean, ProdMean, MeanPeriodic, #mean functions
     GaussLik, BernLik, ExpLik, StuTLik, PoisLik, BinLik,       #likelihood functions
-    mcmc, ess, lss, optimize!, vi, var_exp, dv_var_exp, elbo, initialise_Q,    #inference functions
+    mcmc, ess, hmc, nuts, nuts_hamiltonian, lss, optimize!, vi, var_exp, dv_var_exp, elbo, initialise_Q,    #inference functions
     set_priors!,set_params!, update_target!, autodiff, update_Q!
 using ForwardDiff: GradientConfig, Dual, partials, copyto!, Chunk
 import ForwardDiff: seed!

diff --git a/src/mcmc.jl b/src/mcmc.jl
@@ -1,13 +1,111 @@
-using ProgressMeter
 
 """
-    mcmc(gp::GPBase; kwargs...)
+    effective_sample_size(chain::AbstractMatrix)
 
-Runs Hamiltonian Monte Carlo algorithm for estimating the hyperparameters of Gaussian process `GPE` and the latent function in the case of `GPA`.
+Routine for computing the effective sample size as in,
+
+Gelman, Andrew, et al., 2013. Bayesian Data Analysis. 3rd.
+
+The samples are assuemd to be in column major order.
 """
-function mcmc(gp::GPBase; nIter::Int=1000, burn::Int=1, thin::Int=1, ε::Float64=0.1,
+
+function effective_sample_size(X::AbstractMatrix)
+    function compute_ess(ρ_scalar)
+        τ_inv = 1 + 2 * ρ_scalar[1]
+        K = 2
+        for k = 2:2:N-2
+            Δ = ρ_scalar[k] + ρ_scalar[k + 1]
+            if all(Δ < 0)
+                break
+            else
+                τ_inv += 2*Δ
+            end
+        end
+        return min(1 / τ_inv, one(τ_inv))
+    end
+
+    N = size(X, 1)
+    lags = collect(1:N-1)
+    ρ = zeros(length(lags), size(X, 2))
+    autocor!(ρ, X, lags)
+    return N * [compute_ess(ρ[:,i]) for i = 1:size(X,2)]
+end
+
+function mcmc(gp::GPBase; nIter::Int=1000, burn::Int=100, thin::Int=1, ε::Float64=0.1,
               Lmin::Int=5, Lmax::Int=15, lik::Bool=true, noise::Bool=true,
               domean::Bool=true, kern::Bool=true)
+    return hmc(gp, nIter=nIter, burn=burn, thin=thin, ε=ε, Lmin=Lmin, Lmax=Lmax,
+               lik=lik, noise=noise, domean=domean, kern=kern)
+end
+
+"""
+    nuts_hamiltonian(gp::GPBase, metric::AdvancedHMC.AbstractMetric)
+
+Generate Hamiltonian for the GP target. A stupid API hack but it works?
+"""
+function nuts_hamiltonian(gp::GPBase; lik::Bool=true, noise::Bool=true, domean::Bool=true, kern::Bool=true,
+                          metric=DiagEuclideanMetric(num_params(gp; get_params_kwargs(gp, domean=domean, kern=kern, noise=noise, lik=lik)...)))
+    precomp = init_precompute(gp)
+    params_kwargs = get_params_kwargs(gp; domean=domean, kern=kern, noise=noise, lik=lik)
+    function calc_target_and_dtarget!(θ::AbstractVector)
+        set_params!(gp, θ; params_kwargs...)
+        # Cholesky exceptions are handled by DynamicHMC
+        update_target_and_dtarget!(gp, precomp; params_kwargs...)
+        return (gp.target, gp.dtarget)
+    end
+
+    function calc_target!(θ::AbstractVector)
+        set_params!(gp, θ; params_kwargs...)
+        update_target!(gp; params_kwargs...)
+        return gp.target
+    end
+    return Hamiltonian(metric, calc_target!, calc_target_and_dtarget!)
+end
+
+"""
+    nuts(gp::GPBase; kwargs...)
+
+Runs Hamiltonian Monte Carlo algorithm for estimating the hyperparameters of 
+Gaussian process `GPE` and the latent function in the case of `GPA`.
+Refer to AdvancedHMC.jl for more info about the keyword options.
+"""
+function nuts(gp::GPBase; nIter::Int=1000, burn::Int=100, thin::Int=1,
+              lik::Bool=true, noise::Bool=true, domean::Bool=true, kern::Bool=true,
+              metric=DiagEuclideanMetric(num_params(gp; get_params_kwargs(gp, domean=domean, kern=kern, noise=noise, lik=lik)...)),
+              hamiltonian=nuts_hamiltonian(gp; metric=metric),
+              ε::Float64=find_good_eps(hamiltonian, get_params(gp; get_params_kwargs(gp, domean=domean, kern=kern, noise=noise, lik=lik)...)),
+              maxDepth::Int64=10, δ::Float64=0.8, integrator=Leapfrog(ε),
+              proposals=NUTS{MultinomialTS, GeneralisedNoUTurn}(integrator, maxDepth),
+              adaptor=StanHMCAdaptor(burn, Preconditioner(metric), NesterovDualAveraging(δ, integrator)),
+              progress=true)
+    params_kwargs = get_params_kwargs(gp; domean=domean, kern=kern, noise=noise, lik=lik)
+    θ_init = get_params(gp; params_kwargs...)
+    dim = length(θ_init)
+    post, stats = sample(hamiltonian, proposals, θ_init, nIter - burn, adaptor,
+                         burn; drop_warmup=true, progress=progress, verbose=false)
+    post = hcat(post...)
+    post = post[:,1:thin:end]
+    set_params!(gp, θ_init; params_kwargs...)
+
+    step_stats = [[step_stat.acceptance_rate, step_stat.tree_depth] for step_stat in stats]
+    avg_accept, avg_depth = mean(step_stats)
+    ε = stats[end-1].step_size
+    @printf("Number of iterations = %d, Thinning = %d, Burn-in = %d \n", nIter,thin,burn)
+    @printf("Step size = %f, Average tree depth = %f \n", ε,avg_depth)
+    @printf("Acceptance rate: %f \n", avg_accept)
+    @printf("Average effective sample size: %f\n", mean(effective_sample_size(post')))
+    return post
+end
+
+"""
+    hmc(gp::GPBase; kwargs...)
+
+Runs Hamiltonian Monte Carlo algorithm for estimating the hyperparameters of 
+Gaussian process `GPE` and the latent function in the case of `GPA`.
+"""
+function hmc(gp::GPBase; nIter::Int=1000, burn::Int=100, thin::Int=1, ε::Float64=0.1,
+             Lmin::Int=5, Lmax::Int=15, lik::Bool=true, noise::Bool=true,
+             domean::Bool=true, kern::Bool=true)
     precomp = init_precompute(gp)
     params_kwargs = get_params_kwargs(gp; domean=domean, kern=kern, noise=noise, lik=lik)
     count = 0
@@ -33,8 +131,8 @@ function mcmc(gp::GPBase; nIter::Int=1000, burn::Int=1, thin::Int=1, ε::Float64
     θ_cur = get_params(gp; params_kwargs...)
     D = length(θ_cur)
     leapSteps = 0                   #accumulator to track number of leap-frog steps
-    post = Array{Float64}(undef, nIter, D)     #posterior samples
-    post[1,:] = θ_cur
+    post = Array{Float64}(undef, D, nIter)     #posterior samples
+    post[:,1] = θ_cur
 
     @assert calc_target!(gp, θ_cur)
     target_cur, grad_cur = gp.target, gp.dtarget
@@ -61,7 +159,7 @@ function mcmc(gp::GPBase; nIter::Int=1000, burn::Int=1, thin::Int=1, ε::Float64
         ν -= 0.5*ε * grad
 
         if reject
-            post[t,:] = θ_cur
+            post[:,t] = θ_cur
         else
             α = target - 0.5 * ν'ν - target_cur + 0.5 * ν_cur'ν_cur
             u = log(rand())
@@ -72,19 +170,19 @@ function mcmc(gp::GPBase; nIter::Int=1000, burn::Int=1, thin::Int=1, ε::Float64
                 target_cur = target
                 grad_cur = grad
             end
-            post[t,:] = θ_cur
+            post[:,t] = θ_cur
         end
     end
-    post = post[burn:thin:end,:]
+    post = post[:,burn:thin:end]
     set_params!(gp, θ_cur; params_kwargs...)
     @printf("Number of iterations = %d, Thinning = %d, Burn-in = %d \n", nIter,thin,burn)
     @printf("Step size = %f, Average number of leapfrog steps = %f \n", ε,leapSteps/nIter)
     println("Number of function calls: ", count)
     @printf("Acceptance rate: %f \n", num_acceptances/nIter)
-    return post'
+    @printf("Average effective sample size: %f\n", mean(effective_sample_size(post')))
+    return post
 end
 
-
 """
     ess(gp::GPBase; kwargs...)
 
@@ -95,7 +193,7 @@ Journal of Machine Learning Research 9 (2010): 541-548.
 
 Requires hyperparameter priors to be Gaussian.
 """
-function ess(gp::GPE; nIter::Int=1000, burn::Int=1, thin::Int=1, lik::Bool=true,
+function ess(gp::GPE; nIter::Int=1000, burn::Int=100, thin::Int=1, lik::Bool=true,
              noise::Bool=true, domean::Bool=true, kern::Bool=true)
     params_kwargs = get_params_kwargs(gp; domean=domean, kern=kern, noise=noise, lik=lik)
     count = 0
@@ -141,20 +239,21 @@ function ess(gp::GPE; nIter::Int=1000, burn::Int=1, thin::Int=1, lik::Bool=true,
     total_proposals = 0
     θ_cur = get_params(gp; params_kwargs...)
     D = length(θ_cur)
-    post = Array{Float64}(undef, nIter, D)
+    post = Array{Float64}(undef, D, nIter)
 
     for i = 1:nIter
         θ_cur, num_proposals = sample!(θ_cur)
-        post[i,:] = θ_cur
+        post[:,i] = θ_cur
         total_proposals += num_proposals
     end
 
-    post = post[burn:thin:end,:]
+    post = post[:,burn:thin:end]
     set_params!(gp, θ_cur; params_kwargs...)
     @printf("Number of iterations = %d, Thinning = %d, Burn-in = %d \n", nIter,thin,burn)
     println("Number of function calls: ", count)
     @printf("Acceptance rate: %f \n", nIter / total_proposals)
-    return post'
+    @printf("Average effective sample size: %f\n", mean(effective_sample_size(post')))
+    return post
 end
 
 
diff --git a/test/mcmc.jl b/test/mcmc.jl
@@ -1,5 +1,6 @@
 module TestMCMC
 using GaussianProcesses, Distributions
+using AdvancedHMC
 using Test, Random
 
 Random.seed!(1)
@@ -13,18 +14,67 @@ Random.seed!(1)
     kern = RQ(1.0, 1.0, 1.0)
 
     # Just checks that it doesn't crash
+    @testset "Legacy MCMC" begin
+        @testset "Without likelihood" begin
+            gp = GP(X, y, MeanZero(), kern)
+            set_priors!(gp.kernel, [Distributions.Normal(-1.0, 1.0) for i in 1:3])
+            global hmc_chain = mcmc(gp, ε=0.05)
+        end
+
+        @testset "With likelihood" begin
+            lik = GaussLik(-1.0)
+            gp = GP(X, y, MeanZero(), kern, lik)
+            set_priors!(gp.kernel, [Distributions.Normal(-1.0, 1.0) for i in 1:3])
+            mcmc(gp, ε=0.05)
+        end
+    end
+
     @testset "HMC" begin
         @testset "Without likelihood" begin
             gp = GP(X, y, MeanZero(), kern)
             set_priors!(gp.kernel, [Distributions.Normal(-1.0, 1.0) for i in 1:3])
-            global hmc_chain = mcmc(gp)
+            global hmc_chain = hmc(gp, ε=0.05)
         end
 
         @testset "With likelihood" begin
             lik = GaussLik(-1.0)
             gp = GP(X, y, MeanZero(), kern, lik)
             set_priors!(gp.kernel, [Distributions.Normal(-1.0, 1.0) for i in 1:3])
-            mcmc(gp)
+            hmc(gp, ε=0.05)
+        end
+    end
+
+    @testset "AdvancedHMC" begin
+        @testset "Without likelihood" begin
+            gp = GP(X, y, MeanZero(), kern)
+            set_priors!(gp.kernel, [Distributions.Normal(-1.0, 1.0) for i in 1:3])
+            global hmc_chain = nuts(gp, progress=false)
+        end
+
+        @testset "With likelihood" begin
+            lik = GaussLik(-1.0)
+            gp = GP(X, y, MeanZero(), kern, lik)
+            set_priors!(gp.kernel, [Distributions.Normal(-1.0, 1.0) for i in 1:3])
+            nuts(gp, nIter=1000, burn=200, progress=true)
+        end
+
+        @testset "Use" begin
+            lik = GaussLik(-1.0)
+            gp = GP(X, y, MeanZero(), kern, lik)
+            set_priors!(gp.kernel, [Distributions.Normal(-1.0, 1.0) for i in 1:3])
+            kwargs = GaussianProcesses.get_params_kwargs(
+                gp; domean=true, kern=true, noise=true, lik=true)
+
+            metric = AdvancedHMC.DenseEuclideanMetric(
+                GaussianProcesses.num_params(gp; kwargs...))
+            hamiltonian = nuts_hamiltonian(gp, metric=metric)
+            ε = 0.1
+            integrator = AdvancedHMC.Leapfrog(ε)
+            prop = AdvancedHMC.NUTS{SliceTS, ClassicNoUTurn}(integrator)
+            adaptor = AdvancedHMC.NaiveHMCAdaptor(
+                Preconditioner(metric), NesterovDualAveraging(0.8, integrator))
+            nuts(gp, nIter=1000, burn=100, metric=metric, hamiltonian=hamiltonian,
+                 ε=ε, integrator=integrator, proposals=prop, adaptor=adaptor, progress=false)
         end
     end
 
@@ -34,6 +84,6 @@ Random.seed!(1)
         set_priors!(gpess.logNoise, [Distributions.Normal(-1.0, 1.0)])
         global ess_chain = ess(gpess)
     end
-
+    
 end
 end