diff --git a/.github/workflows/generate_website.yml b/.github/workflows/generate_website.yml index 94e51e1..cc1cf7e 100644 --- a/.github/workflows/generate_website.yml +++ b/.github/workflows/generate_website.yml @@ -53,6 +53,8 @@ jobs: - name: Setup keys id: keys run: uv run ad.py setup + env: + DATADEPS_ALWAYS_ACCEPT: "true" run-models: runs-on: ubuntu-latest @@ -87,6 +89,7 @@ jobs: run: uv run ad.py run --model ${{ matrix.model }} env: ADTYPE_KEYS: ${{ needs.setup-keys.outputs.adtype_keys }} + DATADEPS_ALWAYS_ACCEPT: "true" - name: Output matrix values id: output-matrix diff --git a/Project.toml b/Project.toml index c4b461f..73e7df8 100644 --- a/Project.toml +++ b/Project.toml @@ -3,13 +3,16 @@ ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/main.jl b/main.jl index 30282fb..aa5b6c6 100644 --- a/main.jl +++ b/main.jl @@ -35,6 +35,7 @@ end # These imports tend to get used a lot in models using DynamicPPL: @model, to_submodel using Distributions +using DistributionsAD: filldist, arraydist using LinearAlgebra include("models/assume_beta.jl") @@ -74,6 +75,10 @@ include("models/observe_submodel.jl") include("models/pdb_eight_schools_centered.jl") include("models/pdb_eight_schools_noncentered.jl") +include("models/dppl_gauss_unknown.jl") +include("models/dppl_high_dim_gauss.jl") +include("models/dppl_naive_bayes.jl") + # The entry point to this script itself begins here if ARGS == ["--list-model-keys"] foreach(println, sort(collect(keys(MODELS)))) diff --git a/models/dppl_gauss_unknown.jl b/models/dppl_gauss_unknown.jl new file mode 100644 index 0000000..a79a212 --- /dev/null +++ b/models/dppl_gauss_unknown.jl @@ -0,0 +1,12 @@ +n = 10_000 +s = abs(rand()) + 0.5 +y = randn() .+ s * randn(n) + +@model function dppl_gauss_unknown(y) + N = length(y) + m ~ Normal(0, 1) + s ~ truncated(Cauchy(0, 5); lower=0) + y ~ filldist(Normal(m, s), N) +end + +@register dppl_gauss_unknown(y) diff --git a/models/dppl_high_dim_gauss.jl b/models/dppl_high_dim_gauss.jl new file mode 100644 index 0000000..2275b6a --- /dev/null +++ b/models/dppl_high_dim_gauss.jl @@ -0,0 +1,5 @@ +@model function dppl_high_dim_gauss(D) + m ~ filldist(Normal(0, 1), D) +end + +@register dppl_high_dim_gauss(10_000) diff --git a/models/dppl_naive_bayes.jl b/models/dppl_naive_bayes.jl new file mode 100644 index 0000000..3ed7f4f --- /dev/null +++ b/models/dppl_naive_bayes.jl @@ -0,0 +1,27 @@ +using MLDatasets: MNIST +using MultivariateStats: fit, PCA, transform + +# Load MNIST images and labels +features = MNIST(split=:train).features +nrows, ncols, nimages = size(features) +image_raw = Float64.(reshape(features, (nrows * ncols, nimages))) +labels = MNIST(split=:train).targets .+ 1 +C = 10 # Number of labels + +# Preprocess the images by reducing dimensionality +D = 40 +pca = fit(PCA, image_raw; maxoutdim=D) +image = transform(pca, image_raw) + +# Take only the first 1000 images and vectorise +N = 1000 +image_subset = image[:, 1:N]' +image_vec = vec(image_subset[:, :]) +labels = labels[1:N] + +@model dppl_naive_bayes(image_vec, labels, C, D) = begin + m ~ filldist(Normal(0, 10), C, D) + image_vec ~ MvNormal(vec(m[labels, :]), I) +end + +@register dppl_naive_bayes(image_vec, labels, C, D)