From 6b34003b003fa624aad9075e098b08df020d719d Mon Sep 17 00:00:00 2001 From: Kristian Haaga Date: Tue, 6 Sep 2022 13:18:40 +0200 Subject: [PATCH 1/2] Reverse dispersion entropy --- src/Entropies.jl | 1 + src/dispersion/reverse_dispersion_entropy.jl | 95 ++++++++++++++++++++ test/runtests.jl | 32 +++++++ 3 files changed, 128 insertions(+) create mode 100644 src/dispersion/reverse_dispersion_entropy.jl diff --git a/src/Entropies.jl b/src/Entropies.jl index c936d32d5..c7e2a8de7 100644 --- a/src/Entropies.jl +++ b/src/Entropies.jl @@ -8,6 +8,7 @@ module Entropies include("wavelet/wavelet.jl") include("nearest_neighbors/nearest_neighbors.jl") include("dispersion/dispersion_entropy.jl") + include("dispersion/reverse_dispersion_entropy.jl") include("tsallis/tsallis.jl") end diff --git a/src/dispersion/reverse_dispersion_entropy.jl b/src/dispersion/reverse_dispersion_entropy.jl new file mode 100644 index 000000000..fa92b3554 --- /dev/null +++ b/src/dispersion/reverse_dispersion_entropy.jl @@ -0,0 +1,95 @@ +export ReverseDispersion +export entropy_reverse_dispersion +export distance_to_whitenoise + +""" + ReverseDispersion(; s = GaussianSymbolization(5), m = 2, τ = 1, check_unique = true) + +A probability estimator using the reverse dispersion entropy technique from +Li et al. (2019)[^Li2019]. + +Although the reverse dispersion entropy is not intended as a probability estimator per se, +it requires a step where probabilities are explicitly computed. Hence, we provide +`ReverseDispersion` as a probability estimator. + +See [`entropy_reverse_dispersion`](@ref) for the meaning of parameters. + +!!! info + This estimator is only available for probability estimation. + +[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new + complexity measure for sensor signal. Sensors, 19(23), 5203. +""" +Base.@kwdef struct ReverseDispersion <: ProbabilitiesEstimator + s = GaussianSymbolization(n_categories = 5) + m = 2 + τ = 1 + check_unique = false +end + +function distance_to_whitenoise(𝐩::Probabilities, N, m) + # We can safely skip non-occurring symbols, because they don't contribute + # to the sum in eq. 3 in Li et al. (2019) + return sum(𝐩[i]^2 for i in eachindex(𝐩)) - 1/(N^m) +end + +function probabilities(x::AbstractVector, est::ReverseDispersion) + if est.check_unique + if length(unique(x)) == 1 + symbols = repeat([1], length(x)) + else + symbols = symbolize(x, est.s) + end + else + symbols = symbolize(x, est.s) + end + m, τ = est.m, est.τ + τs = tuple((x for x in 0:-τ:-(m-1)*τ)...) + dispersion_patterns = genembed(symbols, τs, ones(m)) + N = length(x) + 𝐩 = Probabilities(dispersion_histogram(dispersion_patterns, N, est.m, est.τ)) +end + +""" + entropy_reverse_dispersion(x::AbstractVector; + s = GaussianSymbolization(n_categories = 5), + m = 2, τ = 1, normalize = true, check_unique = true) + +Estimate reverse dispersion entropy (Li et al., 2019)[^Li2019]. + +Relative frequencies of dispersion patterns are computed using +the symbolization scheme `s` with embedding dimension `m` and embedding delay `τ`. +Recommended parameter values[^Li2018] are `m ∈ [2, 3]`, `τ = 1`, and +`n_categories ∈ [3, 4, …, 8]` for the Gaussian mapping (defaults to 5). +The total number of possible symbols is `n_categories^m`. +If `normalize == true`, then normalize to `[0, 1]`. + +## Input data + +The input must have more than one unique element for the Gaussian mapping to be +well-defined. If `check_unique == true` (default), then it is checked that the input has +more than one unique value. If `check_unique == false` and the input only has one +unique element, then a `InexactError` is thrown. + +See also: [`ReverseDispersion`](@ref). + +[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new + complexity measure for sensor signal. Sensors, 19(23), 5203. +""" +function entropy_reverse_dispersion(x::AbstractVector{T}; + s = GaussianSymbolization(n_categories = 5), + m = 2, τ = 1, normalize = true, + check_unique = true) where T <: Real + + est = ReverseDispersion(s = s, m = m, τ = τ, check_unique = check_unique) + 𝐩 = probabilities(x, est) + Hrde = distance_to_whitenoise(𝐩, s.n_categories, m) + + if normalize + # The factor `f` considers *all* possible symbols (also non-occurring) + f = s.n_categories^m + return Hrde / (1 - (1/f)) + else + return Hrde + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 850b7a072..8694efa5d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -381,4 +381,36 @@ end p = Probabilities(repeat([1/5], 5)) @assert round(tsallisentropy(p, q = -1/2, k = 1), digits = 2) ≈ 6.79 end + + @testset "Reverse dispersion entropy" begin + est = ReverseDispersion() + @test Probabilities(est) isa Probabilities + + # RDE is minimal when all probabilities are equal. Normalized RDE should then → 0. + m, n_categories = 3, 5 + ps = Probabilities(repeat([1/n_categories^m], n_categories^m)) + rde_eq = Entropies.distance_to_whitenoise(ps, n_categories, m) + @test round(rde_eq, digits = 10) ≈ 0.0 + + # RDE measures deviation from white noise, so for long enough + # time series, normalized values should approach zero. + rde = entropy_reverse_dispersion(rand(100000), m = 5, normalize = true) + @test round(rde, digits = 3) ≈ 0.0 + + # RDE is minimal when all symbol *embedding vectors*are equal. + # Normalized RDE should then → 1. This situtation arises + # when the input only has one unique element. + # Note: the input repeat([1, 2], 10), for example, would *not* give equal + # probabilities,because symbolization occurs *before* the embedding, slightly + # skewing the probabilities due to data entries lost during embedding. + x = repeat([1.0], 100) + rde_max = entropy_reverse_dispersion(x, normalize = true) + @test rde_max ≈ 1.0 + + + # In all situations except those above, RDE ∈ (0.0, 1.0) + x = repeat([1, 2, 3, 4, 5, 4, 3, 2, 1, 0], 100) + res = entropy_reverse_dispersion(x, normalize = true) + @test 0.0 < res < 1.0 + end end From c86980a3cf75a8fc007e31730c13b1cb9a3d3183 Mon Sep 17 00:00:00 2001 From: Kristian Haaga Date: Tue, 6 Sep 2022 13:39:55 +0200 Subject: [PATCH 2/2] Fix test --- test/runtests.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 8694efa5d..5cc4b2b22 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -383,8 +383,9 @@ end end @testset "Reverse dispersion entropy" begin + x = rand(100) est = ReverseDispersion() - @test Probabilities(est) isa Probabilities + @test probabilities(x, est) isa Probabilities # RDE is minimal when all probabilities are equal. Normalized RDE should then → 0. m, n_categories = 3, 5