From 6b34003b003fa624aad9075e098b08df020d719d Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Tue, 6 Sep 2022 13:18:40 +0200
Subject: [PATCH 1/2] Reverse dispersion entropy

---
 src/Entropies.jl                             |  1 +
 src/dispersion/reverse_dispersion_entropy.jl | 95 ++++++++++++++++++++
 test/runtests.jl                             | 32 +++++++
 3 files changed, 128 insertions(+)
 create mode 100644 src/dispersion/reverse_dispersion_entropy.jl

diff --git a/src/Entropies.jl b/src/Entropies.jl
index c936d32d5..c7e2a8de7 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -8,6 +8,7 @@ module Entropies
     include("wavelet/wavelet.jl")
     include("nearest_neighbors/nearest_neighbors.jl")
     include("dispersion/dispersion_entropy.jl")
+    include("dispersion/reverse_dispersion_entropy.jl")
 
     include("tsallis/tsallis.jl")
 end
diff --git a/src/dispersion/reverse_dispersion_entropy.jl b/src/dispersion/reverse_dispersion_entropy.jl
new file mode 100644
index 000000000..fa92b3554
--- /dev/null
+++ b/src/dispersion/reverse_dispersion_entropy.jl
@@ -0,0 +1,95 @@
+export ReverseDispersion
+export entropy_reverse_dispersion
+export distance_to_whitenoise
+
+"""
+    ReverseDispersion(; s = GaussianSymbolization(5), m = 2, τ = 1, check_unique = true)
+
+A probability estimator using the reverse dispersion entropy technique from
+Li et al. (2019)[^Li2019].
+
+Although the reverse dispersion entropy is not intended as a probability estimator per se,
+it requires a step where probabilities are explicitly computed. Hence, we provide
+`ReverseDispersion` as a probability estimator.
+
+See [`entropy_reverse_dispersion`](@ref) for the meaning of parameters.
+
+!!! info
+    This estimator is only available for probability estimation.
+
+[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
+    complexity measure for sensor signal. Sensors, 19(23), 5203.
+"""
+Base.@kwdef struct ReverseDispersion <: ProbabilitiesEstimator
+    s = GaussianSymbolization(n_categories = 5)
+    m = 2
+    τ = 1
+    check_unique = false
+end
+
+function distance_to_whitenoise(𝐩::Probabilities, N, m)
+    # We can safely skip non-occurring symbols, because they don't contribute
+    # to the sum in eq. 3 in Li et al. (2019)
+    return sum(𝐩[i]^2 for i in eachindex(𝐩)) - 1/(N^m)
+end
+
+function probabilities(x::AbstractVector, est::ReverseDispersion)
+    if est.check_unique
+        if length(unique(x)) == 1
+            symbols = repeat([1], length(x))
+        else
+            symbols = symbolize(x, est.s)
+        end
+    else
+        symbols = symbolize(x, est.s)
+    end
+    m, τ = est.m, est.τ
+    τs = tuple((x for x in 0:-τ:-(m-1)*τ)...)
+    dispersion_patterns = genembed(symbols, τs, ones(m))
+    N = length(x)
+    𝐩 = Probabilities(dispersion_histogram(dispersion_patterns, N, est.m, est.τ))
+end
+
+"""
+    entropy_reverse_dispersion(x::AbstractVector;
+        s = GaussianSymbolization(n_categories = 5),
+        m = 2, τ = 1, normalize = true, check_unique = true)
+
+Estimate reverse dispersion entropy (Li et al., 2019)[^Li2019].
+
+Relative frequencies of dispersion patterns are computed using
+the symbolization scheme `s` with embedding dimension `m` and embedding delay `τ`.
+Recommended parameter values[^Li2018] are `m ∈ [2, 3]`, `τ = 1`, and
+`n_categories ∈ [3, 4, …, 8]` for the Gaussian mapping (defaults to 5).
+The total number of possible symbols is `n_categories^m`.
+If `normalize == true`, then normalize to `[0, 1]`.
+
+## Input data
+
+The input must have more than one unique element for the Gaussian mapping to be
+well-defined. If `check_unique == true` (default), then it is checked that the input has
+more than one unique value. If `check_unique == false` and the input only has one
+unique element, then a `InexactError` is thrown.
+
+See also: [`ReverseDispersion`](@ref).
+
+[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
+    complexity measure for sensor signal. Sensors, 19(23), 5203.
+"""
+function entropy_reverse_dispersion(x::AbstractVector{T};
+        s = GaussianSymbolization(n_categories = 5),
+        m = 2, τ = 1, normalize = true,
+        check_unique = true) where T <: Real
+
+    est = ReverseDispersion(s = s, m = m, τ = τ, check_unique = check_unique)
+    𝐩 = probabilities(x, est)
+    Hrde = distance_to_whitenoise(𝐩, s.n_categories, m)
+
+    if normalize
+        # The factor `f` considers *all* possible symbols (also non-occurring)
+        f = s.n_categories^m
+        return Hrde / (1 - (1/f))
+    else
+        return Hrde
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 850b7a072..8694efa5d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -381,4 +381,36 @@ end
         p = Probabilities(repeat([1/5], 5))
         @assert round(tsallisentropy(p, q = -1/2, k = 1), digits = 2) ≈ 6.79
     end
+
+    @testset "Reverse dispersion entropy" begin
+        est = ReverseDispersion()
+        @test Probabilities(est) isa Probabilities
+
+        # RDE is minimal when all probabilities are equal. Normalized RDE should then → 0.
+        m, n_categories = 3, 5
+        ps = Probabilities(repeat([1/n_categories^m], n_categories^m))
+        rde_eq = Entropies.distance_to_whitenoise(ps, n_categories, m)
+        @test round(rde_eq, digits = 10) ≈ 0.0
+
+        # RDE measures deviation from white noise, so for long enough
+        # time series, normalized values should approach zero.
+        rde = entropy_reverse_dispersion(rand(100000), m = 5, normalize = true)
+        @test round(rde, digits = 3) ≈ 0.0
+
+        # RDE is minimal when all symbol *embedding vectors*are equal.
+        # Normalized RDE should then → 1. This situtation arises
+        # when the input only has one unique element.
+        # Note: the input repeat([1, 2], 10), for example, would *not* give equal
+        # probabilities,because symbolization occurs *before* the embedding, slightly
+        # skewing the probabilities due to data entries lost during embedding.
+        x = repeat([1.0], 100)
+        rde_max = entropy_reverse_dispersion(x, normalize = true)
+        @test rde_max ≈ 1.0
+
+
+        # In all situations except those above, RDE ∈ (0.0, 1.0)
+        x = repeat([1, 2, 3, 4, 5, 4, 3, 2, 1, 0], 100)
+        res = entropy_reverse_dispersion(x, normalize = true)
+        @test 0.0 < res < 1.0
+    end
 end

From c86980a3cf75a8fc007e31730c13b1cb9a3d3183 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Tue, 6 Sep 2022 13:39:55 +0200
Subject: [PATCH 2/2] Fix test

---
 test/runtests.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 8694efa5d..5cc4b2b22 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -383,8 +383,9 @@ end
     end
 
     @testset "Reverse dispersion entropy" begin
+        x = rand(100)
         est = ReverseDispersion()
-        @test Probabilities(est) isa Probabilities
+        @test probabilities(x, est) isa Probabilities
 
         # RDE is minimal when all probabilities are equal. Normalized RDE should then → 0.
         m, n_categories = 3, 5