JuliaDynamics · Datseris · Sep 26, 2022 · Sep 24, 2022 · Sep 24, 2022 · Sep 24, 2022
diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md
@@ -8,6 +8,7 @@
 
 ```@docs
 reverse_dispersion
+distance_to_whitenoise
 ```
 
 ## Disequilibrium
diff --git a/docs/src/entropies.md b/docs/src/entropies.md
@@ -13,10 +13,21 @@ entropy_tsallis
 ```
 
 ## Shannon entropy (convenience)
+
 ```@docs
 entropy_shannon
 ```
 
+## Normalization
+
+The generic [`entropy_normalized`](@ref) normalizes any entropy value to the entropy of a
+uniform distribution. We also provide [maximum entropy](@ref maximum_entropy) functions
+that are useful for manual normalization.
+
+```@docs
+entropy_normalized
+```
+
 ## Indirect entropies
 Here we list functions which compute Shannon entropies via alternate means, without explicitly computing some probability distributions and then using the Shannon formulat.
 

diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -171,13 +171,11 @@ des = zeros(length(windows))
 pes = zeros(length(windows))
 
 m, c = 2, 6
-scheme = GaussianSymbolization(c)
-est_de = Dispersion(s = scheme, m = m, τ = 1, normalize = true)
+est_de = Dispersion(symbolization = GaussianSymbolization(c), m = m, τ = 1)
 
 for (i, window) in enumerate(windows)
-    rdes[i] = reverse_dispersion(y[window];
-        s = scheme, m = m, τ = 1, normalize = true)
-    des[i] = entropy_renyi(y[window], est_de)
+    rdes[i] = reverse_dispersion(y[window], est_de; normalize = true)
+    des[i] = entropy_renyi_norm(y[window], est_de)
 end
 
 fig = Figure()

diff --git a/docs/src/utils.md b/docs/src/utils.md
@@ -26,3 +26,17 @@ OrdinalPattern
 ```@docs
 Entropies.encode_motif
 ```
+
+### Normalization
+
+```@docs
+alphabet_length
+```
+
+#### [Maximum entropy](@id maximum_entropy)
+
+```@docs
+maxentropy_tsallis
+maxentropy_renyi
+maxentropy_shannon
+```
diff --git a/src/complexity_measures/reverse_dispersion_entropy.jl b/src/complexity_measures/reverse_dispersion_entropy.jl
@@ -1,91 +1,76 @@
 export reverse_dispersion
+export distance_to_whitenoise
 
-function distance_to_whitenoise(p::Probabilities, n_classes, m; normalize = false)
+# Note: this is not an entropy estimator, so we don't use the entropy_xxx_norm interface
+# for normalization, even though we rely on `alphabet_length`.
+"""
+    distance_to_whitenoise(p::Probabilities, estimator::Dispersion; normalize = false)
+
+Compute the distance of the probability distribution `p` from a uniform distribution,
+given the parameters of `estimator` (which must be known beforehand).
+
+If `normalize == true`, then normalize the value to the interval `[0, 1]` by using the
+parameters of `estimator`.
+
+Used to compute reverse dispersion entropy([`reverse_dispersion`](@ref);
+Li et al., 2019[^Li2019]).
+
+[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
+    complexity measure for sensor signal. Sensors, 19(23), 5203.
+"""
+function distance_to_whitenoise(p::Probabilities, est::Dispersion; normalize = false)
     # We can safely skip non-occurring symbols, because they don't contribute
     # to the sum in eq. 3 in Li et al. (2019)
-    Hrde = sum(abs2, p) - 1/(n_classes^m)
+    Hrde = sum(abs2, p) - (1 / alphabet_length(est))
 
     if normalize
-        # The factor `f` considers *all* possible symbols (also non-occurring)
-        f = n_classes^m
-        return Hrde / (1 - (1/f))
+        return Hrde / (1 - (1 / alphabet_length(est)))
     else
         return Hrde
     end
 end
 
+# Note again: this is a *complexity measure*, not an entropy estimator, so we don't use
+# the entropy_xxx_norm interface for normalization, even though we rely on `alphabet_length`.
 """
-    reverse_dispersion(x::AbstractVector{T}, s = GaussianSymbolization(c = 5), m = 2, τ = 1,
-        normalize = true)
-
+    reverse_dispersion(x::AbstractVector{T}, est::Dispersion = Dispersion();
+        normalize = true) where T <: Real
 
 Compute the reverse dispersion entropy complexity measure (Li et al., 2019)[^Li2019].
 
-## Algorithm
+## Description
 
 Li et al. (2021)[^Li2019] defines the reverse dispersion entropy as
 
 ```math
-H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2.
+H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2 =
+\\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\dfrac{1}{c^{m}}
 ```
-
 where the probabilities ``p_i`` are obtained precisely as for the [`Dispersion`](@ref)
 probability estimator. Relative frequencies of dispersion patterns are computed using the
-symbolization scheme `s`, which defaults to symbolization using the normal cumulative
+given `symbolization` scheme , which defaults to symbolization using the normal cumulative
 distribution function (NCDF), as implemented by [`GaussianSymbolization`](@ref), using
 embedding dimension `m` and embedding delay `τ`.
 Recommended parameter values[^Li2018] are `m ∈ [2, 3]`, `τ = 1` for the embedding, and
-`c ∈ [3, 4, …, 8]` categories for the Gaussian mapping. If `normalize == true`, then
-the reverse dispersion entropy is normalized to `[0, 1]`.
+`c ∈ [3, 4, …, 8]` categories for the Gaussian mapping.
+
+If `normalize == true`, then the reverse dispersion entropy is normalized to `[0, 1]`.
 
 The minimum value of ``H_{rde}`` is zero and occurs precisely when the dispersion
 pattern distribution is flat, which occurs when all ``p_i``s are equal to ``1/c^m``.
 Because ``H_{rde} \\geq 0``, ``H_{rde}`` can therefore be said to be a measure of how far
 the dispersion pattern probability distribution is from white noise.
 
-## Example
-
-```jldoctest reverse_dispersion_example; setup = :(using Entropies)
-julia> x = repeat([0.5, 0.7, 0.1, -1.0, 1.11, 2.22, 4.4, 0.2, 0.2, 0.1], 10);
-
-julia> c, m = 3, 5;
-
-julia> reverse_dispersion(x, s = GaussianSymbolization(c = c), m = m, normalize = false)
-0.11372331532921814
-```
-
-!!! note
-    #### A clarification on notation
-
-    With ambiguous notation, Li et al. claim that
-
-    ``H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2 = \\sum_{i = 1}^{c^m} p_i^2 - \\frac{1}{c^m}.``
-
-    But on the right-hand side of the equality, does the constant term appear within or
-    outside the sum? Using that ``P`` is a probability distribution by
-    construction (in step 4) , we see that the constant must appear *outside* the sum:
-
-    ```math
-    \\begin{aligned}
-    H_{rde} &= \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2
-    = \\sum_{i=1}^{c^m} p_i^2 - \\frac{2p_i}{c^m} + \\frac{1}{c^{2m}} \\\\
-    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\left(\\sum_i^{c^m} \\frac{2p_i}{c^m}\\right) + \\left( \\sum_{i=1}^{c^m} \\dfrac{1}{{c^{2m}}} \\right) \\\\
-    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\left(\\frac{2}{c^m} \\sum_{i=1}^{c^m} p_i \\right) +  \\dfrac{c^m}{c^{2m}} \\\\
-    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\frac{2}{c^m} (1) +  \\dfrac{1}{c^{m}} \\\\
-    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\dfrac{1}{c^{m}}.
-    \\end{aligned}
-    ```
-
 [^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
     complexity measure for sensor signal. Sensors, 19(23), 5203.
 """
-function reverse_dispersion(x::AbstractVector{T}; s = GaussianSymbolization(5),
-        m = 2, τ = 1, normalize = true) where T <: Real
-    est = Dispersion(τ = τ, m = m, s = s)
+function reverse_dispersion(x::AbstractVector{T}, est::Dispersion = Dispersion();
+        normalize = true) where T <: Real
+
     p = probabilities(x, est)
 
     # The following step combines distance information with the probabilities, so
     # from here on, it is not possible to use `renyi_entropy` or similar methods, because
     # we're not dealing with probabilities anymore.
-    Hrde = distance_to_whitenoise(p, s.c, m)
+    Hrde = distance_to_whitenoise(p, est, normalize = normalize)
 end
diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl
@@ -3,5 +3,6 @@ include("tsallis.jl")
 include("shannon.jl")
 include("convenience_definitions.jl")
 include("direct_entropies/nearest_neighbors/nearest_neighbors.jl")
-
 # TODO: What else is included here from direct entropies?
+
+include("normalized_entropy.jl")
diff --git a/src/entropies/normalized_entropy.jl b/src/entropies/normalized_entropy.jl
@@ -0,0 +1,80 @@
+export entropy_normalized
+
+"""
+    entropy_normalized(f::Function, x, est::ProbabilitiesEstimator, args...; kwargs...)
+
+Convenience syntax for normalizing to the entropy of uniform probability distribution.
+First estimates probabilities as `p::Probabilities = f(x, est, args...; kwargs...`),
+then calls `entropy_normalized(f, p, args...; kwargs...)`.
+
+Normalization is only defined for estimators for which [`alphabet_length`](@ref) is defined,
+meaning that the total number of states or symbols is known beforehand.
+
+    entropy_normalized(f::Function, p::Probabilities, est::ProbabilitiesEstimator, args...;
+        kwargs...)
+
+Normalize the entropy, as returned by the entropy function `f` called with the given
+arguments (i.e. `f(p, args...; kwargs...)`), to the entropy of a uniform distribution,
+inferring [`alphabet_length`](@ref) from `est`.
+
+    entropy_normalized(f::Function, p::Probabilities, args...; kwargs...)
+
+The same as above, but infers alphabet length from counting how many elements
+are in `p` (zero probabilities are counted).
+
+## Examples
+
+Computing normalized entropy from scratch:
+
+```julia
+x = rand(100)
+entropy_normalized(entropy_renyi, x, Dispersion())
+```
+
+Computing normalized entropy from pre-computed probabilities with known parameters:
+
+```julia
+x = rand(100)
+est = Dispersion(m = 3, symbolization = GaussianSymbolization(c = 4))
+p = probabilities(x, est)
+entropy_normalized(entropy_renyi, p, est)
+```
+
+Computing normalized entropy, assumming there are `N = 10` total states:
+
+```julia
+N = 10
+p = Probabilities(rand(10))
+entropy_normalized(entropy_renyi, p, est)
+```
+
+!!! note "Normalized output range"
+    For Rényi entropy (e.g. Kumar et al., 1986), and for Tsallis entropy (Tsallis, 1998),
+    normalizing to the uniform distribution ensures that the entropy lies in
+    the interval `[0, 1]`. For other entropies and parameter choices, the resulting entropy
+    is not guaranteed to lie in `[0, 1]`. It is up to the user to decide whether
+    normalizing to a uniform distribution makes sense for their use case.
+
+
+[^Kumar1986]: Kumar, U., Kumar, V., & Kapur, J. N. (1986). Normalized measures of entropy.
+    International Journal Of General System, 12(1), 55-69.
+[^Tsallis1998]: Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics.
+    Journal of statistical physics, 52(1), 479-487.
+"""
+function entropy_normalized(f::Function, x, est::ProbEst, args...; kwargs...)
+    N = alphabet_length(est)
+    p_uniform = Probabilities(repeat([1/N], N))
+    return f(x, est, args...; kwargs...) / f(p_uniform, args...; kwargs...)
+end
+
+function entropy_normalized(f::Function, x::Probabilities, est::ProbEst, args...; kwargs...)
+    N = alphabet_length(est)
+    p_uniform = Probabilities(repeat([1/N], N))
+    return f(x, args...; kwargs...) / f(p_uniform; kwargs...)
+end
+
+function entropy_normalized(f::Function, x::Probabilities, args...; kwargs...)
+    N = length(x)
+    p_uniform = Probabilities(repeat([1/N], N))
+    return f(x, args...; kwargs...) / f(p_uniform; kwargs...)
+end
diff --git a/src/entropies/renyi.jl b/src/entropies/renyi.jl
@@ -1,4 +1,5 @@
 export entropy_renyi
+export maxentropy_renyi
 
 """
     entropy_renyi(p::Probabilities; q = 1.0, base = MathConstants.e)
@@ -30,7 +31,11 @@ also known as Hartley entropy), or the correlation entropy
 [^Rényi1960]:
     A. Rényi, _Proceedings of the fourth Berkeley Symposium on Mathematics,
     Statistics and Probability_, pp 547 (1960)
+[^Kumar1986]: Kumar, U., Kumar, V., & Kapur, J. N. (1986). Normalized measures of entropy.
+    International Journal Of General System, 12(1), 55-69.
 [^Shannon1948]: C. E. Shannon, Bell Systems Technical Journal **27**, pp 379 (1948)
+
+See also: [`maxentropy_renyi`](@ref).
 """
 function entropy_renyi end
 
@@ -89,3 +94,14 @@ function entropy_renyi!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e
     probabilities!(p, x, est)
     entropy_renyi(p; q = q, base = base)
 end
+
+"""
+    maxentropy_renyi(N::Int, base = MathConstants.e)
+
+Convenience function that computes the maximum value of the order-`q` generalized
+Rényi entropy for an `N`-element probability distribution, i.e.
+``\\log_{base}(N)``, which is useful for normalization when `N` is known.
+
+See also [`entropy_renyi`](@ref), [`entropy_normalized`](@ref).
+"""
+maxentropy_renyi(N::Int; base = MathConstants.e) = log(base, N)
diff --git a/src/entropies/shannon.jl b/src/entropies/shannon.jl
@@ -1,11 +1,26 @@
 export entropy_shannon
+export maxentropy_shannon
 
 """
     entropy_shannon(args...; base = MathConstants.e)
-Equivalent to `entropy_renyi(args...; base, q = 1)` and provided solely for convenience.
-Compute the Shannon entropy, given by
+
+Equivalent to `entropy_renyi(args...; base = base, q = 1)` and provided solely for convenience.
+Computes the Shannon entropy, given by
 ```math
 H(p) = - \\sum_i p[i] \\log(p[i])
 ```
+
+See also: [`maxentropy_shannon`](@ref).
+"""
+entropy_shannon(args...; base = MathConstants.e) =
+    entropy_renyi(args...; base = base, q = 1)
+
+"""
+    maxentropy_shannon(N::Int, q; base = MathConstants.e)
+
+Convenience function that computes the maximum value of the Shannon entropy, i.e.
+``log_{base}(N)``, which is useful for normalization when `N` is known.
+
+See also [`entropy_shannon`](@ref), [`entropy_normalized`](@ref).
 """
-entropy_shannon(args...; base = MathConstants.e) = entropy_renyi(args...; base, q = 1)
+maxentropy_shannon(N::Int; base = MathConstants.e) = maxentropy_renyi(N, base = base)