JuliaDynamics · Datseris · Oct 24, 2022 · Aug 29, 2022 · Sep 5, 2022 · Sep 29, 2022
diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md
@@ -12,6 +12,12 @@ ReverseDispersion
 distance_to_whitenoise
 ```
 
+## Approximate entropy
+
+```@docs
+ApproximateEntropy
+```
+
 ## Sample entropy
 
 ```@docs
@@ -20,8 +26,9 @@ SampleEntropy
 
 ## Convenience functions
 
-We provide a few convenience functions for widely used "entropy-like" complexity measures, such as "sample entropy". Other arbitrary specialized convenience functions can easily be defined in a couple lines of code.
+We provide a few convenience functions for widely used "entropy-like" complexity measures. Other arbitrary specialized convenience functions can easily be defined in a couple lines of code.
 
 ```@docs
+approx_entropy
 sample_entropy
 ```
diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -225,10 +225,11 @@ des = zeros(length(windows))
 pes = zeros(length(windows))
 
 m, c = 2, 6
+est_rd = ReverseDispersion(symbolization = GaussianSymbolization(c), m = m, τ = 1)
 est_de = Dispersion(symbolization = GaussianSymbolization(c), m = m, τ = 1)
 
 for (i, window) in enumerate(windows)
-    rdes[i] = reverse_dispersion(y[window], est_de; normalize = true)
+    rdes[i] = complexity_normalized(est_rd, y[window])
     des[i] = entropy_normalized(Renyi(), y[window], est_de)
 end
 
@@ -283,6 +284,85 @@ end
 You see that while the direct entropy values of the chaotic and noisy signals change massively with `N` but they are almost the same for the normalized version.
 For the regular signals, the entropy decreases nevertheless because the noise contribution of the Fourier computation becomes less significant.
 
+## Approximate entropy
+
+Here, we reproduce the Henon map example with ``R=0.8`` from Pincus (1991),
+comparing our values with relevant values from table 1 in Pincus (1991).
+
+We use `DiscreteDynamicalSystem` from `DynamicalSystems.jl` to represent the map,
+and use the `trajectory` function from the same package to iterate the map
+for different initial conditions, for multiple time series lengths.
+
+Finally, we summarize our results in box plots and compare the values to those
+obtained by Pincus (1991).
+
+```@example
+using Entropies, DynamicalSystems, CairoMakie
+
+# Equation 13 in Pincus (1991)
+function eom_henon(u, p, n)
+    R = p[1]
+    x, y = u
+    dx = R*y + 1 - 1.4*x^2
+    dy = 0.3*R*x
+
+    return SVector{2}(dx, dy)
+end
+
+function henon(; u₀ = rand(2), R = 0.8)
+    DiscreteDynamicalSystem(eom_henon, u₀, [R])
+end
+
+ts_lengths = [300, 1000, 2000, 3000]
+nreps = 100
+apens_08 = [zeros(nreps) for i = 1:length(ts_lengths)]
+
+# For some initial conditions, the Henon map as specified here blows up,
+# so we need to check for infinite values.
+containsinf(x) = any(isinf.(x))
+
+c = ApproximateEntropy(r = 0.05, m = 2)
+
+for (i, L) in enumerate(ts_lengths)
+    k = 1
+    while k <= nreps
+        sys = henon(u₀ = rand(2), R = 0.8)
+        t = trajectory(sys, L, Ttr = 5000)
+
+        if !any([containsinf(tᵢ) for tᵢ in t])
+            x, y = columns(t)
+            apens_08[i][k] = complexity(c, x)
+            k += 1
+        end
+    end
+end
+
+fig = Figure()
+
+# Example time series
+a1 = Axis(fig[1,1]; xlabel = "Time (t)", ylabel = "Value")
+sys = henon(u₀ = [0.5, 0.1], R = 0.8)
+x, y = columns(trajectory(sys, 100, Ttr = 500))
+lines!(a1, 1:length(x), x, label = "x")
+lines!(a1, 1:length(y), y, label = "y")
+
+# Approximate entropy values, compared to those of the original paper (black dots).
+a2 = Axis(fig[2, 1]; 
+    xlabel = "Time series length (L)", 
+    ylabel = "ApEn(m = 2, r = 0.05)")
+
+# hacky boxplot, but this seems to be how it's done in Makie at the moment
+n = length(ts_lengths)
+for i = 1:n
+    boxplot!(a2, fill(ts_lengths[i], n), apens_08[i];
+        width = 200)
+end
+
+scatter!(a2, ts_lengths, [0.337, 0.385, NaN, 0.394]; 
+    label = "Pincus (1991)", color = :black)
+fig
+```
+
 ## Sample entropy
 
 Completely regular signals should have sample entropy approaching zero, while
@@ -328,7 +408,6 @@ x_periodic .= (x_periodic .- mean(x_periodic)) ./ std(x_periodic)
 rs = 10 .^ range(-1, 0, length = 30)
 base = 2
 m = 2
-c = 
 hs_U = [complexity_normalized(SampleEntropy(m = m, r = r), x_U) for r in rs]
 hs_N = [complexity_normalized(SampleEntropy(m = m, r = r), x_N) for r in rs]
 hs_periodic = [complexity_normalized(SampleEntropy(m = m, r = r), x_periodic) for r in rs]
@@ -340,6 +419,5 @@ lines!(a1, rs, hs_U, label = "Uniform noise, U(0, 1)")
 lines!(a1, rs, hs_N, label = "Gaussian noise, N(0, 1)")
 lines!(a1, rs, hs_periodic, label = "Periodic signal")
 axislegend()
-
 fig
 ```
diff --git a/src/complexity.jl b/src/complexity.jl
@@ -16,6 +16,7 @@ Estimate the complexity measure `c` for [input data](@ref input_data) `x`, where
 be any of the following measures:
 
 - [`ReverseDispersion`](@ref).
+- [`ApproximateEntropy`](@ref).
 - [`SampleEntropy`](@ref).
 """
 function complexity(c::C, x) where C <: ComplexityMeasure

diff --git a/src/complexity_measures/approximate_entropy.jl b/src/complexity_measures/approximate_entropy.jl
@@ -0,0 +1,160 @@
+using DelayEmbeddings
+using Neighborhood: inrangecount
+using Distances
+using Statistics
+
+export ApproximateEntropy
+export approx_entropy
+
+"""
+    ApproximateEntropy([x]; r = 0.2std(x), kwargs...)
+
+An estimator for the approximate entropy (ApEn; Pincus, 1991)[^Pincus1991] complexity
+measure, used with [`complexity`](@ref).
+
+The keyword argument `r` is mandatory if an input timeseries `x` is not provided.
+
+## Keyword arguments
+- `r::Real`: The radius used when querying for nearest neighbors around points. Its value
+    should be determined from the input data, for example as some proportion of the
+    standard deviation of the data.
+- `m::Int = 2`: The embedding dimension.
+- `τ::Int = 1`: The embedding lag.
+- `base::Real = MathConstants.e`: The base to use for the logarithm. Pincus (1991) uses the
+    natural logarithm.
+- `metric`: The metric used to compute distances.
+
+## Description
+
+Approximate entropy is defined as
+
+```math
+ApEn(m ,r) = \\lim_{N \\to \\infty} \\left[ \\phi(x, m, r) - \\phi(x, m + 1, r) \\right].
+```
+
+Approximate entropy is estimated for a timeseries `x`, by first embedding `x` using
+embedding dimension `m` and embedding lag `τ`, then searching for similar vectors within
+tolerance radius `r`, using the estimator described below, with logarithms to the given
+`base` (natural logarithm is used in Pincus, 1991).
+
+Specifically, for a finite-length timeseries `x`, an estimator for ``ApEn(m ,r)`` is
+
+```math
+ApEn(m, r, N) = \\phi(x, m, r, N) -  \\phi(x, m + 1, r, N),
+```
+
+where `N = length(x)` and
+
+```math
+\\phi(x, k, r, N) =
+\\dfrac{1}{N-(k-1)\\tau} \\sum_{i=1}^{N - (k-1)\\tau}
+\\log{\\left(
+    \\sum_{j = 1}^{N-(k-1)\\tau} \\dfrac{\\theta(d({\\bf x}_i^m, {\\bf x}_j^m) \\leq r)}{N-(k-1)\\tau}
+    \\right)}.
+```
+
+Here, ``\\theta(\\cdot)`` returns 1 if the argument is true and 0 otherwise,
+ ``d({\\bf x}_i, {\\bf x}_j)`` returns the Chebyshev distance between vectors
+ ``{\\bf x}_i`` and ``{\\bf x}_j``, and the `k`-dimensional embedding vectors are
+constructed from the input timeseries ``x(t)`` as
+
+```math
+{\\bf x}_i^k = (x(i), x(i+τ), x(i+2τ), \\ldots, x(i+(k-1)\\tau)).
+```
+
+!!! note "Flexible embedding lag"
+    In the original paper, they fix `τ = 1`. In our implementation, the normalization
+    constant is modified to account for embeddings with `τ != 1`.
+
+[^Pincus1991]: Pincus, S. M. (1991). Approximate entropy as a measure of system complexity.
+    Proceedings of the National Academy of Sciences, 88(6), 2297-2301.
+"""
+Base.@kwdef struct ApproximateEntropy{I, M, B, R} <: ComplexityMeasure
+    m::I = 2
+    τ::I = 1
+    metric::M = Chebyshev()
+    base::B = MathConstants.e
+    r::R
+
+    function ApproximateEntropy(m::I, τ::I, metric::M, base::B, r::R) where {I, R, M, B}
+        m >= 1 || throw(ArgumentError("m must be >= 1. Got m=$(m)."))
+        r > 0 || throw(ArgumentError("r must be > 0. Got r=$(r)."))
+        new{I, M, B, R}(m, τ, metric, base, r)
+    end
+    function ApproximateEntropy(x::AbstractVector{T}; m::Int = 2, τ::Int = 1,
+            metric = Chebyshev(), base = MathConstants.e) where T
+        r = 0.2 * Statistics.std(x)
+        ApproximateEntropy(m, τ, metric, base, r)
+    end
+end
+
+
+
+function complexity(c::ApproximateEntropy, x::AbstractVector{T}) where T
+    (; m, τ, r, base) = c
+
+    # Definition in https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7515030/
+    if m == 1
+       return compute_ϕ(x; k = m, r, τ, base)
+    else
+        ϕᵐ = compute_ϕ(x; k = m, r, τ, base)
+        ϕᵐ⁺¹ = compute_ϕ(x; k = m + 1, r, τ, base)
+        return ϕᵐ - ϕᵐ⁺¹
+    end
+end
+
+"""
+    compute_ϕ(x::AbstractVector{T}; r = 0.2 * Statistics.std(x), k::Int = 2,
+        τ::Int = 1, base = MathConstants.e) where T <: Real
+
+Construct the embedding
+
+```math
+u = \\{{\\bf u}_n \\}_{n = 1}^{N - k + 1} =
+\\{[x(i), x(i + 1), \\ldots, x(i + k - 1)]\\}_{n = 1}^{N - k + 1}
+```
+
+and use a tree-and-nearest-neighbor search approach to compute
+
+```math
+\\phi^k(r) = \\dfrac{1}{N - kτ + 1} \\sum_{i}^{N - kτ + 1} \\log_{b}{(C_i^k(r))},
+```
+
+taking logarithms to `base` ``b``, and where
+
+```math
+C_i^k(r) = \\textrm{number of } j \\textrm{ such that } d({\\bf u}_i, {\\bf u}_j) < r,
+```
+
+where ``d`` is the maximum (Chebyshev) distance, `r` is the tolerance, and `N` is the
+length of the original scalar-valued time series `x`.
+"""
+function compute_ϕ(x::AbstractVector{T}; r = 0.2 * Statistics.std(x), k::Int = 2,
+        τ::Int = 1, base = MathConstants.e) where T <: Real
+    τs = 0:τ:(k - 1)*τ
+    pts = genembed(x, τs)
+    tree = KDTree(pts, Chebyshev())
+
+    # Account for `τ != 1` in the normalization constant.
+    f = length(x) - (k - 1)*τ
+
+    # `inrangecount` counts the query point itself, which is wanted for approximate entropy,
+    # because there are always neighbors and thus log(0) is never encountered.
+    ϕ = sum(log(base, inrangecount(tree, pᵢ, r) / f) for pᵢ in pts)
+
+    return ϕ / f
+end
+
+"""
+    approx_entropy(x; m = 2, τ = 1, r = 0.2 * Statistics.std(x), base = MathConstants.e)
+
+Convenience syntax for computing the approximate entropy (Pincus, 1991) for timeseries `x`.
+
+This is just a wrapper for `complexity(ApproximateEntropy(; m, τ, r, base), x)` (see
+also [`ApproximateEntropy`](@ref)).
+"""
+function approx_entropy(x; m = 2, τ = 1, r = 0.2 * Statistics.std(x),
+         base = MathConstants.e)
+    c = ApproximateEntropy(; m, τ, r, base)
+    return complexity(c, x)
+end
diff --git a/src/complexity_measures/complexity_measures.jl b/src/complexity_measures/complexity_measures.jl
@@ -1,2 +1,3 @@
 include("reverse_dispersion_entropy.jl")
+include("approximate_entropy.jl")
 include("sample_entropy.jl")
diff --git a/src/complexity_measures/sample_entropy.jl b/src/complexity_measures/sample_entropy.jl
@@ -72,14 +72,18 @@ Base.@kwdef struct SampleEntropy{I, R, M} <: ComplexityMeasure
     τ::I = 1
     metric::M = Chebyshev()
     r::R
-end
-function SampleEntropy(x::AbstractVector; m::Int = 2, τ::Int = 1, metric = Chebyshev())
-    r = 0.2 * Statistics.std(x)
-    SampleEntropy(; m, τ, metric, r)
-end
 
-function SampleEntropy(; r, m::Int = 2, τ::Int = 1, metric = Chebyshev())
-    SampleEntropy(; m, τ, metric, r)
+    function SampleEntropy(m::I, τ::I, metric::M, r::R) where {I, R, M, B}
+        m >= 1 || throw(ArgumentError("m must be >= 1. Got m=$(m)."))
+        r > 0 || throw(ArgumentError("r must be > 0. Got r=$(r)."))
+        new{I, R, M}(m, τ, metric, r)
+    end
+
+    function SampleEntropy(x::AbstractVector{T}; m::Int = 2, τ::Int = 1,
+            metric = Chebyshev()) where T
+        r = 0.2 * Statistics.std(x)
+        SampleEntropy(m, τ, metric, r)
+    end
 end
 
 # See comment in https://github.com/JuliaDynamics/Entropies.jl/pull/71 for why

diff --git a/test/Project.toml b/test/Project.toml
@@ -0,0 +1,5 @@
+[deps]
+DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/complexity_measures/approx_entropy.jl b/test/complexity_measures/approx_entropy.jl
@@ -0,0 +1,47 @@
+using DynamicalSystemsBase
+using Statistics
+
+@test_throws UndefKeywordError ApproximateEntropy()
+@test_throws ArgumentError complexity(ApproximateEntropy(r = 0.2), Dataset(rand(100, 3)))
+
+# Here, we try to reproduce Pincus' results within reasonable accuracy
+# for the Henon map. He doesn't give initial conditions, so we just check that our
+#  results +- 1σ approaches what he gets for this system for time series length 1000).
+# ---------------------------------------------------------------------------
+# Equation 13 in Pincus (1991)
+function eom_henon(u, p, n)
+    R = p[1]
+    x, y = (u...,)
+    dx = R*y + 1 - 1.4*x^2
+    dy = 0.3*R*x
+
+    return SVector{2}(dx, dy)
+end
+henon(; u₀ = rand(2), R = 0.8) = DiscreteDynamicalSystem(eom_henon, u₀, [R])
+
+# For some initial conditions, the Henon map as specified here blows up,
+# so we need to check for infinite values.
+containsinf(x) = any(isinf.(x))
+
+function calculate_hs(; nreps = 50, L = 1000)
+    # Calculate approx entropy for 50 different initial conditions
+    hs = zeros(nreps)
+    hs_conv = zeros(nreps)
+    k = 1
+    while k <= nreps
+        sys = henon(u₀ = rand(2), R = 0.8)
+        t = trajectory(sys, L, Ttr = 5000)
+
+        if !any([containsinf(tᵢ) for tᵢ in t])
+            x = t[:, 1]
+            hs[k] = complexity(ApproximateEntropy(r = 0.05, m = 2), x)
+            hs_conv[k] = approx_entropy(x, r = 0.05, m = 2)
+            k += 1
+        end
+    end
+    return hs, hs_conv
+end
+hs, hs_conv = calculate_hs()
+
+@test mean(hs) - std(hs) <= 0.385 <= mean(hs) + std(hs)
+@test mean(hs_conv) - std(hs_conv) <= 0.385 <= mean(hs_conv) + std(hs_conv)
diff --git a/test/complexity_measures/complexity_measures.jl b/test/complexity_measures/complexity_measures.jl
@@ -2,5 +2,6 @@ using Entropies, Test
 
 @testset "Complexity measures" begin
     testfile("./reverse_dispersion.jl")
+    testfile("./approx_entropy.jl")
     testfile("./sample_entropy.jl")
 end