diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md index f5972b954..16c387579 100644 --- a/docs/src/complexity_measures.md +++ b/docs/src/complexity_measures.md @@ -12,6 +12,12 @@ ReverseDispersion distance_to_whitenoise ``` +## Approximate entropy + +```@docs +ApproximateEntropy +``` + ## Sample entropy ```@docs @@ -20,8 +26,9 @@ SampleEntropy ## Convenience functions -We provide a few convenience functions for widely used "entropy-like" complexity measures, such as "sample entropy". Other arbitrary specialized convenience functions can easily be defined in a couple lines of code. +We provide a few convenience functions for widely used "entropy-like" complexity measures. Other arbitrary specialized convenience functions can easily be defined in a couple lines of code. ```@docs +approx_entropy sample_entropy ``` diff --git a/docs/src/examples.md b/docs/src/examples.md index a8b4372ab..604b07437 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -225,10 +225,11 @@ des = zeros(length(windows)) pes = zeros(length(windows)) m, c = 2, 6 +est_rd = ReverseDispersion(symbolization = GaussianSymbolization(c), m = m, τ = 1) est_de = Dispersion(symbolization = GaussianSymbolization(c), m = m, τ = 1) for (i, window) in enumerate(windows) - rdes[i] = reverse_dispersion(y[window], est_de; normalize = true) + rdes[i] = complexity_normalized(est_rd, y[window]) des[i] = entropy_normalized(Renyi(), y[window], est_de) end @@ -283,6 +284,85 @@ end You see that while the direct entropy values of the chaotic and noisy signals change massively with `N` but they are almost the same for the normalized version. For the regular signals, the entropy decreases nevertheless because the noise contribution of the Fourier computation becomes less significant. +## Approximate entropy + +Here, we reproduce the Henon map example with ``R=0.8`` from Pincus (1991), +comparing our values with relevant values from table 1 in Pincus (1991). + +We use `DiscreteDynamicalSystem` from `DynamicalSystems.jl` to represent the map, +and use the `trajectory` function from the same package to iterate the map +for different initial conditions, for multiple time series lengths. + +Finally, we summarize our results in box plots and compare the values to those +obtained by Pincus (1991). + +```@example +using Entropies, DynamicalSystems, CairoMakie + +# Equation 13 in Pincus (1991) +function eom_henon(u, p, n) + R = p[1] + x, y = u + dx = R*y + 1 - 1.4*x^2 + dy = 0.3*R*x + + return SVector{2}(dx, dy) +end + +function henon(; u₀ = rand(2), R = 0.8) + DiscreteDynamicalSystem(eom_henon, u₀, [R]) +end + +ts_lengths = [300, 1000, 2000, 3000] +nreps = 100 +apens_08 = [zeros(nreps) for i = 1:length(ts_lengths)] + +# For some initial conditions, the Henon map as specified here blows up, +# so we need to check for infinite values. +containsinf(x) = any(isinf.(x)) + +c = ApproximateEntropy(r = 0.05, m = 2) + +for (i, L) in enumerate(ts_lengths) + k = 1 + while k <= nreps + sys = henon(u₀ = rand(2), R = 0.8) + t = trajectory(sys, L, Ttr = 5000) + + if !any([containsinf(tᵢ) for tᵢ in t]) + x, y = columns(t) + apens_08[i][k] = complexity(c, x) + k += 1 + end + end +end + +fig = Figure() + +# Example time series +a1 = Axis(fig[1,1]; xlabel = "Time (t)", ylabel = "Value") +sys = henon(u₀ = [0.5, 0.1], R = 0.8) +x, y = columns(trajectory(sys, 100, Ttr = 500)) +lines!(a1, 1:length(x), x, label = "x") +lines!(a1, 1:length(y), y, label = "y") + +# Approximate entropy values, compared to those of the original paper (black dots). +a2 = Axis(fig[2, 1]; + xlabel = "Time series length (L)", + ylabel = "ApEn(m = 2, r = 0.05)") + +# hacky boxplot, but this seems to be how it's done in Makie at the moment +n = length(ts_lengths) +for i = 1:n + boxplot!(a2, fill(ts_lengths[i], n), apens_08[i]; + width = 200) +end + +scatter!(a2, ts_lengths, [0.337, 0.385, NaN, 0.394]; + label = "Pincus (1991)", color = :black) +fig +``` + ## Sample entropy Completely regular signals should have sample entropy approaching zero, while @@ -328,7 +408,6 @@ x_periodic .= (x_periodic .- mean(x_periodic)) ./ std(x_periodic) rs = 10 .^ range(-1, 0, length = 30) base = 2 m = 2 -c = hs_U = [complexity_normalized(SampleEntropy(m = m, r = r), x_U) for r in rs] hs_N = [complexity_normalized(SampleEntropy(m = m, r = r), x_N) for r in rs] hs_periodic = [complexity_normalized(SampleEntropy(m = m, r = r), x_periodic) for r in rs] @@ -340,6 +419,5 @@ lines!(a1, rs, hs_U, label = "Uniform noise, U(0, 1)") lines!(a1, rs, hs_N, label = "Gaussian noise, N(0, 1)") lines!(a1, rs, hs_periodic, label = "Periodic signal") axislegend() - fig ``` diff --git a/src/complexity.jl b/src/complexity.jl index 4e5df004d..55d92cb57 100644 --- a/src/complexity.jl +++ b/src/complexity.jl @@ -16,6 +16,7 @@ Estimate the complexity measure `c` for [input data](@ref input_data) `x`, where be any of the following measures: - [`ReverseDispersion`](@ref). +- [`ApproximateEntropy`](@ref). - [`SampleEntropy`](@ref). """ function complexity(c::C, x) where C <: ComplexityMeasure diff --git a/src/complexity_measures/approximate_entropy.jl b/src/complexity_measures/approximate_entropy.jl new file mode 100644 index 000000000..5fd491a46 --- /dev/null +++ b/src/complexity_measures/approximate_entropy.jl @@ -0,0 +1,160 @@ +using DelayEmbeddings +using Neighborhood: inrangecount +using Distances +using Statistics + +export ApproximateEntropy +export approx_entropy + +""" + ApproximateEntropy([x]; r = 0.2std(x), kwargs...) + +An estimator for the approximate entropy (ApEn; Pincus, 1991)[^Pincus1991] complexity +measure, used with [`complexity`](@ref). + +The keyword argument `r` is mandatory if an input timeseries `x` is not provided. + +## Keyword arguments +- `r::Real`: The radius used when querying for nearest neighbors around points. Its value + should be determined from the input data, for example as some proportion of the + standard deviation of the data. +- `m::Int = 2`: The embedding dimension. +- `τ::Int = 1`: The embedding lag. +- `base::Real = MathConstants.e`: The base to use for the logarithm. Pincus (1991) uses the + natural logarithm. +- `metric`: The metric used to compute distances. + +## Description + +Approximate entropy is defined as + +```math +ApEn(m ,r) = \\lim_{N \\to \\infty} \\left[ \\phi(x, m, r) - \\phi(x, m + 1, r) \\right]. +``` + +Approximate entropy is estimated for a timeseries `x`, by first embedding `x` using +embedding dimension `m` and embedding lag `τ`, then searching for similar vectors within +tolerance radius `r`, using the estimator described below, with logarithms to the given +`base` (natural logarithm is used in Pincus, 1991). + +Specifically, for a finite-length timeseries `x`, an estimator for ``ApEn(m ,r)`` is + +```math +ApEn(m, r, N) = \\phi(x, m, r, N) - \\phi(x, m + 1, r, N), +``` + +where `N = length(x)` and + +```math +\\phi(x, k, r, N) = +\\dfrac{1}{N-(k-1)\\tau} \\sum_{i=1}^{N - (k-1)\\tau} +\\log{\\left( + \\sum_{j = 1}^{N-(k-1)\\tau} \\dfrac{\\theta(d({\\bf x}_i^m, {\\bf x}_j^m) \\leq r)}{N-(k-1)\\tau} + \\right)}. +``` + +Here, ``\\theta(\\cdot)`` returns 1 if the argument is true and 0 otherwise, + ``d({\\bf x}_i, {\\bf x}_j)`` returns the Chebyshev distance between vectors + ``{\\bf x}_i`` and ``{\\bf x}_j``, and the `k`-dimensional embedding vectors are +constructed from the input timeseries ``x(t)`` as + +```math +{\\bf x}_i^k = (x(i), x(i+τ), x(i+2τ), \\ldots, x(i+(k-1)\\tau)). +``` + +!!! note "Flexible embedding lag" + In the original paper, they fix `τ = 1`. In our implementation, the normalization + constant is modified to account for embeddings with `τ != 1`. + +[^Pincus1991]: Pincus, S. M. (1991). Approximate entropy as a measure of system complexity. + Proceedings of the National Academy of Sciences, 88(6), 2297-2301. +""" +Base.@kwdef struct ApproximateEntropy{I, M, B, R} <: ComplexityMeasure + m::I = 2 + τ::I = 1 + metric::M = Chebyshev() + base::B = MathConstants.e + r::R + + function ApproximateEntropy(m::I, τ::I, metric::M, base::B, r::R) where {I, R, M, B} + m >= 1 || throw(ArgumentError("m must be >= 1. Got m=$(m).")) + r > 0 || throw(ArgumentError("r must be > 0. Got r=$(r).")) + new{I, M, B, R}(m, τ, metric, base, r) + end + function ApproximateEntropy(x::AbstractVector{T}; m::Int = 2, τ::Int = 1, + metric = Chebyshev(), base = MathConstants.e) where T + r = 0.2 * Statistics.std(x) + ApproximateEntropy(m, τ, metric, base, r) + end +end + + + +function complexity(c::ApproximateEntropy, x::AbstractVector{T}) where T + (; m, τ, r, base) = c + + # Definition in https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7515030/ + if m == 1 + return compute_ϕ(x; k = m, r, τ, base) + else + ϕᵐ = compute_ϕ(x; k = m, r, τ, base) + ϕᵐ⁺¹ = compute_ϕ(x; k = m + 1, r, τ, base) + return ϕᵐ - ϕᵐ⁺¹ + end +end + +""" + compute_ϕ(x::AbstractVector{T}; r = 0.2 * Statistics.std(x), k::Int = 2, + τ::Int = 1, base = MathConstants.e) where T <: Real + +Construct the embedding + +```math +u = \\{{\\bf u}_n \\}_{n = 1}^{N - k + 1} = +\\{[x(i), x(i + 1), \\ldots, x(i + k - 1)]\\}_{n = 1}^{N - k + 1} +``` + +and use a tree-and-nearest-neighbor search approach to compute + +```math +\\phi^k(r) = \\dfrac{1}{N - kτ + 1} \\sum_{i}^{N - kτ + 1} \\log_{b}{(C_i^k(r))}, +``` + +taking logarithms to `base` ``b``, and where + +```math +C_i^k(r) = \\textrm{number of } j \\textrm{ such that } d({\\bf u}_i, {\\bf u}_j) < r, +``` + +where ``d`` is the maximum (Chebyshev) distance, `r` is the tolerance, and `N` is the +length of the original scalar-valued time series `x`. +""" +function compute_ϕ(x::AbstractVector{T}; r = 0.2 * Statistics.std(x), k::Int = 2, + τ::Int = 1, base = MathConstants.e) where T <: Real + τs = 0:τ:(k - 1)*τ + pts = genembed(x, τs) + tree = KDTree(pts, Chebyshev()) + + # Account for `τ != 1` in the normalization constant. + f = length(x) - (k - 1)*τ + + # `inrangecount` counts the query point itself, which is wanted for approximate entropy, + # because there are always neighbors and thus log(0) is never encountered. + ϕ = sum(log(base, inrangecount(tree, pᵢ, r) / f) for pᵢ in pts) + + return ϕ / f +end + +""" + approx_entropy(x; m = 2, τ = 1, r = 0.2 * Statistics.std(x), base = MathConstants.e) + +Convenience syntax for computing the approximate entropy (Pincus, 1991) for timeseries `x`. + +This is just a wrapper for `complexity(ApproximateEntropy(; m, τ, r, base), x)` (see +also [`ApproximateEntropy`](@ref)). +""" +function approx_entropy(x; m = 2, τ = 1, r = 0.2 * Statistics.std(x), + base = MathConstants.e) + c = ApproximateEntropy(; m, τ, r, base) + return complexity(c, x) +end diff --git a/src/complexity_measures/complexity_measures.jl b/src/complexity_measures/complexity_measures.jl index 88e62b41c..62d2023b8 100644 --- a/src/complexity_measures/complexity_measures.jl +++ b/src/complexity_measures/complexity_measures.jl @@ -1,2 +1,3 @@ include("reverse_dispersion_entropy.jl") +include("approximate_entropy.jl") include("sample_entropy.jl") diff --git a/src/complexity_measures/sample_entropy.jl b/src/complexity_measures/sample_entropy.jl index b8a4f186f..c2c286a05 100644 --- a/src/complexity_measures/sample_entropy.jl +++ b/src/complexity_measures/sample_entropy.jl @@ -72,14 +72,18 @@ Base.@kwdef struct SampleEntropy{I, R, M} <: ComplexityMeasure τ::I = 1 metric::M = Chebyshev() r::R -end -function SampleEntropy(x::AbstractVector; m::Int = 2, τ::Int = 1, metric = Chebyshev()) - r = 0.2 * Statistics.std(x) - SampleEntropy(; m, τ, metric, r) -end -function SampleEntropy(; r, m::Int = 2, τ::Int = 1, metric = Chebyshev()) - SampleEntropy(; m, τ, metric, r) + function SampleEntropy(m::I, τ::I, metric::M, r::R) where {I, R, M, B} + m >= 1 || throw(ArgumentError("m must be >= 1. Got m=$(m).")) + r > 0 || throw(ArgumentError("r must be > 0. Got r=$(r).")) + new{I, R, M}(m, τ, metric, r) + end + + function SampleEntropy(x::AbstractVector{T}; m::Int = 2, τ::Int = 1, + metric = Chebyshev()) where T + r = 0.2 * Statistics.std(x) + SampleEntropy(m, τ, metric, r) + end end # See comment in https://github.com/JuliaDynamics/Entropies.jl/pull/71 for why diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 000000000..4e6f0385b --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,5 @@ +[deps] +DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/complexity_measures/approx_entropy.jl b/test/complexity_measures/approx_entropy.jl new file mode 100644 index 000000000..9de2b5028 --- /dev/null +++ b/test/complexity_measures/approx_entropy.jl @@ -0,0 +1,47 @@ +using DynamicalSystemsBase +using Statistics + +@test_throws UndefKeywordError ApproximateEntropy() +@test_throws ArgumentError complexity(ApproximateEntropy(r = 0.2), Dataset(rand(100, 3))) + +# Here, we try to reproduce Pincus' results within reasonable accuracy +# for the Henon map. He doesn't give initial conditions, so we just check that our +# results +- 1σ approaches what he gets for this system for time series length 1000). +# --------------------------------------------------------------------------- +# Equation 13 in Pincus (1991) +function eom_henon(u, p, n) + R = p[1] + x, y = (u...,) + dx = R*y + 1 - 1.4*x^2 + dy = 0.3*R*x + + return SVector{2}(dx, dy) +end +henon(; u₀ = rand(2), R = 0.8) = DiscreteDynamicalSystem(eom_henon, u₀, [R]) + +# For some initial conditions, the Henon map as specified here blows up, +# so we need to check for infinite values. +containsinf(x) = any(isinf.(x)) + +function calculate_hs(; nreps = 50, L = 1000) + # Calculate approx entropy for 50 different initial conditions + hs = zeros(nreps) + hs_conv = zeros(nreps) + k = 1 + while k <= nreps + sys = henon(u₀ = rand(2), R = 0.8) + t = trajectory(sys, L, Ttr = 5000) + + if !any([containsinf(tᵢ) for tᵢ in t]) + x = t[:, 1] + hs[k] = complexity(ApproximateEntropy(r = 0.05, m = 2), x) + hs_conv[k] = approx_entropy(x, r = 0.05, m = 2) + k += 1 + end + end + return hs, hs_conv +end +hs, hs_conv = calculate_hs() + +@test mean(hs) - std(hs) <= 0.385 <= mean(hs) + std(hs) +@test mean(hs_conv) - std(hs_conv) <= 0.385 <= mean(hs_conv) + std(hs_conv) diff --git a/test/complexity_measures/complexity_measures.jl b/test/complexity_measures/complexity_measures.jl index b96c3ed6f..e90986a37 100644 --- a/test/complexity_measures/complexity_measures.jl +++ b/test/complexity_measures/complexity_measures.jl @@ -2,5 +2,6 @@ using Entropies, Test @testset "Complexity measures" begin testfile("./reverse_dispersion.jl") + testfile("./approx_entropy.jl") testfile("./sample_entropy.jl") end