JuliaDynamics · Datseris · Dec 19, 2022 · Dec 16, 2022 · Dec 16, 2022 · Dec 17, 2022
diff --git a/src/encoding/ordinal_pattern.jl b/src/encoding/ordinal_pattern.jl
@@ -1,12 +1,13 @@
 export OrdinalPatternEncoding
+#TODO: The docstring here, and probably the source code, needs a full re-write
+# based on new `encode` interface.
 
 """
     OrdinalPatternEncoding <: Encoding
     OrdinalPatternEncoding(m = 3, τ = 1; lt = est.lt)
 
 A encoding scheme that converts the input time series to ordinal patterns, which are
-then encoded to integers using [`encode_motif`](@ref), used with
-[`outcomes`](@ref).
+then encoded to integers using [`encode`](@ref).
 
 !!! note
     `OrdinalPatternEncoding` is intended for symbolizing *time series*. If providing a short vector,

diff --git a/src/encodings.jl b/src/encodings.jl
@@ -3,8 +3,8 @@ export Encoding, encode, decode
 """
     Encoding
 
-The supertype for all encoding schemes. Encodings **always encode elements of
-input data into the positive integers**. The encoding API is defined by the
+The supertype for all encoding schemes. Encodings always encode elements of
+input data into the positive integers. The encoding API is defined by the
 functions [`encode`](@ref) and [`decode`](@ref).
 Some probability estimators utilize encodings internally.
 
@@ -17,15 +17,16 @@ Current available encodings are:
 abstract type Encoding end
 
 """
-    encode(χ, e::Encoding) -> i::Int
+    encode(c::Encoding, χ) -> i::Int
 Encoding an element `χ ∈ x` of input data `x` (those given to [`probabilities`](@ref))
-using encoding `e`.
+using encoding `c`. The special value of `-1` is reserved as a return value for
+inappropriate elements `χ` that cannot be encoded according to `c`.
 """
 function encode end
 
 """
-    decode(i::Int, e::Encoding) -> ω
-Decode an encoded element `i::Int` into the outcome it corresponds to `ω ∈ Ω`.
-`Ω` is the [`outcome_space`](@ref) of a probabilities estimator that uses encoding `e`.
+    decode(c::Encoding, i::Int) -> ω
+Decode an encoded element `i` into the outcome it corresponds to `ω ∈ Ω`.
+`Ω` is the [`outcome_space`](@ref) of a probabilities estimator that uses encoding `c`.
 """
 function decode end
diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
@@ -58,7 +58,7 @@ entropy(Shannon(base), est, x)
 See [`WaveletOverlap`](@ref) for more info.
 """
 function entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(), base = 2)
-    est = WaveletOverlap(wavelet)
+    est = WaveletOverlap(x, wavelet)
     entropy(Shannon(; base), est, x)
 end
 

diff --git a/src/entropy.jl b/src/entropy.jl
@@ -21,12 +21,11 @@ These entropy types are given as inputs to [`entropy`](@ref) and [`entropy_norma
 
 Mathematically speaking, generalized entropies are just nonnegative functions of
 probability distributions that verify certain (entropy-type-dependent) axioms.
-Amigó et al., 2018's
-[summary paper](https://www.mdpi.com/1099-4300/20/11/813) gives a nice overview.
+Amigó et al.[^Amigó2018] summary paper gives a nice overview.
 
 [Amigó2018]:
     Amigó, J. M., Balogh, S. G., & Hernández, S. (2018). A brief review of
-    generalized entropies. Entropy, 20(11), 813.
+    generalized entropies. [Entropy, 20(11), 813.](https://www.mdpi.com/1099-4300/20/11/813)
 """
 abstract type Entropy <: AbstractEntropy end
 
@@ -57,21 +56,23 @@ abstract type EntropyEstimator <: AbstractEntropy end
 ###########################################################################################
 # Notice that StatsBase.jl exports `entropy` and Wavelets.jl exports `Entropy`.
 """
-    entropy([e::Entropy,] probs::Probabilities) → h::Real ∈ [0, ∞)
-    entropy([e::Entropy,] est::ProbabilitiesEstimator, x) → h::Real ∈ [0, ∞)
-    entropy([e::Entropy,] est::EntropyEstimator, x) → h::Real ∈ [0, ∞)
+    entropy([e::Entropy,] probs::Probabilities)
+    entropy([e::Entropy,] est::ProbabilitiesEstimator, x)
+    entropy([e::Entropy,] est::EntropyEstimator, x)
 
-Compute `h`, a (generalized) [`Entropy`](@ref) of type `e`, in one of three ways:
+Compute `h::Real`, which is
+a (generalized) entropy defined by `e`, in one of three ways:
 
 1. Directly from existing [`Probabilities`](@ref) `probs`.
 2. From input data `x`, by first estimating a probability distribution using the provided
-    [`ProbabilitiesEstimator`](@ref), then computing entropy from that distribution.
-    In fact, the second method is just a 2-lines-of-code wrapper that calls
-    [`probabilities`](@ref) and gives the result to the first method.
+   [`ProbabilitiesEstimator`](@ref), then computing entropy from that distribution.
+   In fact, the second method is just a 2-lines-of-code wrapper that calls
+   [`probabilities`](@ref) and gives the result to the first method.
 3. From input data `x`, by using a dedicated [`EntropyEstimator`](@ref) that computes
-    entropy in a way that doesn't involve explicitly computing probabilities first.
+   entropy in a way that doesn't involve explicitly computing probabilities first.
 
-The entropy (first argument) is optional. When `est` is a probability estimator,
+The entropy definition (first argument) is optional.
+When `est` is a probability estimator,
 `Shannon()` is used by default. When `est` is a dedicated entropy estimator,
 the default entropy type is inferred from the estimator (e.g. [`Kraskov`](@ref)
 estimates the [`Shannon`](@ref) entropy).
@@ -123,16 +124,17 @@ function entropy!(s::AbstractVector{Int}, e::Entropy, est::ProbabilitiesEstimato
     entropy(e, probs)
 end
 
-entropy!(s::AbstractVector{Int}, est::ProbabilitiesEstimator, x) =
+function entropy!(s::AbstractVector{Int}, est::ProbabilitiesEstimator, x)
     entropy!(s, Shannon(), est, x)
+end
 
 ###########################################################################################
 # API: entropy from entropy estimators
 ###########################################################################################
 # Dispatch for these functions is implemented in individual estimator files in
 # `entropies/estimators/`.
 function entropy(e::Entropy, est::EntropyEstimator, x)
-    t = string(typeof(e).name.name)
+    t = string(nameof(typeof(e)))
     throw(ArgumentError("$t entropy not implemented for $(typeof(est)) estimator"))
 end
 
@@ -150,20 +152,16 @@ entropy(est::EntropyEstimator, x; base = 2) = entropy(Shannon(; base), est, x)
 # Normalize API
 ###########################################################################################
 """
-    entropy_maximum(e::Entropy, est::ProbabilitiesEstimator, x) → m::Real
-
-Return the maximum value `m` of the given entropy type based on the given estimator
-and the given input `x` (whose values are not important, but layout and type are).
+    entropy_maximum(e::Entropy, est::ProbabilitiesEstimator) → m::Real
 
-This function only works if the maximum value is deducable, which is possible only
-when the estimator has a known [`total_outcomes`](@ref).
+Return the maximum value `m` of the given entropy definition based on the given estimator.
 
     entropy_maximum(e::Entropy, L::Int) → m::Real
 
 Alternatively, compute the maximum entropy from the number of total outcomes `L` directly.
 """
-function entropy_maximum(e::Entropy, est::ProbabilitiesEstimator, x)
-    L = total_outcomes(x, est)
+function entropy_maximum(e::Entropy, est::ProbabilitiesEstimator)
+    L = total_outcomes(est)
     return entropy_maximum(e, L)
 end
 function entropy_maximum(e::Entropy, ::Int)
@@ -182,7 +180,7 @@ Notice that unlike for [`entropy`](@ref), here there is no method
 the amount of _possible_ events (i.e., the [`total_outcomes`](@ref)) from `probs`.
 """
 function entropy_normalized(e::Entropy, est::ProbabilitiesEstimator, x)
-    return entropy(e, est, x) / entropy_maximum(e, est, x)
+    return entropy(e, est, x) / entropy_maximum(e, est)
 end
 function entropy_normalized(est::ProbabilitiesEstimator, x::Array_or_Dataset)
     return entropy_normalized(Shannon(), est, x)

diff --git a/src/probabilities.jl b/src/probabilities.jl
@@ -32,17 +32,18 @@ function Probabilities(x::AbstractVector{<:Integer})
     return Probabilities(x ./ s, true)
 end
 
-
 # extend base Vector interface:
-for f in (:length, :size, :eachindex, :eltype,
+for f in (:length, :size, :eachindex, :eltype, :parent,
     :lastindex, :firstindex, :vec, :getindex, :iterate)
     @eval Base.$(f)(d::Probabilities, args...) = $(f)(d.p, args...)
 end
 Base.IteratorSize(::Probabilities) = Base.HasLength()
+# Special extension due to the rules of the API
 @inline Base.sum(::Probabilities{T}) where T = one(T)
 
 """
-ProbabilitiesEstimator
+    ProbabilitiesEstimator
+
 The supertype for all probabilities estimators.
 
 In Entropies.jl, probability distributions are estimated from data by defining a set of
@@ -66,6 +67,11 @@ across experimental realizations, by using the outcome as a dictionary key and t
 probability as the value for that key (or, alternatively, the key remains the outcome
 and one has a vector of probabilities, one for each experimental realization).
 
+We have made the design decision that all probabilities estimators have a well defined
+outcome space when instantiated. For some estimators this means that the input data
+`x` must be provided both when instantiating the estimator, but also when computing
+the probabilities.
+
 All currently implemented probability estimators are:
 
 - [`CountOccurrences`](@ref).
@@ -110,13 +116,14 @@ function probabilities(est::ProbabilitiesEstimator, x)
 end
 
 """
-    probabilities_and_outcomes(x, est) → (probs, Ω::Vector)
+    probabilities_and_outcomes(est, x)
 
-Return `probs, Ω`, where `probs = probabilities(x, est)` and
-`Ω[i]` is the outcome with probability `probs[i]`.
-The element type of `Ω` depends on the estimator.
+Return `probs, outs`, where `probs = probabilities(x, est)` and
+`outs[i]` is the outcome with probability `probs[i]`.
+The element type of `outs` depends on the estimator.
+`outs` is a subset of the [`outcome_space`](@ref) of `est`.
 
-See also [`outcomes`](@ref), [`total_outcomes`](@ref), and [`outcome_space`](@ref).
+See also [`outcomes`](@ref), [`total_outcomes`](@ref).
 """
 function probabilities_and_outcomes(est::ProbabilitiesEstimator, x)
     error("`probabilities_and_outcomes` not implemented for estimator $(typeof(est)).")
@@ -136,73 +143,42 @@ function probabilities! end
 # Outcome space
 ###########################################################################################
 """
-    outcome_space([x,] est::ProbabilitiesEstimator) → Ω
+    outcome_space(est::ProbabilitiesEstimator) → Ω
 
-Return a container (typically `Vector`) containing all _possible_ outcomes of `est`,
-i.e., the outcome space `Ω`.
-Only possible for estimators that implement [`total_outcomes`](@ref),
-and similarly, for some estimators `x` is not needed. The _values_ of `x` are never needed;
-but some times the type and dimensional layout of `x` is.
+Return a container containing all _possible_ outcomes of `est`.
 """
-function outcome_space(x, est::ProbabilitiesEstimator)
-    outcome_space(est)
-end
 function outcome_space(est::ProbabilitiesEstimator)
-    error(
-        "`outcome_space(est)` not known/implemented for estimator $(typeof(est))."*
-        "Try providing some input data, e.g. `outcomes_space(x, est)`."*
-        "In some cases, this gives the dimensional layout/type information needed "*
-        "to define the outcome space."
-        )
+    error("`outcome_space` not implemented for estimator $(typeof(est)).")
 end
 
 """
-    total_outcomes([x::Array_or_Dataset,] est::ProbabilitiesEstimator) → Int
-
-Return the size/cardinality of the outcome space ``\\Omega`` defined by the probabilities
-estimator `est` imposed on the input data `x`.
+    total_outcomes(est::ProbabilitiesEstimator)
 
-For some estimators, the total number of outcomes is independent of `x`, in which case
-the input `x` is ignored and the method `total_outcomes(est)` is called. If the total
-number of states cannot be known a priori, an error is thrown. Primarily used in
-[`entropy_normalized`](@ref).
-
-## Examples
-
-```jldoctest setup = :(using Entropies)
-julia> est = SymbolicPermutation(m = 4);
-
-julia> total_outcomes(rand(42), est) # same as `factorial(m)` for any `x`
-24
-```
+Return the length (cardinality) of the outcome space ``\\Omega`` of `est`.
 """
-function total_outcomes(x, est::ProbabilitiesEstimator)
-    return length(outcome_space(x, est))
-end
+total_outcomes(est::ProbabilitiesEstimator) = length(outcome_space(est))
 
 """
-    missing_outcomes(x, est::ProbabilitiesEstimator) → n_missing::Int
+    missing_outcomes(est::ProbabilitiesEstimator, x) → n_missing::Int
 
 Estimate a probability distribution for `x` using the given estimator, then count the number
 of missing (i.e. zero-probability) outcomes.
 
-Works for estimators that implement [`total_outcomes`](@ref).
-
 See also: [`MissingDispersionPatterns`](@ref).
 """
-function missing_outcomes(x::Array_or_Dataset, est::ProbabilitiesEstimator)
+function missing_outcomes(est::ProbabilitiesEstimator, x::Array_or_Dataset)
     probs = probabilities(x, est)
-    L = total_outcomes(x, est)
+    L = total_outcomes(est)
     O = count(!iszero, probs)
     return L - O
 end
 
 """
-    outcomes(x, est::ProbabilitiesEstimator)
+    outcomes(est::ProbabilitiesEstimator, x)
 Return all (unique) outcomes contained in `x` according to the given estimator.
 Equivalent with `probabilities_and_outcomes(x, est)[2]`, but for some estimators
 it may be explicitly extended for better performance.
 """
-function outcomes(x, est::ProbabilitiesEstimator)
+function outcomes(est::ProbabilitiesEstimator, x)
     return probabilities_and_outcomes(est, x)[2]
 end
diff --git a/src/probabilities_estimators/counting/count_occurences.jl b/src/probabilities_estimators/counting/count_occurences.jl
@@ -1,16 +1,19 @@
 export CountOccurrences
 
 """
-    CountOccurrences()
+    CountOccurrences(x)
 
 A probabilities/entropy estimator based on straight-forward counting of distinct elements in
 a univariate time series or multivariate dataset. This is the same as giving no
 estimator to [`probabilities`](@ref).
 
 ## Outcome space
 The outcome space is the unique sorted values of the input.
+Hence, input `x` is needed for a well-defined outcome space.
 """
-struct CountOccurrences <: ProbabilitiesEstimator end
+struct CountOccurrences{X} <: ProbabilitiesEstimator
+    x::X
+end
 
 function probabilities_and_outcomes(::CountOccurrences, x::Array_or_Dataset)
     z = copy(x)
@@ -19,7 +22,7 @@ function probabilities_and_outcomes(::CountOccurrences, x::Array_or_Dataset)
     return probs, unique!(z)
 end
 
-outcome_space(x, ::CountOccurrences) = sort!(unique(x))
+outcome_space(est::CountOccurrences) = sort!(unique(est.x))
 
 probabilities(::CountOccurrences, x::Array_or_Dataset) = probabilities(x)
 function probabilities(x::Array_or_Dataset)

diff --git a/src/probabilities_estimators/dispersion/dispersion.jl b/src/probabilities_estimators/dispersion/dispersion.jl
@@ -118,11 +118,11 @@ function probabilities_and_outcomes(est::Dispersion, x::AbstractVector{<:Real})
     return Probabilities(hist), dispersion_patterns
 end
 
-total_outcomes(est::Dispersion)::Int = est.encoding.c ^ est.m
-
 function outcome_space(est::Dispersion)
     c, m = 1:est.encoding.c, est.m
     cart = CartesianIndices(ntuple(i -> c, m))
     V = SVector{m, Int}
     return map(i -> V(Tuple(i)), vec(cart))
 end
+# Performance extension
+total_outcomes(est::Dispersion)::Int = total_outcomes(est.encoding) ^ est.m