From 5122bb8ff57516723d934547c5bc8385b1356f33 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 9 Nov 2022 12:56:08 +0100
Subject: [PATCH 01/13] update gitignore

---
 .DS_Store      | Bin 0 -> 6148 bytes
 docs/.DS_Store | Bin 0 -> 6148 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 docs/.DS_Store

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..9ef012718a177fe72f49d979233573ba4a677278
GIT binary patch
literal 6148
zcmeHK!AiqG5S?wKO({YT3VK`cS~M++h?fxS4;aydN^MNhV9ZLB+CwSiu0Q0D_&v_-
zZi`YqcoESVn0b@gnS^}_I~f2F!6<G6Gy%XxC9KrZd?K_?x*!!BsUixxN1R`$ky7bG
zCI@~i1N813*n%5Kpm_iO9Ed@|HRRDae~3~QVd6{lkS2NId9R{Ub(WTEZq04Dm*K4%
zhef{_<w1XVPFE-R5kxb;ADt$Haj&_0sIsD;WP_nDNa6veT%09Ytj2*FWpS=+L$l!4
z-FmONF`0Ds_FJ;O>rGp7()M;bExFg(olfiS+UEAbaqlj@&(xElSKwEuWyRtQp3#|a
z=rJ5-smg9JHp`#oF){<p05kAE7_g_G+xQ>b<y|rZ%)mDopz}eZ68aWPgZk(|qelQl
z2g6#>r&)q(q($FiX%JUXgegTdrNWLF!jxlN+C1N4Y0#8|u)~M2PZoAU5&G$Pf2q?!
z_y*ZB1I)lj259y}X;S|``g;D?No<$_X5eo!Ae?U44RCL^w=V3GdaXpgLM5TR(%`%V
j9o>pCms;^Usuqk(au9usr9q6K@Q;9|fekb8qYS(OFFj6;

literal 0
HcmV?d00001

diff --git a/docs/.DS_Store b/docs/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..eee7acbe18dc45cfdb9215b6eae1d5a21bf6bdac
GIT binary patch
literal 6148
zcmeHK%}T>S5T5OiO%b671-&hJE!bMC7B8Xd3mDOZN^MNhV9ZL>*h4Agt}o<^_&m<+
zZl&6K5>#{sX20FpnS}YW>|_8yw9}vtPyql3l`vPvVUEx~>5Sy8rGO~(GXj5@-20L8
zXMEZ4I~kyFR|FqY2qA*$@AvHAM`1Fk)m}s{Unnk=oRYKXTzEHX=ncAqG--GHXS6z2
zD)J}uu748thMmgtzKRFkFz)qrKp6DU<@_{^12t@`G!7CS>zNIw?36o|wb7{FYF6cD
zqdBh1QN31cRpr)pYdkJHE9)D3N1fZ~E>@3*Nr6XB%aX+jJmF+2r3bGcM=HKXD@EiX
zfj(SK9n>F5bhFv0%KFakR};1Ad`NOa1tT-S3@`)p8L$_qvp9d7t;_&3@Q)dw`$3`-
zx)w8o`s%<zw*ZLr8>t27)Jsr|wCGyQ4B`%oFrkPhRM-|nm~ga9>*rd`44QBdw)qhD
z&BAslLcblKFSR=e*C3C~05kBOfxKB(sQw>*-v7T3;uSN%4E!kuMB%_YXk$vYw$3C+
vwN|2DqLPqbX7DWpC%P1)FO}j|R4r(iG(mJNW(LuM!ao9<1|FD!Z)M;W+cSo3

literal 0
HcmV?d00001


From 5afaee4f9a3134db85ea5c6ef402134d3f701e54 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 9 Nov 2022 12:56:47 +0100
Subject: [PATCH 02/13] Unified signature for `entropy`

---
 .../new_entropy_template.md                   |   2 +-
 docs/src/index.md                             |   6 +-
 src/entropies/convenience_definitions.jl      |  18 +-
 src/entropies/entropies.jl                    |   2 +-
 .../estimators.jl}                            |   0
 .../nearest_neighbors/KozachenkoLeonenko.jl   |  14 +-
 .../nearest_neighbors/Kraskov.jl              |  21 ++-
 .../nearest_neighbors/Zhu.jl                  |  16 +-
 .../nearest_neighbors/ZhuSingh.jl             |  12 +-
 .../nearest_neighbors/nearest_neighbors.jl    |   0
 .../order_statistics/AlizadehArghami.jl       |  15 +-
 .../order_statistics/Correa.jl                |  10 +-
 .../order_statistics/Ebrahimi.jl              |  11 +-
 .../order_statistics/Vasicek.jl               |  10 +-
 .../order_statistics/order_statistics.jl      |   0
 src/entropy.jl                                | 170 +++++++++++-------
 .../{ => counting}/count_occurences.jl        |   0
 .../{ => kernel}/kernel_density.jl            |   0
 .../spatial_permutation.jl                    |   2 +-
 .../probabilities_estimators.jl               |   4 +-
 test/entropies/convenience.jl                 |  12 +-
 test/entropies/estimators/alizadeharghami.jl  |  15 ++
 test/entropies/estimators/correa.jl           |  15 ++
 test/entropies/estimators/ebrahimi.jl         |  15 ++
 test/entropies/estimators/estimators.jl       |   8 +
 .../estimators/kozachenkoleonenko.jl          |  26 +++
 test/entropies/estimators/kraskov.jl          |  27 +++
 test/entropies/estimators/vasicek.jl          |  16 ++
 test/entropies/estimators/zhu.jl              |  19 ++
 test/entropies/estimators/zhusingh.jl         |  62 +++++++
 test/entropies/nearest_neighbors_direct.jl    | 155 ----------------
 test/entropies/shannon.jl                     |  29 ---
 test/entropies/stretched_exponential.jl       |   2 +-
 .../count_occurrences.jl                      |   4 +-
 .../dispersion.jl                             |   4 +-
 .../diversity.jl                              |   2 +-
 .../naive_kernel.jl                           |   4 +-
 .../permutation.jl                            |  12 +-
 .../permutation_amplitude_aware.jl            |   4 +-
 .../permutation_spatial.jl                    |  12 +-
 .../permutation_weighted.jl                   |   4 +-
 .../timescales.jl                             |   4 +-
 .../transfer_operator.jl                      |   0
 .../visitation_frequency.jl                   |   0
 test/runtests.jl                              |  25 +--
 45 files changed, 431 insertions(+), 358 deletions(-)
 rename src/entropies/{direct_entropies/direct_entropies.jl => estimators/estimators.jl} (100%)
 rename src/entropies/{direct_entropies => estimators}/nearest_neighbors/KozachenkoLeonenko.jl (72%)
 rename src/entropies/{direct_entropies => estimators}/nearest_neighbors/Kraskov.jl (55%)
 rename src/entropies/{direct_entropies => estimators}/nearest_neighbors/Zhu.jl (85%)
 rename src/entropies/{direct_entropies => estimators}/nearest_neighbors/ZhuSingh.jl (90%)
 rename src/entropies/{direct_entropies => estimators}/nearest_neighbors/nearest_neighbors.jl (100%)
 rename src/entropies/{direct_entropies => estimators}/order_statistics/AlizadehArghami.jl (66%)
 rename src/entropies/{direct_entropies => estimators}/order_statistics/Correa.jl (84%)
 rename src/entropies/{direct_entropies => estimators}/order_statistics/Ebrahimi.jl (84%)
 rename src/entropies/{direct_entropies => estimators}/order_statistics/Vasicek.jl (83%)
 rename src/entropies/{direct_entropies => estimators}/order_statistics/order_statistics.jl (100%)
 rename src/probabilities_estimators/{ => counting}/count_occurences.jl (100%)
 rename src/probabilities_estimators/{ => kernel}/kernel_density.jl (100%)
 create mode 100644 test/entropies/estimators/alizadeharghami.jl
 create mode 100644 test/entropies/estimators/correa.jl
 create mode 100644 test/entropies/estimators/ebrahimi.jl
 create mode 100644 test/entropies/estimators/estimators.jl
 create mode 100644 test/entropies/estimators/kozachenkoleonenko.jl
 create mode 100644 test/entropies/estimators/kraskov.jl
 create mode 100644 test/entropies/estimators/vasicek.jl
 create mode 100644 test/entropies/estimators/zhu.jl
 create mode 100644 test/entropies/estimators/zhusingh.jl
 delete mode 100644 test/entropies/nearest_neighbors_direct.jl
 rename test/{estimators => probabilities}/count_occurrences.jl (89%)
 rename test/{estimators => probabilities}/dispersion.jl (96%)
 rename test/{estimators => probabilities}/diversity.jl (93%)
 rename test/{estimators => probabilities}/naive_kernel.jl (86%)
 rename test/{estimators => probabilities}/permutation.jl (85%)
 rename test/{estimators => probabilities}/permutation_amplitude_aware.jl (96%)
 rename test/{estimators => probabilities}/permutation_spatial.jl (89%)
 rename test/{estimators => probabilities}/permutation_weighted.jl (95%)
 rename test/{estimators => probabilities}/timescales.jl (88%)
 rename test/{estimators => probabilities}/transfer_operator.jl (100%)
 rename test/{estimators => probabilities}/visitation_frequency.jl (100%)

diff --git a/.github/PULL_REQUEST_TEMPLATE/new_entropy_template.md b/.github/PULL_REQUEST_TEMPLATE/new_entropy_template.md
index aa9d4611c..20d3d6c4d 100644
--- a/.github/PULL_REQUEST_TEMPLATE/new_entropy_template.md
+++ b/.github/PULL_REQUEST_TEMPLATE/new_entropy_template.md
@@ -13,7 +13,7 @@ project [devdocs](https://juliadynamics.github.io/Entropies.jl/dev/devdocs/).
 Ticking the boxes below will help us provide good feedback and speed up the review process.
 Partial PRs are welcome too, and we're happy to help if you're stuck on something.
 
-- [ ] The new entropy subtypes `Entropy` or `IndirectEntropy`.
+- [ ] The new entropy (estimator) subtypes `Entropy` (`EntropyEstimator`).
 - [ ] The new entropy has an informative docstring, which is referenced in
     `docs/src/entropies.md`.
 - [ ] Relevant sources are cited in the docstring.
diff --git a/docs/src/index.md b/docs/src/index.md
index d7a945deb..501fb1ba6 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -40,7 +40,11 @@ Thus, any of the implemented [probabilities estimators](@ref probabilities_estim
 
     These names are common place, and so in Entropies.jl we provide convenience functions like [`entropy_wavelet`](@ref). However, it should be noted that these functions really aren't anything more than 2-lines-of-code wrappers that call [`entropy`](@ref) with the appropriate [`ProbabilitiesEstimator`](@ref).
 
-    There are only a few exceptions to this rule, which are quantities that are able to compute Shannon entropies via alternate means, without explicitly computing some probability distributions. These are `IndirectEntropy` instances, such as [`Kraskov`](@ref).
+    In addition to `ProbabilitiesEstimators`, we also provide [`EntropyEstimator`](@ref)s, 
+    which compute entropies via alternate means, without explicitly computing some 
+    probability distribution. For example, [`Kraskov`](@ref) estimator computes Shannon 
+    entropy via a nearest neighbor algorithm, while the [`Zhu`](@ref) estimator computes
+    Shannon entropy using order statistics.
 
 ### Other complexity measures
 
diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
index d114454a7..415150139 100644
--- a/src/entropies/convenience_definitions.jl
+++ b/src/entropies/convenience_definitions.jl
@@ -23,11 +23,11 @@ for the weighted/amplitude-aware versions.
 """
 function entropy_permutation(x; base = 2, kwargs...)
     est = SymbolicPermutation(; kwargs...)
-    entropy(Shannon(; base), x, est)
+    entropy(Shannon(; base), est, x)
 end
 
 """
-    entropy_spatial_permutation(x, stencil, periodic = true; kwargs...)
+    entropy_spatial_permutation(x, stencil; periodic = true; kwargs...)
 
 Compute the spatial permutation entropy of `x` given the `stencil`.
 Here `x` must be a matrix or higher dimensional `Array` containing spatial data.
@@ -35,14 +35,14 @@ This function is just a convenience call to:
 
 ```julia
 est = SpatialSymbolicPermutation(stencil, x, periodic)
-entropy(Renyi(;kwargs...), x, est)
+entropy(Renyi(;kwargs...), est, x)
 ```
 
 See [`SpatialSymbolicPermutation`](@ref) for more info, or how to encode stencils.
 """
-function entropy_spatial_permutation(x, stencil, periodic = true; kwargs...)
+function entropy_spatial_permutation(x, stencil; periodic = true, kwargs...)
     est = SpatialSymbolicPermutation(stencil, x, periodic)
-    entropy(Renyi(;kwargs...), x, est)
+    entropy(Renyi(;kwargs...), est, x)
 end
 
 """
@@ -52,14 +52,14 @@ Compute the wavelet entropy. This function is just a convenience call to:
 
 ```julia
 est = WaveletOverlap(wavelet)
-entropy(Shannon(base), x, est)
+entropy(Shannon(base), est, x)
 ```
 
 See [`WaveletOverlap`](@ref) for more info.
 """
 function entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(), base = 2)
     est = WaveletOverlap(wavelet)
-    entropy(Shannon(; base), x, est)
+    entropy(Shannon(; base), est, x)
 end
 
 """
@@ -69,12 +69,12 @@ Compute the dispersion entropy. This function is just a convenience call to:
 
 ```julia
 est = Dispersion(kwargs...)
-entropy(Shannon(base), x, est)
+entropy(Shannon(base), est, x)
 ```
 
 See [`Dispersion`](@ref) for more info.
 """
 function entropy_dispersion(x; base = 2, kwargs...)
     est = Dispersion(kwargs...)
-    entropy(Shannon(; base), x, est)
+    entropy(Shannon(; base), est, x)
 end
diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl
index 31dde2a4d..967c0e3e8 100644
--- a/src/entropies/entropies.jl
+++ b/src/entropies/entropies.jl
@@ -3,4 +3,4 @@ include("tsallis.jl")
 include("curado.jl")
 include("streched_exponential.jl")
 include("convenience_definitions.jl")
-include("direct_entropies/direct_entropies.jl")
+include("estimators/estimators.jl")
diff --git a/src/entropies/direct_entropies/direct_entropies.jl b/src/entropies/estimators/estimators.jl
similarity index 100%
rename from src/entropies/direct_entropies/direct_entropies.jl
rename to src/entropies/estimators/estimators.jl
diff --git a/src/entropies/direct_entropies/nearest_neighbors/KozachenkoLeonenko.jl b/src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
similarity index 72%
rename from src/entropies/direct_entropies/nearest_neighbors/KozachenkoLeonenko.jl
rename to src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
index d030db30e..6d18229b9 100644
--- a/src/entropies/direct_entropies/nearest_neighbors/KozachenkoLeonenko.jl
+++ b/src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
@@ -1,13 +1,13 @@
 export KozachenkoLeonenko
 
 """
-    KozachenkoLeonenko <: IndirectEntropy
+    KozachenkoLeonenko <: EntropyEstimator
     KozachenkoLeonenko(; k::Int = 1, w::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(KozachenkoLeonenko(), x)` to
-estimate the Shannon entropy of `x` (a multi-dimensional `Dataset`) to the given
-`base` using nearest neighbor searches using the method from Kozachenko &
-Leonenko[^KozachenkoLeonenko1987], as described in Charzyńska and Gambin[^Charzyńska2016].
+The `KozachenkoLeonenko` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base`, based on nearest neighbor searches
+using the method from Kozachenko & Leonenko (1987)[^KozachenkoLeonenko1987], as described in
+Charzyńska and Gambin[^Charzyńska2016].
 
 `w` is the Theiler window, which determines if temporal neighbors are excluded
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
@@ -15,12 +15,14 @@ when searching for neighbours).
 
 In contrast to [`Kraskov`](@ref), this estimator uses only the *closest* neighbor.
 
+See also: [`entropy`](@ref).
+
 [^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy
     estimator with applications in systems biology. Entropy, 18(1), 13.
 [^KozachenkoLeonenko1987]: Kozachenko, L. F., & Leonenko, N. N. (1987). Sample estimate of
     the entropy of a random vector. Problemy Peredachi Informatsii, 23(2), 9-16.
 """
-@Base.kwdef struct KozachenkoLeonenko{B} <: IndirectEntropy
+@Base.kwdef struct KozachenkoLeonenko{B} <: EntropyEstimator
     w::Int = 1
     base::B = 2
 end
diff --git a/src/entropies/direct_entropies/nearest_neighbors/Kraskov.jl b/src/entropies/estimators/nearest_neighbors/Kraskov.jl
similarity index 55%
rename from src/entropies/direct_entropies/nearest_neighbors/Kraskov.jl
rename to src/entropies/estimators/nearest_neighbors/Kraskov.jl
index eaa961222..693c5dd05 100644
--- a/src/entropies/direct_entropies/nearest_neighbors/Kraskov.jl
+++ b/src/entropies/estimators/nearest_neighbors/Kraskov.jl
@@ -1,31 +1,34 @@
 export Kraskov
 
 """
-    Kraskov <: IndirectEntropy
+    Kraskov <: EntropyEstimator
     Kraskov(; k::Int = 1, w::Int = 1, base = 2)
 
-An indirect entropy used in [`entropy`](@ref)`(Kraskov(), x)` to estimate the Shannon
-entropy of `x` (a multi-dimensional `Dataset`) to the given
-`base` using `k`-th nearest neighbor searches as in [^Kraskov2004].
+The `Kraskov` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base`, using the `k`-th nearest neighbor
+searches method from [^Kraskov2004].
 
 `w` is the Theiler window, which determines if temporal neighbors are excluded
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
 
-See also: [`KozachenkoLeonenko`](@ref).
+See also: [`entropy`](@ref), [`KozachenkoLeonenko`](@ref).
 
 [^Kraskov2004]:
     Kraskov, A., Stögbauer, H., & Grassberger, P. (2004).
     Estimating mutual information. Physical review E, 69(6), 066138.
 """
-Base.@kwdef struct Kraskov{B} <: IndirectEntropy
+Base.@kwdef struct Kraskov{B} <: EntropyEstimator
     k::Int = 1
     w::Int = 1
     base::B = 2
 end
 
-function entropy(e::Kraskov, x::AbstractDataset{D, T}) where {D, T}
-    (; k, w, base) = e
+function entropy(e::Renyi, est::Kraskov, x::AbstractDataset{D, T}) where {D, T}
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $typeof(est) estimators"
+    ))
+    (; k, w, base) = est
     N = length(x)
     ρs = maximum_neighbor_distances(x, w, k)
     # The estimated entropy has "unit" [nats]
@@ -34,3 +37,5 @@ function entropy(e::Kraskov, x::AbstractDataset{D, T}) where {D, T}
         D/N*sum(log.(MathConstants.e, ρs))
     return h / log(base, MathConstants.e) # Convert to target unit
 end
+
+entropy(est::Kraskov, x) = entropy(Shannon(), est, x)
diff --git a/src/entropies/direct_entropies/nearest_neighbors/Zhu.jl b/src/entropies/estimators/nearest_neighbors/Zhu.jl
similarity index 85%
rename from src/entropies/direct_entropies/nearest_neighbors/Zhu.jl
rename to src/entropies/estimators/nearest_neighbors/Zhu.jl
index 1f004236e..abf74a005 100644
--- a/src/entropies/direct_entropies/nearest_neighbors/Zhu.jl
+++ b/src/entropies/estimators/nearest_neighbors/Zhu.jl
@@ -1,26 +1,28 @@
 export Zhu
 
 """
-    Zhu <: IndirectEntropy
+    Zhu <: EntropyEstimator
     Zhu(k = 1, w = 0, base = MathConstants.e)
 
-The `Zhu` indirect entropy estimator (Zhu et al., 2015)[^Zhu2015] estimates the Shannon
-entropy of `x` (a multi-dimensional `Dataset`) to the given `base`, by approximating
-probabilities within hyperrectangles surrounding each point `xᵢ ∈ x` using
+The `Zhu` estimator (Zhu et al., 2015)[^Zhu2015] computes the [`Shannon`](@ref)
+[`entropy`](@ref) of `x` (a multi-dimensional `Dataset`) to the given `base`, by
+approximating probabilities within hyperrectangles surrounding each point `xᵢ ∈ x` using
 using `k` nearest neighbor searches.
 
-This estimator is an extension to [`KozachenkoLeonenko`](@ref).
-
 `w` is the Theiler window, which determines if temporal neighbors are excluded
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
 
+This estimator is an extension to [`KozachenkoLeonenko`](@ref).
+
+See also: [`entropy`](@ref).
+
 [^Zhu2015]:
     Zhu, J., Bellanger, J. J., Shu, H., & Le Bouquin Jeannès, R. (2015). Contribution to
     transfer entropy estimation via the k-nearest-neighbors approach. Entropy, 17(6),
     4173-4201.
 """
-Base.@kwdef struct Zhu{B} <: IndirectEntropy
+Base.@kwdef struct Zhu{B} <: EntropyEstimator
     k::Int = 1
     w::Int = 0
     base::B = MathConstants.e
diff --git a/src/entropies/direct_entropies/nearest_neighbors/ZhuSingh.jl b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
similarity index 90%
rename from src/entropies/direct_entropies/nearest_neighbors/ZhuSingh.jl
rename to src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
index 88cb02b1c..730ec0f3a 100644
--- a/src/entropies/direct_entropies/nearest_neighbors/ZhuSingh.jl
+++ b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
@@ -1,16 +1,16 @@
 using DelayEmbeddings: minmaxima
 using SpecialFunctions: digamma
-using Entropies: Entropy, IndirectEntropy
+using Entropies: Entropy, EntropyEstimator
 using Neighborhood: KDTree, Chebyshev, bulkisearch, Theiler, NeighborNumber
 
 export ZhuSingh
 
 """
-    ZhuSingh <: IndirectEntropy
+    ZhuSingh <: EntropyEstimator
     ZhuSingh(k = 1, w = 0, base = MathConstants.e)
 
-The `ZhuSingh` indirect entropy estimator (Zhu et al., 2015)[^Zhu2015] estimates the Shannon
-entropy of `x` (a multi-dimensional `Dataset`) to the given `base`.
+The `ZhuSingh` estimator (Zhu et al., 2015)[^Zhu2015] computes the [`Shannon`](@ref)
+[`entropy`](@ref) of `x` (a multi-dimensional `Dataset`) to the given `base`.
 
 Like [`Zhu`](@ref), this estimator approximates probabilities within hyperrectangles
 surrounding each point `xᵢ ∈ x` using using `k` nearest neighbor searches. However,
@@ -21,6 +21,8 @@ This estimator is an extension to the entropy estimator in Singh et al. (2003).
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
 
+See also: [`entropy`](@ref).
+
 [^Zhu2015]:
     Zhu, J., Bellanger, J. J., Shu, H., & Le Bouquin Jeannès, R. (2015). Contribution to
     transfer entropy estimation via the k-nearest-neighbors approach. Entropy, 17(6),
@@ -30,7 +32,7 @@ when searching for neighbours).
     neighbor estimates of entropy. American journal of mathematical and management
     sciences, 23(3-4), 301-321.
 """
-Base.@kwdef struct ZhuSingh{B} <: IndirectEntropy
+Base.@kwdef struct ZhuSingh{B} <: EntropyEstimator
     k::Int = 1
     w::Int = 0
     base::B = MathConstants.e
diff --git a/src/entropies/direct_entropies/nearest_neighbors/nearest_neighbors.jl b/src/entropies/estimators/nearest_neighbors/nearest_neighbors.jl
similarity index 100%
rename from src/entropies/direct_entropies/nearest_neighbors/nearest_neighbors.jl
rename to src/entropies/estimators/nearest_neighbors/nearest_neighbors.jl
diff --git a/src/entropies/direct_entropies/order_statistics/AlizadehArghami.jl b/src/entropies/estimators/order_statistics/AlizadehArghami.jl
similarity index 66%
rename from src/entropies/direct_entropies/order_statistics/AlizadehArghami.jl
rename to src/entropies/estimators/order_statistics/AlizadehArghami.jl
index db14ee022..df3ff2a7e 100644
--- a/src/entropies/direct_entropies/order_statistics/AlizadehArghami.jl
+++ b/src/entropies/estimators/order_statistics/AlizadehArghami.jl
@@ -1,16 +1,17 @@
 export AlizadehArghami
 
 """
-AlizadehArghami <: IndirectEntropy
+    AlizadehArghami <: EntropyEstimator
     AlizadehArghami(; m::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(Alizadeh(), x)` to
-estimate the Shannon entropy of the timeseries `x` to the given
-`base` using the method from Alizadeh & Arghami (2010)[^Alizadeh2010].
+The `AlizadehArghami`estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base` using the method from Alizadeh & Arghami
+(2010)[^Alizadeh2010].
 
 ## Description
 
-The Alizadeh entropy estimator first computes the order statistics
+The estimator first computes the
+[order statistics](https://en.wikipedia.org/wiki/Order_statistic)
 ``X_{(1)} \\leq X_{(2)} \\leq \\cdots \\leq X_{(n)}`` for a random sample of length ``n``,
 i.e. the input timeseries. The entropy for the length-`n` sample is then estimated as
 the [`Vasicek`](@ref) entropy estimate, plus a correction factor
@@ -19,11 +20,13 @@ the [`Vasicek`](@ref) entropy estimate, plus a correction factor
 H_{A}(m, n) = H_{V}(m, n) + \\dfrac{2}{n}\\left(m \\log_{base}(2) \\right).
 ```
 
+See also: [`entropy`](@ref).
+
 [^Alizadeh2010]:
     Alizadeh, N. H., & Arghami, N. R. (2010). A new estimator of entropy.
     Journal of the Iranian Statistical Society (JIRSS).
 """
-@Base.kwdef struct AlizadehArghami{I<:Integer, B} <: IndirectEntropy
+@Base.kwdef struct AlizadehArghami{I<:Integer, B} <: EntropyEstimator
     m::I = 1
     base::B = 2
 end
diff --git a/src/entropies/direct_entropies/order_statistics/Correa.jl b/src/entropies/estimators/order_statistics/Correa.jl
similarity index 84%
rename from src/entropies/direct_entropies/order_statistics/Correa.jl
rename to src/entropies/estimators/order_statistics/Correa.jl
index a60908dd3..45c0c9dbf 100644
--- a/src/entropies/direct_entropies/order_statistics/Correa.jl
+++ b/src/entropies/estimators/order_statistics/Correa.jl
@@ -1,12 +1,12 @@
 export Correa
 
 """
-    Correa <: IndirectEntropy
+    Correa <: EntropyEstimator
     Correa(; m::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(Correa(), x)` to
-estimate the Shannon entropy of the timeseries `x` to the given
-`base` using the method from Correa (1995)[^Correa1995].
+The `Correa` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base` using the method from
+Correa (1995)[^Correa1995].
 
 ## Description
 
@@ -29,7 +29,7 @@ where
     Correa, J. C. (1995). A new estimator of entropy. Communications in Statistics-Theory
     and Methods, 24(10), 2439-2449.
 """
-@Base.kwdef struct Correa{I<:Integer, B} <: IndirectEntropy
+@Base.kwdef struct Correa{I<:Integer, B} <: EntropyEstimator
     m::I = 1
     base::B = 2
 end
diff --git a/src/entropies/direct_entropies/order_statistics/Ebrahimi.jl b/src/entropies/estimators/order_statistics/Ebrahimi.jl
similarity index 84%
rename from src/entropies/direct_entropies/order_statistics/Ebrahimi.jl
rename to src/entropies/estimators/order_statistics/Ebrahimi.jl
index bd552ca14..de944fb9c 100644
--- a/src/entropies/direct_entropies/order_statistics/Ebrahimi.jl
+++ b/src/entropies/estimators/order_statistics/Ebrahimi.jl
@@ -1,12 +1,13 @@
 export Ebrahimi
 
 """
-    Ebrahimi <: IndirectEntropy
+    Ebrahimi <: EntropyEstimator
     Ebrahimi(; m::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(Ebrahimi(), x)` to
-estimate the Shannon entropy of the timeseries `x` to the given
-`base` using the method from Ebrahimi (1994)[^Ebrahimi1994].
+The `Ebrahimi` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base` using the method from
+Ebrahimi (1994)[^Ebrahimi1994].
+
 
 ## Description
 
@@ -34,7 +35,7 @@ c_i =
     Ebrahimi, N., Pflughoeft, K., & Soofi, E. S. (1994). Two measures of sample entropy.
     Statistics & Probability Letters, 20(3), 225-234.
 """
-@Base.kwdef struct Ebrahimi{I<:Integer, B} <: IndirectEntropy
+@Base.kwdef struct Ebrahimi{I<:Integer, B} <: EntropyEstimator
     m::I = 1
     base::B = 2
 end
diff --git a/src/entropies/direct_entropies/order_statistics/Vasicek.jl b/src/entropies/estimators/order_statistics/Vasicek.jl
similarity index 83%
rename from src/entropies/direct_entropies/order_statistics/Vasicek.jl
rename to src/entropies/estimators/order_statistics/Vasicek.jl
index 7a4454f19..fffcc9c73 100644
--- a/src/entropies/direct_entropies/order_statistics/Vasicek.jl
+++ b/src/entropies/estimators/order_statistics/Vasicek.jl
@@ -1,12 +1,12 @@
 export Vasicek
 
 """
-    Vasicek <: IndirectEntropy
+    Vasicek <: EntropyEstimator
     Vasicek(; m::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(Vasicek(), x)` to
-estimate the Shannon entropy of the timeseries `x` to the given
-`base` using the method from Vasicek (1976)[^Vasicek1976].
+The `Vasicek` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base` using the method from
+Vasicek (1976)[^Vasicek1976].
 
 ## Description
 
@@ -31,7 +31,7 @@ written for this package).
     Vasicek, O. (1976). A test for normality based on sample entropy. Journal of the Royal
     Statistical Society: Series B (Methodological), 38(1), 54-59.
 """
-@Base.kwdef struct Vasicek{I<:Integer, B} <: IndirectEntropy
+@Base.kwdef struct Vasicek{I<:Integer, B} <: EntropyEstimator
     m::I = 1
     base::B = 2
 end
diff --git a/src/entropies/direct_entropies/order_statistics/order_statistics.jl b/src/entropies/estimators/order_statistics/order_statistics.jl
similarity index 100%
rename from src/entropies/direct_entropies/order_statistics/order_statistics.jl
rename to src/entropies/estimators/order_statistics/order_statistics.jl
diff --git a/src/entropy.jl b/src/entropy.jl
index fc3fbb0bb..fc41b59ab 100644
--- a/src/entropy.jl
+++ b/src/entropy.jl
@@ -1,67 +1,111 @@
-export Entropy, IndirectEntropy
+export Entropy, EntropyEstimator
 export entropy, entropy_maximum, entropy_normalized, entropy!
 
 abstract type AbstractEntropy end
 
-"Supertype of direct entropies. See [`entropy`](@ref)."
+"""
+    Entropy
+
+`Entropy` is the supertype of all (generalized) entropies, and currently implemented
+entropy types are:
+
+- [`Renyi`](@ref).
+- [`Tsallis`](@ref).
+- [`Shannon`](@ref), which is a subcase of the above two in the limit `q → 1`.
+- [`Curado`](@ref).
+- [`StretchedExponential`](@ref).
+
+These entropy types are given as inputs to [`entropy`](@ref) and [`entropy_normalized`].
+
+## Description
+
+Mathematically speaking, generalized entropies are just nonnegative functions of
+probability distributions that verify certain (entropy-type-dependent) axioms.
+Amigó et al., 2018's
+[summary paper](https://www.mdpi.com/1099-4300/20/11/813) gives a nice overview.
+
+[Amigó2018]:
+    Amigó, J. M., Balogh, S. G., & Hernández, S. (2018). A brief review of
+    generalized entropies. Entropy, 20(11), 813.
+"""
 abstract type Entropy <: AbstractEntropy end
 
-"Supertype of inderect entropies. See [`entropy`](@ref)."
-abstract type IndirectEntropy <: AbstractEntropy end
+"""
+    EntropyEstimator
+
+The supertype of all entropy estimators.
+
+These estimators compute some [`Entropy`](@ref) in various ways that doesn't involve
+explicitly estimating a probability distribution. Currently implemented estimators are:
+
+- [`KozachenkoLeonenko`](@ref)
+- [`Kraskov`](@ref)
+- [`Zhu`](@ref)
+- [`ZhuSingh`](@ref)
+- [`Vasicek`](@ref)
+- [`Ebrahimi`](@ref)
+- [`Correa`](@ref)
+- [`AlizadehArghami`](@ref)
+
+For example, [`entropy`](@ref)`(Shannon(), Kraskov(), x)` computes the Shannon entropy
+of the input data `x` using the [`Kraskov`](@ref) `k`-th nearest neighbor estimator.
+"""
+abstract type EntropyEstimator <: AbstractEntropy end
 
 ###########################################################################################
-# Direct API
+# API: entropy from probabilities
 ###########################################################################################
 # Notice that StatsBase.jl exports `entropy` and Wavelets.jl exports `Entropy`.
 """
-    entropy([e::Entropy,] x, est::ProbabilitiesEstimator) → h::Real
-    entropy([e::Entropy,] probs::Probabilities) → h::Real
+    entropy([e::Entropy,] probs::Probabilities) → h::Real ∈ [0, ∞)
+    entropy([e::Entropy,] est::ProbabilitiesEstimator, x) → h::Real ∈ [0, ∞)
+    entropy([e::Entropy,] est::EntropyEstimator, x) → h::Real ∈ [0, ∞)
 
-Compute a (generalized) entropy `h` from `x` according to the specified
-entropy type `e` and the given probability estimator `est`.
-Alternatively compute the entropy directly from the existing probabilities `probs`.
-In fact, the first method is a 2-lines-of-code wrapper that calls [`probabilities`](@ref)
-and gives the result to the second method.
+Compute `h`, a (generalized) [`Entropy`](@ref) of type `e`, in one of three ways:
 
-`x` is typically an `Array` or a `Dataset`, see [Input data for Entropies.jl](@ref).
+1. Directly from existing [`Probabilities`](@ref) `probs`.
+2. From input data `x`, by first estimating a probability distribution using the provided
+    [`ProbabilitiesEstimator`](@ref), then computing entropy from that distribution.
+    In fact, the second method is just a 2-lines-of-code wrapper that calls
+    [`probabilities`](@ref) and gives the result to the first method.
+3. From input data `x`, by using a dedicated [`EntropyEstimator`](@ref) that computes
+    entropy in a way that doesn't involve explicitly computing probabilities first.
 
-The entropy types that support this interface are "direct" entropies. They always
-yield an entropy value given a probability distribution.
-Such entropies are theoretically well-founded and are typically called "generalized
-entropies". Currently implemented types are:
+The entropy (first argument) is optional. When `est` is a probability estimator,
+`Shannon()` is used by default. When `est` is a dedicated entropy estimator,
+the default entropy type is inferred from the estimator (e.g. [`Kraskov`](@ref)
+estimates the [`Shannon`](@ref) entropy).
 
-- [`Renyi`](@ref).
-- [`Tsallis`](@ref).
-- [`Shannon`](@ref), which is a subcase of the above two in the limit `q → 1`.
-- [`Curado`](@ref).
-- [`StretchedExponential`](@ref).
+## Input data
 
-The entropy (first argument) is optional: if not given, `Shannon()` is used instead.
+`x` is typically an `Array` or a `Dataset`, see [Input data for Entropies.jl](@ref).
+
+## Maximum entropy and normalized entropy
 
-These entropies also have a well defined maximum value for a given probability estimator.
+All entropies `e` have a well defined maximum value for a given probability estimator.
 To obtain this value one only needs to call the [`entropy_maximum`](@ref) function with the
 chosen entropy type and probability estimator. Or, one can use [`entropy_normalized`](@ref)
 to obtain the normalized form of the entropy (divided by the maximum).
 
 ## Examples
+
 ```julia
 x = [rand(Bool) for _ in 1:10000] # coin toss
 ps = probabilities(x) # gives about [0.5, 0.5] by definition
 h = entropy(ps) # gives 1, about 1 bit by definition
 h = entropy(Shannon(), ps) # syntactically equivalent to above
-h = entropy(Shannon(), x, CountOccurrences()) # syntactically equivalent to above
-h = entropy(x, SymbolicPermutation(;m=3)) # gives about 2, again by definition
+h = entropy(Shannon(), CountOccurrences(), x) # syntactically equivalent to above
+h = entropy(SymbolicPermutation(;m=3), x) # gives about 2, again by definition
 h = entropy(Renyi(2.0), ps) # also gives 1, order `q` doesn't matter for coin toss
 ```
 """
-function entropy(e::Entropy, x, est::ProbabilitiesEstimator)
+function entropy(e::Entropy, est::ProbabilitiesEstimator, x)
     ps = probabilities(x, est)
     return entropy(e, ps)
 end
+
 # Convenience
-function entropy(x::Array_or_Dataset, est::ProbabilitiesEstimator)
-    entropy(Shannon(), x, est)
-end
+entropy(est::ProbabilitiesEstimator, x::Array_or_Dataset) = entropy(Shannon(), est, x)
 entropy(probs::Probabilities) = entropy(Shannon(), probs)
 
 """
@@ -74,19 +118,34 @@ The entropy (second argument) is optional: if not given, `Shannon()` is used ins
 
 Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
 """
-function entropy!(s::AbstractVector{Int}, e::Entropy, x, est::ProbabilitiesEstimator)
+function entropy!(s::AbstractVector{Int}, e::Entropy, est::ProbabilitiesEstimator, x)
     probs = probabilities!(s, x, est)
     entropy(e, probs)
 end
 
-entropy!(s::AbstractVector{Int}, x, est::ProbabilitiesEstimator) =
-    entropy!(s, Shannon(), x, est)
+entropy!(s::AbstractVector{Int}, est::ProbabilitiesEstimator, x) =
+    entropy!(s, Shannon(), est, x)
+
+###########################################################################################
+# API: entropy from entropy estimators
+###########################################################################################
+# Dispatch for these functions is implemented in individual estimator files in
+# `entropies/estimators/`.
+function entropy(e::Entropy, est::EntropyEstimator, x) end
+entropy(est::EntropyEstimator, ::Probabilities) =
+    error("Entropy estimators like $(nameof(typeof(est))) are not called with probabilities.")
+entropy(e::Entropy, est::EntropyEstimator, ::Probabilities) =
+    error("Entropy estimators like $(nameof(typeof(est))) are not called with probabilities.")
+
+#
+entropy(e::Entropy, est::EntropyEstimator, x::AbstractVector{<:Real}) =
+    entropy(e, est, Dataset(x))
 
 ###########################################################################################
 # Normalize API
 ###########################################################################################
 """
-    entropy_maximum(e::Entropy, x, est::ProbabilitiesEstimator) → m::Real
+    entropy_maximum(e::Entropy, est::ProbabilitiesEstimator, x) → m::Real
 
 Return the maximum value `m` of the given entropy type based on the given estimator
 and the given input `x` (whose values are not important, but layout and type are).
@@ -96,9 +155,9 @@ when the estimator has a known [`total_outcomes`](@ref).
 
     entropy_maximum(e::Entropy, L::Int) → m::Real
 
-Alternatively, compute the maximum entropy from the total outcomes `L` directly.
+Alternatively, compute the maximum entropy from the number of total outcomes `L` directly.
 """
-function entropy_maximum(e::Entropy, x, est::ProbabilitiesEstimator)
+function entropy_maximum(e::Entropy, est::ProbabilitiesEstimator, x)
     L = total_outcomes(x, est)
     return entropy_maximum(e, L)
 end
@@ -107,46 +166,21 @@ function entropy_maximum(e::Entropy, ::Int)
 end
 
 """
-    entropy_normalized([e::Entropy,] x, est::ProbabilitiesEstimator) → h̃ ∈ [0, 1]
+    entropy_normalized([e::Entropy,] est::ProbabilitiesEstimator, x) → h̃ ∈ [0, 1]
 
-Return the normalized entropy of `x`, i.e., the value of [`entropy`](@ref) divided
-by the maximum value for `e`, according to the given probability estimator.
+Return h̃, the normalized entropy of `x`, i.e. the value of [`entropy`](@ref) divided
+by the maximum value for `e`, according to the given probabilities estimator.
 If `e` is not given, it defaults to `Shannon()`.
 
 Notice that unlike for [`entropy`](@ref), here there is no method
 `entropy_normalized(e::Entropy, probs::Probabilities)` because there is no way to know
 the amount of _possible_ events (i.e., the [`total_outcomes`](@ref)) from `probs`.
 """
-function entropy_normalized(e::Entropy, x, est::ProbabilitiesEstimator)
-    return entropy(e, x, est)/entropy_maximum(e, x, est)
-end
-function entropy_normalized(x::Array_or_Dataset, est::ProbabilitiesEstimator)
-    return entropy_normalized(Shannon(), x, est)
-end
-
-###########################################################################################
-# Indirect API
-###########################################################################################
-"""
-    entropy(e::IndirectEntropy, x) → h::Real
-
-Compute the entropy of `x`, here named `h`, according to the specified indirect entropy
-estimator `e`.
-
-In contrast to the "typical" way one obtains entropies in the above methods, indirect
-entropy estimators compute Shannon entropies via alternate means, without explicitly
-computing probability distributions. The available indirect entropies are:
-
-- [`Kraskov`](@ref).
-- [`KozachenkoLeonenko`](@ref).
-- [`Vasicek`](@ref).
-"""
-function entropy(::IndirectEntropy, ::Array_or_Dataset) end
-function entropy(e::IndirectEntropy, ::Array_or_Dataset, ::ProbabilitiesEstimator)
-    error("Indirect entropies like $(nameof(typeof(e))) are not called with probabilities.")
+function entropy_normalized(e::Entropy, est::ProbabilitiesEstimator, x)
+    return entropy(e, est, x) / entropy_maximum(e, est, x)
 end
-function entropy(e::IndirectEntropy, ::Probabilities)
-    error("Indirect entropies $(nameof(typeof(e))) are not called with probabilities.")
+function entropy_normalized(est::ProbabilitiesEstimator, x::Array_or_Dataset)
+    return entropy_normalized(Shannon(), est, x)
 end
 
 ###########################################################################################
diff --git a/src/probabilities_estimators/count_occurences.jl b/src/probabilities_estimators/counting/count_occurences.jl
similarity index 100%
rename from src/probabilities_estimators/count_occurences.jl
rename to src/probabilities_estimators/counting/count_occurences.jl
diff --git a/src/probabilities_estimators/kernel_density.jl b/src/probabilities_estimators/kernel/kernel_density.jl
similarity index 100%
rename from src/probabilities_estimators/kernel_density.jl
rename to src/probabilities_estimators/kernel/kernel_density.jl
diff --git a/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
index 0c955a832..eeaf04557 100644
--- a/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
@@ -124,7 +124,7 @@ function stencil_to_offsets(stencil::NTuple{2, NTuple{D, T}}) where {D, T}
     return stencil, D
 end
 
-function stencil_to_offsets(stencil::Array{Int, D}) where D
+function stencil_to_offsets(stencil::AbstractArray{Int, D}) where D
     # translate D-dim array into stencil of cartesian indices (of dimension D)
     stencil = [idx - CartesianIndex(Tuple(ones(Int, D))) for idx in findall(Bool.(stencil))]
     # subtract first coordinate from everything to get a stencil that contains (0,0)
diff --git a/src/probabilities_estimators/probabilities_estimators.jl b/src/probabilities_estimators/probabilities_estimators.jl
index ae6b06406..def2cd4e5 100644
--- a/src/probabilities_estimators/probabilities_estimators.jl
+++ b/src/probabilities_estimators/probabilities_estimators.jl
@@ -1,7 +1,7 @@
-include("count_occurences.jl")
+include("counting/count_occurences.jl")
 include("histograms/visitation_frequency.jl")
 include("permutation_ordinal/symbolic.jl")
-include("kernel_density.jl")
+include("kernel/kernel_density.jl")
 include("timescales/timescales.jl")
 include("transfer_operator/transfer_operator.jl")
 include("dispersion/dispersion.jl")
diff --git a/test/entropies/convenience.jl b/test/entropies/convenience.jl
index d6ebb3391..ffa82c28a 100644
--- a/test/entropies/convenience.jl
+++ b/test/entropies/convenience.jl
@@ -5,11 +5,11 @@ import Statistics
 x = randn(1000)
 σ = 0.2*Statistics.std(x)
 # All of the following are API tests.
-@test entropy_permutation(x) == entropy(x, SymbolicPermutation())
-@test entropy_wavelet(x) == entropy(x, WaveletOverlap())
-@test entropy_dispersion(x) == entropy(x, Dispersion())
+@test entropy_permutation(x) == entropy(SymbolicPermutation(), x)
+@test entropy_wavelet(x) == entropy(WaveletOverlap(), x)
+@test entropy_dispersion(x) == entropy(Dispersion(), x)
 
-m = rand(50, 50)
+x = rand(50, 50)
 stencil = CartesianIndex.([(0,1), (1,1), (1,0)])
-est = SpatialSymbolicPermutation(stencil, m)
-@test entropy_spatial_permutation(m, stencil) == entropy(m, est)
\ No newline at end of file
+est = SpatialSymbolicPermutation(stencil, x)
+@test entropy_spatial_permutation(x, stencil) == entropy(est, x)
diff --git a/test/entropies/estimators/alizadeharghami.jl b/test/entropies/estimators/alizadeharghami.jl
new file mode 100644
index 000000000..522c32048
--- /dev/null
+++ b/test/entropies/estimators/alizadeharghami.jl
@@ -0,0 +1,15 @@
+# -------------------------------------------------------------------------------------
+# Check if the estimator converge to true values for some distributions with
+# analytically derivable entropy.
+# -------------------------------------------------------------------------------------
+# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
+U = 0.00
+# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
+N = round(0.5*log(2π) + 0.5, digits = 2)
+
+ea = AlizadehArghami(m = 100, base = 2)
+ea_n = AlizadehArghami(m = 100, base = MathConstants.e)
+
+n = 1000000
+@test round(entropy(ea, rand(rng, n)), digits = 2) == U
+@test round(entropy(ea_n, randn(rng, n)), digits = 2) == N
diff --git a/test/entropies/estimators/correa.jl b/test/entropies/estimators/correa.jl
new file mode 100644
index 000000000..291a95436
--- /dev/null
+++ b/test/entropies/estimators/correa.jl
@@ -0,0 +1,15 @@
+# -------------------------------------------------------------------------------------
+# Check if the estimator converge to true values for some distributions with
+# analytically derivable entropy.
+# -------------------------------------------------------------------------------------
+# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
+U = 0.00
+# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
+N = round(0.5*log(2π) + 0.5, digits = 2)
+
+ec = Correa(m = 100, base = 2)
+ec_n = Correa(m = 100, base = MathConstants.e)
+
+n = 1000000
+@test round(entropy(ec, rand(rng, n)), digits = 2) == U
+@test round(entropy(ec_n, randn(rng, n)), digits = 2) == N
diff --git a/test/entropies/estimators/ebrahimi.jl b/test/entropies/estimators/ebrahimi.jl
new file mode 100644
index 000000000..d2bcaaffa
--- /dev/null
+++ b/test/entropies/estimators/ebrahimi.jl
@@ -0,0 +1,15 @@
+# -------------------------------------------------------------------------------------
+# Check if the estimator converge to true values for some distributions with
+# analytically derivable entropy.
+# -------------------------------------------------------------------------------------
+# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
+U = 0.00
+# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
+N = round(0.5*log(2π) + 0.5, digits = 2)
+
+ee = Ebrahimi(m = 100, base = 2)
+ee_n = Ebrahimi(m = 100, base = MathConstants.e)
+
+n = 1000000
+@test round(entropy(ee, rand(rng, n)), digits = 2) == U
+@test round(entropy(ee_n, randn(rng, n)), digits = 2) == N
diff --git a/test/entropies/estimators/estimators.jl b/test/entropies/estimators/estimators.jl
new file mode 100644
index 000000000..fadb99481
--- /dev/null
+++ b/test/entropies/estimators/estimators.jl
@@ -0,0 +1,8 @@
+testfile("kozachenkoleonenko.jl")
+testfile("kraskov.jl")
+testfile("zhu.jl")
+testfile("zhusingh.jl")
+testfile("alizadeharghami.jl")
+testfile("correa.jl")
+testfile("ebrahimi.jl")
+testfile("vasicek.jl")
diff --git a/test/entropies/estimators/kozachenkoleonenko.jl b/test/entropies/estimators/kozachenkoleonenko.jl
new file mode 100644
index 000000000..bece82241
--- /dev/null
+++ b/test/entropies/estimators/kozachenkoleonenko.jl
@@ -0,0 +1,26 @@
+using DelayEmbeddings: genembed
+using DelayEmbeddings: Dataset
+
+m = 4
+τ = 1
+τs = tuple([τ*i for i = 0:m-1]...)
+x = rand(250)
+D = genembed(x, τs)
+est = KozachenkoLeonenko(w = 1)
+
+@test entropy(est, D) isa Real
+
+# Analytical test.
+XN = Dataset(randn(100000, 1));
+# For normal distribution with mean 0 and std 1, the entropy is
+h_XN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5 # nats
+h_XN_base_2 = h_XN_base_e / log(2, MathConstants.e) # bits
+
+h_XN_kr_base_e = entropy(KozachenkoLeonenko(base = MathConstants.e), XN)
+h_XN_kr_base_2 = entropy(KozachenkoLeonenko(base = 2), XN)
+# The KozachenkoLeonenko estimator is not as precise as Kraskov, so check that we're
+# within +- 3% of the target value
+tol_e = h_XN_base_e * 0.03
+tol_2 = h_XN_base_2 * 0.03
+@test h_XN_base_e - tol_e ≤ h_XN_kr_base_e ≤ h_XN_base_e + tol_e
+@test h_XN_base_2 - tol_2 ≤ h_XN_kr_base_2 ≤ h_XN_base_2 + tol_2
diff --git a/test/entropies/estimators/kraskov.jl b/test/entropies/estimators/kraskov.jl
new file mode 100644
index 000000000..ce24a2946
--- /dev/null
+++ b/test/entropies/estimators/kraskov.jl
@@ -0,0 +1,27 @@
+using DelayEmbeddings: genembed
+using DelayEmbeddings: Dataset
+
+m = 4
+τ = 1
+τs = tuple([τ*i for i = 0:m-1]...)
+x = rand(250)
+D = genembed(x, τs)
+est = Kraskov(k = 3, w = 1)
+e = Shannon()
+er = Renyi(q = 1.5)
+@test_throws ArgumentError entropy(er, est, D)
+
+
+@test entropy(est, D) isa Real
+@test entropy(e, est, D) isa Real
+
+# Analytical test.
+XN = Dataset(randn(100000, 1));
+# For normal distribution with mean 0 and std 1, the entropy is
+h_XN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5 # nats
+h_XN_base_2 = h_XN_base_e / log(2, MathConstants.e) # bits
+
+h_XN_kr_base_e = entropy(Kraskov(k = 3, base = MathConstants.e), XN)
+h_XN_kr_base_2 = entropy(Kraskov(k = 3, base = 2), XN)
+@test round(h_XN_base_e, digits = 1) == round(h_XN_kr_base_e, digits = 1)
+@test round(h_XN_base_2, digits = 1) == round(h_XN_kr_base_2, digits = 1)
diff --git a/test/entropies/estimators/vasicek.jl b/test/entropies/estimators/vasicek.jl
new file mode 100644
index 000000000..e88116b42
--- /dev/null
+++ b/test/entropies/estimators/vasicek.jl
@@ -0,0 +1,16 @@
+
+# -------------------------------------------------------------------------------------
+# Check if the estimator converge to true values for some distributions with
+# analytically derivable entropy.
+# -------------------------------------------------------------------------------------
+# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
+U = 0.00
+# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
+N = round(0.5*log(2π) + 0.5, digits = 2)
+
+ev = Vasicek(m = 100, base = 2)
+ev_n = Vasicek(m = 100, base = MathConstants.e)
+
+n = 1000000
+@test round(entropy(ev, rand(rng, n)), digits = 2) == U
+@test round(entropy(ev_n, randn(rng, n)), digits = 2) == N
diff --git a/test/entropies/estimators/zhu.jl b/test/entropies/estimators/zhu.jl
new file mode 100644
index 000000000..5ef85edc6
--- /dev/null
+++ b/test/entropies/estimators/zhu.jl
@@ -0,0 +1,19 @@
+using DelayEmbeddings: Dataset
+
+# To ensure minimal rectangle volumes are correct, we also test internals directly here.
+# It's not feasible to construct an end-product test due to the neighbor searches.
+x = Dataset([[-1, -2], [0, -2], [3, 2]])
+y = Dataset([[3, 1], [-5, 1], [3, -2]])
+@test Entropies.volume_minimal_rect([0, 0], x) == 24
+@test Entropies.volume_minimal_rect([0, 0], y) == 40
+
+# Analytical tests for the estimated entropy
+DN = Dataset(randn(200000, 1))
+hN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5
+hN_base_2 = hN_base_e / log(2, MathConstants.e)
+
+e_base_e = Zhu(k = 3, base = MathConstants.e)
+e_base_2 = Zhu(k = 3, base = 2)
+
+@test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
+@test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
diff --git a/test/entropies/estimators/zhusingh.jl b/test/entropies/estimators/zhusingh.jl
new file mode 100644
index 000000000..6f89ce51e
--- /dev/null
+++ b/test/entropies/estimators/zhusingh.jl
@@ -0,0 +1,62 @@
+using Test, Distributions, LinearAlgebra
+using DelayEmbeddings: Dataset
+using StaticArrays: @SVector
+
+# Test internals in addition to end-product, because designing an exact end-product
+# test is  a mess due to the neighbor searches. If these top-level tests fail, then
+# the issue is probability related to these functions.
+nns = Dataset([[-1, -1], [0, -2], [3, 2.0]])
+x = @SVector [0.0, 0.0]
+dists = Entropies.maxdists(x, nns)
+vol = Entropies.volume_minimal_rect(dists)
+ξ = Entropies.n_borderpoints(x, nns, dists)
+@test vol == 24.0
+@test ξ == 2.0
+
+nns = Dataset([[3, 1], [3, -2], [-5, 1.0]])
+x = @SVector [0.0, 0.0]
+dists = Entropies.maxdists(x, nns)
+vol = Entropies.volume_minimal_rect(dists)
+ξ = Entropies.n_borderpoints(x, nns, dists)
+@test vol == 40.0
+@test ξ == 2.0
+
+nns = Dataset([[-3, 1], [3, 1], [5, -2.0]])
+x = @SVector [0.0, 0.0]
+dists = Entropies.maxdists(x, nns)
+vol = Entropies.volume_minimal_rect(dists)
+ξ = Entropies.n_borderpoints(x, nns, dists)
+@test vol == 40.0
+@test ξ == 1.0
+
+# Analytical tests: 1D normal distribution
+DN = Dataset(randn(100000, 1))
+σ = 1.0
+hN_base_e = 0.5 * log(MathConstants.e, 2π * σ^2) + 0.5
+hN_base_2 = hN_base_e / log(2, MathConstants.e)
+
+e_base_e = ZhuSingh(k = 3, base = MathConstants.e)
+e_base_2 = ZhuSingh(k = 3, base = 2)
+
+@test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
+@test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
+
+# Analytical test: 3D normal distribution
+σs = ones(3)
+μs = zeros(3)
+𝒩₂ = MvNormal(μs, Diagonal(σs))
+Σ = diagm(σs)
+n = length(μs)
+h_𝒩₂_base_ℯ = 0.5n * log(ℯ, 2π) + 0.5*log(ℯ, det(Σ)) + 0.5n
+h_𝒩₂_base_2 = h_𝒩₂_base_ℯ  / log(2, ℯ)
+
+sample = Dataset(transpose(rand(𝒩₂, 50000)))
+hZS_𝒩₂_base_ℯ = entropy(e_base_e, sample)
+hZS_𝒩₂_base_2 = entropy(e_base_2, sample)
+
+# Estimation accuracy decreases for fixed N with increasing edimension, so exact comparison
+# isn't useful. Just check that values are within 1% of the target.
+tol_ℯ  = hZS_𝒩₂_base_ℯ * 0.01
+tol_2  = hZS_𝒩₂_base_2 * 0.01
+@test h_𝒩₂_base_ℯ - tol_ℯ ≤ hZS_𝒩₂_base_ℯ ≤ h_𝒩₂_base_ℯ + tol_ℯ
+@test h_𝒩₂_base_2 - tol_2 ≤ hZS_𝒩₂_base_2 ≤ h_𝒩₂_base_2 + tol_2
diff --git a/test/entropies/nearest_neighbors_direct.jl b/test/entropies/nearest_neighbors_direct.jl
deleted file mode 100644
index ab22733ec..000000000
--- a/test/entropies/nearest_neighbors_direct.jl
+++ /dev/null
@@ -1,155 +0,0 @@
-using Test, StaticArrays, LinearAlgebra, Distributions
-
-@testset "NN - Kraskov" begin
-    m = 4
-    τ = 1
-    τs = tuple([τ*i for i = 0:m-1]...)
-    x = rand(250)
-    D = genembed(x, τs)
-    est = Kraskov(k = 3, w = 1)
-    @test entropy(est, D) isa Real
-
-    # Analytical test.
-    XN = Dataset(randn(100000, 1));
-    # For normal distribution with mean 0 and std 1, the entropy is
-    h_XN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5 # nats
-    h_XN_base_2 = h_XN_base_e / log(2, MathConstants.e) # bits
-
-    h_XN_kr_base_e = entropy(Kraskov(k = 3, base = MathConstants.e), XN)
-    h_XN_kr_base_2 = entropy(Kraskov(k = 3, base = 2), XN)
-    @test round(h_XN_base_e, digits = 1) == round(h_XN_kr_base_e, digits = 1)
-    @test round(h_XN_base_2, digits = 1) == round(h_XN_kr_base_2, digits = 1)
-end
-
-@testset "NN - KozachenkoLeonenko" begin
-    m = 4
-    τ = 1
-    τs = tuple([τ*i for i = 0:m-1]...)
-    x = rand(250)
-    D = genembed(x, τs)
-    est = KozachenkoLeonenko(w = 1)
-
-    @test entropy(est, D) isa Real
-
-    # Analytical test.
-    XN = Dataset(randn(100000, 1));
-    # For normal distribution with mean 0 and std 1, the entropy is
-    h_XN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5 # nats
-    h_XN_base_2 = h_XN_base_e / log(2, MathConstants.e) # bits
-
-    h_XN_kr_base_e = entropy(KozachenkoLeonenko(base = MathConstants.e), XN)
-    h_XN_kr_base_2 = entropy(KozachenkoLeonenko(base = 2), XN)
-    # The KozachenkoLeonenko estimator is not as precise as Kraskov, so check that we're
-    # within +- 3% of the target value
-    tol_e = h_XN_base_e * 0.03
-    tol_2 = h_XN_base_2 * 0.03
-    @test h_XN_base_e - tol_e ≤ h_XN_kr_base_e ≤ h_XN_base_e + tol_e
-    @test h_XN_base_2 - tol_2 ≤ h_XN_kr_base_2 ≤ h_XN_base_2 + tol_2
-end
-
-@testset "NN - Zhu" begin
-
-    # To ensure minimal rectangle volumes are correct, we also test internals directly here.
-    # It's not feasible to construct an end-product test due to the neighbor searches.
-    x = Dataset([[-1, -2], [0, -2], [3, 2]])
-    y = Dataset([[3, 1], [-5, 1], [3, -2]])
-    @test Entropies.volume_minimal_rect([0, 0], x) == 24
-    @test Entropies.volume_minimal_rect([0, 0], y) == 40
-
-    # Analytical tests for the estimated entropy
-    DN = Dataset(randn(200000, 1))
-    hN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5
-    hN_base_2 = hN_base_e / log(2, MathConstants.e)
-
-    e_base_e = Zhu(k = 3, base = MathConstants.e)
-    e_base_2 = Zhu(k = 3, base = 2)
-
-    @test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
-    @test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
-end
-
-@testset "NN - Zhu" begin
-
-    # To ensure minimal rectangle volumes are correct, we also test internals directly here.
-    # It's not feasible to construct an end-product test due to the neighbor searches.
-    x = Dataset([[-1, -2], [0, -2], [3, 2]])
-    y = Dataset([[3, 1], [-5, 1], [3, -2]])
-    @test Entropies.volume_minimal_rect([0, 0], x) == 24
-    @test Entropies.volume_minimal_rect([0, 0], y) == 40
-
-    # Analytical tests for the estimated entropy
-    DN = Dataset(randn(200000, 1))
-    hN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5
-    hN_base_2 = hN_base_e / log(2, MathConstants.e)
-
-    e_base_e = Zhu(k = 3, base = MathConstants.e)
-    e_base_2 = Zhu(k = 3, base = 2)
-
-    @test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
-    @test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
-end
-
-
-@testset "NN - ZhuSingh" begin
-
-    using Test, Distributions, LinearAlgebra
-    # Test internals in addition to end-product, because designing an exact end-product
-    # test is  a mess due to the neighbor searches. If these top-level tests fail, then
-    # the issue is probability related to these functions.
-    using Test
-    nns = Dataset([[-1, -1], [0, -2], [3, 2.0]])
-    x = @SVector [0.0, 0.0]
-    dists = Entropies.maxdists(x, nns)
-    vol = Entropies.volume_minimal_rect(dists)
-    ξ = Entropies.n_borderpoints(x, nns, dists)
-    @test vol == 24.0
-    @test ξ == 2.0
-
-    nns = Dataset([[3, 1], [3, -2], [-5, 1.0]])
-    x = @SVector [0.0, 0.0]
-    dists = Entropies.maxdists(x, nns)
-    vol = Entropies.volume_minimal_rect(dists)
-    ξ = Entropies.n_borderpoints(x, nns, dists)
-    @test vol == 40.0
-    @test ξ == 2.0
-
-    nns = Dataset([[-3, 1], [3, 1], [5, -2.0]])
-    x = @SVector [0.0, 0.0]
-    dists = Entropies.maxdists(x, nns)
-    vol = Entropies.volume_minimal_rect(dists)
-    ξ = Entropies.n_borderpoints(x, nns, dists)
-    @test vol == 40.0
-    @test ξ == 1.0
-
-    # Analytical tests: 1D normal distribution
-    DN = Dataset(randn(100000, 1))
-    σ = 1.0
-    hN_base_e = 0.5 * log(MathConstants.e, 2π * σ^2) + 0.5
-    hN_base_2 = hN_base_e / log(2, MathConstants.e)
-
-    e_base_e = ZhuSingh(k = 3, base = MathConstants.e)
-    e_base_2 = ZhuSingh(k = 3, base = 2)
-
-    @test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
-    @test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
-
-    # Analytical test: 3D normal distribution
-    σs = ones(3)
-    μs = zeros(3)
-    𝒩₂ = MvNormal(μs, Diagonal(σs))
-    Σ = diagm(σs)
-    n = length(μs)
-    h_𝒩₂_base_ℯ = 0.5n * log(ℯ, 2π) + 0.5*log(ℯ, det(Σ)) + 0.5n
-    h_𝒩₂_base_2 = h_𝒩₂_base_ℯ  / log(2, ℯ)
-
-    sample = Dataset(transpose(rand(𝒩₂, 50000)))
-    hZS_𝒩₂_base_ℯ = entropy(e_base_e, sample)
-    hZS_𝒩₂_base_2 = entropy(e_base_2, sample)
-
-    # Estimation accuracy decreases for fixed N with increasing edimension, so exact comparison
-    # isn't useful. Just check that values are within 1% of the target.
-    tol_ℯ  = hZS_𝒩₂_base_ℯ * 0.01
-    tol_2  = hZS_𝒩₂_base_2 * 0.01
-    @test h_𝒩₂_base_ℯ - tol_ℯ ≤ hZS_𝒩₂_base_ℯ ≤ h_𝒩₂_base_ℯ + tol_ℯ
-    @test h_𝒩₂_base_2 - tol_2 ≤ hZS_𝒩₂_base_2 ≤ h_𝒩₂_base_2 + tol_2
-end
diff --git a/test/entropies/shannon.jl b/test/entropies/shannon.jl
index b6141dc80..f32cdc591 100644
--- a/test/entropies/shannon.jl
+++ b/test/entropies/shannon.jl
@@ -15,32 +15,3 @@ xp = Probabilities(x)
 
 # or minimal when only one probability is nonzero and equal to 1.0
 @test entropy(Shannon(), Probabilities([1.0, 0.0, 0.0, 0.0])) ≈ 0.0
-
-# -----------------
-# Direct estimators
-# -----------------
-# We just check if the estimator converge to true values for some distributions with
-# analytically derivable entropy.
-# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
-U = 0.00
-# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
-N = round(0.5*log(2π) + 0.5, digits = 2)
-
-ev = Vasicek(m = 100, base = 2)
-ee = Ebrahimi(m = 100, base = 2)
-ec = Correa(m = 100, base = 2)
-ea = AlizadehArghami(m = 100, base = 2)
-ev_n = Vasicek(m = 100, base = MathConstants.e)
-ee_n = Ebrahimi(m = 100, base = MathConstants.e)
-ec_n = Correa(m = 100, base = MathConstants.e)
-ea_n = AlizadehArghami(m = 100, base = MathConstants.e)
-
-n = 1000000
-@test round(entropy(ev, rand(rng, n)), digits = 2) == U
-@test round(entropy(ee, rand(rng, n)), digits = 2) == U
-@test round(entropy(ec, rand(rng, n)), digits = 2) == U
-@test round(entropy(ea, rand(rng, n)), digits = 2) == U
-@test round(entropy(ee_n, randn(rng, n)), digits = 2) == N
-@test round(entropy(ev_n, randn(rng, n)), digits = 2) == N
-@test round(entropy(ec_n, randn(rng, n)), digits = 2) == N
-@test round(entropy(ea_n, randn(rng, n)), digits = 2) == N
diff --git a/test/entropies/stretched_exponential.jl b/test/entropies/stretched_exponential.jl
index d7c411f00..0d19a8e28 100644
--- a/test/entropies/stretched_exponential.jl
+++ b/test/entropies/stretched_exponential.jl
@@ -19,5 +19,5 @@ b = 2
 # probability distribution.
 x = [repeat([0, 1], 5); 0]
 est = SymbolicPermutation(m = 2)
-@test entropy(StretchedExponential(η = η, base = b), x, est) ≈
+@test entropy(StretchedExponential(η = η, base = b), est, x) ≈
     entropy_maximum(StretchedExponential(η = η, base = b), total_outcomes(est))
diff --git a/test/estimators/count_occurrences.jl b/test/probabilities/count_occurrences.jl
similarity index 89%
rename from test/estimators/count_occurrences.jl
rename to test/probabilities/count_occurrences.jl
index 423b0e71c..17b47c8f8 100644
--- a/test/estimators/count_occurrences.jl
+++ b/test/probabilities/count_occurrences.jl
@@ -24,8 +24,8 @@ end
 # Renyi of coin toss is 1 bit, and for two coin tosses is two bits
 # Result doesn't depend on `q` due to uniformity of the PDF.
 for q in (0.5, 1.0, 2.0)
-    h = entropy(Renyi(q), x, CountOccurrences())
+    h = entropy(Renyi(q), CountOccurrences(), x)
     @test 0.99 < h < 1.01
-    h = entropy(Renyi(q), D, CountOccurrences())
+    h = entropy(Renyi(q), CountOccurrences(), D)
     @test 1.99 < h < 2.01
 end
diff --git a/test/estimators/dispersion.jl b/test/probabilities/dispersion.jl
similarity index 96%
rename from test/estimators/dispersion.jl
rename to test/probabilities/dispersion.jl
index 25c9935df..46171fcd2 100644
--- a/test/estimators/dispersion.jl
+++ b/test/probabilities/dispersion.jl
@@ -42,14 +42,14 @@ using Entropies, Test
         # There is probably a typo in Rostaghi & Azami (2016). They state that the
         # non-normalized dispersion entropy is 1.8642. However, with identical probabilies,
         # we obtain non-normalized dispersion entropy of 1.8462.
-        res = entropy(Renyi(base = MathConstants.e, q = 1), x, est)
+        res = entropy(Renyi(base = MathConstants.e, q = 1), est, x)
         @test round(res, digits = 4) == 1.8462
 
         # Again, probabilities are identical up to this point, but the values we get differ
         # slightly from the paper. They get normalized DE of 0.85, but we get 0.84. 0.85 is
         # the normalized DE you'd get by manually normalizing the (erroneous) value from
         # their previous step.
-        res_norm = entropy_normalized(Renyi(base = MathConstants.e, q = 1), x, est)
+        res_norm = entropy_normalized(Renyi(base = MathConstants.e, q = 1), est, x)
         @test round(res_norm, digits = 2) == 0.84
     end
 end
diff --git a/test/estimators/diversity.jl b/test/probabilities/diversity.jl
similarity index 93%
rename from test/estimators/diversity.jl
rename to test/probabilities/diversity.jl
index 535e367c1..6cdc01acd 100644
--- a/test/estimators/diversity.jl
+++ b/test/probabilities/diversity.jl
@@ -5,7 +5,7 @@ m = 3
 τ = 1
 est = Diversity(; nbins, m, τ)
 @test probabilities(x, est) == [0.5, 0.5]
-@test round(entropy_normalized(x, est), digits = 4) == 0.3010
+@test round(entropy_normalized(est, x), digits = 4) == 0.3010
 
 @test total_outcomes(est) == 10
 
diff --git a/test/estimators/naive_kernel.jl b/test/probabilities/naive_kernel.jl
similarity index 86%
rename from test/estimators/naive_kernel.jl
rename to test/probabilities/naive_kernel.jl
index c4a0ca34b..212aa3448 100644
--- a/test/estimators/naive_kernel.jl
+++ b/test/probabilities/naive_kernel.jl
@@ -16,8 +16,8 @@ p_tree = probabilities(pts, est_tree)
 p_direct = probabilities(pts, est_direct)
 @test all(p_tree .== p_direct) == true
 
-@test entropy(Renyi(), pts, est_direct) isa Real
-@test entropy(Renyi(), pts, est_tree) isa Real
+@test entropy(Renyi(), est_direct, pts) isa Real
+@test entropy(Renyi(), est_tree, pts) isa Real
 
 probs, z = probabilities_and_outcomes(pts, est_tree)
 @test z == pts
diff --git a/test/estimators/permutation.jl b/test/probabilities/permutation.jl
similarity index 85%
rename from test/estimators/permutation.jl
rename to test/probabilities/permutation.jl
index ad0e27cc4..5cbd27cc8 100644
--- a/test/estimators/permutation.jl
+++ b/test/probabilities/permutation.jl
@@ -35,12 +35,12 @@
         s_timeseries = zeros(Int, length(x_timeseries) - (m - 1)*τ)
         s_dataset = zeros(Int, length(x_dataset))
 
-        @test entropy!(s_timeseries, Shannon(base = 2), x_timeseries, est) ≈ 1.0
-        @test entropy!(s_dataset, Shannon(base = 2), x_dataset, est) ≈ 1.0
+        @test entropy!(s_timeseries, Shannon(base = 2), est, x_timeseries) ≈ 1.0
+        @test entropy!(s_dataset, Shannon(base = 2), est, x_dataset) ≈ 1.0
 
         # Should default to Shannon base 2
-        @test entropy!(s_timeseries, x_timeseries, est) ≈ 1.0
-        @test entropy!(s_dataset, x_dataset, est) ≈ 1.0
+        @test entropy!(s_timeseries, est, x_timeseries) ≈ 1.0
+        @test entropy!(s_dataset, est, x_dataset) ≈ 1.0
     end
 end
 
@@ -57,8 +57,8 @@ end
     @test sum(p2) ≈ 1.0
 
     # Entropy
-    @test entropy(Renyi(q = 1), x, est) ≈ 0  # Regular order-1 entropy
-    @test entropy(Renyi(q = 2), y, est) >= 0 # Higher-order entropy
+    @test entropy(Renyi(q = 1), est, x) ≈ 0  # Regular order-1 entropy
+    @test entropy(Renyi(q = 2), est, y) >= 0 # Higher-order entropy
 end
 
 @testset "Custom sorting" begin
diff --git a/test/estimators/permutation_amplitude_aware.jl b/test/probabilities/permutation_amplitude_aware.jl
similarity index 96%
rename from test/estimators/permutation_amplitude_aware.jl
rename to test/probabilities/permutation_amplitude_aware.jl
index 4991a3653..ec412adb7 100644
--- a/test/estimators/permutation_amplitude_aware.jl
+++ b/test/probabilities/permutation_amplitude_aware.jl
@@ -19,8 +19,8 @@ p2 = probabilities(D, est)
 @test all(p1.p .≈ p2.p)
 
 # Entropy
-e1 = entropy(Renyi(), D, est)
-e2 = entropy(Renyi(), x, est)
+e1 = entropy(Renyi(), est, D)
+e2 = entropy(Renyi(), est, x)
 @test e1 ≈ e2
 
 
diff --git a/test/estimators/permutation_spatial.jl b/test/probabilities/permutation_spatial.jl
similarity index 89%
rename from test/estimators/permutation_spatial.jl
rename to test/probabilities/permutation_spatial.jl
index edd8aec83..431ec0a3b 100644
--- a/test/estimators/permutation_spatial.jl
+++ b/test/probabilities/permutation_spatial.jl
@@ -14,14 +14,14 @@ using Entropies, Test
     @test length(ps) == 3
     @test sort(ps) == [0.25, 0.25, 0.5]
 
-    h = entropy(Renyi(base = 2), x, est)
+    h = entropy(Renyi(base = 2), est, x)
     @test h == 1.5
 
     # In fact, doesn't matter how we order the stencil,
     # the symbols will always be equal in top-left and bottom-right
     stencil = CartesianIndex.([(0,0), (1,0), (1,1), (0,1)])
     est = SpatialSymbolicPermutation(stencil, x, false)
-    @test entropy(Renyi(base = 2), x, est) == 1.5
+    @test entropy(Renyi(base = 2), est, x) == 1.5
 
     # But for sanity, let's ensure we get a different number
     # for a different stencil
@@ -36,12 +36,12 @@ using Entropies, Test
     extent = (2, 2)
     lag = (1, 1)
     est = SpatialSymbolicPermutation((extent, lag), x, false)
-    @test entropy(Renyi(base = 2), x, est) == 1.5
+    @test entropy(Renyi(base = 2), est, x) == 1.5
 
     # and let's also test the matrix-way of specifying the stencil
     stencil = [1 1; 1 1]
     est = SpatialSymbolicPermutation(stencil, x, false)
-    @test entropy(Renyi(base = 2), x, est) == 1.5
+    @test entropy(Renyi(base = 2), est, x) == 1.5
 
     # Also test that it works for arbitrarily high-dimensional arrays
     stencil = CartesianIndex.([(0,0,0), (0,1,0), (0,0,1), (1,0,0)])
@@ -67,10 +67,10 @@ using Entropies, Test
     extent = (2, 2, 2)
     lag = (1, 1, 1)
     est2 = SpatialSymbolicPermutation((extent, lag), w, false)
-    @test entropy(Renyi(), w, est1) == entropy(Renyi(), w, est2)
+    @test entropy(Renyi(), est1, w) == entropy(Renyi(), est2, w)
 
     # and to this stencil written as a matrix
     stencil = [1; 1;; 1; 1;;; 1; 1;; 1; 1]
     est3 = SpatialSymbolicPermutation(stencil, w, false)
-    @test entropy(Renyi(), w, est1) == entropy(Renyi(), w, est3)
+    @test entropy(Renyi(), est1, w) == entropy(Renyi(), est3, w)
 end
diff --git a/test/estimators/permutation_weighted.jl b/test/probabilities/permutation_weighted.jl
similarity index 95%
rename from test/estimators/permutation_weighted.jl
rename to test/probabilities/permutation_weighted.jl
index 4bf584a03..f8ce34aa7 100644
--- a/test/estimators/permutation_weighted.jl
+++ b/test/probabilities/permutation_weighted.jl
@@ -19,8 +19,8 @@ p2 = probabilities(D, est)
 @test all(p1.p .≈ p2.p)
 
 # Entropy
-e1 = entropy(Renyi(), D, est)
-e2 = entropy(Renyi(), x, est)
+e1 = entropy(Renyi(), est, D)
+e2 = entropy(Renyi(), est, x)
 @test e1 ≈ e2
 
 
diff --git a/test/estimators/timescales.jl b/test/probabilities/timescales.jl
similarity index 88%
rename from test/estimators/timescales.jl
rename to test/probabilities/timescales.jl
index 25f545254..7f65a4d51 100644
--- a/test/estimators/timescales.jl
+++ b/test/probabilities/timescales.jl
@@ -12,7 +12,7 @@ using Entropies, Test
         ps = probabilities(x, est)
         @test length(ps) == 8
         @test ps isa Probabilities
-        @test entropy(Renyi( q = 1, base = 2), x, WaveletOverlap()) isa Real
+        @test entropy(Renyi( q = 1, base = 2), WaveletOverlap(), x) isa Real
     end
 
     @testset "Fourier Spectrum" begin
@@ -22,7 +22,7 @@ using Entropies, Test
         y = @. sin(t) + sin(sqrt(3)*t)
         z = randn(N)
         est = PowerSpectrum()
-        ents = [entropy(Renyi(), w, est) for w in (x,y,z)]
+        ents = [entropy(Renyi(), est, w) for w in (x,y,z)]
         @test ents[1] < ents[2] < ents[3]
         # Test event stuff (analytically, using sine wave)
         probs, events = probabilities_and_outcomes(x, est)
diff --git a/test/estimators/transfer_operator.jl b/test/probabilities/transfer_operator.jl
similarity index 100%
rename from test/estimators/transfer_operator.jl
rename to test/probabilities/transfer_operator.jl
diff --git a/test/estimators/visitation_frequency.jl b/test/probabilities/visitation_frequency.jl
similarity index 100%
rename from test/estimators/visitation_frequency.jl
rename to test/probabilities/visitation_frequency.jl
diff --git a/test/runtests.jl b/test/runtests.jl
index 057263d86..61a6c0e32 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,17 +4,17 @@ defaultname(file) = uppercasefirst(replace(splitext(basename(file))[1], '_' => '
 testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include(file); end
 @testset "Entropies.jl" begin
     # Probability estimators
-    testfile("estimators/count_occurrences.jl")
-    testfile("estimators/visitation_frequency.jl")
-    testfile("estimators/transfer_operator.jl")
-    testfile("estimators/naive_kernel.jl")
-    testfile("estimators/permutation.jl")
-    testfile("estimators/permutation_weighted.jl")
-    testfile("estimators/permutation_amplitude_aware.jl")
-    testfile("estimators/permutation_spatial.jl")
-    testfile("estimators/timescales.jl")
-    testfile("estimators/dispersion.jl")
-    testfile("estimators/diversity.jl")
+    testfile("probabilities/count_occurrences.jl")
+    testfile("probabilities/visitation_frequency.jl")
+    testfile("probabilities/transfer_operator.jl")
+    testfile("probabilities/naive_kernel.jl")
+    testfile("probabilities/permutation.jl")
+    testfile("probabilities/permutation_weighted.jl")
+    testfile("probabilities/permutation_amplitude_aware.jl")
+    testfile("probabilities/permutation_spatial.jl")
+    testfile("probabilities/timescales.jl")
+    testfile("probabilities/dispersion.jl")
+    testfile("probabilities/diversity.jl")
 
     # Different entropies
     testfile("entropies/renyi.jl")
@@ -23,7 +23,8 @@ testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include
     testfile("entropies/curado.jl")
     testfile("entropies/stretched_exponential.jl")
 
-    testfile("entropies/nearest_neighbors_direct.jl")
+    # Entropy estimators
+    testfile("entropies/estimators/estimators.jl")
 
     # Various
     testfile("utils/utils.jl")

From d4e8325d105d3faef234665022de5bacd4a569e7 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 17 Nov 2022 12:33:51 +0100
Subject: [PATCH 03/13] Fix diversity test

---
 test/probabilities/{ => estimators}/diversity.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename test/probabilities/{ => estimators}/diversity.jl (94%)

diff --git a/test/probabilities/diversity.jl b/test/probabilities/estimators/diversity.jl
similarity index 94%
rename from test/probabilities/diversity.jl
rename to test/probabilities/estimators/diversity.jl
index 6cdc01acd..185be8552 100644
--- a/test/probabilities/diversity.jl
+++ b/test/probabilities/estimators/diversity.jl
@@ -14,7 +14,7 @@ binsize = (1-(-1))/10
 probs, events = probabilities_and_outcomes(x, est)
 
 ds = [0.605, 0.698, 0.924, 0.930] # value from Wang et al. (2020)
-# These distances should be in the following distance bins: [8, 8, 9, 9],
+# These distances should be in the following distance bins: [8, 8, 9, 9].
 # Which means that the probability distribution should be [0.5, 0.5]
 bins = floor.(Int, (ds .- (-1.0)) / binsize)
 @test sort(bins) == [8, 8, 9, 9]
@@ -22,5 +22,5 @@ bins = floor.(Int, (ds .- (-1.0)) / binsize)
 
 # The coordinates corresponding to the left corners of these bins are as
 # follows, and should correspond with the events.
-coords = -1.0 .+ (bins .* 0.2)
+coords = ((bins .- 1) .* binsize) .+ (-1.0)
 @test all(round.(events, digits = 13) == round.(unique(coords), digits = 13))

From e1b34d13535e7fe80b099acfe4cefaa220f7942e Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 17 Nov 2022 12:41:40 +0100
Subject: [PATCH 04/13] Reorganize tests

---
 test/convenience.jl                           | 32 +++++++
 test/entropies/convenience.jl                 | 15 ----
 test/entropies/entropies.jl                   | 12 +++
 test/entropies/{ => entropy_types}/curado.jl  |  0
 test/entropies/{ => entropy_types}/renyi.jl   |  0
 test/entropies/{ => entropy_types}/shannon.jl |  0
 .../stretched_exponential.jl                  |  0
 test/entropies/{ => entropy_types}/tsallis.jl |  0
 test/entropies/interface.jl                   |  2 +
 .../{ => estimators}/count_occurrences.jl     |  0
 .../{ => estimators}/dispersion.jl            |  4 +-
 .../{ => estimators}/naive_kernel.jl          |  0
 .../{ => estimators}/permutation.jl           |  0
 .../permutation_amplitude_aware.jl            |  0
 .../{ => estimators}/permutation_spatial.jl   |  0
 .../{ => estimators}/permutation_weighted.jl  |  0
 .../{ => estimators}/timescales.jl            |  0
 .../{ => estimators}/transfer_operator.jl     |  0
 .../estimators/value_histogram.jl}            |  0
 test/probabilities/interface.jl               |  3 +
 test/probabilities/probabilities.jl           | 15 ++++
 test/probabilities/visitation_frequency.jl    | 86 -------------------
 test/runtests.jl                              | 27 +-----
 23 files changed, 70 insertions(+), 126 deletions(-)
 create mode 100644 test/convenience.jl
 delete mode 100644 test/entropies/convenience.jl
 create mode 100644 test/entropies/entropies.jl
 rename test/entropies/{ => entropy_types}/curado.jl (100%)
 rename test/entropies/{ => entropy_types}/renyi.jl (100%)
 rename test/entropies/{ => entropy_types}/shannon.jl (100%)
 rename test/entropies/{ => entropy_types}/stretched_exponential.jl (100%)
 rename test/entropies/{ => entropy_types}/tsallis.jl (100%)
 create mode 100644 test/entropies/interface.jl
 rename test/probabilities/{ => estimators}/count_occurrences.jl (100%)
 rename test/probabilities/{ => estimators}/dispersion.jl (96%)
 rename test/probabilities/{ => estimators}/naive_kernel.jl (100%)
 rename test/probabilities/{ => estimators}/permutation.jl (100%)
 rename test/probabilities/{ => estimators}/permutation_amplitude_aware.jl (100%)
 rename test/probabilities/{ => estimators}/permutation_spatial.jl (100%)
 rename test/probabilities/{ => estimators}/permutation_weighted.jl (100%)
 rename test/probabilities/{ => estimators}/timescales.jl (100%)
 rename test/probabilities/{ => estimators}/transfer_operator.jl (100%)
 rename test/{estimators/histograms.jl => probabilities/estimators/value_histogram.jl} (100%)
 create mode 100644 test/probabilities/interface.jl
 create mode 100644 test/probabilities/probabilities.jl
 delete mode 100644 test/probabilities/visitation_frequency.jl

diff --git a/test/convenience.jl b/test/convenience.jl
new file mode 100644
index 000000000..dc64f43be
--- /dev/null
+++ b/test/convenience.jl
@@ -0,0 +1,32 @@
+using Entropies
+using Test
+import Statistics
+
+@testset "Common literature names" begin
+    x = randn(1000)
+    σ = 0.2*Statistics.std(x)
+
+    @testset "Permutation entropy" begin
+        @test entropy_permutation(x) == entropy(SymbolicPermutation(), x)
+    end
+
+    @testset "Wavelet entropy" begin
+        @test entropy_wavelet(x) == entropy(WaveletOverlap(), x)
+    end
+
+    @testset "Dispersion entropy" begin
+        @test entropy_dispersion(x) == entropy(Dispersion(), x)
+    end
+
+    @testset "Spatial permutation entropy" begin
+        x = rand(50, 50)
+        stencil = CartesianIndex.([(0,1), (1,1), (1,0)])
+        est = SpatialSymbolicPermutation(stencil, x)
+        @test entropy_spatial_permutation(x, stencil) == entropy(est, x)
+    end
+end
+
+@testset "`probabilities` defaults to `CountOccurrences`" begin
+    x = [1, 1, 2, 2, 3, 3]
+    @test probabilities(x) == probabilities(x, CountOccurrences()) == [1/3, 1/3, 1/3]
+end
diff --git a/test/entropies/convenience.jl b/test/entropies/convenience.jl
deleted file mode 100644
index ffa82c28a..000000000
--- a/test/entropies/convenience.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-using Entropies
-using Test
-import Statistics
-
-x = randn(1000)
-σ = 0.2*Statistics.std(x)
-# All of the following are API tests.
-@test entropy_permutation(x) == entropy(SymbolicPermutation(), x)
-@test entropy_wavelet(x) == entropy(WaveletOverlap(), x)
-@test entropy_dispersion(x) == entropy(Dispersion(), x)
-
-x = rand(50, 50)
-stencil = CartesianIndex.([(0,1), (1,1), (1,0)])
-est = SpatialSymbolicPermutation(stencil, x)
-@test entropy_spatial_permutation(x, stencil) == entropy(est, x)
diff --git a/test/entropies/entropies.jl b/test/entropies/entropies.jl
new file mode 100644
index 000000000..259606fbb
--- /dev/null
+++ b/test/entropies/entropies.jl
@@ -0,0 +1,12 @@
+# Interface.
+testfile("interface.jl")
+
+# Entropy types.
+testfile("entropy_types/renyi.jl")
+testfile("entropy_types/shannon.jl")
+testfile("entropy_types/tsallis.jl")
+testfile("entropy_types/curado.jl")
+testfile("entropy_types/stretched_exponential.jl")
+
+# Estimators
+testfile("estimators/estimators.jl")
diff --git a/test/entropies/curado.jl b/test/entropies/entropy_types/curado.jl
similarity index 100%
rename from test/entropies/curado.jl
rename to test/entropies/entropy_types/curado.jl
diff --git a/test/entropies/renyi.jl b/test/entropies/entropy_types/renyi.jl
similarity index 100%
rename from test/entropies/renyi.jl
rename to test/entropies/entropy_types/renyi.jl
diff --git a/test/entropies/shannon.jl b/test/entropies/entropy_types/shannon.jl
similarity index 100%
rename from test/entropies/shannon.jl
rename to test/entropies/entropy_types/shannon.jl
diff --git a/test/entropies/stretched_exponential.jl b/test/entropies/entropy_types/stretched_exponential.jl
similarity index 100%
rename from test/entropies/stretched_exponential.jl
rename to test/entropies/entropy_types/stretched_exponential.jl
diff --git a/test/entropies/tsallis.jl b/test/entropies/entropy_types/tsallis.jl
similarity index 100%
rename from test/entropies/tsallis.jl
rename to test/entropies/entropy_types/tsallis.jl
diff --git a/test/entropies/interface.jl b/test/entropies/interface.jl
new file mode 100644
index 000000000..d142a656f
--- /dev/null
+++ b/test/entropies/interface.jl
@@ -0,0 +1,2 @@
+x = ones(3)
+@test_throws MethodError entropy(x, 0.1)
diff --git a/test/probabilities/count_occurrences.jl b/test/probabilities/estimators/count_occurrences.jl
similarity index 100%
rename from test/probabilities/count_occurrences.jl
rename to test/probabilities/estimators/count_occurrences.jl
diff --git a/test/probabilities/dispersion.jl b/test/probabilities/estimators/dispersion.jl
similarity index 96%
rename from test/probabilities/dispersion.jl
rename to test/probabilities/estimators/dispersion.jl
index 3e5f3db51..60758c2c1 100644
--- a/test/probabilities/dispersion.jl
+++ b/test/probabilities/estimators/dispersion.jl
@@ -39,14 +39,14 @@ using Entropies, Test
     # There is probably a typo in Rostaghi & Azami (2016). They state that the
     # non-normalized dispersion entropy is 1.8642. However, with identical probabilies,
     # we obtain non-normalized dispersion entropy of 1.8462.
-    res = entropy(Renyi(base = MathConstants.e, q = 1), x, est)
+    res = entropy(Renyi(base = MathConstants.e, q = 1), est, x)
     @test round(res, digits = 4) == 1.8462
 
     # Again, probabilities are identical up to this point, but the values we get differ
     # slightly from the paper. They get normalized DE of 0.85, but we get 0.84. 0.85 is
     # the normalized DE you'd get by manually normalizing the (erroneous) value from
     # their previous step.
-    res_norm = entropy_normalized(Renyi(base = MathConstants.e, q = 1), x, est)
+    res_norm = entropy_normalized(Renyi(base = MathConstants.e, q = 1), est, x)
     @test round(res_norm, digits = 2) == 0.84
 
     @testset "outcomes" begin
diff --git a/test/probabilities/naive_kernel.jl b/test/probabilities/estimators/naive_kernel.jl
similarity index 100%
rename from test/probabilities/naive_kernel.jl
rename to test/probabilities/estimators/naive_kernel.jl
diff --git a/test/probabilities/permutation.jl b/test/probabilities/estimators/permutation.jl
similarity index 100%
rename from test/probabilities/permutation.jl
rename to test/probabilities/estimators/permutation.jl
diff --git a/test/probabilities/permutation_amplitude_aware.jl b/test/probabilities/estimators/permutation_amplitude_aware.jl
similarity index 100%
rename from test/probabilities/permutation_amplitude_aware.jl
rename to test/probabilities/estimators/permutation_amplitude_aware.jl
diff --git a/test/probabilities/permutation_spatial.jl b/test/probabilities/estimators/permutation_spatial.jl
similarity index 100%
rename from test/probabilities/permutation_spatial.jl
rename to test/probabilities/estimators/permutation_spatial.jl
diff --git a/test/probabilities/permutation_weighted.jl b/test/probabilities/estimators/permutation_weighted.jl
similarity index 100%
rename from test/probabilities/permutation_weighted.jl
rename to test/probabilities/estimators/permutation_weighted.jl
diff --git a/test/probabilities/timescales.jl b/test/probabilities/estimators/timescales.jl
similarity index 100%
rename from test/probabilities/timescales.jl
rename to test/probabilities/estimators/timescales.jl
diff --git a/test/probabilities/transfer_operator.jl b/test/probabilities/estimators/transfer_operator.jl
similarity index 100%
rename from test/probabilities/transfer_operator.jl
rename to test/probabilities/estimators/transfer_operator.jl
diff --git a/test/estimators/histograms.jl b/test/probabilities/estimators/value_histogram.jl
similarity index 100%
rename from test/estimators/histograms.jl
rename to test/probabilities/estimators/value_histogram.jl
diff --git a/test/probabilities/interface.jl b/test/probabilities/interface.jl
new file mode 100644
index 000000000..7818fe352
--- /dev/null
+++ b/test/probabilities/interface.jl
@@ -0,0 +1,3 @@
+x = ones(3)
+p = probabilities(x, ValueHistogram(0.1))
+@test p isa Probabilities
diff --git a/test/probabilities/probabilities.jl b/test/probabilities/probabilities.jl
new file mode 100644
index 000000000..639dfa5ed
--- /dev/null
+++ b/test/probabilities/probabilities.jl
@@ -0,0 +1,15 @@
+# Interface.
+testfile("interface.jl")
+
+# Probability estimators.
+testfile("estimators/count_occurrences.jl")
+testfile("estimators/value_histogram.jl")
+testfile("estimators/transfer_operator.jl")
+testfile("estimators/naive_kernel.jl")
+testfile("estimators/permutation.jl")
+testfile("estimators/permutation_weighted.jl")
+testfile("estimators/permutation_amplitude_aware.jl")
+testfile("estimators/permutation_spatial.jl")
+testfile("estimators/timescales.jl")
+testfile("estimators/dispersion.jl")
+testfile("estimators/diversity.jl")
diff --git a/test/probabilities/visitation_frequency.jl b/test/probabilities/visitation_frequency.jl
deleted file mode 100644
index 7dd01c271..000000000
--- a/test/probabilities/visitation_frequency.jl
+++ /dev/null
@@ -1,86 +0,0 @@
-using Entropies
-using Entropies.DelayEmbeddings, Test
-using Random
-
-@testset "Rectangular binning" begin
-    x = Dataset(rand(Random.MersenneTwister(1234), 100_000, 2))
-    push!(x, SVector(0, 0)) # ensure both 0 and 1 have values in, exactly.
-    push!(x, SVector(1, 1))
-    # All these binnings should give approximately same probabilities
-    n = 10 # boxes cover 0 - 1 in steps of slightly more than 0.1
-    ε = nextfloat(0.1) # this guarantees that we get the same as the `n` above!
-
-    binnings = [
-        RectangularBinning(n),
-        RectangularBinning(ε),
-        RectangularBinning([n, n]),
-        RectangularBinning([ε, ε])
-    ]
-
-    for bin in binnings
-        @testset "ϵ = $(bin.ϵ)" begin
-            est = ValueHistogram(bin)
-            p = probabilities(x, est)
-            @test length(p) == 100
-            @test all(e -> 0.009 ≤ e ≤ 0.011, p)
-        end
-    end
-
-    @testset "Check rogue 1s" begin
-        b = RectangularBinning(0.1) # no `nextfloat` here, so the rogue (1, 1) is in extra bin!
-        p = probabilities(x, ValueHistogram(b))
-        @test length(p) == 100 + 1
-        @test p[end] ≈ 1/100_000 atol = 1e-5
-    end
-
-    @testset "vector" begin
-        x = rand(Random.MersenneTwister(1234), 100_000)
-        push!(x, 0, 1)
-        n = 10 # boxes cover 0 - 1 in steps of slightly more than 0.1
-        ε = nextfloat(0.1) # this guarantees that we get the same as the `n` above!
-        binnings = RectangularBinning.((n, ε))
-        for bin in binnings
-            p = probabilities(x, ValueHistogram(bin))
-            @test length(p) == 10
-            @test all(e -> 0.09 ≤ e ≤ 0.11, p)
-        end
-    end
-
-    # An extra bin might appear due to roundoff error after using nextfloat when
-    # constructing `RectangularBinEncoding`s.
-    # The following tests ensure with *some* certainty that this does not occur.
-    @testset "Rogue extra bins" begin
-        rng = MersenneTwister(1234)
-        xs1D = [rand(rng, 20) for i = 1:10000];
-        xs2D = [rand(rng, 1000, 2) |> Dataset for i = 1:10000] # more points to fill all bins
-        est = ValueHistogram(RectangularBinning(10))
-        ps1D = [probabilities(x, est) for x in xs1D];
-        ps2D = [probabilities(x, est) for x in xs2D];
-        n_rogue_extrabin_1D = count(length.(ps1D) .> est.binning.ϵ)
-        n_rogue_extrabin_2D = count(length.(ps2D) .> est.binning.ϵ^2)
-        @test n_rogue_extrabin_1D == 0
-        @test n_rogue_extrabin_2D == 0
-
-        # Concrete examples where a rogue extra bin has appeared.
-        x1 = [0.5213236385155418, 0.03516318860292644, 0.5437726723245310, 0.52598710966469610, 0.34199879802511246, 0.6017129426606275, 0.6972844365031351, 0.89163995617220900, 0.39009862510518045, 0.06296038912844315, 0.9897176284081909, 0.7935001082966890, 0.890198448900077700, 0.11762640519877565, 0.7849413168095061, 0.13768932585886573, 0.50869900547793430, 0.18042178201388548, 0.28507312391861270, 0.96480406570924970]
-        x2 = [0.4125754262679051, 0.52844411982339560, 0.4535277505543355, 0.25502420827802674, 0.77862522996085940, 0.6081939026664078, 0.2628674795466387, 0.18846258495465185, 0.93320375283233840, 0.40093871561247874, 0.8032730760974603, 0.3531608285217499, 0.018436525139752136, 0.55541857934068420, 0.9907521337888632, 0.15382361136212420, 0.01774321666660561, 0.67569337507728300, 0.06130971689608822, 0.31417161558476836]
-        N = 10
-        b = RectangularBinning(N)
-        rb1 = RectangularBinEncoding(x1, b, n_eps = 1)
-        rb2 = RectangularBinEncoding(x1, b, n_eps = 2)
-        @test Entropies.encode_as_bin(maximum(x1), rb1) == 10 # shouldn't occur, but does when tolerance is too low
-        @test Entropies.encode_as_bin(maximum(x1), rb2) == 9
-
-        rb1 = RectangularBinEncoding(x2, b, n_eps = 1)
-        rb2 = RectangularBinEncoding(x2, b, n_eps = 2)
-        @test Entropies.encode_as_bin(maximum(x2), rb1) == 10 # shouldn't occur, but does when tolerance is too low
-        @test Entropies.encode_as_bin(maximum(x2), rb2) == 9
-    end
-
-    @testset "interface" begin
-        x = ones(3)
-        p = probabilities(x, ValueHistogram(0.1))
-        @test p isa Probabilities
-        @test_throws MethodError entropy(x, 0.1)
-    end
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index 3b6c70fed..153abe8e6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,33 +1,14 @@
 using Test
+using Entropies
 
 defaultname(file) = uppercasefirst(replace(splitext(basename(file))[1], '_' => ' '))
 testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include(file); end
 @testset "Entropies.jl" begin
-    # Probability estimators
-    testfile("probabilities/count_occurrences.jl")
-    testfile("probabilities/visitation_frequency.jl")
-    testfile("probabilities/transfer_operator.jl")
-    testfile("probabilities/naive_kernel.jl")
-    testfile("probabilities/permutation.jl")
-    testfile("probabilities/permutation_weighted.jl")
-    testfile("probabilities/permutation_amplitude_aware.jl")
-    testfile("probabilities/permutation_spatial.jl")
-    testfile("probabilities/timescales.jl")
-    testfile("probabilities/dispersion.jl")
-    testfile("probabilities/diversity.jl")
-
-    # Different entropies
-    testfile("entropies/renyi.jl")
-    testfile("entropies/shannon.jl")
-    testfile("entropies/tsallis.jl")
-    testfile("entropies/curado.jl")
-    testfile("entropies/stretched_exponential.jl")
-
-    # Entropy estimators
-    testfile("entropies/estimators/estimators.jl")
+    include("probabilities/probabilities.jl")
+    include("entropies/entropies.jl")
 
     # Various
     testfile("utils/fasthist.jl")
     # testfile("utils/encoding.jl")
-    testfile("entropies/convenience.jl")
+    testfile("convenience.jl")
 end

From 18e3b38e7bf25ed3ba445781accd497a760b36a0 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 17 Nov 2022 13:44:42 +0100
Subject: [PATCH 05/13] probabilities

---
 docs/src/examples.md                          |  2 +-
 src/entropy.jl                                |  4 +-
 src/probabilities.jl                          |  8 ++--
 .../counting/count_occurences.jl              |  4 +-
 .../dispersion/dispersion.jl                  |  4 +-
 .../diversity/diversity.jl                    | 14 +++----
 .../histograms/rectangular_binning.jl         |  2 +-
 .../histograms/value_histogram.jl             |  4 +-
 .../kernel/kernel_density.jl                  |  2 +-
 .../SymbolicAmplitudeAware.jl                 |  9 +++--
 .../SymbolicPermutation.jl                    | 39 ++++++++++---------
 .../SymbolicWeightedPermutation.jl            |  8 ++--
 .../spatial_permutation.jl                    |  6 +--
 .../probabilities_estimators.jl               |  2 +-
 .../timescales/power_spectrum.jl              |  6 +--
 .../timescales/wavelet_overlap.jl             |  2 +-
 .../transfer_operator/transfer_operator.jl    |  2 +-
 test/convenience.jl                           |  2 +-
 .../estimators/count_occurrences.jl           |  6 +--
 test/probabilities/estimators/dispersion.jl   |  4 +-
 test/probabilities/estimators/diversity.jl    |  4 +-
 test/probabilities/estimators/naive_kernel.jl | 10 ++---
 test/probabilities/estimators/permutation.jl  | 14 +++----
 .../estimators/permutation_amplitude_aware.jl | 10 ++---
 .../estimators/permutation_spatial.jl         |  8 ++--
 .../estimators/permutation_weighted.jl        | 10 ++---
 test/probabilities/estimators/timescales.jl   |  4 +-
 .../estimators/transfer_operator.jl           |  4 +-
 .../estimators/value_histogram.jl             | 10 ++---
 test/probabilities/interface.jl               |  2 +-
 30 files changed, 105 insertions(+), 101 deletions(-)

diff --git a/docs/src/examples.md b/docs/src/examples.md
index 13eb08cd8..0697c4e39 100644
--- a/docs/src/examples.md
+++ b/docs/src/examples.md
@@ -173,7 +173,7 @@ using DynamicalSystemsBase, CairoMakie, Distributions
 N = 500
 D = Dataset(sort([rand(𝒩) for i = 1:N]))
 x, y = columns(D)
-p = probabilities(D, NaiveKernel(1.5))
+p = probabilities(NaiveKernel(1.5), D)
 fig, ax = scatter(D[:, 1], D[:, 2], zeros(N);
     markersize=8, axis=(type = Axis3,)
 )
diff --git a/src/entropy.jl b/src/entropy.jl
index fc41b59ab..3ca149c0e 100644
--- a/src/entropy.jl
+++ b/src/entropy.jl
@@ -100,7 +100,7 @@ h = entropy(Renyi(2.0), ps) # also gives 1, order `q` doesn't matter for coin to
 ```
 """
 function entropy(e::Entropy, est::ProbabilitiesEstimator, x)
-    ps = probabilities(x, est)
+    ps = probabilities(est, x)
     return entropy(e, ps)
 end
 
@@ -119,7 +119,7 @@ The entropy (second argument) is optional: if not given, `Shannon()` is used ins
 Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
 """
 function entropy!(s::AbstractVector{Int}, e::Entropy, est::ProbabilitiesEstimator, x)
-    probs = probabilities!(s, x, est)
+    probs = probabilities!(s, est, x)
     entropy(e, probs)
 end
 
diff --git a/src/probabilities.jl b/src/probabilities.jl
index 5475140a9..f71f96359 100644
--- a/src/probabilities.jl
+++ b/src/probabilities.jl
@@ -85,7 +85,7 @@ abstract type ProbabilitiesEstimator end
 # probabilities and combo function
 ###########################################################################################
 """
-    probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator) → p::Probabilities
+    probabilities(est::ProbabilitiesEstimator, x::Array_or_Dataset) → p::Probabilities
 
 Compute a probability distribution over the set of possible outcomes defined by the
 probabilities estimator `est`, given input data `x`.
@@ -104,8 +104,8 @@ This is mostly useful when `x` contains categorical or integer data.
 
 See also: [`Probabilities`](@ref), [`ProbabilitiesEstimator`](@ref).
 """
-function probabilities(x, est::ProbabilitiesEstimator)
-    return probabilities_and_outcomes(x, est)[1]
+function probabilities(est::ProbabilitiesEstimator, x)
+    return probabilities_and_outcomes(est, x)[1]
 end
 
 """
@@ -117,7 +117,7 @@ The element type of `Ω` depends on the estimator.
 
 See also [`outcomes`](@ref), [`total_outcomes`](@ref), and [`outcome_space`](@ref).
 """
-function probabilities_and_outcomes(x, est::ProbabilitiesEstimator)
+function probabilities_and_outcomes(est::ProbabilitiesEstimator, x)
     error("`probabilities_and_outcomes` not implemented for estimator $(typeof(est)).")
 end
 
diff --git a/src/probabilities_estimators/counting/count_occurences.jl b/src/probabilities_estimators/counting/count_occurences.jl
index 9e34b55d3..f145a16f1 100644
--- a/src/probabilities_estimators/counting/count_occurences.jl
+++ b/src/probabilities_estimators/counting/count_occurences.jl
@@ -12,7 +12,7 @@ The outcome space is the unique sorted values of the input.
 """
 struct CountOccurrences <: ProbabilitiesEstimator end
 
-function probabilities_and_outcomes(x::Array_or_Dataset, ::CountOccurrences)
+function probabilities_and_outcomes(::CountOccurrences, x::Array_or_Dataset)
     z = copy(x)
     probs = Probabilities(fasthist!(z))
     # notice that `z` is now sorted within `fasthist!` so we can skip sorting
@@ -21,7 +21,7 @@ end
 
 outcome_space(x, ::CountOccurrences) = sort!(unique(x))
 
-probabilities(x::Array_or_Dataset, ::CountOccurrences) = probabilities(x)
+probabilities(::CountOccurrences, x::Array_or_Dataset) = probabilities(x)
 function probabilities(x::Array_or_Dataset)
     # Fast histograms code is in the `histograms` folder
     return Probabilities(fasthist!(copy(x)))
diff --git a/src/probabilities_estimators/dispersion/dispersion.jl b/src/probabilities_estimators/dispersion/dispersion.jl
index 7840de1c1..1d9422203 100644
--- a/src/probabilities_estimators/dispersion/dispersion.jl
+++ b/src/probabilities_estimators/dispersion/dispersion.jl
@@ -107,7 +107,7 @@ function symbolize_for_dispersion(x, est::Dispersion)
     return symbols::Vector{Int}
 end
 
-function probabilities_and_outcomes(x::AbstractVector{<:Real}, est::Dispersion)
+function probabilities_and_outcomes(est::Dispersion, x::AbstractVector{<:Real})
     N = length(x)
     symbols = symbolize_for_dispersion(x, est)
     # We must use genembed, not embed, to make sure the zero lag is included
@@ -125,4 +125,4 @@ function outcome_space(est::Dispersion)
     cart = CartesianIndices(ntuple(i -> c, m))
     V = SVector{m, Int}
     return map(i -> V(Tuple(i)), vec(cart))
-end
\ No newline at end of file
+end
diff --git a/src/probabilities_estimators/diversity/diversity.jl b/src/probabilities_estimators/diversity/diversity.jl
index a91997083..3ccde602a 100644
--- a/src/probabilities_estimators/diversity/diversity.jl
+++ b/src/probabilities_estimators/diversity/diversity.jl
@@ -44,21 +44,21 @@ Base.@kwdef struct Diversity <: ProbabilitiesEstimator
     nbins::Int = 5
 end
 
-function probabilities(x::AbstractVector{T}, est::Diversity) where T <: Real
-    ds, binning = similarities_and_binning(x, est)
+function probabilities(est::Diversity, x::AbstractVector{T}) where T <: Real
+    ds, binning = similarities_and_binning(est, x)
     return fasthist(ds, binning)[1]
 end
 
-function probabilities_and_outcomes(x::AbstractVector{T}, est::Diversity) where T <: Real
-    ds, binning = similarities_and_binning(x, est)
-    return probabilities_and_outcomes(ds, ValueHistogram(binning))
+function probabilities_and_outcomes(est::Diversity, x::AbstractVector{T}) where T <: Real
+    ds, binning = similarities_and_binning(est, x)
+    return probabilities_and_outcomes(ValueHistogram(binning), ds)
 end
 
 total_outcomes(est::Diversity) = est.nbins
 
 outcome_space(x, est::Diversity) = outcome_space(x, binning_for_diversity(est))
 
-function similarities_and_binning(x::AbstractVector{T}, est::Diversity) where T <: Real
+function similarities_and_binning(est::Diversity, x::AbstractVector{T}) where T <: Real
     # embed and then calculate cosine similary for each consecutive pair of delay vectors
     τs = 0:est.τ:(est.m - 1)*est.τ
     Y = genembed(x, τs)
@@ -73,4 +73,4 @@ end
 
 cosine_similarity(xᵢ, xⱼ) = sum(xᵢ .* xⱼ) / (sqrt(sum(xᵢ .^ 2)) * sqrt(sum(xⱼ .^ 2)))
 
-binning_for_diversity(est::Diversity) = FixedRectangularBinning(-1.0, 1.0, est.nbins)
\ No newline at end of file
+binning_for_diversity(est::Diversity) = FixedRectangularBinning(-1.0, 1.0, est.nbins)
diff --git a/src/probabilities_estimators/histograms/rectangular_binning.jl b/src/probabilities_estimators/histograms/rectangular_binning.jl
index 37a8525f9..09c322cfa 100644
--- a/src/probabilities_estimators/histograms/rectangular_binning.jl
+++ b/src/probabilities_estimators/histograms/rectangular_binning.jl
@@ -238,7 +238,7 @@ end
 ##################################################################
 # low level histogram call
 ##################################################################
-# This method is called by `probabilities(x::Array_or_Dataset, est::ValueHistogram)`
+# This method is called by `probabilities(est::ValueHistogram, x::Array_or_Dataset)`
 """
     fasthist(x::Vector_or_Dataset, ϵ::AbstractBinning)
 Create an encoding for binning, then map `x` to bins, then call `fasthist!` on the bins.
diff --git a/src/probabilities_estimators/histograms/value_histogram.jl b/src/probabilities_estimators/histograms/value_histogram.jl
index b0425fd6a..8ae57a87c 100644
--- a/src/probabilities_estimators/histograms/value_histogram.jl
+++ b/src/probabilities_estimators/histograms/value_histogram.jl
@@ -59,13 +59,13 @@ const VisitationFrequency = ValueHistogram
 
 # This method is only valid for rectangular binnings, as `fasthist`
 # is only valid for rectangular binnings. For more binnings, it needs to be extended.
-function probabilities(x::Array_or_Dataset, est::ValueHistogram)
+function probabilities(est::ValueHistogram, x::Array_or_Dataset)
     # and the `fasthist` actually just makes an encoding,
     # this function is in `rectangular_binning.jl`
     fasthist(x, est.binning)[1]
 end
 
-function probabilities_and_outcomes(x::Array_or_Dataset, est::ValueHistogram)
+function probabilities_and_outcomes(est::ValueHistogram, x::Array_or_Dataset)
     probs, bins, encoder = fasthist(x, est.binning)
     unique!(bins) # `bins` is already sorted from `fasthist!`
     # Here we transfor the cartesian coordinate based bins into data unit bins:
diff --git a/src/probabilities_estimators/kernel/kernel_density.jl b/src/probabilities_estimators/kernel/kernel_density.jl
index d91c5127b..b007574bc 100644
--- a/src/probabilities_estimators/kernel/kernel_density.jl
+++ b/src/probabilities_estimators/kernel/kernel_density.jl
@@ -46,7 +46,7 @@ function NaiveKernel(ϵ::Real, method = KDTree; w = 0, metric = Euclidean())
     return NaiveKernel(ϵ, method, w, metric)
 end
 
-function probabilities_and_outcomes(x::AbstractDataset, est::NaiveKernel)
+function probabilities_and_outcomes(est::NaiveKernel, x::AbstractDataset)
     theiler = Theiler(est.w)
     ss = searchstructure(est.method, x.data, est.metric)
     idxs = bulkisearch(ss, x.data, WithinRange(est.ϵ), theiler)
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
index fb53495b2..d38f560f7 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
@@ -72,8 +72,8 @@ function AAPE(x; A::Real = 0.5, m::Int = length(x))
     (A/m)*sum(abs.(x)) + (1-A)/(m-1)*sum(abs.(diff(x)))
 end
 
-function probabilities_and_outcomes(x::AbstractDataset{m, T},
-        est::SymbolicAmplitudeAwarePermutation) where {m, T}
+function probabilities_and_outcomes(est::SymbolicAmplitudeAwarePermutation,
+        x::AbstractDataset{m, T}) where {m, T}
     πs = outcomes(x, OrdinalPatternEncoding(m = m, lt = est.lt))
     wts = AAPE.(x.data, A = est.A, m = est.m)
     probs = symprobs(πs, wts, normalize = true)
@@ -87,8 +87,9 @@ function probabilities_and_outcomes(x::AbstractDataset{m, T},
     return Probabilities(probs), observed_outcomes
 end
 
-function probabilities_and_outcomes(x::AbstractVector{T},
-        est::SymbolicAmplitudeAwarePermutation) where {T<:Real}
+function probabilities_and_outcomes(
+        est::SymbolicAmplitudeAwarePermutation,
+        x::AbstractVector{T}) where {T<:Real}
     # We need to manually embed here instead of just calling the method above,
     # because the embedding vectors are needed to compute weights.
     τs = tuple([est.τ*i for i = 0:est.m-1]...)
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
index c046b240b..492598ee6 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
@@ -40,15 +40,15 @@ est = SymbolicPermutation(; m, τ)
 # For a time series
 x_ts = rand(N)
 πs_ts = zeros(Int, N - (m - 1)*τ)
-p = probabilities!(πs_ts, x_ts, est)
-h = entropy!(πs_ts, Renyi(),  x_ts, est)
+p = probabilities!(πs_ts, est, x_ts)
+h = entropy!(πs_ts, Renyi(), est, x_ts)
 
 # For a pre-discretized `Dataset`
 x_symb = outcomes(x_ts, OrdinalPatternEncoding(m = 2, τ = 1))
 x_d = genembed(x_symb, (0, -1, -2))
 πs_d = zeros(Int, length(x_d))
-p = probabilities!(πs_d, x_d, est)
-h = entropy!(πs_d, Renyi(), x_d, est)
+p = probabilities!(πs_d, est, x_d)
+h = entropy!(πs_d, Renyi(), est, x_d)
 ```
 
 See [`SymbolicWeightedPermutation`](@ref) and [`SymbolicAmplitudeAwarePermutation`](@ref)
@@ -79,22 +79,21 @@ function SymbolicPermutation(; τ::Int = 1, m::Int = 3, lt::F=isless_rand) where
     SymbolicPermutation{F}(τ, m, lt)
 end
 
-function probabilities!(πs::AbstractVector{Int}, x::AbstractDataset{m, T}, est::SymbolicPermutation) where {m, T}
+function probabilities!(πs::AbstractVector{Int}, est::SymbolicPermutation, x::AbstractDataset{m, T}) where {m, T}
     length(πs) == length(x) || throw(ArgumentError("Need length(πs) == length(x), got `length(πs)=$(length(πs))` and `length(x)==$(length(x))`."))
     m >= 2 || error("Data must be at least 2-dimensional to compute the permutation entropy. If data is a univariate time series embed it using `genembed` first.")
     encoding = OrdinalPatternEncoding(m = est.m, τ = est.τ, lt = est.lt)
     probabilities(outcomes!(πs, x, encoding))
 end
 
-function probabilities!(πs::AbstractVector{Int}, x::AbstractVector{T}, est::SymbolicPermutation) where {T<:Real}
+function probabilities!(πs::AbstractVector{Int}, est::SymbolicPermutation, x::AbstractVector{T}) where {T<:Real}
     N = length(x) - (est.m - 1)*est.τ
     length(πs) == N || error("Pre-allocated symbol vector `πs` needs to have length `length(x) - (m-1)*τ` to match the number of state vectors after `x` has been embedded. Got length(πs)=$(length(πs)) and length(x)=$(L).")
     encoding = OrdinalPatternEncoding(m = est.m, τ = est.τ, lt = est.lt)
     probabilities(outcomes!(πs, x, encoding))
 end
 
-function probabilities_and_outcomes(x::AbstractDataset{m, T},
-        est::SymbolicPermutation) where {m, T}
+function probabilities_and_outcomes(est::SymbolicPermutation, x::AbstractDataset{m, T}) where {m, T}
     encoding = OrdinalPatternEncoding(m = est.m, τ = est.τ, lt = est.lt)
     πs = outcomes(x, encoding)
     probs = probabilities(πs)
@@ -108,11 +107,10 @@ function probabilities_and_outcomes(x::AbstractDataset{m, T},
     return probs, observed_outcomes
 end
 
-function probabilities_and_outcomes(x::AbstractVector{T},
-        est::SymbolicPermutation) where {T<:Real}
+function probabilities_and_outcomes(est::SymbolicPermutation, x::AbstractVector{T}) where {T<:Real}
     encoding = OrdinalPatternEncoding(m = est.m, τ = est.τ, lt = est.lt)
     πs = outcomes(x, encoding)
-    probs = probabilities(πs, CountOccurrences())
+    probs = probabilities(πs)
 
     # The observed integer encodings are in the set `{0, 1, ..., factorial(m)}`, and each
     # integer corresponds to a unique permutation. Decoding an integer gives the original
@@ -123,31 +121,36 @@ function probabilities_and_outcomes(x::AbstractVector{T},
     return probs, observed_outcomes
 end
 
-function entropy!(e::Entropy,
+function entropy!(
     s::AbstractVector{Int},
-    x::AbstractDataset{m, T},
-    est::SymbolicPermutation;
+    e::Entropy,
+    est::SymbolicPermutation,
+    x::AbstractDataset{m, T};
     ) where {m, T}
 
     length(s) == length(x) || error("Pre-allocated symbol vector s need the same number of elements as x. Got length(πs)=$(length(πs)) and length(x)=$(L).")
-    ps = probabilities!(s, x, est)
+    ps = probabilities!(s, est, x)
 
     entropy(e, ps)
 end
 
-function entropy!(e::Entropy,
+function entropy!(
         s::AbstractVector{Int},
+        e::Entropy,
+        est::SymbolicPermutation,
         x::AbstractVector{T},
-        est::SymbolicPermutation
     ) where {T<:Real}
 
     L = length(x)
     N = L - (est.m-1)*est.τ
     length(s) == N || error("Pre-allocated symbol vector `s` needs to have length `length(x) - (m-1)*τ` to match the number of state vectors after `x` has been embedded. Got length(s)=$(length(s)) and length(x)=$(L).")
 
-    ps = probabilities!(s, x, est)
+    ps = probabilities!(s, est, x)
     entropy(e, ps)
 end
 
+entropy!(s::AbstractVector{Int}, est::SymbolicPermutation, x) =
+    entropy!(s, Shannon(base = 2), est, x)
+
 total_outcomes(est::SymbolicPermutation)::Int = factorial(est.m)
 outcome_space(est::SymbolicPermutation) = permutations(1:est.m) |> collect
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
index 34f2057da..8a860a161 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
@@ -85,8 +85,8 @@ function weights_from_variance(x, m::Int)
 end
 
 
-function probabilities_and_outcomes(x::AbstractDataset{m, T},
-        est::SymbolicWeightedPermutation) where {m, T}
+function probabilities_and_outcomes(est::SymbolicWeightedPermutation,
+        x::AbstractDataset{m, T}) where {m, T}
     m >= 2 || error("Need m ≥ 2, otherwise no dynamical information is encoded in the symbols.")
     πs = outcomes(x, OrdinalPatternEncoding(m = m, lt = est.lt))  # motif length controlled by dimension of input data
     wts = weights_from_variance.(x.data, m)
@@ -101,8 +101,8 @@ function probabilities_and_outcomes(x::AbstractDataset{m, T},
    return Probabilities(probs), observed_outcomes
 end
 
-function probabilities_and_outcomes(x::AbstractVector{T},
-        est::SymbolicWeightedPermutation) where {T<:Real}
+function probabilities_and_outcomes(est::SymbolicWeightedPermutation,
+        x::AbstractVector{T}) where {T<:Real}
     # We need to manually embed here instead of just calling the method above,
     # because the embedding vectors are needed to compute weights.
     τs = tuple([est.τ*i for i = 0:est.m-1]...)
diff --git a/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
index eeaf04557..a861e3b2e 100644
--- a/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
@@ -136,14 +136,14 @@ end
 ###########################################################################################
 # probabilities implementation
 ###########################################################################################
-function probabilities(x::AbstractArray, est::SpatialSymbolicPermutation)
+function probabilities(est::SpatialSymbolicPermutation, x::AbstractArray)
     # TODO: This can be literally a call to `outcomes` and then
     # calling probabilities on it. Should do once the `outcomes` refactoring is done.
     s = zeros(Int, length(est.valid))
-    probabilities!(s, x, est)
+    probabilities!(s, est, x)
 end
 
-function probabilities!(s, x, est::SpatialSymbolicPermutation)
+function probabilities!(s, est::SpatialSymbolicPermutation, x)
     m = length(est.stencil)
     for (i, pixel) in enumerate(est.valid)
         pixels = pixels_in_stencil(pixel, est)
diff --git a/src/probabilities_estimators/probabilities_estimators.jl b/src/probabilities_estimators/probabilities_estimators.jl
index def2cd4e5..371af9c68 100644
--- a/src/probabilities_estimators/probabilities_estimators.jl
+++ b/src/probabilities_estimators/probabilities_estimators.jl
@@ -1,5 +1,5 @@
 include("counting/count_occurences.jl")
-include("histograms/visitation_frequency.jl")
+include("histograms/value_histogram.jl")
 include("permutation_ordinal/symbolic.jl")
 include("kernel/kernel_density.jl")
 include("timescales/timescales.jl")
diff --git a/src/probabilities_estimators/timescales/power_spectrum.jl b/src/probabilities_estimators/timescales/power_spectrum.jl
index 346881b75..e159c1bdc 100644
--- a/src/probabilities_estimators/timescales/power_spectrum.jl
+++ b/src/probabilities_estimators/timescales/power_spectrum.jl
@@ -30,15 +30,15 @@ should be multiplied with the sampling rate of the signal, which is assumed to b
 """
 struct PowerSpectrum <: ProbabilitiesEstimator end
 
-function probabilities_and_outcomes(x, est::PowerSpectrum)
-    probs = probabilities(x, est)
+function probabilities_and_outcomes(est::PowerSpectrum, x)
+    probs = probabilities(est, x)
     events = FFTW.rfftfreq(length(x))
     return probs, events
 end
 
 outcome_space(x, ::PowerSpectrum) = FFTW.rfftfreq(length(x))
 
-function probabilities(x, ::PowerSpectrum)
+function probabilities(::PowerSpectrum, x)
     @assert x isa AbstractVector{<:Real} "`PowerSpectrum` only works for timeseries input!"
     f = FFTW.rfft(x)
     Probabilities(abs2.(f))
diff --git a/src/probabilities_estimators/timescales/wavelet_overlap.jl b/src/probabilities_estimators/timescales/wavelet_overlap.jl
index 46f1fce21..3dfa50aa4 100644
--- a/src/probabilities_estimators/timescales/wavelet_overlap.jl
+++ b/src/probabilities_estimators/timescales/wavelet_overlap.jl
@@ -30,7 +30,7 @@ struct WaveletOverlap{W<:Wavelets.WT.OrthoWaveletClass} <: ProbabilitiesEstimato
 end
 WaveletOverlap() = WaveletOverlap(Wavelets.WT.Daubechies{12}())
 
-function probabilities_and_outcomes(x, est::WaveletOverlap)
+function probabilities_and_outcomes(est::WaveletOverlap, x)
     @assert x isa AbstractVector{<:Real} "`WaveletOverlap` only works for timeseries input!"
     p = Probabilities(time_scale_density(x, est.wl))
     return p, 1:length(p)
diff --git a/src/probabilities_estimators/transfer_operator/transfer_operator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
index a41ac4e81..70161c1f7 100644
--- a/src/probabilities_estimators/transfer_operator/transfer_operator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -477,7 +477,7 @@ function transfermatrix(iv::InvariantMeasure)
     return iv.to.transfermatrix, iv.to.bins
 end
 
-function probabilities_and_outcomes(x::Array_or_Dataset, est::TransferOperator)
+function probabilities_and_outcomes(est::TransferOperator, x::Array_or_Dataset)
     to = transferoperator(x, est.binning)
     probs = invariantmeasure(to).ρ
 
diff --git a/test/convenience.jl b/test/convenience.jl
index dc64f43be..0f3a3023f 100644
--- a/test/convenience.jl
+++ b/test/convenience.jl
@@ -28,5 +28,5 @@ end
 
 @testset "`probabilities` defaults to `CountOccurrences`" begin
     x = [1, 1, 2, 2, 3, 3]
-    @test probabilities(x) == probabilities(x, CountOccurrences()) == [1/3, 1/3, 1/3]
+    @test probabilities(x) == probabilities(CountOccurrences(), x) == [1/3, 1/3, 1/3]
 end
diff --git a/test/probabilities/estimators/count_occurrences.jl b/test/probabilities/estimators/count_occurrences.jl
index 5edbfeca0..0a21defdc 100644
--- a/test/probabilities/estimators/count_occurrences.jl
+++ b/test/probabilities/estimators/count_occurrences.jl
@@ -6,8 +6,8 @@ rng = Random.MersenneTwister(1234)
 x = [rand(rng, Bool) for _ in 1:10000]
 
 probs1 = probabilities(x)
-probs2 = probabilities(x, CountOccurrences())
-probs3, outs = probabilities_and_outcomes(x, CountOccurrences())
+probs2 = probabilities(CountOccurrences(), x)
+probs3, outs = probabilities_and_outcomes(CountOccurrences(), x)
 
 for ps in (probs1, probs2, probs3)
     for p in ps; @test 0.49 < p < 0.51; end
@@ -22,7 +22,7 @@ y = [rand(rng, Bool) for _ in 1:10000]
 D = Dataset(x, y)
 
 probs1 = probabilities(D)
-probs2 = probabilities(D, CountOccurrences())
+probs2 = probabilities(CountOccurrences(), D)
 for ps in (probs1, probs2)
     for p in ps; @test 0.24 < p < 0.26; end
 end
diff --git a/test/probabilities/estimators/dispersion.jl b/test/probabilities/estimators/dispersion.jl
index 60758c2c1..e67aaee0c 100644
--- a/test/probabilities/estimators/dispersion.jl
+++ b/test/probabilities/estimators/dispersion.jl
@@ -33,7 +33,7 @@ using Entropies, Test
     ps_paper = [1/11, 0/11, 3/11, 2/11, 1/11, 0/11, 1/11, 2/11, 1/11] |> sort
     ps_paper = ps_paper[findall(ps_paper .> 0)]
 
-    ps = probabilities(x, est)
+    ps = probabilities(est, x)
     @test ps |> sort == ps_paper
 
     # There is probably a typo in Rostaghi & Azami (2016). They state that the
@@ -54,7 +54,7 @@ using Entropies, Test
         Ω = outcome_space(est)
         @test length(Ω) == total_outcomes(est) == 4^3
 
-        probs, out = probabilities_and_outcomes(rand(1000), est)
+        probs, out = probabilities_and_outcomes(est, rand(1000))
         @test all(x -> x ∈ Ω, out)
     end
 end
diff --git a/test/probabilities/estimators/diversity.jl b/test/probabilities/estimators/diversity.jl
index 185be8552..943c3f93b 100644
--- a/test/probabilities/estimators/diversity.jl
+++ b/test/probabilities/estimators/diversity.jl
@@ -4,14 +4,14 @@ nbins = 10
 m = 3
 τ = 1
 est = Diversity(; nbins, m, τ)
-@test probabilities(x, est) == [0.5, 0.5]
+@test probabilities(est, x) == [0.5, 0.5]
 @test round(entropy_normalized(est, x), digits = 4) == 0.3010
 
 @test total_outcomes(est) == 10
 
 # Diversity divides the interval [-1, 1] into nbins subintervals.
 binsize = (1-(-1))/10
-probs, events = probabilities_and_outcomes(x, est)
+probs, events = probabilities_and_outcomes(est, x)
 
 ds = [0.605, 0.698, 0.924, 0.930] # value from Wang et al. (2020)
 # These distances should be in the following distance bins: [8, 8, 9, 9].
diff --git a/test/probabilities/estimators/naive_kernel.jl b/test/probabilities/estimators/naive_kernel.jl
index b6e928461..60fa526ba 100644
--- a/test/probabilities/estimators/naive_kernel.jl
+++ b/test/probabilities/estimators/naive_kernel.jl
@@ -9,15 +9,15 @@ pts = Dataset([rand(2) for i = 1:N]);
 est_direct = NaiveKernel(ϵ, KDTree)
 est_tree = NaiveKernel(ϵ, BruteForce)
 
-@test probabilities(pts, est_tree) isa Probabilities
-@test probabilities(pts, est_direct) isa Probabilities
-p_tree = probabilities(pts, est_tree)
-p_direct = probabilities(pts, est_direct)
+@test probabilities(est_tree, pts) isa Probabilities
+@test probabilities(est_direct, pts) isa Probabilities
+p_tree = probabilities(est_tree, pts)
+p_direct = probabilities(est_direct, pts)
 @test all(p_tree .== p_direct) == true
 
 @test entropy(Renyi(), est_direct, pts) isa Real
 @test entropy(Renyi(), est_tree, pts) isa Real
 
-probs, z = probabilities_and_outcomes(pts, est_tree)
+probs, z = probabilities_and_outcomes(est_tree, pts)
 @test z == 1:length(pts)
 @test outcome_space(pts, est_tree) == 1:length(pts)
diff --git a/test/probabilities/estimators/permutation.jl b/test/probabilities/estimators/permutation.jl
index 5b0f9b34f..1cd76aac1 100644
--- a/test/probabilities/estimators/permutation.jl
+++ b/test/probabilities/estimators/permutation.jl
@@ -14,8 +14,8 @@
         z = rand(N)
 
         # Probability distributions
-        p1 = probabilities!(s, x, est)
-        p2 = probabilities!(s, y, est)
+        p1 = probabilities!(s, est, x)
+        p2 = probabilities!(s, est, y)
         @test sum(p1) ≈ 1.0
         @test sum(p2) ≈ 1.0
     end
@@ -51,8 +51,8 @@ end
     y = Dataset(rand(N, 5))
 
     # Probability distributions
-    p1 = probabilities(x, est)
-    p2 = probabilities(y, est)
+    p1 = probabilities(est, x)
+    p2 = probabilities(est, y)
     @test sum(p1) ≈ 1.0
     @test sum(p2) ≈ 1.0
 
@@ -70,8 +70,8 @@ end
 
     est_isless = SymbolicPermutation(m = 5, τ = 1, lt = Base.isless)
     est_isless_rand = SymbolicPermutation(m = 5, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(D, est_isless) isa Probabilities
-    @test probabilities(D, est_isless_rand) isa Probabilities
+    @test probabilities(est_isless, D) isa Probabilities
+    @test probabilities(est_isless_rand, D) isa Probabilities
 end
 
 # With m = 2, ordinal patterns and frequencies are:
@@ -80,7 +80,7 @@ end
 x = [1, 2, 1, 2, 1, 2]
 # don't randomize in the case of equal values, so use Base.isless
 est = SymbolicPermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(x, est)
+probs, πs = probabilities_and_outcomes(est, x)
 @test πs == SVector{2, Int}.([[1, 2], [2, 1]])
 @test probs == [3/5, 2/5]
 
diff --git a/test/probabilities/estimators/permutation_amplitude_aware.jl b/test/probabilities/estimators/permutation_amplitude_aware.jl
index 343cced6e..844f16e92 100644
--- a/test/probabilities/estimators/permutation_amplitude_aware.jl
+++ b/test/probabilities/estimators/permutation_amplitude_aware.jl
@@ -12,8 +12,8 @@ D = genembed(x, τs)
 
 est = SymbolicAmplitudeAwarePermutation(m = m, τ = τ)
 # Probability distributions
-p1 = probabilities(x, est)
-p2 = probabilities(D, est)
+p1 = probabilities(est, x)
+p2 = probabilities(est, D)
 @test sum(p1) ≈ 1.0
 @test sum(p2) ≈ 1.0
 @test all(p1.p .≈ p2.p)
@@ -33,8 +33,8 @@ e2 = entropy(Renyi(), est, x)
 
     est_isless = SymbolicAmplitudeAwarePermutation(m = 5, τ = 1, lt = Base.isless)
     est_isless_rand = SymbolicAmplitudeAwarePermutation(m = 5, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(ts, est_isless) isa Probabilities
-    @test probabilities(D, est_isless) isa Probabilities
+    @test probabilities(est_isless, ts) isa Probabilities
+    @test probabilities(est_isless, D) isa Probabilities
 end
 
 # With m = 2, ordinal patterns and frequencies are:
@@ -43,7 +43,7 @@ end
 x = [1, 2, 1, 2, 1, 2]
 # don't randomize in the case of equal values, so use Base.isless
 est = SymbolicAmplitudeAwarePermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(x, est)
+probs, πs = probabilities_and_outcomes(est, x)
 @test πs == SVector{2, Int}.([[1, 2], [2, 1]])
 
 # TODO: probabilities should be explicitly tested too.
diff --git a/test/probabilities/estimators/permutation_spatial.jl b/test/probabilities/estimators/permutation_spatial.jl
index 431ec0a3b..6800eba92 100644
--- a/test/probabilities/estimators/permutation_spatial.jl
+++ b/test/probabilities/estimators/permutation_spatial.jl
@@ -9,7 +9,7 @@ using Entropies, Test
     est = SpatialSymbolicPermutation(stencil, x, false)
 
     # Generic tests
-    ps = probabilities(x, est)
+    ps = probabilities(est, x)
     @test ps isa Probabilities
     @test length(ps) == 3
     @test sort(ps) == [0.25, 0.25, 0.5]
@@ -27,7 +27,7 @@ using Entropies, Test
     # for a different stencil
     stencil = CartesianIndex.([(0,0), (1,0), (2,0)])
     est = SpatialSymbolicPermutation(stencil, x, false)
-    ps = sort(probabilities(x, est))
+    ps = sort(probabilities(est, x))
     @test ps[1] == 1/3
     @test ps[2] == 2/3
 
@@ -49,12 +49,12 @@ using Entropies, Test
     est = SpatialSymbolicPermutation(stencil, z, false)
     # Analytically the total stencils are of length 4*4*4 = 64
     # but all of them given the same probabilities because of the layout
-    ps = probabilities(z, est)
+    ps = probabilities(est, z)
     @test ps == [1]
     # if we shuffle, we get random stuff
     using Random
     w = shuffle!(Random.MersenneTwister(42), collect(z))
-    ps = probabilities(w, est)
+    ps = probabilities(est, w)
     @test length(ps) > 1
 
     # check that the 3d-hyperrectangle version also works as expected
diff --git a/test/probabilities/estimators/permutation_weighted.jl b/test/probabilities/estimators/permutation_weighted.jl
index ca8d9ede2..ab120f206 100644
--- a/test/probabilities/estimators/permutation_weighted.jl
+++ b/test/probabilities/estimators/permutation_weighted.jl
@@ -12,8 +12,8 @@ D = genembed(x, τs)
 
 # Probability distributions
 est = SymbolicWeightedPermutation(m = m, τ = τ)
-p1 = probabilities(x, est)
-p2 = probabilities(D, est)
+p1 = probabilities(est, x)
+p2 = probabilities(est, D)
 @test sum(p1) ≈ 1.0
 @test sum(p2) ≈ 1.0
 @test all(p1.p .≈ p2.p)
@@ -34,8 +34,8 @@ e2 = entropy(Renyi(), est, x)
 
     est_isless = SymbolicWeightedPermutation(m = 5, τ = 1, lt = Base.isless)
     est_isless_rand = SymbolicWeightedPermutation(m = 5, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(ts, est_isless) isa Probabilities
-    @test probabilities(D, est_isless) isa Probabilities
+    @test probabilities(est_isless, ts) isa Probabilities
+    @test probabilities(est_isless, D) isa Probabilities
 end
 
 # With m = 2, ordinal patterns and frequencies are:
@@ -44,7 +44,7 @@ end
 x = [1, 2, 1, 2, 1, 2]
 # don't randomize in the case of equal values, so use Base.isless
 est = SymbolicWeightedPermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(x, est)
+probs, πs = probabilities_and_outcomes(est, x)
 @test πs == SVector{2, Int}.([[1, 2], [2, 1]])
 # TODO: probabilities should be explicitly tested too.
 
diff --git a/test/probabilities/estimators/timescales.jl b/test/probabilities/estimators/timescales.jl
index 7f65a4d51..6e36ea792 100644
--- a/test/probabilities/estimators/timescales.jl
+++ b/test/probabilities/estimators/timescales.jl
@@ -9,7 +9,7 @@ using Entropies, Test
     @testset "WaveletOverlap" begin
         wl = Entropies.Wavelets.WT.Daubechies{4}()
         est = WaveletOverlap(wl)
-        ps = probabilities(x, est)
+        ps = probabilities(est, x)
         @test length(ps) == 8
         @test ps isa Probabilities
         @test entropy(Renyi( q = 1, base = 2), WaveletOverlap(), x) isa Real
@@ -25,7 +25,7 @@ using Entropies, Test
         ents = [entropy(Renyi(), est, w) for w in (x,y,z)]
         @test ents[1] < ents[2] < ents[3]
         # Test event stuff (analytically, using sine wave)
-        probs, events = probabilities_and_outcomes(x, est)
+        probs, events = probabilities_and_outcomes(est, x)
         @test length(events) == length(probs) == 501
         @test events[1] ≈ 0 atol=1e-16 # 0 frequency, i.e., mean value
         @test probs[1] ≈ 0 atol=1e-16  # sine wave has 0 mean value
diff --git a/test/probabilities/estimators/transfer_operator.jl b/test/probabilities/estimators/transfer_operator.jl
index f3c6f1de8..e4c0bac00 100644
--- a/test/probabilities/estimators/transfer_operator.jl
+++ b/test/probabilities/estimators/transfer_operator.jl
@@ -39,6 +39,6 @@ end
     @test bins isa Vector{<:SVector}
 
     est = TransferOperator(binnings[i])
-    @test probabilities(D, est) isa Probabilities
-    @test probabilities_and_outcomes(D, est) isa Tuple{Probabilities, Vector{SVector{2, Float64}}}
+    @test probabilities(est, D) isa Probabilities
+    @test probabilities_and_outcomes(est, D) isa Tuple{Probabilities, Vector{SVector{2, Float64}}}
 end
diff --git a/test/probabilities/estimators/value_histogram.jl b/test/probabilities/estimators/value_histogram.jl
index 0a9cfe8b8..051c11232 100644
--- a/test/probabilities/estimators/value_histogram.jl
+++ b/test/probabilities/estimators/value_histogram.jl
@@ -21,7 +21,7 @@ using Random
     for bin in binnings
         @testset "ϵ = $(bin.ϵ)" begin
             est = ValueHistogram(bin)
-            p = probabilities(x, est)
+            p = probabilities(est, x)
             @test length(p) == 100
             @test all(e -> 0.009 ≤ e ≤ 0.011, p)
         end
@@ -29,7 +29,7 @@ using Random
 
     @testset "Check rogue 1s" begin
         b = RectangularBinning(0.1) # no `nextfloat` here, so the rogue (1, 1) is in extra bin!
-        p = probabilities(x, ValueHistogram(b))
+        p = probabilities(ValueHistogram(b), x)
         @test length(p) == 100 + 1
         @test p[end] ≈ 1/100_000 atol = 1e-5
     end
@@ -41,7 +41,7 @@ using Random
         ε = nextfloat(0.1) # this guarantees that we get the same as the `n` above!
         binnings = RectangularBinning.((n, ε))
         for bin in binnings
-            p = probabilities(x, ValueHistogram(bin))
+            p = probabilities(ValueHistogram(bin), x)
             @test length(p) == 10
             @test all(e -> 0.09 ≤ e ≤ 0.11, p)
         end
@@ -55,8 +55,8 @@ using Random
         xs1D = [rand(rng, 20) for i = 1:10000];
         xs2D = [rand(rng, 1000, 2) |> Dataset for i = 1:10000] # more points to fill all bins
         est = ValueHistogram(RectangularBinning(10))
-        ps1D = [probabilities(x, est) for x in xs1D];
-        ps2D = [probabilities(x, est) for x in xs2D];
+        ps1D = [probabilities(est, x) for x in xs1D];
+        ps2D = [probabilities(est, x) for x in xs2D];
         n_rogue_extrabin_1D = count(length.(ps1D) .> est.binning.ϵ)
         n_rogue_extrabin_2D = count(length.(ps2D) .> est.binning.ϵ^2)
         @test n_rogue_extrabin_1D == 0
diff --git a/test/probabilities/interface.jl b/test/probabilities/interface.jl
index 7818fe352..f187b0b12 100644
--- a/test/probabilities/interface.jl
+++ b/test/probabilities/interface.jl
@@ -1,3 +1,3 @@
 x = ones(3)
-p = probabilities(x, ValueHistogram(0.1))
+p = probabilities(ValueHistogram(0.1), x)
 @test p isa Probabilities

From 60f7774103660ea6f78508a471d70c02c9174052 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 17 Nov 2022 13:44:56 +0100
Subject: [PATCH 06/13] Add deprecation

---
 src/deprecations.jl | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/deprecations.jl b/src/deprecations.jl
index 40dd1bdd6..a60af35ca 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -8,6 +8,14 @@ function probabilities(x::Vector_or_Dataset, ε::Union{Real, Vector{<:Real}})
     probabilities(x, ValueHistogram(ε))
 end
 
+function probabilities(x, est::ProbabilitiesEstimator)
+    @warn """
+    `probabilities(x, est::ProbabilitiesEstimator)`
+    is deprecated, use `probabilities(est::ProbabilitiesEstimator, x) instead`.
+    """
+    return probabilities(est, x)
+end
+
 export genentropy, permentropy
 
 function permentropy(x; τ = 1, m = 3, base = MathConstants.e)

From 6167a041b27091a0b85006ff8e45060d29d8eb03 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Fri, 18 Nov 2022 10:01:18 +0100
Subject: [PATCH 07/13] Default to Shannon entropy

---
 .../nearest_neighbors/KozachenkoLeonenko.jl         |  8 ++++++--
 .../estimators/nearest_neighbors/Kraskov.jl         |  4 +---
 src/entropies/estimators/nearest_neighbors/Zhu.jl   |  8 ++++++--
 .../estimators/nearest_neighbors/ZhuSingh.jl        |  7 +++++--
 .../estimators/order_statistics/AlizadehArghami.jl  |  8 ++++++--
 src/entropies/estimators/order_statistics/Correa.jl |  7 +++++--
 .../estimators/order_statistics/Ebrahimi.jl         |  7 +++++--
 .../estimators/order_statistics/Vasicek.jl          |  7 +++++--
 src/entropy.jl                                      | 13 +++++++++----
 test/entropies/estimators/alizadeharghami.jl        |  3 +++
 test/entropies/estimators/correa.jl                 |  3 +++
 test/entropies/estimators/ebrahimi.jl               |  4 ++++
 test/entropies/estimators/kozachenkoleonenko.jl     |  2 ++
 test/entropies/estimators/kraskov.jl                |  2 ++
 test/entropies/estimators/vasicek.jl                |  3 +++
 test/entropies/estimators/zhu.jl                    |  2 ++
 test/entropies/estimators/zhusingh.jl               |  2 ++
 test/entropies/interface.jl                         |  6 ++++++
 18 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl b/src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
index d11db9993..f3f3fe288 100644
--- a/src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
+++ b/src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
@@ -27,8 +27,12 @@ See also: [`entropy`](@ref).
     base::B = 2
 end
 
-function entropy(e::KozachenkoLeonenko, x::AbstractDataset{D, T}) where {D, T}
-    (; w, base) = e
+function entropy(e::Renyi, est::KozachenkoLeonenko, x::AbstractDataset{D, T}) where {D, T}
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; w, base) = est
+
     N = length(x)
     ρs = maximum_neighbor_distances(x, w, 1)
     # The estimated entropy has "unit" [nats]
diff --git a/src/entropies/estimators/nearest_neighbors/Kraskov.jl b/src/entropies/estimators/nearest_neighbors/Kraskov.jl
index 693c5dd05..d09c9fd27 100644
--- a/src/entropies/estimators/nearest_neighbors/Kraskov.jl
+++ b/src/entropies/estimators/nearest_neighbors/Kraskov.jl
@@ -26,7 +26,7 @@ end
 
 function entropy(e::Renyi, est::Kraskov, x::AbstractDataset{D, T}) where {D, T}
     e.q == 1 || throw(ArgumentError(
-        "Renyi entropy with q = $(e.q) not implemented for $typeof(est) estimators"
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
     ))
     (; k, w, base) = est
     N = length(x)
@@ -37,5 +37,3 @@ function entropy(e::Renyi, est::Kraskov, x::AbstractDataset{D, T}) where {D, T}
         D/N*sum(log.(MathConstants.e, ρs))
     return h / log(base, MathConstants.e) # Convert to target unit
 end
-
-entropy(est::Kraskov, x) = entropy(Shannon(), est, x)
diff --git a/src/entropies/estimators/nearest_neighbors/Zhu.jl b/src/entropies/estimators/nearest_neighbors/Zhu.jl
index abf74a005..73a12793d 100644
--- a/src/entropies/estimators/nearest_neighbors/Zhu.jl
+++ b/src/entropies/estimators/nearest_neighbors/Zhu.jl
@@ -28,8 +28,12 @@ Base.@kwdef struct Zhu{B} <: EntropyEstimator
     base::B = MathConstants.e
 end
 
-function entropy(e::Zhu, x::AbstractDataset{D, T}) where {D, T}
-    (; k, w, base) = e
+function entropy(e::Renyi, est::Zhu, x::AbstractDataset{D, T}) where {D, T}
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; k, w, base) = est
+
     N = length(x)
     tree = KDTree(x, Euclidean())
     nn_idxs = bulkisearch(tree, x, NeighborNumber(k), Theiler(w))
diff --git a/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
index 730ec0f3a..392e25737 100644
--- a/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
+++ b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
@@ -42,8 +42,11 @@ Base.@kwdef struct ZhuSingh{B} <: EntropyEstimator
     end
 end
 
-function entropy(e::ZhuSingh, x::AbstractDataset{D, T}) where {D, T}
-    (; k, w, base) = e
+function entropy(e::Renyi, est::ZhuSingh, x::AbstractDataset{D, T}) where {D, T}
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; k, w, base) = est
     N = length(x)
     tree = KDTree(x, Euclidean())
     nn_idxs = bulkisearch(tree, x, NeighborNumber(k), Theiler(w))
diff --git a/src/entropies/estimators/order_statistics/AlizadehArghami.jl b/src/entropies/estimators/order_statistics/AlizadehArghami.jl
index df3ff2a7e..df8c44d45 100644
--- a/src/entropies/estimators/order_statistics/AlizadehArghami.jl
+++ b/src/entropies/estimators/order_statistics/AlizadehArghami.jl
@@ -31,8 +31,12 @@ See also: [`entropy`](@ref).
     base::B = 2
 end
 
-function entropy(e::AlizadehArghami, x::AbstractVector{T}) where T
-    (; m, base) = e
+function entropy(e::Renyi, est::AlizadehArghami, x::AbstractVector{T}) where T
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+
+    (; m, base) = est
     n = length(x)
     m < floor(Int, n / 2) || throw(ArgumentError("Need m < length(x)/2."))
     return entropy(Vasicek(; m, base), x) + (2 / n)*(m * log(base, 2))
diff --git a/src/entropies/estimators/order_statistics/Correa.jl b/src/entropies/estimators/order_statistics/Correa.jl
index 45c0c9dbf..12036b22d 100644
--- a/src/entropies/estimators/order_statistics/Correa.jl
+++ b/src/entropies/estimators/order_statistics/Correa.jl
@@ -43,8 +43,11 @@ function local_scaled_mean(ex, i::Int, m::Int, n::Int = length(x))
     return x̄ / (2m + 1)
 end
 
-function entropy(e::Correa, x::AbstractVector{T}) where T
-    (; m, base) = e
+function entropy(e::Renyi, est::Correa, x::AbstractVector{T}) where T
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; m, base) = est
     n = length(x)
     m < floor(Int, n / 2) || throw(ArgumentError("Need m < length(x)/2."))
 
diff --git a/src/entropies/estimators/order_statistics/Ebrahimi.jl b/src/entropies/estimators/order_statistics/Ebrahimi.jl
index de944fb9c..bd2c088d0 100644
--- a/src/entropies/estimators/order_statistics/Ebrahimi.jl
+++ b/src/entropies/estimators/order_statistics/Ebrahimi.jl
@@ -50,8 +50,11 @@ function ebrahimi_scaling_factor(i, m, n)
     end
 end
 
-function entropy(e::Ebrahimi, x::AbstractVector{T}) where T
-    (; m, base) = e
+function entropy(e::Renyi, est::Ebrahimi, x::AbstractVector{T}) where T
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; m, base) = est
     n = length(x)
     m < floor(Int, n / 2) || throw(ArgumentError("Need m < length(x)/2."))
 
diff --git a/src/entropies/estimators/order_statistics/Vasicek.jl b/src/entropies/estimators/order_statistics/Vasicek.jl
index fffcc9c73..1cc52d4be 100644
--- a/src/entropies/estimators/order_statistics/Vasicek.jl
+++ b/src/entropies/estimators/order_statistics/Vasicek.jl
@@ -36,8 +36,11 @@ written for this package).
     base::B = 2
 end
 
-function entropy(e::Vasicek, x::AbstractVector{T}) where T
-    (; m, base) = e
+function entropy(e::Renyi, est::Vasicek, x::AbstractVector{T}) where T
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; m, base) = est
     n = length(x)
     m < floor(Int, n / 2) || throw(ArgumentError("Need m < length(x)/2."))
 
diff --git a/src/entropy.jl b/src/entropy.jl
index 3ca149c0e..5feb60651 100644
--- a/src/entropy.jl
+++ b/src/entropy.jl
@@ -131,15 +131,20 @@ entropy!(s::AbstractVector{Int}, est::ProbabilitiesEstimator, x) =
 ###########################################################################################
 # Dispatch for these functions is implemented in individual estimator files in
 # `entropies/estimators/`.
-function entropy(e::Entropy, est::EntropyEstimator, x) end
+function entropy(e::Entropy, est::EntropyEstimator, x)
+    t = string(typeof(e).name.name)
+    throw(ArgumentError("$t entropy not implemented for $(typeof(est)) estimator"))
+end
+
 entropy(est::EntropyEstimator, ::Probabilities) =
     error("Entropy estimators like $(nameof(typeof(est))) are not called with probabilities.")
 entropy(e::Entropy, est::EntropyEstimator, ::Probabilities) =
     error("Entropy estimators like $(nameof(typeof(est))) are not called with probabilities.")
-
-#
-entropy(e::Entropy, est::EntropyEstimator, x::AbstractVector{<:Real}) =
+entropy(e::Entropy, est::EntropyEstimator, x::AbstractVector) =
     entropy(e, est, Dataset(x))
+# Always default to Shannon with base-2 logs. Individual estimators may override this.
+entropy(est::EntropyEstimator, x; base = 2) = entropy(Shannon(; base), est, x)
+
 
 ###########################################################################################
 # Normalize API
diff --git a/test/entropies/estimators/alizadeharghami.jl b/test/entropies/estimators/alizadeharghami.jl
index 522c32048..d57f6bb05 100644
--- a/test/entropies/estimators/alizadeharghami.jl
+++ b/test/entropies/estimators/alizadeharghami.jl
@@ -13,3 +13,6 @@ ea_n = AlizadehArghami(m = 100, base = MathConstants.e)
 n = 1000000
 @test round(entropy(ea, rand(rng, n)), digits = 2) == U
 @test round(entropy(ea_n, randn(rng, n)), digits = 2) == N
+
+x = rand(1000)
+@test_throws ArgumentError entropy(Renyi(q = 2), AlizadehArghami(), x)
diff --git a/test/entropies/estimators/correa.jl b/test/entropies/estimators/correa.jl
index 291a95436..6c9398fd4 100644
--- a/test/entropies/estimators/correa.jl
+++ b/test/entropies/estimators/correa.jl
@@ -13,3 +13,6 @@ ec_n = Correa(m = 100, base = MathConstants.e)
 n = 1000000
 @test round(entropy(ec, rand(rng, n)), digits = 2) == U
 @test round(entropy(ec_n, randn(rng, n)), digits = 2) == N
+
+x = rand(1000)
+@test_throws ArgumentError entropy(Renyi(q = 2), Correa(), x)
diff --git a/test/entropies/estimators/ebrahimi.jl b/test/entropies/estimators/ebrahimi.jl
index d2bcaaffa..9d41c36b3 100644
--- a/test/entropies/estimators/ebrahimi.jl
+++ b/test/entropies/estimators/ebrahimi.jl
@@ -13,3 +13,7 @@ ee_n = Ebrahimi(m = 100, base = MathConstants.e)
 n = 1000000
 @test round(entropy(ee, rand(rng, n)), digits = 2) == U
 @test round(entropy(ee_n, randn(rng, n)), digits = 2) == N
+
+
+x = rand(1000)
+@test_throws ArgumentError entropy(Renyi(q = 2), Ebrahimi(), x)
diff --git a/test/entropies/estimators/kozachenkoleonenko.jl b/test/entropies/estimators/kozachenkoleonenko.jl
index bece82241..4c23a63be 100644
--- a/test/entropies/estimators/kozachenkoleonenko.jl
+++ b/test/entropies/estimators/kozachenkoleonenko.jl
@@ -24,3 +24,5 @@ tol_e = h_XN_base_e * 0.03
 tol_2 = h_XN_base_2 * 0.03
 @test h_XN_base_e - tol_e ≤ h_XN_kr_base_e ≤ h_XN_base_e + tol_e
 @test h_XN_base_2 - tol_2 ≤ h_XN_kr_base_2 ≤ h_XN_base_2 + tol_2
+
+@test_throws ArgumentError entropy(Renyi(q = 2), KozachenkoLeonenko(), XN)
diff --git a/test/entropies/estimators/kraskov.jl b/test/entropies/estimators/kraskov.jl
index ce24a2946..6f0dd5d41 100644
--- a/test/entropies/estimators/kraskov.jl
+++ b/test/entropies/estimators/kraskov.jl
@@ -25,3 +25,5 @@ h_XN_kr_base_e = entropy(Kraskov(k = 3, base = MathConstants.e), XN)
 h_XN_kr_base_2 = entropy(Kraskov(k = 3, base = 2), XN)
 @test round(h_XN_base_e, digits = 1) == round(h_XN_kr_base_e, digits = 1)
 @test round(h_XN_base_2, digits = 1) == round(h_XN_kr_base_2, digits = 1)
+
+@test_throws ArgumentError entropy(Renyi(q = 2), Kraskov(), XN)
diff --git a/test/entropies/estimators/vasicek.jl b/test/entropies/estimators/vasicek.jl
index e88116b42..bc012923b 100644
--- a/test/entropies/estimators/vasicek.jl
+++ b/test/entropies/estimators/vasicek.jl
@@ -14,3 +14,6 @@ ev_n = Vasicek(m = 100, base = MathConstants.e)
 n = 1000000
 @test round(entropy(ev, rand(rng, n)), digits = 2) == U
 @test round(entropy(ev_n, randn(rng, n)), digits = 2) == N
+
+x = rand(1000)
+@test_throws ArgumentError entropy(Renyi(q = 2), Vasicek(), x)
diff --git a/test/entropies/estimators/zhu.jl b/test/entropies/estimators/zhu.jl
index 5ef85edc6..42aa249b2 100644
--- a/test/entropies/estimators/zhu.jl
+++ b/test/entropies/estimators/zhu.jl
@@ -17,3 +17,5 @@ e_base_2 = Zhu(k = 3, base = 2)
 
 @test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
 @test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
+
+@test_throws ArgumentError entropy(Renyi(q = 2), Zhu(), DN)
diff --git a/test/entropies/estimators/zhusingh.jl b/test/entropies/estimators/zhusingh.jl
index 6f89ce51e..cb20291c5 100644
--- a/test/entropies/estimators/zhusingh.jl
+++ b/test/entropies/estimators/zhusingh.jl
@@ -60,3 +60,5 @@ tol_ℯ  = hZS_𝒩₂_base_ℯ * 0.01
 tol_2  = hZS_𝒩₂_base_2 * 0.01
 @test h_𝒩₂_base_ℯ - tol_ℯ ≤ hZS_𝒩₂_base_ℯ ≤ h_𝒩₂_base_ℯ + tol_ℯ
 @test h_𝒩₂_base_2 - tol_2 ≤ hZS_𝒩₂_base_2 ≤ h_𝒩₂_base_2 + tol_2
+
+@test_throws ArgumentError entropy(Renyi(q = 2), ZhuSingh(), rand(100))
diff --git a/test/entropies/interface.jl b/test/entropies/interface.jl
index d142a656f..80f73c9ca 100644
--- a/test/entropies/interface.jl
+++ b/test/entropies/interface.jl
@@ -1,2 +1,8 @@
 x = ones(3)
 @test_throws MethodError entropy(x, 0.1)
+
+@testset "Non-default entropy types" begin
+    x = rand(1000)
+    est = AlizadehArghami() # the AlizadehArghami estimator only works for Shannon entropy
+    @test_throws ArgumentError entropy(Tsallis(), AlizadehArghami(), x)
+end

From ca99aecab81e7b375b219cb49d1dad17cc1f3ba7 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Fri, 18 Nov 2022 10:59:25 +0100
Subject: [PATCH 08/13] resolve ambiguous method signature by being less strict

---
 src/probabilities.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/probabilities.jl b/src/probabilities.jl
index f71f96359..0fb6dc36a 100644
--- a/src/probabilities.jl
+++ b/src/probabilities.jl
@@ -175,7 +175,7 @@ julia> total_outcomes(rand(42), est) # same as `factorial(m)` for any `x`
 24
 ```
 """
-function total_outcomes(x::Array_or_Dataset, est::ProbabilitiesEstimator)
+function total_outcomes(x, est::ProbabilitiesEstimator)
     return length(outcome_space(x, est))
 end
 

From 1ec9c963fa7f585100e816732e173e6ff4e42e11 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Fri, 18 Nov 2022 11:09:43 +0100
Subject: [PATCH 09/13] Fix examples

---
 docs/src/examples.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/src/examples.md b/docs/src/examples.md
index 0697c4e39..e8f113a4d 100644
--- a/docs/src/examples.md
+++ b/docs/src/examples.md
@@ -137,9 +137,9 @@ for (i, r) in enumerate(rs)
     lyaps[i] = lyapunov(ds, N_lyap)
 
     x = trajectory(ds, N_ent) # time series
-    hperm = entropy(x, SymbolicPermutation(; m, τ))
-    hwtperm = entropy(x, SymbolicWeightedPermutation(; m, τ))
-    hampperm = entropy(x, SymbolicAmplitudeAwarePermutation(; m, τ))
+    hperm = entropy(SymbolicPermutation(; m, τ), x)
+    hwtperm = entropy(SymbolicWeightedPermutation(; m, τ), x)
+    hampperm = entropy(SymbolicAmplitudeAwarePermutation(; m, τ), x)
 
     hs_perm[i] = hperm; hs_wtperm[i] = hwtperm; hs_ampperm[i] = hampperm
 end
@@ -301,9 +301,9 @@ des = zeros(length(windows))
 pes = zeros(length(windows))
 
 m, c = 2, 6
-est_de = Dispersion(encoding = GaussianCDFEncoding(c), m = m, τ = 1)
+est_de = Dispersion(c = c, m = m, τ = 1)
 for (i, window) in enumerate(windows)
-    des[i] = entropy_normalized(Renyi(), y[window], est_de)
+    des[i] = entropy_normalized(Renyi(), est_de, y[window])
 end
 
 fig = Figure()
@@ -344,8 +344,8 @@ for N in (N1, N2)
     local w = trajectory(Systems.lorenz(), N÷10; Δt = 0.1, Ttr = 100)[:, 1] # chaotic
 
     for q in (x, y, z, w)
-        h = entropy(q, PowerSpectrum())
-        n = entropy_normalized(q, PowerSpectrum())
+        h = entropy(PowerSpectrum(), q)
+        n = entropy_normalized(PowerSpectrum(), q)
         println("entropy: $(h), normalized: $(n).")
     end
 end

From bf85386b5bc1f8baa36a54084b2f08e46ba963a8 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Tue, 22 Nov 2022 13:52:24 +0100
Subject: [PATCH 10/13] `base` belongs in the entropy, not the estimator

---
 src/entropies/estimators/nearest_neighbors/Zhu.jl   |  9 ++++-----
 .../estimators/nearest_neighbors/ZhuSingh.jl        | 13 ++++++-------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/entropies/estimators/nearest_neighbors/Zhu.jl b/src/entropies/estimators/nearest_neighbors/Zhu.jl
index 73a12793d..8839fb405 100644
--- a/src/entropies/estimators/nearest_neighbors/Zhu.jl
+++ b/src/entropies/estimators/nearest_neighbors/Zhu.jl
@@ -2,10 +2,10 @@ export Zhu
 
 """
     Zhu <: EntropyEstimator
-    Zhu(k = 1, w = 0, base = MathConstants.e)
+    Zhu(k = 1, w = 0)
 
 The `Zhu` estimator (Zhu et al., 2015)[^Zhu2015] computes the [`Shannon`](@ref)
-[`entropy`](@ref) of `x` (a multi-dimensional `Dataset`) to the given `base`, by
+[`entropy`](@ref) of `x` (a multi-dimensional `Dataset`), by
 approximating probabilities within hyperrectangles surrounding each point `xᵢ ∈ x` using
 using `k` nearest neighbor searches.
 
@@ -22,10 +22,9 @@ See also: [`entropy`](@ref).
     transfer entropy estimation via the k-nearest-neighbors approach. Entropy, 17(6),
     4173-4201.
 """
-Base.@kwdef struct Zhu{B} <: EntropyEstimator
+Base.@kwdef struct Zhu <: EntropyEstimator
     k::Int = 1
     w::Int = 0
-    base::B = MathConstants.e
 end
 
 function entropy(e::Renyi, est::Zhu, x::AbstractDataset{D, T}) where {D, T}
@@ -38,7 +37,7 @@ function entropy(e::Renyi, est::Zhu, x::AbstractDataset{D, T}) where {D, T}
     tree = KDTree(x, Euclidean())
     nn_idxs = bulkisearch(tree, x, NeighborNumber(k), Theiler(w))
     h = digamma(N) + mean_logvolumes(x, nn_idxs, N) - digamma(k) + (D - 1) / k
-    return h / log(base, MathConstants.e)
+    return h / log(e.base, MathConstants.e)
 end
 
 function mean_logvolumes(x, nn_idxs, N::Int)
diff --git a/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
index 392e25737..c0cdb7ced 100644
--- a/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
+++ b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
@@ -7,10 +7,10 @@ export ZhuSingh
 
 """
     ZhuSingh <: EntropyEstimator
-    ZhuSingh(k = 1, w = 0, base = MathConstants.e)
+    ZhuSingh(k = 1, w = 0)
 
 The `ZhuSingh` estimator (Zhu et al., 2015)[^Zhu2015] computes the [`Shannon`](@ref)
-[`entropy`](@ref) of `x` (a multi-dimensional `Dataset`) to the given `base`.
+[`entropy`](@ref) of `x` (a multi-dimensional `Dataset`).
 
 Like [`Zhu`](@ref), this estimator approximates probabilities within hyperrectangles
 surrounding each point `xᵢ ∈ x` using using `k` nearest neighbor searches. However,
@@ -35,10 +35,9 @@ See also: [`entropy`](@ref).
 Base.@kwdef struct ZhuSingh{B} <: EntropyEstimator
     k::Int = 1
     w::Int = 0
-    base::B = MathConstants.e
 
-    function ZhuSingh(k::Int, w::Int, base::B) where B
-        new{B}(k, w, base)
+    function ZhuSingh(k::Int, w::Int)
+        new{B}(k, w)
     end
 end
 
@@ -46,13 +45,13 @@ function entropy(e::Renyi, est::ZhuSingh, x::AbstractDataset{D, T}) where {D, T}
     e.q == 1 || throw(ArgumentError(
         "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
     ))
-    (; k, w, base) = est
+    (; k, w) = est
     N = length(x)
     tree = KDTree(x, Euclidean())
     nn_idxs = bulkisearch(tree, x, NeighborNumber(k), Theiler(w))
     mean_logvol, mean_digammaξ = mean_logvolumes_and_digamma(x, nn_idxs, N, k)
     h = digamma(N) + mean_logvol - mean_digammaξ
-    return h / log(base, MathConstants.e)
+    return h / log(e.base, MathConstants.e)
 end
 
 function mean_logvolumes_and_digamma(x, nn_idxs, N::Int, k::Int)

From 16c4f6bf5b2b8e68f4f4c6f111c4b298f68d774f Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Tue, 22 Nov 2022 13:58:06 +0100
Subject: [PATCH 11/13] Simplify constructor

---
 src/entropies/estimators/nearest_neighbors/ZhuSingh.jl | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
index c0cdb7ced..b2acb3b80 100644
--- a/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
+++ b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
@@ -35,10 +35,6 @@ See also: [`entropy`](@ref).
 Base.@kwdef struct ZhuSingh{B} <: EntropyEstimator
     k::Int = 1
     w::Int = 0
-
-    function ZhuSingh(k::Int, w::Int)
-        new{B}(k, w)
-    end
 end
 
 function entropy(e::Renyi, est::ZhuSingh, x::AbstractDataset{D, T}) where {D, T}

From b39f0072c0a14dcaf2bb5186ab60408ae18445c7 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Tue, 22 Nov 2022 14:03:51 +0100
Subject: [PATCH 12/13] Remove redundant type parameter

---
 src/entropies/estimators/nearest_neighbors/ZhuSingh.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
index b2acb3b80..62540c814 100644
--- a/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
+++ b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
@@ -32,7 +32,7 @@ See also: [`entropy`](@ref).
     neighbor estimates of entropy. American journal of mathematical and management
     sciences, 23(3-4), 301-321.
 """
-Base.@kwdef struct ZhuSingh{B} <: EntropyEstimator
+Base.@kwdef struct ZhuSingh <: EntropyEstimator
     k::Int = 1
     w::Int = 0
 end

From ce728ad89d7813a168acc9ffcded72e7e8920e67 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Tue, 22 Nov 2022 14:25:49 +0100
Subject: [PATCH 13/13] Fix tests

---
 src/entropies/estimators/nearest_neighbors/Zhu.jl |  2 +-
 test/entropies/estimators/zhu.jl                  | 11 +++++++----
 test/entropies/estimators/zhusingh.jl             | 15 +++++++++------
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/entropies/estimators/nearest_neighbors/Zhu.jl b/src/entropies/estimators/nearest_neighbors/Zhu.jl
index 8839fb405..1a63a4470 100644
--- a/src/entropies/estimators/nearest_neighbors/Zhu.jl
+++ b/src/entropies/estimators/nearest_neighbors/Zhu.jl
@@ -31,7 +31,7 @@ function entropy(e::Renyi, est::Zhu, x::AbstractDataset{D, T}) where {D, T}
     e.q == 1 || throw(ArgumentError(
         "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
     ))
-    (; k, w, base) = est
+    (; k, w) = est
 
     N = length(x)
     tree = KDTree(x, Euclidean())
diff --git a/test/entropies/estimators/zhu.jl b/test/entropies/estimators/zhu.jl
index 42aa249b2..5c882766e 100644
--- a/test/entropies/estimators/zhu.jl
+++ b/test/entropies/estimators/zhu.jl
@@ -12,10 +12,13 @@ DN = Dataset(randn(200000, 1))
 hN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5
 hN_base_2 = hN_base_e / log(2, MathConstants.e)
 
-e_base_e = Zhu(k = 3, base = MathConstants.e)
-e_base_2 = Zhu(k = 3, base = 2)
+est = Zhu(k = 3)
 
-@test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
-@test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
+@test round(entropy(Shannon(; base = ℯ), est, DN), digits = 1) == round(hN_base_e, digits = 1)
+@test round(entropy(Shannon(; base = 2), est, DN), digits = 1) == round(hN_base_2, digits = 1)
 
 @test_throws ArgumentError entropy(Renyi(q = 2), Zhu(), DN)
+
+# Shannon entropy is default.
+@test entropy(Shannon(; base = 2), est, DN) ==  entropy(est, DN, base = 2)
+@test entropy(Shannon(; base = ℯ), est, DN) ==  entropy(est, DN, base = ℯ)
diff --git a/test/entropies/estimators/zhusingh.jl b/test/entropies/estimators/zhusingh.jl
index cb20291c5..b7cf360d7 100644
--- a/test/entropies/estimators/zhusingh.jl
+++ b/test/entropies/estimators/zhusingh.jl
@@ -35,11 +35,10 @@ DN = Dataset(randn(100000, 1))
 hN_base_e = 0.5 * log(MathConstants.e, 2π * σ^2) + 0.5
 hN_base_2 = hN_base_e / log(2, MathConstants.e)
 
-e_base_e = ZhuSingh(k = 3, base = MathConstants.e)
-e_base_2 = ZhuSingh(k = 3, base = 2)
+est = ZhuSingh(k = 3)
 
-@test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
-@test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
+@test round(entropy(est, DN, base = ℯ), digits = 1) == round(hN_base_e, digits = 1)
+@test round(entropy(est, DN, base = 2), digits = 1) == round(hN_base_2, digits = 1)
 
 # Analytical test: 3D normal distribution
 σs = ones(3)
@@ -51,8 +50,8 @@ h_𝒩₂_base_ℯ = 0.5n * log(ℯ, 2π) + 0.5*log(ℯ, det(Σ)) + 0.5n
 h_𝒩₂_base_2 = h_𝒩₂_base_ℯ  / log(2, ℯ)
 
 sample = Dataset(transpose(rand(𝒩₂, 50000)))
-hZS_𝒩₂_base_ℯ = entropy(e_base_e, sample)
-hZS_𝒩₂_base_2 = entropy(e_base_2, sample)
+hZS_𝒩₂_base_ℯ = entropy(Shannon(; base = ℯ), est, sample)
+hZS_𝒩₂_base_2 = entropy(Shannon(; base = 2), est, sample)
 
 # Estimation accuracy decreases for fixed N with increasing edimension, so exact comparison
 # isn't useful. Just check that values are within 1% of the target.
@@ -62,3 +61,7 @@ tol_2  = hZS_𝒩₂_base_2 * 0.01
 @test h_𝒩₂_base_2 - tol_2 ≤ hZS_𝒩₂_base_2 ≤ h_𝒩₂_base_2 + tol_2
 
 @test_throws ArgumentError entropy(Renyi(q = 2), ZhuSingh(), rand(100))
+
+# Shannon entropy is default.
+@test entropy(Shannon(; base = 2), est, sample) ==  entropy(est, sample, base = 2)
+@test entropy(Shannon(; base = ℯ), est, sample) ==  entropy(est, sample, base = ℯ)