diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 000000000..9ef012718
Binary files /dev/null and b/.DS_Store differ
diff --git a/.github/PULL_REQUEST_TEMPLATE/new_entropy_template.md b/.github/PULL_REQUEST_TEMPLATE/new_entropy_template.md
index aa9d4611c..20d3d6c4d 100644
--- a/.github/PULL_REQUEST_TEMPLATE/new_entropy_template.md
+++ b/.github/PULL_REQUEST_TEMPLATE/new_entropy_template.md
@@ -13,7 +13,7 @@ project [devdocs](https://juliadynamics.github.io/Entropies.jl/dev/devdocs/).
 Ticking the boxes below will help us provide good feedback and speed up the review process.
 Partial PRs are welcome too, and we're happy to help if you're stuck on something.
 
-- [ ] The new entropy subtypes `Entropy` or `IndirectEntropy`.
+- [ ] The new entropy (estimator) subtypes `Entropy` (`EntropyEstimator`).
 - [ ] The new entropy has an informative docstring, which is referenced in
     `docs/src/entropies.md`.
 - [ ] Relevant sources are cited in the docstring.
diff --git a/docs/.DS_Store b/docs/.DS_Store
new file mode 100644
index 000000000..eee7acbe1
Binary files /dev/null and b/docs/.DS_Store differ
diff --git a/docs/src/examples.md b/docs/src/examples.md
index 13eb08cd8..e8f113a4d 100644
--- a/docs/src/examples.md
+++ b/docs/src/examples.md
@@ -137,9 +137,9 @@ for (i, r) in enumerate(rs)
     lyaps[i] = lyapunov(ds, N_lyap)
 
     x = trajectory(ds, N_ent) # time series
-    hperm = entropy(x, SymbolicPermutation(; m, τ))
-    hwtperm = entropy(x, SymbolicWeightedPermutation(; m, τ))
-    hampperm = entropy(x, SymbolicAmplitudeAwarePermutation(; m, τ))
+    hperm = entropy(SymbolicPermutation(; m, τ), x)
+    hwtperm = entropy(SymbolicWeightedPermutation(; m, τ), x)
+    hampperm = entropy(SymbolicAmplitudeAwarePermutation(; m, τ), x)
 
     hs_perm[i] = hperm; hs_wtperm[i] = hwtperm; hs_ampperm[i] = hampperm
 end
@@ -173,7 +173,7 @@ using DynamicalSystemsBase, CairoMakie, Distributions
 N = 500
 D = Dataset(sort([rand(𝒩) for i = 1:N]))
 x, y = columns(D)
-p = probabilities(D, NaiveKernel(1.5))
+p = probabilities(NaiveKernel(1.5), D)
 fig, ax = scatter(D[:, 1], D[:, 2], zeros(N);
     markersize=8, axis=(type = Axis3,)
 )
@@ -301,9 +301,9 @@ des = zeros(length(windows))
 pes = zeros(length(windows))
 
 m, c = 2, 6
-est_de = Dispersion(encoding = GaussianCDFEncoding(c), m = m, τ = 1)
+est_de = Dispersion(c = c, m = m, τ = 1)
 for (i, window) in enumerate(windows)
-    des[i] = entropy_normalized(Renyi(), y[window], est_de)
+    des[i] = entropy_normalized(Renyi(), est_de, y[window])
 end
 
 fig = Figure()
@@ -344,8 +344,8 @@ for N in (N1, N2)
     local w = trajectory(Systems.lorenz(), N÷10; Δt = 0.1, Ttr = 100)[:, 1] # chaotic
 
     for q in (x, y, z, w)
-        h = entropy(q, PowerSpectrum())
-        n = entropy_normalized(q, PowerSpectrum())
+        h = entropy(PowerSpectrum(), q)
+        n = entropy_normalized(PowerSpectrum(), q)
         println("entropy: $(h), normalized: $(n).")
     end
 end
diff --git a/docs/src/index.md b/docs/src/index.md
index d7a945deb..501fb1ba6 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -40,7 +40,11 @@ Thus, any of the implemented [probabilities estimators](@ref probabilities_estim
 
     These names are common place, and so in Entropies.jl we provide convenience functions like [`entropy_wavelet`](@ref). However, it should be noted that these functions really aren't anything more than 2-lines-of-code wrappers that call [`entropy`](@ref) with the appropriate [`ProbabilitiesEstimator`](@ref).
 
-    There are only a few exceptions to this rule, which are quantities that are able to compute Shannon entropies via alternate means, without explicitly computing some probability distributions. These are `IndirectEntropy` instances, such as [`Kraskov`](@ref).
+    In addition to `ProbabilitiesEstimators`, we also provide [`EntropyEstimator`](@ref)s, 
+    which compute entropies via alternate means, without explicitly computing some 
+    probability distribution. For example, [`Kraskov`](@ref) estimator computes Shannon 
+    entropy via a nearest neighbor algorithm, while the [`Zhu`](@ref) estimator computes
+    Shannon entropy using order statistics.
 
 ### Other complexity measures
 
diff --git a/src/deprecations.jl b/src/deprecations.jl
index 40dd1bdd6..a60af35ca 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -8,6 +8,14 @@ function probabilities(x::Vector_or_Dataset, ε::Union{Real, Vector{<:Real}})
     probabilities(x, ValueHistogram(ε))
 end
 
+function probabilities(x, est::ProbabilitiesEstimator)
+    @warn """
+    `probabilities(x, est::ProbabilitiesEstimator)`
+    is deprecated, use `probabilities(est::ProbabilitiesEstimator, x) instead`.
+    """
+    return probabilities(est, x)
+end
+
 export genentropy, permentropy
 
 function permentropy(x; τ = 1, m = 3, base = MathConstants.e)
diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
index d114454a7..415150139 100644
--- a/src/entropies/convenience_definitions.jl
+++ b/src/entropies/convenience_definitions.jl
@@ -23,11 +23,11 @@ for the weighted/amplitude-aware versions.
 """
 function entropy_permutation(x; base = 2, kwargs...)
     est = SymbolicPermutation(; kwargs...)
-    entropy(Shannon(; base), x, est)
+    entropy(Shannon(; base), est, x)
 end
 
 """
-    entropy_spatial_permutation(x, stencil, periodic = true; kwargs...)
+    entropy_spatial_permutation(x, stencil; periodic = true; kwargs...)
 
 Compute the spatial permutation entropy of `x` given the `stencil`.
 Here `x` must be a matrix or higher dimensional `Array` containing spatial data.
@@ -35,14 +35,14 @@ This function is just a convenience call to:
 
 ```julia
 est = SpatialSymbolicPermutation(stencil, x, periodic)
-entropy(Renyi(;kwargs...), x, est)
+entropy(Renyi(;kwargs...), est, x)
 ```
 
 See [`SpatialSymbolicPermutation`](@ref) for more info, or how to encode stencils.
 """
-function entropy_spatial_permutation(x, stencil, periodic = true; kwargs...)
+function entropy_spatial_permutation(x, stencil; periodic = true, kwargs...)
     est = SpatialSymbolicPermutation(stencil, x, periodic)
-    entropy(Renyi(;kwargs...), x, est)
+    entropy(Renyi(;kwargs...), est, x)
 end
 
 """
@@ -52,14 +52,14 @@ Compute the wavelet entropy. This function is just a convenience call to:
 
 ```julia
 est = WaveletOverlap(wavelet)
-entropy(Shannon(base), x, est)
+entropy(Shannon(base), est, x)
 ```
 
 See [`WaveletOverlap`](@ref) for more info.
 """
 function entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(), base = 2)
     est = WaveletOverlap(wavelet)
-    entropy(Shannon(; base), x, est)
+    entropy(Shannon(; base), est, x)
 end
 
 """
@@ -69,12 +69,12 @@ Compute the dispersion entropy. This function is just a convenience call to:
 
 ```julia
 est = Dispersion(kwargs...)
-entropy(Shannon(base), x, est)
+entropy(Shannon(base), est, x)
 ```
 
 See [`Dispersion`](@ref) for more info.
 """
 function entropy_dispersion(x; base = 2, kwargs...)
     est = Dispersion(kwargs...)
-    entropy(Shannon(; base), x, est)
+    entropy(Shannon(; base), est, x)
 end
diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl
index 31dde2a4d..967c0e3e8 100644
--- a/src/entropies/entropies.jl
+++ b/src/entropies/entropies.jl
@@ -3,4 +3,4 @@ include("tsallis.jl")
 include("curado.jl")
 include("streched_exponential.jl")
 include("convenience_definitions.jl")
-include("direct_entropies/direct_entropies.jl")
+include("estimators/estimators.jl")
diff --git a/src/entropies/direct_entropies/direct_entropies.jl b/src/entropies/estimators/estimators.jl
similarity index 100%
rename from src/entropies/direct_entropies/direct_entropies.jl
rename to src/entropies/estimators/estimators.jl
diff --git a/src/entropies/direct_entropies/nearest_neighbors/KozachenkoLeonenko.jl b/src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
similarity index 61%
rename from src/entropies/direct_entropies/nearest_neighbors/KozachenkoLeonenko.jl
rename to src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
index 363c88392..f3f3fe288 100644
--- a/src/entropies/direct_entropies/nearest_neighbors/KozachenkoLeonenko.jl
+++ b/src/entropies/estimators/nearest_neighbors/KozachenkoLeonenko.jl
@@ -1,13 +1,13 @@
 export KozachenkoLeonenko
 
 """
-    KozachenkoLeonenko <: IndirectEntropy
+    KozachenkoLeonenko <: EntropyEstimator
     KozachenkoLeonenko(; k::Int = 1, w::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(KozachenkoLeonenko(), x)` to
-estimate the Shannon entropy of `x` (a multi-dimensional `Dataset`) to the given
-`base` using nearest neighbor searches using the method from Kozachenko &
-Leonenko[^KozachenkoLeonenko1987], as described in Charzyńska and Gambin[^Charzyńska2016].
+The `KozachenkoLeonenko` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base`, based on nearest neighbor searches
+using the method from Kozachenko & Leonenko (1987)[^KozachenkoLeonenko1987], as described in
+Charzyńska and Gambin[^Charzyńska2016].
 
 `w` is the Theiler window, which determines if temporal neighbors are excluded
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
@@ -15,18 +15,24 @@ when searching for neighbours).
 
 In contrast to [`Kraskov`](@ref), this estimator uses only the *closest* neighbor.
 
+See also: [`entropy`](@ref).
+
 [^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy
     estimator with applications in systems biology. Entropy, 18(1), 13.
 [^KozachenkoLeonenko1987]: Kozachenko, L. F., & Leonenko, N. N. (1987). Sample estimate of
     the entropy of a random vector. Problemy Peredachi Informatsii, 23(2), 9-16.
 """
-@Base.kwdef struct KozachenkoLeonenko{B} <: IndirectEntropy
+@Base.kwdef struct KozachenkoLeonenko{B} <: EntropyEstimator
     w::Int = 1
     base::B = 2
 end
 
-function entropy(e::KozachenkoLeonenko, x::AbstractDataset{D, T}) where {D, T}
-    (; w, base) = e
+function entropy(e::Renyi, est::KozachenkoLeonenko, x::AbstractDataset{D, T}) where {D, T}
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; w, base) = est
+
     N = length(x)
     ρs = maximum_neighbor_distances(x, w, 1)
     # The estimated entropy has "unit" [nats]
diff --git a/src/entropies/direct_entropies/nearest_neighbors/Kraskov.jl b/src/entropies/estimators/nearest_neighbors/Kraskov.jl
similarity index 57%
rename from src/entropies/direct_entropies/nearest_neighbors/Kraskov.jl
rename to src/entropies/estimators/nearest_neighbors/Kraskov.jl
index eaa961222..d09c9fd27 100644
--- a/src/entropies/direct_entropies/nearest_neighbors/Kraskov.jl
+++ b/src/entropies/estimators/nearest_neighbors/Kraskov.jl
@@ -1,31 +1,34 @@
 export Kraskov
 
 """
-    Kraskov <: IndirectEntropy
+    Kraskov <: EntropyEstimator
     Kraskov(; k::Int = 1, w::Int = 1, base = 2)
 
-An indirect entropy used in [`entropy`](@ref)`(Kraskov(), x)` to estimate the Shannon
-entropy of `x` (a multi-dimensional `Dataset`) to the given
-`base` using `k`-th nearest neighbor searches as in [^Kraskov2004].
+The `Kraskov` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base`, using the `k`-th nearest neighbor
+searches method from [^Kraskov2004].
 
 `w` is the Theiler window, which determines if temporal neighbors are excluded
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
 
-See also: [`KozachenkoLeonenko`](@ref).
+See also: [`entropy`](@ref), [`KozachenkoLeonenko`](@ref).
 
 [^Kraskov2004]:
     Kraskov, A., Stögbauer, H., & Grassberger, P. (2004).
     Estimating mutual information. Physical review E, 69(6), 066138.
 """
-Base.@kwdef struct Kraskov{B} <: IndirectEntropy
+Base.@kwdef struct Kraskov{B} <: EntropyEstimator
     k::Int = 1
     w::Int = 1
     base::B = 2
 end
 
-function entropy(e::Kraskov, x::AbstractDataset{D, T}) where {D, T}
-    (; k, w, base) = e
+function entropy(e::Renyi, est::Kraskov, x::AbstractDataset{D, T}) where {D, T}
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; k, w, base) = est
     N = length(x)
     ρs = maximum_neighbor_distances(x, w, k)
     # The estimated entropy has "unit" [nats]
diff --git a/src/entropies/direct_entropies/nearest_neighbors/Zhu.jl b/src/entropies/estimators/nearest_neighbors/Zhu.jl
similarity index 75%
rename from src/entropies/direct_entropies/nearest_neighbors/Zhu.jl
rename to src/entropies/estimators/nearest_neighbors/Zhu.jl
index 1f004236e..1a63a4470 100644
--- a/src/entropies/direct_entropies/nearest_neighbors/Zhu.jl
+++ b/src/entropies/estimators/nearest_neighbors/Zhu.jl
@@ -1,38 +1,43 @@
 export Zhu
 
 """
-    Zhu <: IndirectEntropy
-    Zhu(k = 1, w = 0, base = MathConstants.e)
+    Zhu <: EntropyEstimator
+    Zhu(k = 1, w = 0)
 
-The `Zhu` indirect entropy estimator (Zhu et al., 2015)[^Zhu2015] estimates the Shannon
-entropy of `x` (a multi-dimensional `Dataset`) to the given `base`, by approximating
-probabilities within hyperrectangles surrounding each point `xᵢ ∈ x` using
+The `Zhu` estimator (Zhu et al., 2015)[^Zhu2015] computes the [`Shannon`](@ref)
+[`entropy`](@ref) of `x` (a multi-dimensional `Dataset`), by
+approximating probabilities within hyperrectangles surrounding each point `xᵢ ∈ x` using
 using `k` nearest neighbor searches.
 
-This estimator is an extension to [`KozachenkoLeonenko`](@ref).
-
 `w` is the Theiler window, which determines if temporal neighbors are excluded
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
 
+This estimator is an extension to [`KozachenkoLeonenko`](@ref).
+
+See also: [`entropy`](@ref).
+
 [^Zhu2015]:
     Zhu, J., Bellanger, J. J., Shu, H., & Le Bouquin Jeannès, R. (2015). Contribution to
     transfer entropy estimation via the k-nearest-neighbors approach. Entropy, 17(6),
     4173-4201.
 """
-Base.@kwdef struct Zhu{B} <: IndirectEntropy
+Base.@kwdef struct Zhu <: EntropyEstimator
     k::Int = 1
     w::Int = 0
-    base::B = MathConstants.e
 end
 
-function entropy(e::Zhu, x::AbstractDataset{D, T}) where {D, T}
-    (; k, w, base) = e
+function entropy(e::Renyi, est::Zhu, x::AbstractDataset{D, T}) where {D, T}
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; k, w) = est
+
     N = length(x)
     tree = KDTree(x, Euclidean())
     nn_idxs = bulkisearch(tree, x, NeighborNumber(k), Theiler(w))
     h = digamma(N) + mean_logvolumes(x, nn_idxs, N) - digamma(k) + (D - 1) / k
-    return h / log(base, MathConstants.e)
+    return h / log(e.base, MathConstants.e)
 end
 
 function mean_logvolumes(x, nn_idxs, N::Int)
diff --git a/src/entropies/direct_entropies/nearest_neighbors/ZhuSingh.jl b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
similarity index 81%
rename from src/entropies/direct_entropies/nearest_neighbors/ZhuSingh.jl
rename to src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
index 88cb02b1c..62540c814 100644
--- a/src/entropies/direct_entropies/nearest_neighbors/ZhuSingh.jl
+++ b/src/entropies/estimators/nearest_neighbors/ZhuSingh.jl
@@ -1,16 +1,16 @@
 using DelayEmbeddings: minmaxima
 using SpecialFunctions: digamma
-using Entropies: Entropy, IndirectEntropy
+using Entropies: Entropy, EntropyEstimator
 using Neighborhood: KDTree, Chebyshev, bulkisearch, Theiler, NeighborNumber
 
 export ZhuSingh
 
 """
-    ZhuSingh <: IndirectEntropy
-    ZhuSingh(k = 1, w = 0, base = MathConstants.e)
+    ZhuSingh <: EntropyEstimator
+    ZhuSingh(k = 1, w = 0)
 
-The `ZhuSingh` indirect entropy estimator (Zhu et al., 2015)[^Zhu2015] estimates the Shannon
-entropy of `x` (a multi-dimensional `Dataset`) to the given `base`.
+The `ZhuSingh` estimator (Zhu et al., 2015)[^Zhu2015] computes the [`Shannon`](@ref)
+[`entropy`](@ref) of `x` (a multi-dimensional `Dataset`).
 
 Like [`Zhu`](@ref), this estimator approximates probabilities within hyperrectangles
 surrounding each point `xᵢ ∈ x` using using `k` nearest neighbor searches. However,
@@ -21,6 +21,8 @@ This estimator is an extension to the entropy estimator in Singh et al. (2003).
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
 
+See also: [`entropy`](@ref).
+
 [^Zhu2015]:
     Zhu, J., Bellanger, J. J., Shu, H., & Le Bouquin Jeannès, R. (2015). Contribution to
     transfer entropy estimation via the k-nearest-neighbors approach. Entropy, 17(6),
@@ -30,24 +32,22 @@ when searching for neighbours).
     neighbor estimates of entropy. American journal of mathematical and management
     sciences, 23(3-4), 301-321.
 """
-Base.@kwdef struct ZhuSingh{B} <: IndirectEntropy
+Base.@kwdef struct ZhuSingh <: EntropyEstimator
     k::Int = 1
     w::Int = 0
-    base::B = MathConstants.e
-
-    function ZhuSingh(k::Int, w::Int, base::B) where B
-        new{B}(k, w, base)
-    end
 end
 
-function entropy(e::ZhuSingh, x::AbstractDataset{D, T}) where {D, T}
-    (; k, w, base) = e
+function entropy(e::Renyi, est::ZhuSingh, x::AbstractDataset{D, T}) where {D, T}
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; k, w) = est
     N = length(x)
     tree = KDTree(x, Euclidean())
     nn_idxs = bulkisearch(tree, x, NeighborNumber(k), Theiler(w))
     mean_logvol, mean_digammaξ = mean_logvolumes_and_digamma(x, nn_idxs, N, k)
     h = digamma(N) + mean_logvol - mean_digammaξ
-    return h / log(base, MathConstants.e)
+    return h / log(e.base, MathConstants.e)
 end
 
 function mean_logvolumes_and_digamma(x, nn_idxs, N::Int, k::Int)
diff --git a/src/entropies/direct_entropies/nearest_neighbors/nearest_neighbors.jl b/src/entropies/estimators/nearest_neighbors/nearest_neighbors.jl
similarity index 100%
rename from src/entropies/direct_entropies/nearest_neighbors/nearest_neighbors.jl
rename to src/entropies/estimators/nearest_neighbors/nearest_neighbors.jl
diff --git a/src/entropies/direct_entropies/order_statistics/AlizadehArghami.jl b/src/entropies/estimators/order_statistics/AlizadehArghami.jl
similarity index 53%
rename from src/entropies/direct_entropies/order_statistics/AlizadehArghami.jl
rename to src/entropies/estimators/order_statistics/AlizadehArghami.jl
index db14ee022..df8c44d45 100644
--- a/src/entropies/direct_entropies/order_statistics/AlizadehArghami.jl
+++ b/src/entropies/estimators/order_statistics/AlizadehArghami.jl
@@ -1,16 +1,17 @@
 export AlizadehArghami
 
 """
-AlizadehArghami <: IndirectEntropy
+    AlizadehArghami <: EntropyEstimator
     AlizadehArghami(; m::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(Alizadeh(), x)` to
-estimate the Shannon entropy of the timeseries `x` to the given
-`base` using the method from Alizadeh & Arghami (2010)[^Alizadeh2010].
+The `AlizadehArghami`estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base` using the method from Alizadeh & Arghami
+(2010)[^Alizadeh2010].
 
 ## Description
 
-The Alizadeh entropy estimator first computes the order statistics
+The estimator first computes the
+[order statistics](https://en.wikipedia.org/wiki/Order_statistic)
 ``X_{(1)} \\leq X_{(2)} \\leq \\cdots \\leq X_{(n)}`` for a random sample of length ``n``,
 i.e. the input timeseries. The entropy for the length-`n` sample is then estimated as
 the [`Vasicek`](@ref) entropy estimate, plus a correction factor
@@ -19,17 +20,23 @@ the [`Vasicek`](@ref) entropy estimate, plus a correction factor
 H_{A}(m, n) = H_{V}(m, n) + \\dfrac{2}{n}\\left(m \\log_{base}(2) \\right).
 ```
 
+See also: [`entropy`](@ref).
+
 [^Alizadeh2010]:
     Alizadeh, N. H., & Arghami, N. R. (2010). A new estimator of entropy.
     Journal of the Iranian Statistical Society (JIRSS).
 """
-@Base.kwdef struct AlizadehArghami{I<:Integer, B} <: IndirectEntropy
+@Base.kwdef struct AlizadehArghami{I<:Integer, B} <: EntropyEstimator
     m::I = 1
     base::B = 2
 end
 
-function entropy(e::AlizadehArghami, x::AbstractVector{T}) where T
-    (; m, base) = e
+function entropy(e::Renyi, est::AlizadehArghami, x::AbstractVector{T}) where T
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+
+    (; m, base) = est
     n = length(x)
     m < floor(Int, n / 2) || throw(ArgumentError("Need m < length(x)/2."))
     return entropy(Vasicek(; m, base), x) + (2 / n)*(m * log(base, 2))
diff --git a/src/entropies/direct_entropies/order_statistics/Correa.jl b/src/entropies/estimators/order_statistics/Correa.jl
similarity index 74%
rename from src/entropies/direct_entropies/order_statistics/Correa.jl
rename to src/entropies/estimators/order_statistics/Correa.jl
index a60908dd3..12036b22d 100644
--- a/src/entropies/direct_entropies/order_statistics/Correa.jl
+++ b/src/entropies/estimators/order_statistics/Correa.jl
@@ -1,12 +1,12 @@
 export Correa
 
 """
-    Correa <: IndirectEntropy
+    Correa <: EntropyEstimator
     Correa(; m::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(Correa(), x)` to
-estimate the Shannon entropy of the timeseries `x` to the given
-`base` using the method from Correa (1995)[^Correa1995].
+The `Correa` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base` using the method from
+Correa (1995)[^Correa1995].
 
 ## Description
 
@@ -29,7 +29,7 @@ where
     Correa, J. C. (1995). A new estimator of entropy. Communications in Statistics-Theory
     and Methods, 24(10), 2439-2449.
 """
-@Base.kwdef struct Correa{I<:Integer, B} <: IndirectEntropy
+@Base.kwdef struct Correa{I<:Integer, B} <: EntropyEstimator
     m::I = 1
     base::B = 2
 end
@@ -43,8 +43,11 @@ function local_scaled_mean(ex, i::Int, m::Int, n::Int = length(x))
     return x̄ / (2m + 1)
 end
 
-function entropy(e::Correa, x::AbstractVector{T}) where T
-    (; m, base) = e
+function entropy(e::Renyi, est::Correa, x::AbstractVector{T}) where T
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; m, base) = est
     n = length(x)
     m < floor(Int, n / 2) || throw(ArgumentError("Need m < length(x)/2."))
 
diff --git a/src/entropies/direct_entropies/order_statistics/Ebrahimi.jl b/src/entropies/estimators/order_statistics/Ebrahimi.jl
similarity index 74%
rename from src/entropies/direct_entropies/order_statistics/Ebrahimi.jl
rename to src/entropies/estimators/order_statistics/Ebrahimi.jl
index bd552ca14..bd2c088d0 100644
--- a/src/entropies/direct_entropies/order_statistics/Ebrahimi.jl
+++ b/src/entropies/estimators/order_statistics/Ebrahimi.jl
@@ -1,12 +1,13 @@
 export Ebrahimi
 
 """
-    Ebrahimi <: IndirectEntropy
+    Ebrahimi <: EntropyEstimator
     Ebrahimi(; m::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(Ebrahimi(), x)` to
-estimate the Shannon entropy of the timeseries `x` to the given
-`base` using the method from Ebrahimi (1994)[^Ebrahimi1994].
+The `Ebrahimi` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base` using the method from
+Ebrahimi (1994)[^Ebrahimi1994].
+
 
 ## Description
 
@@ -34,7 +35,7 @@ c_i =
     Ebrahimi, N., Pflughoeft, K., & Soofi, E. S. (1994). Two measures of sample entropy.
     Statistics & Probability Letters, 20(3), 225-234.
 """
-@Base.kwdef struct Ebrahimi{I<:Integer, B} <: IndirectEntropy
+@Base.kwdef struct Ebrahimi{I<:Integer, B} <: EntropyEstimator
     m::I = 1
     base::B = 2
 end
@@ -49,8 +50,11 @@ function ebrahimi_scaling_factor(i, m, n)
     end
 end
 
-function entropy(e::Ebrahimi, x::AbstractVector{T}) where T
-    (; m, base) = e
+function entropy(e::Renyi, est::Ebrahimi, x::AbstractVector{T}) where T
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; m, base) = est
     n = length(x)
     m < floor(Int, n / 2) || throw(ArgumentError("Need m < length(x)/2."))
 
diff --git a/src/entropies/direct_entropies/order_statistics/Vasicek.jl b/src/entropies/estimators/order_statistics/Vasicek.jl
similarity index 72%
rename from src/entropies/direct_entropies/order_statistics/Vasicek.jl
rename to src/entropies/estimators/order_statistics/Vasicek.jl
index 7a4454f19..1cc52d4be 100644
--- a/src/entropies/direct_entropies/order_statistics/Vasicek.jl
+++ b/src/entropies/estimators/order_statistics/Vasicek.jl
@@ -1,12 +1,12 @@
 export Vasicek
 
 """
-    Vasicek <: IndirectEntropy
+    Vasicek <: EntropyEstimator
     Vasicek(; m::Int = 1, base = 2)
 
-An indirect entropy estimator used in [`entropy`](@ref)`(Vasicek(), x)` to
-estimate the Shannon entropy of the timeseries `x` to the given
-`base` using the method from Vasicek (1976)[^Vasicek1976].
+The `Vasicek` estimator computes the [`Shannon`](@ref) [`entropy`](@ref) of `x`
+(a multi-dimensional `Dataset`) to the given `base` using the method from
+Vasicek (1976)[^Vasicek1976].
 
 ## Description
 
@@ -31,13 +31,16 @@ written for this package).
     Vasicek, O. (1976). A test for normality based on sample entropy. Journal of the Royal
     Statistical Society: Series B (Methodological), 38(1), 54-59.
 """
-@Base.kwdef struct Vasicek{I<:Integer, B} <: IndirectEntropy
+@Base.kwdef struct Vasicek{I<:Integer, B} <: EntropyEstimator
     m::I = 1
     base::B = 2
 end
 
-function entropy(e::Vasicek, x::AbstractVector{T}) where T
-    (; m, base) = e
+function entropy(e::Renyi, est::Vasicek, x::AbstractVector{T}) where T
+    e.q == 1 || throw(ArgumentError(
+        "Renyi entropy with q = $(e.q) not implemented for $(typeof(est)) estimator"
+    ))
+    (; m, base) = est
     n = length(x)
     m < floor(Int, n / 2) || throw(ArgumentError("Need m < length(x)/2."))
 
diff --git a/src/entropies/direct_entropies/order_statistics/order_statistics.jl b/src/entropies/estimators/order_statistics/order_statistics.jl
similarity index 100%
rename from src/entropies/direct_entropies/order_statistics/order_statistics.jl
rename to src/entropies/estimators/order_statistics/order_statistics.jl
diff --git a/src/entropy.jl b/src/entropy.jl
index fc3fbb0bb..5feb60651 100644
--- a/src/entropy.jl
+++ b/src/entropy.jl
@@ -1,67 +1,111 @@
-export Entropy, IndirectEntropy
+export Entropy, EntropyEstimator
 export entropy, entropy_maximum, entropy_normalized, entropy!
 
 abstract type AbstractEntropy end
 
-"Supertype of direct entropies. See [`entropy`](@ref)."
+"""
+    Entropy
+
+`Entropy` is the supertype of all (generalized) entropies, and currently implemented
+entropy types are:
+
+- [`Renyi`](@ref).
+- [`Tsallis`](@ref).
+- [`Shannon`](@ref), which is a subcase of the above two in the limit `q → 1`.
+- [`Curado`](@ref).
+- [`StretchedExponential`](@ref).
+
+These entropy types are given as inputs to [`entropy`](@ref) and [`entropy_normalized`].
+
+## Description
+
+Mathematically speaking, generalized entropies are just nonnegative functions of
+probability distributions that verify certain (entropy-type-dependent) axioms.
+Amigó et al., 2018's
+[summary paper](https://www.mdpi.com/1099-4300/20/11/813) gives a nice overview.
+
+[Amigó2018]:
+    Amigó, J. M., Balogh, S. G., & Hernández, S. (2018). A brief review of
+    generalized entropies. Entropy, 20(11), 813.
+"""
 abstract type Entropy <: AbstractEntropy end
 
-"Supertype of inderect entropies. See [`entropy`](@ref)."
-abstract type IndirectEntropy <: AbstractEntropy end
+"""
+    EntropyEstimator
+
+The supertype of all entropy estimators.
+
+These estimators compute some [`Entropy`](@ref) in various ways that doesn't involve
+explicitly estimating a probability distribution. Currently implemented estimators are:
+
+- [`KozachenkoLeonenko`](@ref)
+- [`Kraskov`](@ref)
+- [`Zhu`](@ref)
+- [`ZhuSingh`](@ref)
+- [`Vasicek`](@ref)
+- [`Ebrahimi`](@ref)
+- [`Correa`](@ref)
+- [`AlizadehArghami`](@ref)
+
+For example, [`entropy`](@ref)`(Shannon(), Kraskov(), x)` computes the Shannon entropy
+of the input data `x` using the [`Kraskov`](@ref) `k`-th nearest neighbor estimator.
+"""
+abstract type EntropyEstimator <: AbstractEntropy end
 
 ###########################################################################################
-# Direct API
+# API: entropy from probabilities
 ###########################################################################################
 # Notice that StatsBase.jl exports `entropy` and Wavelets.jl exports `Entropy`.
 """
-    entropy([e::Entropy,] x, est::ProbabilitiesEstimator) → h::Real
-    entropy([e::Entropy,] probs::Probabilities) → h::Real
+    entropy([e::Entropy,] probs::Probabilities) → h::Real ∈ [0, ∞)
+    entropy([e::Entropy,] est::ProbabilitiesEstimator, x) → h::Real ∈ [0, ∞)
+    entropy([e::Entropy,] est::EntropyEstimator, x) → h::Real ∈ [0, ∞)
 
-Compute a (generalized) entropy `h` from `x` according to the specified
-entropy type `e` and the given probability estimator `est`.
-Alternatively compute the entropy directly from the existing probabilities `probs`.
-In fact, the first method is a 2-lines-of-code wrapper that calls [`probabilities`](@ref)
-and gives the result to the second method.
+Compute `h`, a (generalized) [`Entropy`](@ref) of type `e`, in one of three ways:
 
-`x` is typically an `Array` or a `Dataset`, see [Input data for Entropies.jl](@ref).
+1. Directly from existing [`Probabilities`](@ref) `probs`.
+2. From input data `x`, by first estimating a probability distribution using the provided
+    [`ProbabilitiesEstimator`](@ref), then computing entropy from that distribution.
+    In fact, the second method is just a 2-lines-of-code wrapper that calls
+    [`probabilities`](@ref) and gives the result to the first method.
+3. From input data `x`, by using a dedicated [`EntropyEstimator`](@ref) that computes
+    entropy in a way that doesn't involve explicitly computing probabilities first.
 
-The entropy types that support this interface are "direct" entropies. They always
-yield an entropy value given a probability distribution.
-Such entropies are theoretically well-founded and are typically called "generalized
-entropies". Currently implemented types are:
+The entropy (first argument) is optional. When `est` is a probability estimator,
+`Shannon()` is used by default. When `est` is a dedicated entropy estimator,
+the default entropy type is inferred from the estimator (e.g. [`Kraskov`](@ref)
+estimates the [`Shannon`](@ref) entropy).
 
-- [`Renyi`](@ref).
-- [`Tsallis`](@ref).
-- [`Shannon`](@ref), which is a subcase of the above two in the limit `q → 1`.
-- [`Curado`](@ref).
-- [`StretchedExponential`](@ref).
+## Input data
+
+`x` is typically an `Array` or a `Dataset`, see [Input data for Entropies.jl](@ref).
 
-The entropy (first argument) is optional: if not given, `Shannon()` is used instead.
+## Maximum entropy and normalized entropy
 
-These entropies also have a well defined maximum value for a given probability estimator.
+All entropies `e` have a well defined maximum value for a given probability estimator.
 To obtain this value one only needs to call the [`entropy_maximum`](@ref) function with the
 chosen entropy type and probability estimator. Or, one can use [`entropy_normalized`](@ref)
 to obtain the normalized form of the entropy (divided by the maximum).
 
 ## Examples
+
 ```julia
 x = [rand(Bool) for _ in 1:10000] # coin toss
 ps = probabilities(x) # gives about [0.5, 0.5] by definition
 h = entropy(ps) # gives 1, about 1 bit by definition
 h = entropy(Shannon(), ps) # syntactically equivalent to above
-h = entropy(Shannon(), x, CountOccurrences()) # syntactically equivalent to above
-h = entropy(x, SymbolicPermutation(;m=3)) # gives about 2, again by definition
+h = entropy(Shannon(), CountOccurrences(), x) # syntactically equivalent to above
+h = entropy(SymbolicPermutation(;m=3), x) # gives about 2, again by definition
 h = entropy(Renyi(2.0), ps) # also gives 1, order `q` doesn't matter for coin toss
 ```
 """
-function entropy(e::Entropy, x, est::ProbabilitiesEstimator)
-    ps = probabilities(x, est)
+function entropy(e::Entropy, est::ProbabilitiesEstimator, x)
+    ps = probabilities(est, x)
     return entropy(e, ps)
 end
+
 # Convenience
-function entropy(x::Array_or_Dataset, est::ProbabilitiesEstimator)
-    entropy(Shannon(), x, est)
-end
+entropy(est::ProbabilitiesEstimator, x::Array_or_Dataset) = entropy(Shannon(), est, x)
 entropy(probs::Probabilities) = entropy(Shannon(), probs)
 
 """
@@ -74,19 +118,39 @@ The entropy (second argument) is optional: if not given, `Shannon()` is used ins
 
 Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
 """
-function entropy!(s::AbstractVector{Int}, e::Entropy, x, est::ProbabilitiesEstimator)
-    probs = probabilities!(s, x, est)
+function entropy!(s::AbstractVector{Int}, e::Entropy, est::ProbabilitiesEstimator, x)
+    probs = probabilities!(s, est, x)
     entropy(e, probs)
 end
 
-entropy!(s::AbstractVector{Int}, x, est::ProbabilitiesEstimator) =
-    entropy!(s, Shannon(), x, est)
+entropy!(s::AbstractVector{Int}, est::ProbabilitiesEstimator, x) =
+    entropy!(s, Shannon(), est, x)
+
+###########################################################################################
+# API: entropy from entropy estimators
+###########################################################################################
+# Dispatch for these functions is implemented in individual estimator files in
+# `entropies/estimators/`.
+function entropy(e::Entropy, est::EntropyEstimator, x)
+    t = string(typeof(e).name.name)
+    throw(ArgumentError("$t entropy not implemented for $(typeof(est)) estimator"))
+end
+
+entropy(est::EntropyEstimator, ::Probabilities) =
+    error("Entropy estimators like $(nameof(typeof(est))) are not called with probabilities.")
+entropy(e::Entropy, est::EntropyEstimator, ::Probabilities) =
+    error("Entropy estimators like $(nameof(typeof(est))) are not called with probabilities.")
+entropy(e::Entropy, est::EntropyEstimator, x::AbstractVector) =
+    entropy(e, est, Dataset(x))
+# Always default to Shannon with base-2 logs. Individual estimators may override this.
+entropy(est::EntropyEstimator, x; base = 2) = entropy(Shannon(; base), est, x)
+
 
 ###########################################################################################
 # Normalize API
 ###########################################################################################
 """
-    entropy_maximum(e::Entropy, x, est::ProbabilitiesEstimator) → m::Real
+    entropy_maximum(e::Entropy, est::ProbabilitiesEstimator, x) → m::Real
 
 Return the maximum value `m` of the given entropy type based on the given estimator
 and the given input `x` (whose values are not important, but layout and type are).
@@ -96,9 +160,9 @@ when the estimator has a known [`total_outcomes`](@ref).
 
     entropy_maximum(e::Entropy, L::Int) → m::Real
 
-Alternatively, compute the maximum entropy from the total outcomes `L` directly.
+Alternatively, compute the maximum entropy from the number of total outcomes `L` directly.
 """
-function entropy_maximum(e::Entropy, x, est::ProbabilitiesEstimator)
+function entropy_maximum(e::Entropy, est::ProbabilitiesEstimator, x)
     L = total_outcomes(x, est)
     return entropy_maximum(e, L)
 end
@@ -107,46 +171,21 @@ function entropy_maximum(e::Entropy, ::Int)
 end
 
 """
-    entropy_normalized([e::Entropy,] x, est::ProbabilitiesEstimator) → h̃ ∈ [0, 1]
+    entropy_normalized([e::Entropy,] est::ProbabilitiesEstimator, x) → h̃ ∈ [0, 1]
 
-Return the normalized entropy of `x`, i.e., the value of [`entropy`](@ref) divided
-by the maximum value for `e`, according to the given probability estimator.
+Return h̃, the normalized entropy of `x`, i.e. the value of [`entropy`](@ref) divided
+by the maximum value for `e`, according to the given probabilities estimator.
 If `e` is not given, it defaults to `Shannon()`.
 
 Notice that unlike for [`entropy`](@ref), here there is no method
 `entropy_normalized(e::Entropy, probs::Probabilities)` because there is no way to know
 the amount of _possible_ events (i.e., the [`total_outcomes`](@ref)) from `probs`.
 """
-function entropy_normalized(e::Entropy, x, est::ProbabilitiesEstimator)
-    return entropy(e, x, est)/entropy_maximum(e, x, est)
-end
-function entropy_normalized(x::Array_or_Dataset, est::ProbabilitiesEstimator)
-    return entropy_normalized(Shannon(), x, est)
-end
-
-###########################################################################################
-# Indirect API
-###########################################################################################
-"""
-    entropy(e::IndirectEntropy, x) → h::Real
-
-Compute the entropy of `x`, here named `h`, according to the specified indirect entropy
-estimator `e`.
-
-In contrast to the "typical" way one obtains entropies in the above methods, indirect
-entropy estimators compute Shannon entropies via alternate means, without explicitly
-computing probability distributions. The available indirect entropies are:
-
-- [`Kraskov`](@ref).
-- [`KozachenkoLeonenko`](@ref).
-- [`Vasicek`](@ref).
-"""
-function entropy(::IndirectEntropy, ::Array_or_Dataset) end
-function entropy(e::IndirectEntropy, ::Array_or_Dataset, ::ProbabilitiesEstimator)
-    error("Indirect entropies like $(nameof(typeof(e))) are not called with probabilities.")
+function entropy_normalized(e::Entropy, est::ProbabilitiesEstimator, x)
+    return entropy(e, est, x) / entropy_maximum(e, est, x)
 end
-function entropy(e::IndirectEntropy, ::Probabilities)
-    error("Indirect entropies $(nameof(typeof(e))) are not called with probabilities.")
+function entropy_normalized(est::ProbabilitiesEstimator, x::Array_or_Dataset)
+    return entropy_normalized(Shannon(), est, x)
 end
 
 ###########################################################################################
diff --git a/src/probabilities.jl b/src/probabilities.jl
index 5475140a9..0fb6dc36a 100644
--- a/src/probabilities.jl
+++ b/src/probabilities.jl
@@ -85,7 +85,7 @@ abstract type ProbabilitiesEstimator end
 # probabilities and combo function
 ###########################################################################################
 """
-    probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator) → p::Probabilities
+    probabilities(est::ProbabilitiesEstimator, x::Array_or_Dataset) → p::Probabilities
 
 Compute a probability distribution over the set of possible outcomes defined by the
 probabilities estimator `est`, given input data `x`.
@@ -104,8 +104,8 @@ This is mostly useful when `x` contains categorical or integer data.
 
 See also: [`Probabilities`](@ref), [`ProbabilitiesEstimator`](@ref).
 """
-function probabilities(x, est::ProbabilitiesEstimator)
-    return probabilities_and_outcomes(x, est)[1]
+function probabilities(est::ProbabilitiesEstimator, x)
+    return probabilities_and_outcomes(est, x)[1]
 end
 
 """
@@ -117,7 +117,7 @@ The element type of `Ω` depends on the estimator.
 
 See also [`outcomes`](@ref), [`total_outcomes`](@ref), and [`outcome_space`](@ref).
 """
-function probabilities_and_outcomes(x, est::ProbabilitiesEstimator)
+function probabilities_and_outcomes(est::ProbabilitiesEstimator, x)
     error("`probabilities_and_outcomes` not implemented for estimator $(typeof(est)).")
 end
 
@@ -175,7 +175,7 @@ julia> total_outcomes(rand(42), est) # same as `factorial(m)` for any `x`
 24
 ```
 """
-function total_outcomes(x::Array_or_Dataset, est::ProbabilitiesEstimator)
+function total_outcomes(x, est::ProbabilitiesEstimator)
     return length(outcome_space(x, est))
 end
 
diff --git a/src/probabilities_estimators/count_occurences.jl b/src/probabilities_estimators/counting/count_occurences.jl
similarity index 84%
rename from src/probabilities_estimators/count_occurences.jl
rename to src/probabilities_estimators/counting/count_occurences.jl
index 9e34b55d3..f145a16f1 100644
--- a/src/probabilities_estimators/count_occurences.jl
+++ b/src/probabilities_estimators/counting/count_occurences.jl
@@ -12,7 +12,7 @@ The outcome space is the unique sorted values of the input.
 """
 struct CountOccurrences <: ProbabilitiesEstimator end
 
-function probabilities_and_outcomes(x::Array_or_Dataset, ::CountOccurrences)
+function probabilities_and_outcomes(::CountOccurrences, x::Array_or_Dataset)
     z = copy(x)
     probs = Probabilities(fasthist!(z))
     # notice that `z` is now sorted within `fasthist!` so we can skip sorting
@@ -21,7 +21,7 @@ end
 
 outcome_space(x, ::CountOccurrences) = sort!(unique(x))
 
-probabilities(x::Array_or_Dataset, ::CountOccurrences) = probabilities(x)
+probabilities(::CountOccurrences, x::Array_or_Dataset) = probabilities(x)
 function probabilities(x::Array_or_Dataset)
     # Fast histograms code is in the `histograms` folder
     return Probabilities(fasthist!(copy(x)))
diff --git a/src/probabilities_estimators/dispersion/dispersion.jl b/src/probabilities_estimators/dispersion/dispersion.jl
index 7840de1c1..1d9422203 100644
--- a/src/probabilities_estimators/dispersion/dispersion.jl
+++ b/src/probabilities_estimators/dispersion/dispersion.jl
@@ -107,7 +107,7 @@ function symbolize_for_dispersion(x, est::Dispersion)
     return symbols::Vector{Int}
 end
 
-function probabilities_and_outcomes(x::AbstractVector{<:Real}, est::Dispersion)
+function probabilities_and_outcomes(est::Dispersion, x::AbstractVector{<:Real})
     N = length(x)
     symbols = symbolize_for_dispersion(x, est)
     # We must use genembed, not embed, to make sure the zero lag is included
@@ -125,4 +125,4 @@ function outcome_space(est::Dispersion)
     cart = CartesianIndices(ntuple(i -> c, m))
     V = SVector{m, Int}
     return map(i -> V(Tuple(i)), vec(cart))
-end
\ No newline at end of file
+end
diff --git a/src/probabilities_estimators/diversity/diversity.jl b/src/probabilities_estimators/diversity/diversity.jl
index a91997083..3ccde602a 100644
--- a/src/probabilities_estimators/diversity/diversity.jl
+++ b/src/probabilities_estimators/diversity/diversity.jl
@@ -44,21 +44,21 @@ Base.@kwdef struct Diversity <: ProbabilitiesEstimator
     nbins::Int = 5
 end
 
-function probabilities(x::AbstractVector{T}, est::Diversity) where T <: Real
-    ds, binning = similarities_and_binning(x, est)
+function probabilities(est::Diversity, x::AbstractVector{T}) where T <: Real
+    ds, binning = similarities_and_binning(est, x)
     return fasthist(ds, binning)[1]
 end
 
-function probabilities_and_outcomes(x::AbstractVector{T}, est::Diversity) where T <: Real
-    ds, binning = similarities_and_binning(x, est)
-    return probabilities_and_outcomes(ds, ValueHistogram(binning))
+function probabilities_and_outcomes(est::Diversity, x::AbstractVector{T}) where T <: Real
+    ds, binning = similarities_and_binning(est, x)
+    return probabilities_and_outcomes(ValueHistogram(binning), ds)
 end
 
 total_outcomes(est::Diversity) = est.nbins
 
 outcome_space(x, est::Diversity) = outcome_space(x, binning_for_diversity(est))
 
-function similarities_and_binning(x::AbstractVector{T}, est::Diversity) where T <: Real
+function similarities_and_binning(est::Diversity, x::AbstractVector{T}) where T <: Real
     # embed and then calculate cosine similary for each consecutive pair of delay vectors
     τs = 0:est.τ:(est.m - 1)*est.τ
     Y = genembed(x, τs)
@@ -73,4 +73,4 @@ end
 
 cosine_similarity(xᵢ, xⱼ) = sum(xᵢ .* xⱼ) / (sqrt(sum(xᵢ .^ 2)) * sqrt(sum(xⱼ .^ 2)))
 
-binning_for_diversity(est::Diversity) = FixedRectangularBinning(-1.0, 1.0, est.nbins)
\ No newline at end of file
+binning_for_diversity(est::Diversity) = FixedRectangularBinning(-1.0, 1.0, est.nbins)
diff --git a/src/probabilities_estimators/histograms/rectangular_binning.jl b/src/probabilities_estimators/histograms/rectangular_binning.jl
index 37a8525f9..09c322cfa 100644
--- a/src/probabilities_estimators/histograms/rectangular_binning.jl
+++ b/src/probabilities_estimators/histograms/rectangular_binning.jl
@@ -238,7 +238,7 @@ end
 ##################################################################
 # low level histogram call
 ##################################################################
-# This method is called by `probabilities(x::Array_or_Dataset, est::ValueHistogram)`
+# This method is called by `probabilities(est::ValueHistogram, x::Array_or_Dataset)`
 """
     fasthist(x::Vector_or_Dataset, ϵ::AbstractBinning)
 Create an encoding for binning, then map `x` to bins, then call `fasthist!` on the bins.
diff --git a/src/probabilities_estimators/histograms/value_histogram.jl b/src/probabilities_estimators/histograms/value_histogram.jl
index b0425fd6a..8ae57a87c 100644
--- a/src/probabilities_estimators/histograms/value_histogram.jl
+++ b/src/probabilities_estimators/histograms/value_histogram.jl
@@ -59,13 +59,13 @@ const VisitationFrequency = ValueHistogram
 
 # This method is only valid for rectangular binnings, as `fasthist`
 # is only valid for rectangular binnings. For more binnings, it needs to be extended.
-function probabilities(x::Array_or_Dataset, est::ValueHistogram)
+function probabilities(est::ValueHistogram, x::Array_or_Dataset)
     # and the `fasthist` actually just makes an encoding,
     # this function is in `rectangular_binning.jl`
     fasthist(x, est.binning)[1]
 end
 
-function probabilities_and_outcomes(x::Array_or_Dataset, est::ValueHistogram)
+function probabilities_and_outcomes(est::ValueHistogram, x::Array_or_Dataset)
     probs, bins, encoder = fasthist(x, est.binning)
     unique!(bins) # `bins` is already sorted from `fasthist!`
     # Here we transfor the cartesian coordinate based bins into data unit bins:
diff --git a/src/probabilities_estimators/kernel_density.jl b/src/probabilities_estimators/kernel/kernel_density.jl
similarity index 96%
rename from src/probabilities_estimators/kernel_density.jl
rename to src/probabilities_estimators/kernel/kernel_density.jl
index d91c5127b..b007574bc 100644
--- a/src/probabilities_estimators/kernel_density.jl
+++ b/src/probabilities_estimators/kernel/kernel_density.jl
@@ -46,7 +46,7 @@ function NaiveKernel(ϵ::Real, method = KDTree; w = 0, metric = Euclidean())
     return NaiveKernel(ϵ, method, w, metric)
 end
 
-function probabilities_and_outcomes(x::AbstractDataset, est::NaiveKernel)
+function probabilities_and_outcomes(est::NaiveKernel, x::AbstractDataset)
     theiler = Theiler(est.w)
     ss = searchstructure(est.method, x.data, est.metric)
     idxs = bulkisearch(ss, x.data, WithinRange(est.ϵ), theiler)
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
index fb53495b2..d38f560f7 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
@@ -72,8 +72,8 @@ function AAPE(x; A::Real = 0.5, m::Int = length(x))
     (A/m)*sum(abs.(x)) + (1-A)/(m-1)*sum(abs.(diff(x)))
 end
 
-function probabilities_and_outcomes(x::AbstractDataset{m, T},
-        est::SymbolicAmplitudeAwarePermutation) where {m, T}
+function probabilities_and_outcomes(est::SymbolicAmplitudeAwarePermutation,
+        x::AbstractDataset{m, T}) where {m, T}
     πs = outcomes(x, OrdinalPatternEncoding(m = m, lt = est.lt))
     wts = AAPE.(x.data, A = est.A, m = est.m)
     probs = symprobs(πs, wts, normalize = true)
@@ -87,8 +87,9 @@ function probabilities_and_outcomes(x::AbstractDataset{m, T},
     return Probabilities(probs), observed_outcomes
 end
 
-function probabilities_and_outcomes(x::AbstractVector{T},
-        est::SymbolicAmplitudeAwarePermutation) where {T<:Real}
+function probabilities_and_outcomes(
+        est::SymbolicAmplitudeAwarePermutation,
+        x::AbstractVector{T}) where {T<:Real}
     # We need to manually embed here instead of just calling the method above,
     # because the embedding vectors are needed to compute weights.
     τs = tuple([est.τ*i for i = 0:est.m-1]...)
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
index c046b240b..492598ee6 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
@@ -40,15 +40,15 @@ est = SymbolicPermutation(; m, τ)
 # For a time series
 x_ts = rand(N)
 πs_ts = zeros(Int, N - (m - 1)*τ)
-p = probabilities!(πs_ts, x_ts, est)
-h = entropy!(πs_ts, Renyi(),  x_ts, est)
+p = probabilities!(πs_ts, est, x_ts)
+h = entropy!(πs_ts, Renyi(), est, x_ts)
 
 # For a pre-discretized `Dataset`
 x_symb = outcomes(x_ts, OrdinalPatternEncoding(m = 2, τ = 1))
 x_d = genembed(x_symb, (0, -1, -2))
 πs_d = zeros(Int, length(x_d))
-p = probabilities!(πs_d, x_d, est)
-h = entropy!(πs_d, Renyi(), x_d, est)
+p = probabilities!(πs_d, est, x_d)
+h = entropy!(πs_d, Renyi(), est, x_d)
 ```
 
 See [`SymbolicWeightedPermutation`](@ref) and [`SymbolicAmplitudeAwarePermutation`](@ref)
@@ -79,22 +79,21 @@ function SymbolicPermutation(; τ::Int = 1, m::Int = 3, lt::F=isless_rand) where
     SymbolicPermutation{F}(τ, m, lt)
 end
 
-function probabilities!(πs::AbstractVector{Int}, x::AbstractDataset{m, T}, est::SymbolicPermutation) where {m, T}
+function probabilities!(πs::AbstractVector{Int}, est::SymbolicPermutation, x::AbstractDataset{m, T}) where {m, T}
     length(πs) == length(x) || throw(ArgumentError("Need length(πs) == length(x), got `length(πs)=$(length(πs))` and `length(x)==$(length(x))`."))
     m >= 2 || error("Data must be at least 2-dimensional to compute the permutation entropy. If data is a univariate time series embed it using `genembed` first.")
     encoding = OrdinalPatternEncoding(m = est.m, τ = est.τ, lt = est.lt)
     probabilities(outcomes!(πs, x, encoding))
 end
 
-function probabilities!(πs::AbstractVector{Int}, x::AbstractVector{T}, est::SymbolicPermutation) where {T<:Real}
+function probabilities!(πs::AbstractVector{Int}, est::SymbolicPermutation, x::AbstractVector{T}) where {T<:Real}
     N = length(x) - (est.m - 1)*est.τ
     length(πs) == N || error("Pre-allocated symbol vector `πs` needs to have length `length(x) - (m-1)*τ` to match the number of state vectors after `x` has been embedded. Got length(πs)=$(length(πs)) and length(x)=$(L).")
     encoding = OrdinalPatternEncoding(m = est.m, τ = est.τ, lt = est.lt)
     probabilities(outcomes!(πs, x, encoding))
 end
 
-function probabilities_and_outcomes(x::AbstractDataset{m, T},
-        est::SymbolicPermutation) where {m, T}
+function probabilities_and_outcomes(est::SymbolicPermutation, x::AbstractDataset{m, T}) where {m, T}
     encoding = OrdinalPatternEncoding(m = est.m, τ = est.τ, lt = est.lt)
     πs = outcomes(x, encoding)
     probs = probabilities(πs)
@@ -108,11 +107,10 @@ function probabilities_and_outcomes(x::AbstractDataset{m, T},
     return probs, observed_outcomes
 end
 
-function probabilities_and_outcomes(x::AbstractVector{T},
-        est::SymbolicPermutation) where {T<:Real}
+function probabilities_and_outcomes(est::SymbolicPermutation, x::AbstractVector{T}) where {T<:Real}
     encoding = OrdinalPatternEncoding(m = est.m, τ = est.τ, lt = est.lt)
     πs = outcomes(x, encoding)
-    probs = probabilities(πs, CountOccurrences())
+    probs = probabilities(πs)
 
     # The observed integer encodings are in the set `{0, 1, ..., factorial(m)}`, and each
     # integer corresponds to a unique permutation. Decoding an integer gives the original
@@ -123,31 +121,36 @@ function probabilities_and_outcomes(x::AbstractVector{T},
     return probs, observed_outcomes
 end
 
-function entropy!(e::Entropy,
+function entropy!(
     s::AbstractVector{Int},
-    x::AbstractDataset{m, T},
-    est::SymbolicPermutation;
+    e::Entropy,
+    est::SymbolicPermutation,
+    x::AbstractDataset{m, T};
     ) where {m, T}
 
     length(s) == length(x) || error("Pre-allocated symbol vector s need the same number of elements as x. Got length(πs)=$(length(πs)) and length(x)=$(L).")
-    ps = probabilities!(s, x, est)
+    ps = probabilities!(s, est, x)
 
     entropy(e, ps)
 end
 
-function entropy!(e::Entropy,
+function entropy!(
         s::AbstractVector{Int},
+        e::Entropy,
+        est::SymbolicPermutation,
         x::AbstractVector{T},
-        est::SymbolicPermutation
     ) where {T<:Real}
 
     L = length(x)
     N = L - (est.m-1)*est.τ
     length(s) == N || error("Pre-allocated symbol vector `s` needs to have length `length(x) - (m-1)*τ` to match the number of state vectors after `x` has been embedded. Got length(s)=$(length(s)) and length(x)=$(L).")
 
-    ps = probabilities!(s, x, est)
+    ps = probabilities!(s, est, x)
     entropy(e, ps)
 end
 
+entropy!(s::AbstractVector{Int}, est::SymbolicPermutation, x) =
+    entropy!(s, Shannon(base = 2), est, x)
+
 total_outcomes(est::SymbolicPermutation)::Int = factorial(est.m)
 outcome_space(est::SymbolicPermutation) = permutations(1:est.m) |> collect
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
index 34f2057da..8a860a161 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
@@ -85,8 +85,8 @@ function weights_from_variance(x, m::Int)
 end
 
 
-function probabilities_and_outcomes(x::AbstractDataset{m, T},
-        est::SymbolicWeightedPermutation) where {m, T}
+function probabilities_and_outcomes(est::SymbolicWeightedPermutation,
+        x::AbstractDataset{m, T}) where {m, T}
     m >= 2 || error("Need m ≥ 2, otherwise no dynamical information is encoded in the symbols.")
     πs = outcomes(x, OrdinalPatternEncoding(m = m, lt = est.lt))  # motif length controlled by dimension of input data
     wts = weights_from_variance.(x.data, m)
@@ -101,8 +101,8 @@ function probabilities_and_outcomes(x::AbstractDataset{m, T},
    return Probabilities(probs), observed_outcomes
 end
 
-function probabilities_and_outcomes(x::AbstractVector{T},
-        est::SymbolicWeightedPermutation) where {T<:Real}
+function probabilities_and_outcomes(est::SymbolicWeightedPermutation,
+        x::AbstractVector{T}) where {T<:Real}
     # We need to manually embed here instead of just calling the method above,
     # because the embedding vectors are needed to compute weights.
     τs = tuple([est.τ*i for i = 0:est.m-1]...)
diff --git a/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
index 0c955a832..a861e3b2e 100644
--- a/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
@@ -124,7 +124,7 @@ function stencil_to_offsets(stencil::NTuple{2, NTuple{D, T}}) where {D, T}
     return stencil, D
 end
 
-function stencil_to_offsets(stencil::Array{Int, D}) where D
+function stencil_to_offsets(stencil::AbstractArray{Int, D}) where D
     # translate D-dim array into stencil of cartesian indices (of dimension D)
     stencil = [idx - CartesianIndex(Tuple(ones(Int, D))) for idx in findall(Bool.(stencil))]
     # subtract first coordinate from everything to get a stencil that contains (0,0)
@@ -136,14 +136,14 @@ end
 ###########################################################################################
 # probabilities implementation
 ###########################################################################################
-function probabilities(x::AbstractArray, est::SpatialSymbolicPermutation)
+function probabilities(est::SpatialSymbolicPermutation, x::AbstractArray)
     # TODO: This can be literally a call to `outcomes` and then
     # calling probabilities on it. Should do once the `outcomes` refactoring is done.
     s = zeros(Int, length(est.valid))
-    probabilities!(s, x, est)
+    probabilities!(s, est, x)
 end
 
-function probabilities!(s, x, est::SpatialSymbolicPermutation)
+function probabilities!(s, est::SpatialSymbolicPermutation, x)
     m = length(est.stencil)
     for (i, pixel) in enumerate(est.valid)
         pixels = pixels_in_stencil(pixel, est)
diff --git a/src/probabilities_estimators/probabilities_estimators.jl b/src/probabilities_estimators/probabilities_estimators.jl
index 371cc1aa7..371af9c68 100644
--- a/src/probabilities_estimators/probabilities_estimators.jl
+++ b/src/probabilities_estimators/probabilities_estimators.jl
@@ -1,7 +1,7 @@
-include("count_occurences.jl")
+include("counting/count_occurences.jl")
 include("histograms/value_histogram.jl")
 include("permutation_ordinal/symbolic.jl")
-include("kernel_density.jl")
+include("kernel/kernel_density.jl")
 include("timescales/timescales.jl")
 include("transfer_operator/transfer_operator.jl")
 include("dispersion/dispersion.jl")
diff --git a/src/probabilities_estimators/timescales/power_spectrum.jl b/src/probabilities_estimators/timescales/power_spectrum.jl
index 346881b75..e159c1bdc 100644
--- a/src/probabilities_estimators/timescales/power_spectrum.jl
+++ b/src/probabilities_estimators/timescales/power_spectrum.jl
@@ -30,15 +30,15 @@ should be multiplied with the sampling rate of the signal, which is assumed to b
 """
 struct PowerSpectrum <: ProbabilitiesEstimator end
 
-function probabilities_and_outcomes(x, est::PowerSpectrum)
-    probs = probabilities(x, est)
+function probabilities_and_outcomes(est::PowerSpectrum, x)
+    probs = probabilities(est, x)
     events = FFTW.rfftfreq(length(x))
     return probs, events
 end
 
 outcome_space(x, ::PowerSpectrum) = FFTW.rfftfreq(length(x))
 
-function probabilities(x, ::PowerSpectrum)
+function probabilities(::PowerSpectrum, x)
     @assert x isa AbstractVector{<:Real} "`PowerSpectrum` only works for timeseries input!"
     f = FFTW.rfft(x)
     Probabilities(abs2.(f))
diff --git a/src/probabilities_estimators/timescales/wavelet_overlap.jl b/src/probabilities_estimators/timescales/wavelet_overlap.jl
index 46f1fce21..3dfa50aa4 100644
--- a/src/probabilities_estimators/timescales/wavelet_overlap.jl
+++ b/src/probabilities_estimators/timescales/wavelet_overlap.jl
@@ -30,7 +30,7 @@ struct WaveletOverlap{W<:Wavelets.WT.OrthoWaveletClass} <: ProbabilitiesEstimato
 end
 WaveletOverlap() = WaveletOverlap(Wavelets.WT.Daubechies{12}())
 
-function probabilities_and_outcomes(x, est::WaveletOverlap)
+function probabilities_and_outcomes(est::WaveletOverlap, x)
     @assert x isa AbstractVector{<:Real} "`WaveletOverlap` only works for timeseries input!"
     p = Probabilities(time_scale_density(x, est.wl))
     return p, 1:length(p)
diff --git a/src/probabilities_estimators/transfer_operator/transfer_operator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
index a41ac4e81..70161c1f7 100644
--- a/src/probabilities_estimators/transfer_operator/transfer_operator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -477,7 +477,7 @@ function transfermatrix(iv::InvariantMeasure)
     return iv.to.transfermatrix, iv.to.bins
 end
 
-function probabilities_and_outcomes(x::Array_or_Dataset, est::TransferOperator)
+function probabilities_and_outcomes(est::TransferOperator, x::Array_or_Dataset)
     to = transferoperator(x, est.binning)
     probs = invariantmeasure(to).ρ
 
diff --git a/test/convenience.jl b/test/convenience.jl
new file mode 100644
index 000000000..0f3a3023f
--- /dev/null
+++ b/test/convenience.jl
@@ -0,0 +1,32 @@
+using Entropies
+using Test
+import Statistics
+
+@testset "Common literature names" begin
+    x = randn(1000)
+    σ = 0.2*Statistics.std(x)
+
+    @testset "Permutation entropy" begin
+        @test entropy_permutation(x) == entropy(SymbolicPermutation(), x)
+    end
+
+    @testset "Wavelet entropy" begin
+        @test entropy_wavelet(x) == entropy(WaveletOverlap(), x)
+    end
+
+    @testset "Dispersion entropy" begin
+        @test entropy_dispersion(x) == entropy(Dispersion(), x)
+    end
+
+    @testset "Spatial permutation entropy" begin
+        x = rand(50, 50)
+        stencil = CartesianIndex.([(0,1), (1,1), (1,0)])
+        est = SpatialSymbolicPermutation(stencil, x)
+        @test entropy_spatial_permutation(x, stencil) == entropy(est, x)
+    end
+end
+
+@testset "`probabilities` defaults to `CountOccurrences`" begin
+    x = [1, 1, 2, 2, 3, 3]
+    @test probabilities(x) == probabilities(CountOccurrences(), x) == [1/3, 1/3, 1/3]
+end
diff --git a/test/entropies/convenience.jl b/test/entropies/convenience.jl
deleted file mode 100644
index d6ebb3391..000000000
--- a/test/entropies/convenience.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-using Entropies
-using Test
-import Statistics
-
-x = randn(1000)
-σ = 0.2*Statistics.std(x)
-# All of the following are API tests.
-@test entropy_permutation(x) == entropy(x, SymbolicPermutation())
-@test entropy_wavelet(x) == entropy(x, WaveletOverlap())
-@test entropy_dispersion(x) == entropy(x, Dispersion())
-
-m = rand(50, 50)
-stencil = CartesianIndex.([(0,1), (1,1), (1,0)])
-est = SpatialSymbolicPermutation(stencil, m)
-@test entropy_spatial_permutation(m, stencil) == entropy(m, est)
\ No newline at end of file
diff --git a/test/entropies/entropies.jl b/test/entropies/entropies.jl
new file mode 100644
index 000000000..259606fbb
--- /dev/null
+++ b/test/entropies/entropies.jl
@@ -0,0 +1,12 @@
+# Interface.
+testfile("interface.jl")
+
+# Entropy types.
+testfile("entropy_types/renyi.jl")
+testfile("entropy_types/shannon.jl")
+testfile("entropy_types/tsallis.jl")
+testfile("entropy_types/curado.jl")
+testfile("entropy_types/stretched_exponential.jl")
+
+# Estimators
+testfile("estimators/estimators.jl")
diff --git a/test/entropies/curado.jl b/test/entropies/entropy_types/curado.jl
similarity index 100%
rename from test/entropies/curado.jl
rename to test/entropies/entropy_types/curado.jl
diff --git a/test/entropies/renyi.jl b/test/entropies/entropy_types/renyi.jl
similarity index 100%
rename from test/entropies/renyi.jl
rename to test/entropies/entropy_types/renyi.jl
diff --git a/test/entropies/entropy_types/shannon.jl b/test/entropies/entropy_types/shannon.jl
new file mode 100644
index 000000000..f32cdc591
--- /dev/null
+++ b/test/entropies/entropy_types/shannon.jl
@@ -0,0 +1,17 @@
+using Random
+rng = MersenneTwister(1234)
+
+x = rand(1000)
+xp = Probabilities(x)
+@test_throws MethodError entropy(Shannon(), x) isa Real
+@test entropy(Shannon(base = 10), xp) isa Real
+@test entropy(Shannon(base = 2), xp) isa Real
+@test entropy_maximum(Shannon(), 2) == 1
+
+# Analytical tests
+# -----------------------------------
+# Minimal and equal to zero when probability distribution has only one element...
+@test entropy(Shannon(), Probabilities([1.0])) ≈ 0.0
+
+# or minimal when only one probability is nonzero and equal to 1.0
+@test entropy(Shannon(), Probabilities([1.0, 0.0, 0.0, 0.0])) ≈ 0.0
diff --git a/test/entropies/stretched_exponential.jl b/test/entropies/entropy_types/stretched_exponential.jl
similarity index 92%
rename from test/entropies/stretched_exponential.jl
rename to test/entropies/entropy_types/stretched_exponential.jl
index d7c411f00..0d19a8e28 100644
--- a/test/entropies/stretched_exponential.jl
+++ b/test/entropies/entropy_types/stretched_exponential.jl
@@ -19,5 +19,5 @@ b = 2
 # probability distribution.
 x = [repeat([0, 1], 5); 0]
 est = SymbolicPermutation(m = 2)
-@test entropy(StretchedExponential(η = η, base = b), x, est) ≈
+@test entropy(StretchedExponential(η = η, base = b), est, x) ≈
     entropy_maximum(StretchedExponential(η = η, base = b), total_outcomes(est))
diff --git a/test/entropies/tsallis.jl b/test/entropies/entropy_types/tsallis.jl
similarity index 100%
rename from test/entropies/tsallis.jl
rename to test/entropies/entropy_types/tsallis.jl
diff --git a/test/entropies/estimators/alizadeharghami.jl b/test/entropies/estimators/alizadeharghami.jl
new file mode 100644
index 000000000..d57f6bb05
--- /dev/null
+++ b/test/entropies/estimators/alizadeharghami.jl
@@ -0,0 +1,18 @@
+# -------------------------------------------------------------------------------------
+# Check if the estimator converge to true values for some distributions with
+# analytically derivable entropy.
+# -------------------------------------------------------------------------------------
+# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
+U = 0.00
+# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
+N = round(0.5*log(2π) + 0.5, digits = 2)
+
+ea = AlizadehArghami(m = 100, base = 2)
+ea_n = AlizadehArghami(m = 100, base = MathConstants.e)
+
+n = 1000000
+@test round(entropy(ea, rand(rng, n)), digits = 2) == U
+@test round(entropy(ea_n, randn(rng, n)), digits = 2) == N
+
+x = rand(1000)
+@test_throws ArgumentError entropy(Renyi(q = 2), AlizadehArghami(), x)
diff --git a/test/entropies/estimators/correa.jl b/test/entropies/estimators/correa.jl
new file mode 100644
index 000000000..6c9398fd4
--- /dev/null
+++ b/test/entropies/estimators/correa.jl
@@ -0,0 +1,18 @@
+# -------------------------------------------------------------------------------------
+# Check if the estimator converge to true values for some distributions with
+# analytically derivable entropy.
+# -------------------------------------------------------------------------------------
+# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
+U = 0.00
+# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
+N = round(0.5*log(2π) + 0.5, digits = 2)
+
+ec = Correa(m = 100, base = 2)
+ec_n = Correa(m = 100, base = MathConstants.e)
+
+n = 1000000
+@test round(entropy(ec, rand(rng, n)), digits = 2) == U
+@test round(entropy(ec_n, randn(rng, n)), digits = 2) == N
+
+x = rand(1000)
+@test_throws ArgumentError entropy(Renyi(q = 2), Correa(), x)
diff --git a/test/entropies/estimators/ebrahimi.jl b/test/entropies/estimators/ebrahimi.jl
new file mode 100644
index 000000000..9d41c36b3
--- /dev/null
+++ b/test/entropies/estimators/ebrahimi.jl
@@ -0,0 +1,19 @@
+# -------------------------------------------------------------------------------------
+# Check if the estimator converge to true values for some distributions with
+# analytically derivable entropy.
+# -------------------------------------------------------------------------------------
+# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
+U = 0.00
+# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
+N = round(0.5*log(2π) + 0.5, digits = 2)
+
+ee = Ebrahimi(m = 100, base = 2)
+ee_n = Ebrahimi(m = 100, base = MathConstants.e)
+
+n = 1000000
+@test round(entropy(ee, rand(rng, n)), digits = 2) == U
+@test round(entropy(ee_n, randn(rng, n)), digits = 2) == N
+
+
+x = rand(1000)
+@test_throws ArgumentError entropy(Renyi(q = 2), Ebrahimi(), x)
diff --git a/test/entropies/estimators/estimators.jl b/test/entropies/estimators/estimators.jl
new file mode 100644
index 000000000..fadb99481
--- /dev/null
+++ b/test/entropies/estimators/estimators.jl
@@ -0,0 +1,8 @@
+testfile("kozachenkoleonenko.jl")
+testfile("kraskov.jl")
+testfile("zhu.jl")
+testfile("zhusingh.jl")
+testfile("alizadeharghami.jl")
+testfile("correa.jl")
+testfile("ebrahimi.jl")
+testfile("vasicek.jl")
diff --git a/test/entropies/estimators/kozachenkoleonenko.jl b/test/entropies/estimators/kozachenkoleonenko.jl
new file mode 100644
index 000000000..4c23a63be
--- /dev/null
+++ b/test/entropies/estimators/kozachenkoleonenko.jl
@@ -0,0 +1,28 @@
+using DelayEmbeddings: genembed
+using DelayEmbeddings: Dataset
+
+m = 4
+τ = 1
+τs = tuple([τ*i for i = 0:m-1]...)
+x = rand(250)
+D = genembed(x, τs)
+est = KozachenkoLeonenko(w = 1)
+
+@test entropy(est, D) isa Real
+
+# Analytical test.
+XN = Dataset(randn(100000, 1));
+# For normal distribution with mean 0 and std 1, the entropy is
+h_XN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5 # nats
+h_XN_base_2 = h_XN_base_e / log(2, MathConstants.e) # bits
+
+h_XN_kr_base_e = entropy(KozachenkoLeonenko(base = MathConstants.e), XN)
+h_XN_kr_base_2 = entropy(KozachenkoLeonenko(base = 2), XN)
+# The KozachenkoLeonenko estimator is not as precise as Kraskov, so check that we're
+# within +- 3% of the target value
+tol_e = h_XN_base_e * 0.03
+tol_2 = h_XN_base_2 * 0.03
+@test h_XN_base_e - tol_e ≤ h_XN_kr_base_e ≤ h_XN_base_e + tol_e
+@test h_XN_base_2 - tol_2 ≤ h_XN_kr_base_2 ≤ h_XN_base_2 + tol_2
+
+@test_throws ArgumentError entropy(Renyi(q = 2), KozachenkoLeonenko(), XN)
diff --git a/test/entropies/estimators/kraskov.jl b/test/entropies/estimators/kraskov.jl
new file mode 100644
index 000000000..6f0dd5d41
--- /dev/null
+++ b/test/entropies/estimators/kraskov.jl
@@ -0,0 +1,29 @@
+using DelayEmbeddings: genembed
+using DelayEmbeddings: Dataset
+
+m = 4
+τ = 1
+τs = tuple([τ*i for i = 0:m-1]...)
+x = rand(250)
+D = genembed(x, τs)
+est = Kraskov(k = 3, w = 1)
+e = Shannon()
+er = Renyi(q = 1.5)
+@test_throws ArgumentError entropy(er, est, D)
+
+
+@test entropy(est, D) isa Real
+@test entropy(e, est, D) isa Real
+
+# Analytical test.
+XN = Dataset(randn(100000, 1));
+# For normal distribution with mean 0 and std 1, the entropy is
+h_XN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5 # nats
+h_XN_base_2 = h_XN_base_e / log(2, MathConstants.e) # bits
+
+h_XN_kr_base_e = entropy(Kraskov(k = 3, base = MathConstants.e), XN)
+h_XN_kr_base_2 = entropy(Kraskov(k = 3, base = 2), XN)
+@test round(h_XN_base_e, digits = 1) == round(h_XN_kr_base_e, digits = 1)
+@test round(h_XN_base_2, digits = 1) == round(h_XN_kr_base_2, digits = 1)
+
+@test_throws ArgumentError entropy(Renyi(q = 2), Kraskov(), XN)
diff --git a/test/entropies/estimators/vasicek.jl b/test/entropies/estimators/vasicek.jl
new file mode 100644
index 000000000..bc012923b
--- /dev/null
+++ b/test/entropies/estimators/vasicek.jl
@@ -0,0 +1,19 @@
+
+# -------------------------------------------------------------------------------------
+# Check if the estimator converge to true values for some distributions with
+# analytically derivable entropy.
+# -------------------------------------------------------------------------------------
+# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
+U = 0.00
+# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
+N = round(0.5*log(2π) + 0.5, digits = 2)
+
+ev = Vasicek(m = 100, base = 2)
+ev_n = Vasicek(m = 100, base = MathConstants.e)
+
+n = 1000000
+@test round(entropy(ev, rand(rng, n)), digits = 2) == U
+@test round(entropy(ev_n, randn(rng, n)), digits = 2) == N
+
+x = rand(1000)
+@test_throws ArgumentError entropy(Renyi(q = 2), Vasicek(), x)
diff --git a/test/entropies/estimators/zhu.jl b/test/entropies/estimators/zhu.jl
new file mode 100644
index 000000000..5c882766e
--- /dev/null
+++ b/test/entropies/estimators/zhu.jl
@@ -0,0 +1,24 @@
+using DelayEmbeddings: Dataset
+
+# To ensure minimal rectangle volumes are correct, we also test internals directly here.
+# It's not feasible to construct an end-product test due to the neighbor searches.
+x = Dataset([[-1, -2], [0, -2], [3, 2]])
+y = Dataset([[3, 1], [-5, 1], [3, -2]])
+@test Entropies.volume_minimal_rect([0, 0], x) == 24
+@test Entropies.volume_minimal_rect([0, 0], y) == 40
+
+# Analytical tests for the estimated entropy
+DN = Dataset(randn(200000, 1))
+hN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5
+hN_base_2 = hN_base_e / log(2, MathConstants.e)
+
+est = Zhu(k = 3)
+
+@test round(entropy(Shannon(; base = ℯ), est, DN), digits = 1) == round(hN_base_e, digits = 1)
+@test round(entropy(Shannon(; base = 2), est, DN), digits = 1) == round(hN_base_2, digits = 1)
+
+@test_throws ArgumentError entropy(Renyi(q = 2), Zhu(), DN)
+
+# Shannon entropy is default.
+@test entropy(Shannon(; base = 2), est, DN) ==  entropy(est, DN, base = 2)
+@test entropy(Shannon(; base = ℯ), est, DN) ==  entropy(est, DN, base = ℯ)
diff --git a/test/entropies/estimators/zhusingh.jl b/test/entropies/estimators/zhusingh.jl
new file mode 100644
index 000000000..b7cf360d7
--- /dev/null
+++ b/test/entropies/estimators/zhusingh.jl
@@ -0,0 +1,67 @@
+using Test, Distributions, LinearAlgebra
+using DelayEmbeddings: Dataset
+using StaticArrays: @SVector
+
+# Test internals in addition to end-product, because designing an exact end-product
+# test is  a mess due to the neighbor searches. If these top-level tests fail, then
+# the issue is probability related to these functions.
+nns = Dataset([[-1, -1], [0, -2], [3, 2.0]])
+x = @SVector [0.0, 0.0]
+dists = Entropies.maxdists(x, nns)
+vol = Entropies.volume_minimal_rect(dists)
+ξ = Entropies.n_borderpoints(x, nns, dists)
+@test vol == 24.0
+@test ξ == 2.0
+
+nns = Dataset([[3, 1], [3, -2], [-5, 1.0]])
+x = @SVector [0.0, 0.0]
+dists = Entropies.maxdists(x, nns)
+vol = Entropies.volume_minimal_rect(dists)
+ξ = Entropies.n_borderpoints(x, nns, dists)
+@test vol == 40.0
+@test ξ == 2.0
+
+nns = Dataset([[-3, 1], [3, 1], [5, -2.0]])
+x = @SVector [0.0, 0.0]
+dists = Entropies.maxdists(x, nns)
+vol = Entropies.volume_minimal_rect(dists)
+ξ = Entropies.n_borderpoints(x, nns, dists)
+@test vol == 40.0
+@test ξ == 1.0
+
+# Analytical tests: 1D normal distribution
+DN = Dataset(randn(100000, 1))
+σ = 1.0
+hN_base_e = 0.5 * log(MathConstants.e, 2π * σ^2) + 0.5
+hN_base_2 = hN_base_e / log(2, MathConstants.e)
+
+est = ZhuSingh(k = 3)
+
+@test round(entropy(est, DN, base = ℯ), digits = 1) == round(hN_base_e, digits = 1)
+@test round(entropy(est, DN, base = 2), digits = 1) == round(hN_base_2, digits = 1)
+
+# Analytical test: 3D normal distribution
+σs = ones(3)
+μs = zeros(3)
+𝒩₂ = MvNormal(μs, Diagonal(σs))
+Σ = diagm(σs)
+n = length(μs)
+h_𝒩₂_base_ℯ = 0.5n * log(ℯ, 2π) + 0.5*log(ℯ, det(Σ)) + 0.5n
+h_𝒩₂_base_2 = h_𝒩₂_base_ℯ  / log(2, ℯ)
+
+sample = Dataset(transpose(rand(𝒩₂, 50000)))
+hZS_𝒩₂_base_ℯ = entropy(Shannon(; base = ℯ), est, sample)
+hZS_𝒩₂_base_2 = entropy(Shannon(; base = 2), est, sample)
+
+# Estimation accuracy decreases for fixed N with increasing edimension, so exact comparison
+# isn't useful. Just check that values are within 1% of the target.
+tol_ℯ  = hZS_𝒩₂_base_ℯ * 0.01
+tol_2  = hZS_𝒩₂_base_2 * 0.01
+@test h_𝒩₂_base_ℯ - tol_ℯ ≤ hZS_𝒩₂_base_ℯ ≤ h_𝒩₂_base_ℯ + tol_ℯ
+@test h_𝒩₂_base_2 - tol_2 ≤ hZS_𝒩₂_base_2 ≤ h_𝒩₂_base_2 + tol_2
+
+@test_throws ArgumentError entropy(Renyi(q = 2), ZhuSingh(), rand(100))
+
+# Shannon entropy is default.
+@test entropy(Shannon(; base = 2), est, sample) ==  entropy(est, sample, base = 2)
+@test entropy(Shannon(; base = ℯ), est, sample) ==  entropy(est, sample, base = ℯ)
diff --git a/test/entropies/interface.jl b/test/entropies/interface.jl
new file mode 100644
index 000000000..80f73c9ca
--- /dev/null
+++ b/test/entropies/interface.jl
@@ -0,0 +1,8 @@
+x = ones(3)
+@test_throws MethodError entropy(x, 0.1)
+
+@testset "Non-default entropy types" begin
+    x = rand(1000)
+    est = AlizadehArghami() # the AlizadehArghami estimator only works for Shannon entropy
+    @test_throws ArgumentError entropy(Tsallis(), AlizadehArghami(), x)
+end
diff --git a/test/entropies/nearest_neighbors_direct.jl b/test/entropies/nearest_neighbors_direct.jl
deleted file mode 100644
index 596f271c5..000000000
--- a/test/entropies/nearest_neighbors_direct.jl
+++ /dev/null
@@ -1,154 +0,0 @@
-using Test, StaticArrays, LinearAlgebra
-
-@testset "NN - Kraskov" begin
-    m = 4
-    τ = 1
-    τs = tuple([τ*i for i = 0:m-1]...)
-    x = rand(250)
-    D = genembed(x, τs)
-    est = Kraskov(k = 3, w = 1)
-    @test entropy(est, D) isa Real
-
-    # Analytical test.
-    XN = Dataset(randn(100000, 1));
-    # For normal distribution with mean 0 and std 1, the entropy is
-    h_XN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5 # nats
-    h_XN_base_2 = h_XN_base_e / log(2, MathConstants.e) # bits
-
-    h_XN_kr_base_e = entropy(Kraskov(k = 3, base = MathConstants.e), XN)
-    h_XN_kr_base_2 = entropy(Kraskov(k = 3, base = 2), XN)
-    @test round(h_XN_base_e, digits = 1) == round(h_XN_kr_base_e, digits = 1)
-    @test round(h_XN_base_2, digits = 1) == round(h_XN_kr_base_2, digits = 1)
-end
-
-@testset "NN - KozachenkoLeonenko" begin
-    m = 4
-    τ = 1
-    τs = tuple([τ*i for i = 0:m-1]...)
-    x = rand(250)
-    D = genembed(x, τs)
-    est = KozachenkoLeonenko(w = 1)
-
-    @test entropy(est, D) isa Real
-
-    # Analytical test.
-    XN = Dataset(randn(100000, 1));
-    # For normal distribution with mean 0 and std 1, the entropy is
-    h_XN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5 # nats
-    h_XN_base_2 = h_XN_base_e / log(2, MathConstants.e) # bits
-
-    h_XN_kr_base_e = entropy(KozachenkoLeonenko(base = MathConstants.e), XN)
-    h_XN_kr_base_2 = entropy(KozachenkoLeonenko(base = 2), XN)
-    # The KozachenkoLeonenko estimator is not as precise as Kraskov, so check that we're
-    # within +- 3% of the target value
-    tol_e = h_XN_base_e * 0.03
-    tol_2 = h_XN_base_2 * 0.03
-    @test h_XN_base_e - tol_e ≤ h_XN_kr_base_e ≤ h_XN_base_e + tol_e
-    @test h_XN_base_2 - tol_2 ≤ h_XN_kr_base_2 ≤ h_XN_base_2 + tol_2
-end
-
-@testset "NN - Zhu" begin
-
-    # To ensure minimal rectangle volumes are correct, we also test internals directly here.
-    # It's not feasible to construct an end-product test due to the neighbor searches.
-    x = Dataset([[-1, -2], [0, -2], [3, 2]])
-    y = Dataset([[3, 1], [-5, 1], [3, -2]])
-    @test Entropies.volume_minimal_rect([0, 0], x) == 24
-    @test Entropies.volume_minimal_rect([0, 0], y) == 40
-
-    # Analytical tests for the estimated entropy
-    DN = Dataset(randn(200000, 1))
-    hN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5
-    hN_base_2 = hN_base_e / log(2, MathConstants.e)
-
-    e_base_e = Zhu(k = 3, base = MathConstants.e)
-    e_base_2 = Zhu(k = 3, base = 2)
-
-    @test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
-    @test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
-end
-
-@testset "NN - Zhu" begin
-
-    # To ensure minimal rectangle volumes are correct, we also test internals directly here.
-    # It's not feasible to construct an end-product test due to the neighbor searches.
-    x = Dataset([[-1, -2], [0, -2], [3, 2]])
-    y = Dataset([[3, 1], [-5, 1], [3, -2]])
-    @test Entropies.volume_minimal_rect([0, 0], x) == 24
-    @test Entropies.volume_minimal_rect([0, 0], y) == 40
-
-    # Analytical tests for the estimated entropy
-    DN = Dataset(randn(200000, 1))
-    hN_base_e = 0.5 * log(MathConstants.e, 2π) + 0.5
-    hN_base_2 = hN_base_e / log(2, MathConstants.e)
-
-    e_base_e = Zhu(k = 3, base = MathConstants.e)
-    e_base_2 = Zhu(k = 3, base = 2)
-
-    @test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
-    @test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
-end
-
-
-@testset "NN - ZhuSingh" begin
-    using Distributions: MvNormal
-
-    # Test internals in addition to end-product, because designing an exact end-product
-    # test is  a mess due to the neighbor searches. If these top-level tests fail, then
-    # the issue is probability related to these functions.
-    nns = Dataset([[-1, -1], [0, -2], [3, 2.0]])
-    x = @SVector [0.0, 0.0]
-    dists = Entropies.maxdists(x, nns)
-    vol = Entropies.volume_minimal_rect(dists)
-    ξ = Entropies.n_borderpoints(x, nns, dists)
-    @test vol == 24.0
-    @test ξ == 2.0
-
-    nns = Dataset([[3, 1], [3, -2], [-5, 1.0]])
-    x = @SVector [0.0, 0.0]
-    dists = Entropies.maxdists(x, nns)
-    vol = Entropies.volume_minimal_rect(dists)
-    ξ = Entropies.n_borderpoints(x, nns, dists)
-    @test vol == 40.0
-    @test ξ == 2.0
-
-    nns = Dataset([[-3, 1], [3, 1], [5, -2.0]])
-    x = @SVector [0.0, 0.0]
-    dists = Entropies.maxdists(x, nns)
-    vol = Entropies.volume_minimal_rect(dists)
-    ξ = Entropies.n_borderpoints(x, nns, dists)
-    @test vol == 40.0
-    @test ξ == 1.0
-
-    # Analytical tests: 1D normal distribution
-    DN = Dataset(randn(100000, 1))
-    σ = 1.0
-    hN_base_e = 0.5 * log(MathConstants.e, 2π * σ^2) + 0.5
-    hN_base_2 = hN_base_e / log(2, MathConstants.e)
-
-    e_base_e = ZhuSingh(k = 3, base = MathConstants.e)
-    e_base_2 = ZhuSingh(k = 3, base = 2)
-
-    @test round(entropy(e_base_e, DN), digits = 1) == round(hN_base_e, digits = 1)
-    @test round(entropy(e_base_2, DN), digits = 1) == round(hN_base_2, digits = 1)
-
-    # Analytical test: 3D normal distribution
-    σs = ones(3)
-    μs = zeros(3)
-    𝒩₂ = MvNormal(μs, Diagonal(σs))
-    Σ = diagm(σs)
-    n = length(μs)
-    h_𝒩₂_base_ℯ = 0.5n * log(ℯ, 2π) + 0.5*log(ℯ, det(Σ)) + 0.5n
-    h_𝒩₂_base_2 = h_𝒩₂_base_ℯ  / log(2, ℯ)
-
-    sample = Dataset(transpose(rand(𝒩₂, 50000)))
-    hZS_𝒩₂_base_ℯ = entropy(e_base_e, sample)
-    hZS_𝒩₂_base_2 = entropy(e_base_2, sample)
-
-    # Estimation accuracy decreases for fixed N with increasing edimension, so exact comparison
-    # isn't useful. Just check that values are within 1% of the target.
-    tol_ℯ  = hZS_𝒩₂_base_ℯ * 0.01
-    tol_2  = hZS_𝒩₂_base_2 * 0.01
-    @test h_𝒩₂_base_ℯ - tol_ℯ ≤ hZS_𝒩₂_base_ℯ ≤ h_𝒩₂_base_ℯ + tol_ℯ
-    @test h_𝒩₂_base_2 - tol_2 ≤ hZS_𝒩₂_base_2 ≤ h_𝒩₂_base_2 + tol_2
-end
diff --git a/test/entropies/shannon.jl b/test/entropies/shannon.jl
deleted file mode 100644
index b6141dc80..000000000
--- a/test/entropies/shannon.jl
+++ /dev/null
@@ -1,46 +0,0 @@
-using Random
-rng = MersenneTwister(1234)
-
-x = rand(1000)
-xp = Probabilities(x)
-@test_throws MethodError entropy(Shannon(), x) isa Real
-@test entropy(Shannon(base = 10), xp) isa Real
-@test entropy(Shannon(base = 2), xp) isa Real
-@test entropy_maximum(Shannon(), 2) == 1
-
-# Analytical tests
-# -----------------------------------
-# Minimal and equal to zero when probability distribution has only one element...
-@test entropy(Shannon(), Probabilities([1.0])) ≈ 0.0
-
-# or minimal when only one probability is nonzero and equal to 1.0
-@test entropy(Shannon(), Probabilities([1.0, 0.0, 0.0, 0.0])) ≈ 0.0
-
-# -----------------
-# Direct estimators
-# -----------------
-# We just check if the estimator converge to true values for some distributions with
-# analytically derivable entropy.
-# Entropy to log with base b of a uniform distribution on [0, 1] = ln(1 - 0)/(ln(b)) = 0
-U = 0.00
-# Entropy with natural log of 𝒩(0, 1) is 0.5*ln(2π) + 0.5.
-N = round(0.5*log(2π) + 0.5, digits = 2)
-
-ev = Vasicek(m = 100, base = 2)
-ee = Ebrahimi(m = 100, base = 2)
-ec = Correa(m = 100, base = 2)
-ea = AlizadehArghami(m = 100, base = 2)
-ev_n = Vasicek(m = 100, base = MathConstants.e)
-ee_n = Ebrahimi(m = 100, base = MathConstants.e)
-ec_n = Correa(m = 100, base = MathConstants.e)
-ea_n = AlizadehArghami(m = 100, base = MathConstants.e)
-
-n = 1000000
-@test round(entropy(ev, rand(rng, n)), digits = 2) == U
-@test round(entropy(ee, rand(rng, n)), digits = 2) == U
-@test round(entropy(ec, rand(rng, n)), digits = 2) == U
-@test round(entropy(ea, rand(rng, n)), digits = 2) == U
-@test round(entropy(ee_n, randn(rng, n)), digits = 2) == N
-@test round(entropy(ev_n, randn(rng, n)), digits = 2) == N
-@test round(entropy(ec_n, randn(rng, n)), digits = 2) == N
-@test round(entropy(ea_n, randn(rng, n)), digits = 2) == N
diff --git a/test/estimators/count_occurrences.jl b/test/probabilities/estimators/count_occurrences.jl
similarity index 77%
rename from test/estimators/count_occurrences.jl
rename to test/probabilities/estimators/count_occurrences.jl
index fb6896006..0a21defdc 100644
--- a/test/estimators/count_occurrences.jl
+++ b/test/probabilities/estimators/count_occurrences.jl
@@ -6,8 +6,8 @@ rng = Random.MersenneTwister(1234)
 x = [rand(rng, Bool) for _ in 1:10000]
 
 probs1 = probabilities(x)
-probs2 = probabilities(x, CountOccurrences())
-probs3, outs = probabilities_and_outcomes(x, CountOccurrences())
+probs2 = probabilities(CountOccurrences(), x)
+probs3, outs = probabilities_and_outcomes(CountOccurrences(), x)
 
 for ps in (probs1, probs2, probs3)
     for p in ps; @test 0.49 < p < 0.51; end
@@ -22,7 +22,7 @@ y = [rand(rng, Bool) for _ in 1:10000]
 D = Dataset(x, y)
 
 probs1 = probabilities(D)
-probs2 = probabilities(D, CountOccurrences())
+probs2 = probabilities(CountOccurrences(), D)
 for ps in (probs1, probs2)
     for p in ps; @test 0.24 < p < 0.26; end
 end
@@ -30,8 +30,8 @@ end
 # Renyi of coin toss is 1 bit, and for two coin tosses is two bits
 # Result doesn't depend on `q` due to uniformity of the PDF.
 for q in (0.5, 1.0, 2.0)
-    h = entropy(Renyi(q), x, CountOccurrences())
+    h = entropy(Renyi(q), CountOccurrences(), x)
     @test 0.99 < h < 1.01
-    h = entropy(Renyi(q), D, CountOccurrences())
+    h = entropy(Renyi(q), CountOccurrences(), D)
     @test 1.99 < h < 2.01
 end
diff --git a/test/estimators/dispersion.jl b/test/probabilities/estimators/dispersion.jl
similarity index 92%
rename from test/estimators/dispersion.jl
rename to test/probabilities/estimators/dispersion.jl
index 3e5f3db51..e67aaee0c 100644
--- a/test/estimators/dispersion.jl
+++ b/test/probabilities/estimators/dispersion.jl
@@ -33,20 +33,20 @@ using Entropies, Test
     ps_paper = [1/11, 0/11, 3/11, 2/11, 1/11, 0/11, 1/11, 2/11, 1/11] |> sort
     ps_paper = ps_paper[findall(ps_paper .> 0)]
 
-    ps = probabilities(x, est)
+    ps = probabilities(est, x)
     @test ps |> sort == ps_paper
 
     # There is probably a typo in Rostaghi & Azami (2016). They state that the
     # non-normalized dispersion entropy is 1.8642. However, with identical probabilies,
     # we obtain non-normalized dispersion entropy of 1.8462.
-    res = entropy(Renyi(base = MathConstants.e, q = 1), x, est)
+    res = entropy(Renyi(base = MathConstants.e, q = 1), est, x)
     @test round(res, digits = 4) == 1.8462
 
     # Again, probabilities are identical up to this point, but the values we get differ
     # slightly from the paper. They get normalized DE of 0.85, but we get 0.84. 0.85 is
     # the normalized DE you'd get by manually normalizing the (erroneous) value from
     # their previous step.
-    res_norm = entropy_normalized(Renyi(base = MathConstants.e, q = 1), x, est)
+    res_norm = entropy_normalized(Renyi(base = MathConstants.e, q = 1), est, x)
     @test round(res_norm, digits = 2) == 0.84
 
     @testset "outcomes" begin
@@ -54,7 +54,7 @@ using Entropies, Test
         Ω = outcome_space(est)
         @test length(Ω) == total_outcomes(est) == 4^3
 
-        probs, out = probabilities_and_outcomes(rand(1000), est)
+        probs, out = probabilities_and_outcomes(est, rand(1000))
         @test all(x -> x ∈ Ω, out)
     end
 end
diff --git a/test/estimators/diversity.jl b/test/probabilities/estimators/diversity.jl
similarity index 55%
rename from test/estimators/diversity.jl
rename to test/probabilities/estimators/diversity.jl
index 199ba1292..943c3f93b 100644
--- a/test/estimators/diversity.jl
+++ b/test/probabilities/estimators/diversity.jl
@@ -1,26 +1,26 @@
-using Test, Entropies
-
 # Analytical example from Wang et al. (2020)
 x = [1, 2, 13, 7, 9, 5, 4]
 nbins = 10
 m = 3
 τ = 1
 est = Diversity(; nbins, m, τ)
-@test probabilities(x, est) == [0.5, 0.5]
-@test total_outcomes(est) == 10
-
-Ω = outcome_space(x, est)
-@test length(Ω) == 10
-@test round(entropy_normalized(x, est), digits = 4) == 0.3010
+@test probabilities(est, x) == [0.5, 0.5]
+@test round(entropy_normalized(est, x), digits = 4) == 0.3010
 
+@test total_outcomes(est) == 10
 
 # Diversity divides the interval [-1, 1] into nbins subintervals.
-binsize = nextfloat((1-(-1))/10, 2)
-probs, events = probabilities_and_outcomes(x, est)
+binsize = (1-(-1))/10
+probs, events = probabilities_and_outcomes(est, x)
 
 ds = [0.605, 0.698, 0.924, 0.930] # value from Wang et al. (2020)
-# These distances should be in the following distance bins: [8, 8, 9, 9],
+# These distances should be in the following distance bins: [8, 8, 9, 9].
 # Which means that the probability distribution should be [0.5, 0.5]
 bins = floor.(Int, (ds .- (-1.0)) / binsize)
 @test sort(bins) == [8, 8, 9, 9]
 @test probs == [0.5, 0.5]
+
+# The coordinates corresponding to the left corners of these bins are as
+# follows, and should correspond with the events.
+coords = ((bins .- 1) .* binsize) .+ (-1.0)
+@test all(round.(events, digits = 13) == round.(unique(coords), digits = 13))
diff --git a/test/estimators/naive_kernel.jl b/test/probabilities/estimators/naive_kernel.jl
similarity index 53%
rename from test/estimators/naive_kernel.jl
rename to test/probabilities/estimators/naive_kernel.jl
index e8fffa4f8..60fa526ba 100644
--- a/test/estimators/naive_kernel.jl
+++ b/test/probabilities/estimators/naive_kernel.jl
@@ -9,15 +9,15 @@ pts = Dataset([rand(2) for i = 1:N]);
 est_direct = NaiveKernel(ϵ, KDTree)
 est_tree = NaiveKernel(ϵ, BruteForce)
 
-@test probabilities(pts, est_tree) isa Probabilities
-@test probabilities(pts, est_direct) isa Probabilities
-p_tree = probabilities(pts, est_tree)
-p_direct = probabilities(pts, est_direct)
+@test probabilities(est_tree, pts) isa Probabilities
+@test probabilities(est_direct, pts) isa Probabilities
+p_tree = probabilities(est_tree, pts)
+p_direct = probabilities(est_direct, pts)
 @test all(p_tree .== p_direct) == true
 
-@test entropy(Renyi(), pts, est_direct) isa Real
-@test entropy(Renyi(), pts, est_tree) isa Real
+@test entropy(Renyi(), est_direct, pts) isa Real
+@test entropy(Renyi(), est_tree, pts) isa Real
 
-probs, z = probabilities_and_outcomes(pts, est_tree)
+probs, z = probabilities_and_outcomes(est_tree, pts)
 @test z == 1:length(pts)
 @test outcome_space(pts, est_tree) == 1:length(pts)
diff --git a/test/estimators/permutation.jl b/test/probabilities/estimators/permutation.jl
similarity index 76%
rename from test/estimators/permutation.jl
rename to test/probabilities/estimators/permutation.jl
index b1f933855..1cd76aac1 100644
--- a/test/estimators/permutation.jl
+++ b/test/probabilities/estimators/permutation.jl
@@ -14,8 +14,8 @@
         z = rand(N)
 
         # Probability distributions
-        p1 = probabilities!(s, x, est)
-        p2 = probabilities!(s, y, est)
+        p1 = probabilities!(s, est, x)
+        p2 = probabilities!(s, est, y)
         @test sum(p1) ≈ 1.0
         @test sum(p2) ≈ 1.0
     end
@@ -35,12 +35,12 @@
         s_timeseries = zeros(Int, length(x_timeseries) - (m - 1)*τ)
         s_dataset = zeros(Int, length(x_dataset))
 
-        @test entropy!(s_timeseries, Shannon(base = 2), x_timeseries, est) ≈ 1.0
-        @test entropy!(s_dataset, Shannon(base = 2), x_dataset, est) ≈ 1.0
+        @test entropy!(s_timeseries, Shannon(base = 2), est, x_timeseries) ≈ 1.0
+        @test entropy!(s_dataset, Shannon(base = 2), est, x_dataset) ≈ 1.0
 
         # Should default to Shannon base 2
-        @test entropy!(s_timeseries, x_timeseries, est) ≈ 1.0
-        @test entropy!(s_dataset, x_dataset, est) ≈ 1.0
+        @test entropy!(s_timeseries, est, x_timeseries) ≈ 1.0
+        @test entropy!(s_dataset, est, x_dataset) ≈ 1.0
     end
 end
 
@@ -51,14 +51,14 @@ end
     y = Dataset(rand(N, 5))
 
     # Probability distributions
-    p1 = probabilities(x, est)
-    p2 = probabilities(y, est)
+    p1 = probabilities(est, x)
+    p2 = probabilities(est, y)
     @test sum(p1) ≈ 1.0
     @test sum(p2) ≈ 1.0
 
     # Entropy
-    @test entropy(Renyi(q = 1), x, est) ≈ 0  # Regular order-1 entropy
-    @test entropy(Renyi(q = 2), y, est) >= 0 # Higher-order entropy
+    @test entropy(Renyi(q = 1), est, x) ≈ 0  # Regular order-1 entropy
+    @test entropy(Renyi(q = 2), est, y) >= 0 # Higher-order entropy
 end
 
 @testset "Custom sorting" begin
@@ -70,8 +70,8 @@ end
 
     est_isless = SymbolicPermutation(m = 5, τ = 1, lt = Base.isless)
     est_isless_rand = SymbolicPermutation(m = 5, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(D, est_isless) isa Probabilities
-    @test probabilities(D, est_isless_rand) isa Probabilities
+    @test probabilities(est_isless, D) isa Probabilities
+    @test probabilities(est_isless_rand, D) isa Probabilities
 end
 
 # With m = 2, ordinal patterns and frequencies are:
@@ -80,7 +80,7 @@ end
 x = [1, 2, 1, 2, 1, 2]
 # don't randomize in the case of equal values, so use Base.isless
 est = SymbolicPermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(x, est)
+probs, πs = probabilities_and_outcomes(est, x)
 @test πs == SVector{2, Int}.([[1, 2], [2, 1]])
 @test probs == [3/5, 2/5]
 
diff --git a/test/estimators/permutation_amplitude_aware.jl b/test/probabilities/estimators/permutation_amplitude_aware.jl
similarity index 84%
rename from test/estimators/permutation_amplitude_aware.jl
rename to test/probabilities/estimators/permutation_amplitude_aware.jl
index 74937bca8..844f16e92 100644
--- a/test/estimators/permutation_amplitude_aware.jl
+++ b/test/probabilities/estimators/permutation_amplitude_aware.jl
@@ -12,15 +12,15 @@ D = genembed(x, τs)
 
 est = SymbolicAmplitudeAwarePermutation(m = m, τ = τ)
 # Probability distributions
-p1 = probabilities(x, est)
-p2 = probabilities(D, est)
+p1 = probabilities(est, x)
+p2 = probabilities(est, D)
 @test sum(p1) ≈ 1.0
 @test sum(p2) ≈ 1.0
 @test all(p1.p .≈ p2.p)
 
 # Entropy
-e1 = entropy(Renyi(), D, est)
-e2 = entropy(Renyi(), x, est)
+e1 = entropy(Renyi(), est, D)
+e2 = entropy(Renyi(), est, x)
 @test e1 ≈ e2
 
 
@@ -33,8 +33,8 @@ e2 = entropy(Renyi(), x, est)
 
     est_isless = SymbolicAmplitudeAwarePermutation(m = 5, τ = 1, lt = Base.isless)
     est_isless_rand = SymbolicAmplitudeAwarePermutation(m = 5, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(ts, est_isless) isa Probabilities
-    @test probabilities(D, est_isless) isa Probabilities
+    @test probabilities(est_isless, ts) isa Probabilities
+    @test probabilities(est_isless, D) isa Probabilities
 end
 
 # With m = 2, ordinal patterns and frequencies are:
@@ -43,7 +43,7 @@ end
 x = [1, 2, 1, 2, 1, 2]
 # don't randomize in the case of equal values, so use Base.isless
 est = SymbolicAmplitudeAwarePermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(x, est)
+probs, πs = probabilities_and_outcomes(est, x)
 @test πs == SVector{2, Int}.([[1, 2], [2, 1]])
 
 # TODO: probabilities should be explicitly tested too.
diff --git a/test/estimators/permutation_spatial.jl b/test/probabilities/estimators/permutation_spatial.jl
similarity index 85%
rename from test/estimators/permutation_spatial.jl
rename to test/probabilities/estimators/permutation_spatial.jl
index edd8aec83..6800eba92 100644
--- a/test/estimators/permutation_spatial.jl
+++ b/test/probabilities/estimators/permutation_spatial.jl
@@ -9,25 +9,25 @@ using Entropies, Test
     est = SpatialSymbolicPermutation(stencil, x, false)
 
     # Generic tests
-    ps = probabilities(x, est)
+    ps = probabilities(est, x)
     @test ps isa Probabilities
     @test length(ps) == 3
     @test sort(ps) == [0.25, 0.25, 0.5]
 
-    h = entropy(Renyi(base = 2), x, est)
+    h = entropy(Renyi(base = 2), est, x)
     @test h == 1.5
 
     # In fact, doesn't matter how we order the stencil,
     # the symbols will always be equal in top-left and bottom-right
     stencil = CartesianIndex.([(0,0), (1,0), (1,1), (0,1)])
     est = SpatialSymbolicPermutation(stencil, x, false)
-    @test entropy(Renyi(base = 2), x, est) == 1.5
+    @test entropy(Renyi(base = 2), est, x) == 1.5
 
     # But for sanity, let's ensure we get a different number
     # for a different stencil
     stencil = CartesianIndex.([(0,0), (1,0), (2,0)])
     est = SpatialSymbolicPermutation(stencil, x, false)
-    ps = sort(probabilities(x, est))
+    ps = sort(probabilities(est, x))
     @test ps[1] == 1/3
     @test ps[2] == 2/3
 
@@ -36,12 +36,12 @@ using Entropies, Test
     extent = (2, 2)
     lag = (1, 1)
     est = SpatialSymbolicPermutation((extent, lag), x, false)
-    @test entropy(Renyi(base = 2), x, est) == 1.5
+    @test entropy(Renyi(base = 2), est, x) == 1.5
 
     # and let's also test the matrix-way of specifying the stencil
     stencil = [1 1; 1 1]
     est = SpatialSymbolicPermutation(stencil, x, false)
-    @test entropy(Renyi(base = 2), x, est) == 1.5
+    @test entropy(Renyi(base = 2), est, x) == 1.5
 
     # Also test that it works for arbitrarily high-dimensional arrays
     stencil = CartesianIndex.([(0,0,0), (0,1,0), (0,0,1), (1,0,0)])
@@ -49,12 +49,12 @@ using Entropies, Test
     est = SpatialSymbolicPermutation(stencil, z, false)
     # Analytically the total stencils are of length 4*4*4 = 64
     # but all of them given the same probabilities because of the layout
-    ps = probabilities(z, est)
+    ps = probabilities(est, z)
     @test ps == [1]
     # if we shuffle, we get random stuff
     using Random
     w = shuffle!(Random.MersenneTwister(42), collect(z))
-    ps = probabilities(w, est)
+    ps = probabilities(est, w)
     @test length(ps) > 1
 
     # check that the 3d-hyperrectangle version also works as expected
@@ -67,10 +67,10 @@ using Entropies, Test
     extent = (2, 2, 2)
     lag = (1, 1, 1)
     est2 = SpatialSymbolicPermutation((extent, lag), w, false)
-    @test entropy(Renyi(), w, est1) == entropy(Renyi(), w, est2)
+    @test entropy(Renyi(), est1, w) == entropy(Renyi(), est2, w)
 
     # and to this stencil written as a matrix
     stencil = [1; 1;; 1; 1;;; 1; 1;; 1; 1]
     est3 = SpatialSymbolicPermutation(stencil, w, false)
-    @test entropy(Renyi(), w, est1) == entropy(Renyi(), w, est3)
+    @test entropy(Renyi(), est1, w) == entropy(Renyi(), est3, w)
 end
diff --git a/test/estimators/permutation_weighted.jl b/test/probabilities/estimators/permutation_weighted.jl
similarity index 83%
rename from test/estimators/permutation_weighted.jl
rename to test/probabilities/estimators/permutation_weighted.jl
index 73e28cea9..ab120f206 100644
--- a/test/estimators/permutation_weighted.jl
+++ b/test/probabilities/estimators/permutation_weighted.jl
@@ -12,15 +12,15 @@ D = genembed(x, τs)
 
 # Probability distributions
 est = SymbolicWeightedPermutation(m = m, τ = τ)
-p1 = probabilities(x, est)
-p2 = probabilities(D, est)
+p1 = probabilities(est, x)
+p2 = probabilities(est, D)
 @test sum(p1) ≈ 1.0
 @test sum(p2) ≈ 1.0
 @test all(p1.p .≈ p2.p)
 
 # Entropy
-e1 = entropy(Renyi(), D, est)
-e2 = entropy(Renyi(), x, est)
+e1 = entropy(Renyi(), est, D)
+e2 = entropy(Renyi(), est, x)
 @test e1 ≈ e2
 
 
@@ -34,8 +34,8 @@ e2 = entropy(Renyi(), x, est)
 
     est_isless = SymbolicWeightedPermutation(m = 5, τ = 1, lt = Base.isless)
     est_isless_rand = SymbolicWeightedPermutation(m = 5, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(ts, est_isless) isa Probabilities
-    @test probabilities(D, est_isless) isa Probabilities
+    @test probabilities(est_isless, ts) isa Probabilities
+    @test probabilities(est_isless, D) isa Probabilities
 end
 
 # With m = 2, ordinal patterns and frequencies are:
@@ -44,7 +44,7 @@ end
 x = [1, 2, 1, 2, 1, 2]
 # don't randomize in the case of equal values, so use Base.isless
 est = SymbolicWeightedPermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(x, est)
+probs, πs = probabilities_and_outcomes(est, x)
 @test πs == SVector{2, Int}.([[1, 2], [2, 1]])
 # TODO: probabilities should be explicitly tested too.
 
diff --git a/test/estimators/timescales.jl b/test/probabilities/estimators/timescales.jl
similarity index 80%
rename from test/estimators/timescales.jl
rename to test/probabilities/estimators/timescales.jl
index 25f545254..6e36ea792 100644
--- a/test/estimators/timescales.jl
+++ b/test/probabilities/estimators/timescales.jl
@@ -9,10 +9,10 @@ using Entropies, Test
     @testset "WaveletOverlap" begin
         wl = Entropies.Wavelets.WT.Daubechies{4}()
         est = WaveletOverlap(wl)
-        ps = probabilities(x, est)
+        ps = probabilities(est, x)
         @test length(ps) == 8
         @test ps isa Probabilities
-        @test entropy(Renyi( q = 1, base = 2), x, WaveletOverlap()) isa Real
+        @test entropy(Renyi( q = 1, base = 2), WaveletOverlap(), x) isa Real
     end
 
     @testset "Fourier Spectrum" begin
@@ -22,10 +22,10 @@ using Entropies, Test
         y = @. sin(t) + sin(sqrt(3)*t)
         z = randn(N)
         est = PowerSpectrum()
-        ents = [entropy(Renyi(), w, est) for w in (x,y,z)]
+        ents = [entropy(Renyi(), est, w) for w in (x,y,z)]
         @test ents[1] < ents[2] < ents[3]
         # Test event stuff (analytically, using sine wave)
-        probs, events = probabilities_and_outcomes(x, est)
+        probs, events = probabilities_and_outcomes(est, x)
         @test length(events) == length(probs) == 501
         @test events[1] ≈ 0 atol=1e-16 # 0 frequency, i.e., mean value
         @test probs[1] ≈ 0 atol=1e-16  # sine wave has 0 mean value
diff --git a/test/estimators/transfer_operator.jl b/test/probabilities/estimators/transfer_operator.jl
similarity index 92%
rename from test/estimators/transfer_operator.jl
rename to test/probabilities/estimators/transfer_operator.jl
index f3c6f1de8..e4c0bac00 100644
--- a/test/estimators/transfer_operator.jl
+++ b/test/probabilities/estimators/transfer_operator.jl
@@ -39,6 +39,6 @@ end
     @test bins isa Vector{<:SVector}
 
     est = TransferOperator(binnings[i])
-    @test probabilities(D, est) isa Probabilities
-    @test probabilities_and_outcomes(D, est) isa Tuple{Probabilities, Vector{SVector{2, Float64}}}
+    @test probabilities(est, D) isa Probabilities
+    @test probabilities_and_outcomes(est, D) isa Tuple{Probabilities, Vector{SVector{2, Float64}}}
 end
diff --git a/test/estimators/histograms.jl b/test/probabilities/estimators/value_histogram.jl
similarity index 93%
rename from test/estimators/histograms.jl
rename to test/probabilities/estimators/value_histogram.jl
index 0a9cfe8b8..051c11232 100644
--- a/test/estimators/histograms.jl
+++ b/test/probabilities/estimators/value_histogram.jl
@@ -21,7 +21,7 @@ using Random
     for bin in binnings
         @testset "ϵ = $(bin.ϵ)" begin
             est = ValueHistogram(bin)
-            p = probabilities(x, est)
+            p = probabilities(est, x)
             @test length(p) == 100
             @test all(e -> 0.009 ≤ e ≤ 0.011, p)
         end
@@ -29,7 +29,7 @@ using Random
 
     @testset "Check rogue 1s" begin
         b = RectangularBinning(0.1) # no `nextfloat` here, so the rogue (1, 1) is in extra bin!
-        p = probabilities(x, ValueHistogram(b))
+        p = probabilities(ValueHistogram(b), x)
         @test length(p) == 100 + 1
         @test p[end] ≈ 1/100_000 atol = 1e-5
     end
@@ -41,7 +41,7 @@ using Random
         ε = nextfloat(0.1) # this guarantees that we get the same as the `n` above!
         binnings = RectangularBinning.((n, ε))
         for bin in binnings
-            p = probabilities(x, ValueHistogram(bin))
+            p = probabilities(ValueHistogram(bin), x)
             @test length(p) == 10
             @test all(e -> 0.09 ≤ e ≤ 0.11, p)
         end
@@ -55,8 +55,8 @@ using Random
         xs1D = [rand(rng, 20) for i = 1:10000];
         xs2D = [rand(rng, 1000, 2) |> Dataset for i = 1:10000] # more points to fill all bins
         est = ValueHistogram(RectangularBinning(10))
-        ps1D = [probabilities(x, est) for x in xs1D];
-        ps2D = [probabilities(x, est) for x in xs2D];
+        ps1D = [probabilities(est, x) for x in xs1D];
+        ps2D = [probabilities(est, x) for x in xs2D];
         n_rogue_extrabin_1D = count(length.(ps1D) .> est.binning.ϵ)
         n_rogue_extrabin_2D = count(length.(ps2D) .> est.binning.ϵ^2)
         @test n_rogue_extrabin_1D == 0
diff --git a/test/probabilities/interface.jl b/test/probabilities/interface.jl
new file mode 100644
index 000000000..f187b0b12
--- /dev/null
+++ b/test/probabilities/interface.jl
@@ -0,0 +1,3 @@
+x = ones(3)
+p = probabilities(ValueHistogram(0.1), x)
+@test p isa Probabilities
diff --git a/test/probabilities/probabilities.jl b/test/probabilities/probabilities.jl
new file mode 100644
index 000000000..639dfa5ed
--- /dev/null
+++ b/test/probabilities/probabilities.jl
@@ -0,0 +1,15 @@
+# Interface.
+testfile("interface.jl")
+
+# Probability estimators.
+testfile("estimators/count_occurrences.jl")
+testfile("estimators/value_histogram.jl")
+testfile("estimators/transfer_operator.jl")
+testfile("estimators/naive_kernel.jl")
+testfile("estimators/permutation.jl")
+testfile("estimators/permutation_weighted.jl")
+testfile("estimators/permutation_amplitude_aware.jl")
+testfile("estimators/permutation_spatial.jl")
+testfile("estimators/timescales.jl")
+testfile("estimators/dispersion.jl")
+testfile("estimators/diversity.jl")
diff --git a/test/runtests.jl b/test/runtests.jl
index 4731128d4..153abe8e6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,32 +1,14 @@
 using Test
+using Entropies
 
 defaultname(file) = uppercasefirst(replace(splitext(basename(file))[1], '_' => ' '))
 testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include(file); end
 @testset "Entropies.jl" begin
-    # Probability estimators
-    testfile("estimators/count_occurrences.jl")
-    testfile("estimators/histograms.jl")
-    testfile("estimators/transfer_operator.jl")
-    testfile("estimators/naive_kernel.jl")
-    testfile("estimators/permutation.jl")
-    testfile("estimators/permutation_weighted.jl")
-    testfile("estimators/permutation_amplitude_aware.jl")
-    testfile("estimators/permutation_spatial.jl")
-    testfile("estimators/timescales.jl")
-    testfile("estimators/dispersion.jl")
-    testfile("estimators/diversity.jl")
-
-    # Different entropies
-    testfile("entropies/renyi.jl")
-    testfile("entropies/shannon.jl")
-    testfile("entropies/tsallis.jl")
-    testfile("entropies/curado.jl")
-    testfile("entropies/stretched_exponential.jl")
-
-    testfile("entropies/nearest_neighbors_direct.jl")
+    include("probabilities/probabilities.jl")
+    include("entropies/entropies.jl")
 
     # Various
     testfile("utils/fasthist.jl")
     # testfile("utils/encoding.jl")
-    testfile("entropies/convenience.jl")
+    testfile("convenience.jl")
 end