diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md
index dfc292032..e0fa6032e 100644
--- a/docs/src/complexity_measures.md
+++ b/docs/src/complexity_measures.md
@@ -8,6 +8,7 @@
 
 ```@docs
 reverse_dispersion
+distance_to_whitenoise
 ```
 
 ## Disequilibrium
diff --git a/docs/src/entropies.md b/docs/src/entropies.md
index d0ee69586..3d72bfcb6 100644
--- a/docs/src/entropies.md
+++ b/docs/src/entropies.md
@@ -13,10 +13,21 @@ entropy_tsallis
 ```
 
 ## Shannon entropy (convenience)
+
 ```@docs
 entropy_shannon
 ```
 
+## Normalization
+
+The generic [`entropy_normalized`](@ref) normalizes any entropy value to the entropy of a
+uniform distribution. We also provide [maximum entropy](@ref maximum_entropy) functions
+that are useful for manual normalization.
+
+```@docs
+entropy_normalized
+```
+
 ## Indirect entropies
 Here we list functions which compute Shannon entropies via alternate means, without explicitly computing some probability distributions and then using the Shannon formulat.
 
diff --git a/docs/src/examples.md b/docs/src/examples.md
index 613ce6d4b..acae371f7 100644
--- a/docs/src/examples.md
+++ b/docs/src/examples.md
@@ -171,13 +171,11 @@ des = zeros(length(windows))
 pes = zeros(length(windows))
 
 m, c = 2, 6
-scheme = GaussianSymbolization(c)
-est_de = Dispersion(s = scheme, m = m, τ = 1, normalize = true)
+est_de = Dispersion(symbolization = GaussianSymbolization(c), m = m, τ = 1)
 
 for (i, window) in enumerate(windows)
-    rdes[i] = reverse_dispersion(y[window];
-        s = scheme, m = m, τ = 1, normalize = true)
-    des[i] = entropy_renyi(y[window], est_de)
+    rdes[i] = reverse_dispersion(y[window], est_de; normalize = true)
+    des[i] = entropy_renyi_norm(y[window], est_de)
 end
 
 fig = Figure()
diff --git a/docs/src/utils.md b/docs/src/utils.md
index d79865b8d..a48ef3dc7 100644
--- a/docs/src/utils.md
+++ b/docs/src/utils.md
@@ -26,3 +26,17 @@ OrdinalPattern
 ```@docs
 Entropies.encode_motif
 ```
+
+### Normalization
+
+```@docs
+alphabet_length
+```
+
+#### [Maximum entropy](@id maximum_entropy)
+
+```@docs
+maxentropy_tsallis
+maxentropy_renyi
+maxentropy_shannon
+```
diff --git a/src/complexity_measures/reverse_dispersion_entropy.jl b/src/complexity_measures/reverse_dispersion_entropy.jl
index 14ed4d703..7b66b39ae 100644
--- a/src/complexity_measures/reverse_dispersion_entropy.jl
+++ b/src/complexity_measures/reverse_dispersion_entropy.jl
@@ -1,91 +1,76 @@
 export reverse_dispersion
+export distance_to_whitenoise
 
-function distance_to_whitenoise(p::Probabilities, n_classes, m; normalize = false)
+# Note: this is not an entropy estimator, so we don't use the entropy_xxx_norm interface
+# for normalization, even though we rely on `alphabet_length`.
+"""
+    distance_to_whitenoise(p::Probabilities, estimator::Dispersion; normalize = false)
+
+Compute the distance of the probability distribution `p` from a uniform distribution,
+given the parameters of `estimator` (which must be known beforehand).
+
+If `normalize == true`, then normalize the value to the interval `[0, 1]` by using the
+parameters of `estimator`.
+
+Used to compute reverse dispersion entropy([`reverse_dispersion`](@ref);
+Li et al., 2019[^Li2019]).
+
+[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
+    complexity measure for sensor signal. Sensors, 19(23), 5203.
+"""
+function distance_to_whitenoise(p::Probabilities, est::Dispersion; normalize = false)
     # We can safely skip non-occurring symbols, because they don't contribute
     # to the sum in eq. 3 in Li et al. (2019)
-    Hrde = sum(abs2, p) - 1/(n_classes^m)
+    Hrde = sum(abs2, p) - (1 / alphabet_length(est))
 
     if normalize
-        # The factor `f` considers *all* possible symbols (also non-occurring)
-        f = n_classes^m
-        return Hrde / (1 - (1/f))
+        return Hrde / (1 - (1 / alphabet_length(est)))
     else
         return Hrde
     end
 end
 
+# Note again: this is a *complexity measure*, not an entropy estimator, so we don't use
+# the entropy_xxx_norm interface for normalization, even though we rely on `alphabet_length`.
 """
-    reverse_dispersion(x::AbstractVector{T}, s = GaussianSymbolization(c = 5), m = 2, τ = 1,
-        normalize = true)
-
+    reverse_dispersion(x::AbstractVector{T}, est::Dispersion = Dispersion();
+        normalize = true) where T <: Real
 
 Compute the reverse dispersion entropy complexity measure (Li et al., 2019)[^Li2019].
 
-## Algorithm
+## Description
 
 Li et al. (2021)[^Li2019] defines the reverse dispersion entropy as
 
 ```math
-H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2.
+H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2 =
+\\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\dfrac{1}{c^{m}}
 ```
-
 where the probabilities ``p_i`` are obtained precisely as for the [`Dispersion`](@ref)
 probability estimator. Relative frequencies of dispersion patterns are computed using the
-symbolization scheme `s`, which defaults to symbolization using the normal cumulative
+given `symbolization` scheme , which defaults to symbolization using the normal cumulative
 distribution function (NCDF), as implemented by [`GaussianSymbolization`](@ref), using
 embedding dimension `m` and embedding delay `τ`.
 Recommended parameter values[^Li2018] are `m ∈ [2, 3]`, `τ = 1` for the embedding, and
-`c ∈ [3, 4, …, 8]` categories for the Gaussian mapping. If `normalize == true`, then
-the reverse dispersion entropy is normalized to `[0, 1]`.
+`c ∈ [3, 4, …, 8]` categories for the Gaussian mapping.
+
+If `normalize == true`, then the reverse dispersion entropy is normalized to `[0, 1]`.
 
 The minimum value of ``H_{rde}`` is zero and occurs precisely when the dispersion
 pattern distribution is flat, which occurs when all ``p_i``s are equal to ``1/c^m``.
 Because ``H_{rde} \\geq 0``, ``H_{rde}`` can therefore be said to be a measure of how far
 the dispersion pattern probability distribution is from white noise.
 
-## Example
-
-```jldoctest reverse_dispersion_example; setup = :(using Entropies)
-julia> x = repeat([0.5, 0.7, 0.1, -1.0, 1.11, 2.22, 4.4, 0.2, 0.2, 0.1], 10);
-
-julia> c, m = 3, 5;
-
-julia> reverse_dispersion(x, s = GaussianSymbolization(c = c), m = m, normalize = false)
-0.11372331532921814
-```
-
-!!! note
-    #### A clarification on notation
-
-    With ambiguous notation, Li et al. claim that
-
-    ``H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2 = \\sum_{i = 1}^{c^m} p_i^2 - \\frac{1}{c^m}.``
-
-    But on the right-hand side of the equality, does the constant term appear within or
-    outside the sum? Using that ``P`` is a probability distribution by
-    construction (in step 4) , we see that the constant must appear *outside* the sum:
-
-    ```math
-    \\begin{aligned}
-    H_{rde} &= \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2
-    = \\sum_{i=1}^{c^m} p_i^2 - \\frac{2p_i}{c^m} + \\frac{1}{c^{2m}} \\\\
-    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\left(\\sum_i^{c^m} \\frac{2p_i}{c^m}\\right) + \\left( \\sum_{i=1}^{c^m} \\dfrac{1}{{c^{2m}}} \\right) \\\\
-    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\left(\\frac{2}{c^m} \\sum_{i=1}^{c^m} p_i \\right) +  \\dfrac{c^m}{c^{2m}} \\\\
-    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\frac{2}{c^m} (1) +  \\dfrac{1}{c^{m}} \\\\
-    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\dfrac{1}{c^{m}}.
-    \\end{aligned}
-    ```
-
 [^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
     complexity measure for sensor signal. Sensors, 19(23), 5203.
 """
-function reverse_dispersion(x::AbstractVector{T}; s = GaussianSymbolization(5),
-        m = 2, τ = 1, normalize = true) where T <: Real
-    est = Dispersion(τ = τ, m = m, s = s)
+function reverse_dispersion(x::AbstractVector{T}, est::Dispersion = Dispersion();
+        normalize = true) where T <: Real
+
     p = probabilities(x, est)
 
     # The following step combines distance information with the probabilities, so
     # from here on, it is not possible to use `renyi_entropy` or similar methods, because
     # we're not dealing with probabilities anymore.
-    Hrde = distance_to_whitenoise(p, s.c, m)
+    Hrde = distance_to_whitenoise(p, est, normalize = normalize)
 end
diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl
index 3e655ebf2..94858b26a 100644
--- a/src/entropies/entropies.jl
+++ b/src/entropies/entropies.jl
@@ -3,5 +3,6 @@ include("tsallis.jl")
 include("shannon.jl")
 include("convenience_definitions.jl")
 include("direct_entropies/nearest_neighbors/nearest_neighbors.jl")
-
 # TODO: What else is included here from direct entropies?
+
+include("normalized_entropy.jl")
diff --git a/src/entropies/normalized_entropy.jl b/src/entropies/normalized_entropy.jl
new file mode 100644
index 000000000..848c1c365
--- /dev/null
+++ b/src/entropies/normalized_entropy.jl
@@ -0,0 +1,80 @@
+export entropy_normalized
+
+"""
+    entropy_normalized(f::Function, x, est::ProbabilitiesEstimator, args...; kwargs...)
+
+Convenience syntax for normalizing to the entropy of uniform probability distribution.
+First estimates probabilities as `p::Probabilities = f(x, est, args...; kwargs...`),
+then calls `entropy_normalized(f, p, args...; kwargs...)`.
+
+Normalization is only defined for estimators for which [`alphabet_length`](@ref) is defined,
+meaning that the total number of states or symbols is known beforehand.
+
+    entropy_normalized(f::Function, p::Probabilities, est::ProbabilitiesEstimator, args...;
+        kwargs...)
+
+Normalize the entropy, as returned by the entropy function `f` called with the given
+arguments (i.e. `f(p, args...; kwargs...)`), to the entropy of a uniform distribution,
+inferring [`alphabet_length`](@ref) from `est`.
+
+    entropy_normalized(f::Function, p::Probabilities, args...; kwargs...)
+
+The same as above, but infers alphabet length from counting how many elements
+are in `p` (zero probabilities are counted).
+
+## Examples
+
+Computing normalized entropy from scratch:
+
+```julia
+x = rand(100)
+entropy_normalized(entropy_renyi, x, Dispersion())
+```
+
+Computing normalized entropy from pre-computed probabilities with known parameters:
+
+```julia
+x = rand(100)
+est = Dispersion(m = 3, symbolization = GaussianSymbolization(c = 4))
+p = probabilities(x, est)
+entropy_normalized(entropy_renyi, p, est)
+```
+
+Computing normalized entropy, assumming there are `N = 10` total states:
+
+```julia
+N = 10
+p = Probabilities(rand(10))
+entropy_normalized(entropy_renyi, p, est)
+```
+
+!!! note "Normalized output range"
+    For Rényi entropy (e.g. Kumar et al., 1986), and for Tsallis entropy (Tsallis, 1998),
+    normalizing to the uniform distribution ensures that the entropy lies in
+    the interval `[0, 1]`. For other entropies and parameter choices, the resulting entropy
+    is not guaranteed to lie in `[0, 1]`. It is up to the user to decide whether
+    normalizing to a uniform distribution makes sense for their use case.
+
+
+[^Kumar1986]: Kumar, U., Kumar, V., & Kapur, J. N. (1986). Normalized measures of entropy.
+    International Journal Of General System, 12(1), 55-69.
+[^Tsallis1998]: Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics.
+    Journal of statistical physics, 52(1), 479-487.
+"""
+function entropy_normalized(f::Function, x, est::ProbEst, args...; kwargs...)
+    N = alphabet_length(est)
+    p_uniform = Probabilities(repeat([1/N], N))
+    return f(x, est, args...; kwargs...) / f(p_uniform, args...; kwargs...)
+end
+
+function entropy_normalized(f::Function, x::Probabilities, est::ProbEst, args...; kwargs...)
+    N = alphabet_length(est)
+    p_uniform = Probabilities(repeat([1/N], N))
+    return f(x, args...; kwargs...) / f(p_uniform; kwargs...)
+end
+
+function entropy_normalized(f::Function, x::Probabilities, args...; kwargs...)
+    N = length(x)
+    p_uniform = Probabilities(repeat([1/N], N))
+    return f(x, args...; kwargs...) / f(p_uniform; kwargs...)
+end
diff --git a/src/entropies/renyi.jl b/src/entropies/renyi.jl
index a12491e5f..fda65b130 100644
--- a/src/entropies/renyi.jl
+++ b/src/entropies/renyi.jl
@@ -1,4 +1,5 @@
 export entropy_renyi
+export maxentropy_renyi
 
 """
     entropy_renyi(p::Probabilities; q = 1.0, base = MathConstants.e)
@@ -30,7 +31,11 @@ also known as Hartley entropy), or the correlation entropy
 [^Rényi1960]:
     A. Rényi, _Proceedings of the fourth Berkeley Symposium on Mathematics,
     Statistics and Probability_, pp 547 (1960)
+[^Kumar1986]: Kumar, U., Kumar, V., & Kapur, J. N. (1986). Normalized measures of entropy.
+    International Journal Of General System, 12(1), 55-69.
 [^Shannon1948]: C. E. Shannon, Bell Systems Technical Journal **27**, pp 379 (1948)
+
+See also: [`maxentropy_renyi`](@ref).
 """
 function entropy_renyi end
 
@@ -89,3 +94,14 @@ function entropy_renyi!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e
     probabilities!(p, x, est)
     entropy_renyi(p; q = q, base = base)
 end
+
+"""
+    maxentropy_renyi(N::Int, base = MathConstants.e)
+
+Convenience function that computes the maximum value of the order-`q` generalized
+Rényi entropy for an `N`-element probability distribution, i.e.
+``\\log_{base}(N)``, which is useful for normalization when `N` is known.
+
+See also [`entropy_renyi`](@ref), [`entropy_normalized`](@ref).
+"""
+maxentropy_renyi(N::Int; base = MathConstants.e) = log(base, N)
diff --git a/src/entropies/shannon.jl b/src/entropies/shannon.jl
index ee2a50350..a9604c567 100644
--- a/src/entropies/shannon.jl
+++ b/src/entropies/shannon.jl
@@ -1,11 +1,26 @@
 export entropy_shannon
+export maxentropy_shannon
 
 """
     entropy_shannon(args...; base = MathConstants.e)
-Equivalent to `entropy_renyi(args...; base, q = 1)` and provided solely for convenience.
-Compute the Shannon entropy, given by
+
+Equivalent to `entropy_renyi(args...; base = base, q = 1)` and provided solely for convenience.
+Computes the Shannon entropy, given by
 ```math
 H(p) = - \\sum_i p[i] \\log(p[i])
 ```
+
+See also: [`maxentropy_shannon`](@ref).
+"""
+entropy_shannon(args...; base = MathConstants.e) =
+    entropy_renyi(args...; base = base, q = 1)
+
+"""
+    maxentropy_shannon(N::Int, q; base = MathConstants.e)
+
+Convenience function that computes the maximum value of the Shannon entropy, i.e.
+``log_{base}(N)``, which is useful for normalization when `N` is known.
+
+See also [`entropy_shannon`](@ref), [`entropy_normalized`](@ref).
 """
-entropy_shannon(args...; base = MathConstants.e) = entropy_renyi(args...; base, q = 1)
+maxentropy_shannon(N::Int; base = MathConstants.e) = maxentropy_renyi(N, base = base)
diff --git a/src/entropies/tsallis.jl b/src/entropies/tsallis.jl
index 51c7765cd..cc6178528 100644
--- a/src/entropies/tsallis.jl
+++ b/src/entropies/tsallis.jl
@@ -1,20 +1,25 @@
 export entropy_tsallis
+export maxentropy_tsallis
 
 """
-    entropy_tsallis(p::Probabilities; k = 1, q = 0)
+    entropy_tsallis(p::Probabilities; k = 1, q = 0, base = MathConstants.e)
 
+Compute the Tsallis entropy of `p` (Tsallis, 1998)[^Tsallis1988].
 
-Compute the Tsallis entropy of `x` (Tsallis, 1998)[^Tsallis1988].
+`base` only applies in the limiting case `q == 1`, in which the Tsallis entropy reduces
+to Shannon entropy.
 
-    entropy_tsallis(x::Array_or_Dataset, est; k = 1, q = 0)
+    entropy_tsallis(x::Array_or_Dataset, est; k = 1, q = 0, base = MathConstants.e)
 
 A convenience syntax, which calls first `probabilities(x, est)`
-and then calculates the entropy of the result (and thus `est` can be anything
+and then calculates the Tsallis entropy of the result (and thus `est` can be anything
 the [`probabilities`](@ref) function accepts).
 
 ## Description
-The Tsallis entropy is a generalization of the Boltzmann–Gibbs entropy,
+
+The Tsallis entropy is a generalization of the Boltzmann-Gibbs entropy,
 with `k` standing for the Boltzmann constant. It is defined as
+
 ```math
 S_q(p) = \\frac{k}{q - 1}\\left(1 - \\sum_{i} p[i]^q\\right)
 ```
@@ -23,14 +28,40 @@ S_q(p) = \\frac{k}{q - 1}\\left(1 - \\sum_{i} p[i]^q\\right)
     Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics.
     Journal of statistical physics, 52(1), 479-487.
 """
-function entropy_tsallis(prob::Probabilities; k = 1, q = 0)
+function entropy_tsallis(prob::Probabilities; k = 1, q = 0, base = MathConstants.e)
     # As for entropy_renyi, we copy because if someone initialized Probabilities with 0s,
     # they'd probably want to index the zeros as well.
     probs = Iterators.filter(!iszero, prob.p)
-    return k/(q-1)*(1 - sum(p^q for p in probs))
+    if q ≈ 1
+        return -sum(p * log(base, p) for p in probs) # Shannon entropy
+    else
+        return k/(q-1)*(1 - sum(p^q for p in probs))
+    end
 end
 
-function entropy_tsallis(x, est; kwargs...)
+function entropy_tsallis(x::Array_or_Dataset, est; kwargs...)
     p = probabilities(x, est)
     entropy_tsallis(p; kwargs...)
-end
\ No newline at end of file
+end
+
+
+"""
+    maxentropy_tsallis(N::Int, q; k = 1, base = MathConstants.e)
+
+Convenience function that computes the maximum value of the generalized Tsallis
+entropy with parameters `q` and `k` for an `N`-element probability distribution, i.e.
+``\\dfrac{N^{1 - q} - 1}{(1 - q)}``, which is useful for normalization when `N` and `q`
+is known.
+
+If `q == 1`, then `log(base, N)` is returned.
+
+See also [`entropy_tsallis`](@ref), [`entropy_normalized`](@ref),
+[`maxentropy_tsallis`](@ref).
+"""
+function maxentropy_tsallis(N::Int, q; k = 1, base = MathConstants.e)
+    if q ≈ 1.0
+        return log(base, N)
+    else
+        return k*(N^(1 - q) - 1) / (1 - q)
+    end
+end
diff --git a/src/probabilities_estimators/dispersion/dispersion.jl b/src/probabilities_estimators/dispersion/dispersion.jl
index 9a07a812d..45d652584 100644
--- a/src/probabilities_estimators/dispersion/dispersion.jl
+++ b/src/probabilities_estimators/dispersion/dispersion.jl
@@ -4,8 +4,8 @@ using StatsBase
 export Dispersion
 
 """
-    Dispersion(; s = GaussianSymbolization(c = 5), m = 2, τ = 1, check_unique = true,
-        normalize = true)
+    Dispersion(; symbolization = GaussianSymbolization(c = 5), m = 2, τ = 1,
+        check_unique = true)
 
 A probability estimator based on dispersion patterns, originally used by
 Rostaghi & Azami, 2016[^Rostaghi2016] to compute the "dispersion entropy", which
@@ -14,17 +14,17 @@ characterizes the complexity and irregularity of a time series.
 Relative frequencies of dispersion patterns are computed using the symbolization scheme
 `s` with embedding dimension `m` and embedding delay `τ`. Recommended parameter
 values[^Li2018] are `m ∈ [2, 3]`, `τ = 1` for the embedding, and `c ∈ [3, 4, …, 8]`
-categories for the Gaussian symbol mapping. See below for an in-depth description of
-dispersion pattern construction and usage.
+categories for the Gaussian symbol mapping.
 
-# Algorithm
+## Description
 
 Assume we have a univariate time series ``X = \\{x_i\\}_{i=1}^N``. First, this time series
-is symbolized using `s`, which default to [`GaussianSymbolization`](@ref), which uses the
-normal cumulative distribution function (CDF) for symbolization. Other choices of CDFs are
-also possible, but Entropies.jl currently only implements [`GaussianSymbolization`](@ref),
-which was used in Rostaghi & Azami (2016). This step results in an integer-valued symbol
-time series ``S = \\{ s_i \\}_{i=1}^N``, where ``s_i \\in [1, 2, \\ldots, c]``.
+is symbolized using `symbolization`, which default to [`GaussianSymbolization`](@ref),
+which uses the normal cumulative distribution function (CDF) for symbolization.
+Other choices of CDFs are also possible, but Entropies.jl currently only implements
+[`GaussianSymbolization`](@ref), which was used in Rostaghi & Azami (2016). This step
+results in an integer-valued symbol time series ``S = \\{ s_i \\}_{i=1}^N``, where
+``s_i \\in [1, 2, \\ldots, c]``.
 
 Next, the symbol time series ``S`` is embedded into an
 ``m``-dimensional time series, using an embedding lag of ``\\tau = 1``, which yields a
@@ -33,47 +33,21 @@ take on ``c`` different values, and each embedding point has ``m`` values, there
 are ``c^m`` possible dispersion patterns. This number is used for normalization when
 computing dispersion entropy.
 
-## Computing dispersion probabilities and entropy
+### Computing dispersion probabilities and entropy
 
 A probability distribution ``P = \\{p_i \\}_{i=1}^{c^m}``, where
 ``\\sum_i^{c^m} p_i = 1``, can then be estimated by counting and sum-normalising
 the distribution of dispersion patterns among the embedding vectors.
-
-```jldoctest dispersion_example; setup = :(using Entropies)
-julia> x = repeat([0.5, 0.7, 0.1, -1.0, 1.11, 2.22, 4.4, 0.2, 0.2, 0.1], 10);
-
-julia> c, m = 3, 5;
-
-julia> est = Dispersion(s = GaussianSymbolization(c = c), m = m);
-
-julia> probs = probabilities(x, est)
-9-element Probabilities{Float64}:
- 0.09375000000000001
- 0.10416666666666669
- 0.18750000000000003
- 0.09375000000000001
- 0.10416666666666669
- 0.10416666666666669
- 0.10416666666666669
- 0.10416666666666669
- 0.10416666666666669
-```
-
 Note that dispersion patterns that are not present are not counted. Therefore, you'll
 always get non-zero probabilities using the `Dispersion` probability estimator.
 
-To compute (normalized) dispersion entropy of order `q` to a given `base` on the
+To compute dispersion entropy of order `q` to a given `base` on the
 univariate input time series `x`, do:
 
-```jldoctest dispersion_example
-julia> entropy_renyi(x, Dispersion(normalize = true), base = 2, q = 1)
-0.6716889280681666
+```julia
+entropy_renyi(x, Dispersion(), base = 2, q = 1)
 ```
 
-If `normalize == true`, then when used in combination with [`entropy_renyi`](@ref),
-the dispersion entropy is normalized to `[0, 1]`. Normalization is only
-defined when `q == 1`.
-
 ## Data requirements and parameters
 
 The input must have more than one unique element for the Gaussian mapping to be
@@ -85,9 +59,7 @@ unique element, then a `InexactError` is thrown when trying to compute probabili
 
 See also: [`entropy_dispersion`](@ref), [`GaussianSymbolization`](@ref).
 
-
-!!! note
-    ## Why "dispersion patterns"?
+!!! note "Why 'dispersion patterns'?"
     Each embedding vector is called a "dispersion pattern". Why? Let's consider the case
     when ``m = 5`` and ``c = 3``, and use some very imprecise terminology for illustration:
 
@@ -103,18 +75,17 @@ See also: [`entropy_dispersion`](@ref), [`GaussianSymbolization`](@ref).
 [^Rostaghi2016]: Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series analysis. IEEE Signal Processing Letters, 23(5), 610-614.
 [^Li2018]: Li, G., Guan, Q., & Yang, H. (2018). Noise reduction method of underwater acoustic signals based on CEEMDAN, effort-to-compress complexity, refined composite multiscale dispersion entropy and wavelet threshold denoising. Entropy, 21(1), 11.
 """
-Base.@kwdef struct Dispersion <: ProbabilitiesEstimator
-    s = GaussianSymbolization(c = 5)
-    m = 2
-    τ = 1
-    check_unique = false
-    normalize = true
+Base.@kwdef struct Dispersion{S <: SymbolizationScheme} <: ProbabilitiesEstimator
+    symbolization::S = GaussianSymbolization(c = 5)
+    m::Int = 2
+    τ::Int = 1
+    check_unique::Bool = false
 end
 
 export entropy_dispersion
 
 """
-    embed_symbols(s::AbstractVector{T}, m, τ) {where T} → Dataset{m, T}
+    embed_symbols(symbols::AbstractVector{T}, m, τ) {where T} → Dataset{m, T}
 
 From the symbols `sᵢ ∈ s`, create the embedding vectors (with dimension `m` and lag `τ`):
 
@@ -137,10 +108,10 @@ function probabilities(x::AbstractVector, est::Dispersion)
         if length(unique(x)) == 1
             symbols = repeat([1], length(x))
         else
-            symbols = symbolize(x, est.s)
+            symbols = symbolize(x, est.symbolization)
         end
     else
-        symbols = symbolize(x, est.s)
+        symbols = symbolize(x, est.symbolization)
     end
     N = length(x)
 
@@ -152,21 +123,4 @@ function probabilities(x::AbstractVector, est::Dispersion)
     p = Probabilities(hist)
 end
 
-function entropy_renyi(x::AbstractVector, est::Dispersion; q = 1, base = MathConstants.e)
-    p = probabilities(x, est)
-    dh = entropy_renyi(p, q = q, base = base)
-
-    n, m = est.s.c, est.m
-
-    if est.normalize
-        # TODO: is is possible to normalize for general order `q`? Need to have a literature
-        # dive or figure it out manually.
-        if q == 1
-            return dh / log(base, n^m)
-        else
-            throw(ArgumentError("Normalization is not well defined when q != 1."))
-        end
-    else
-        return dh
-    end
-end
+alphabet_length(est::Dispersion)::Int = est.symbolization.c ^ est.m
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
index fc5987c3e..9bee968fa 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
@@ -45,3 +45,5 @@ function probabilities(x::AbstractVector{T}, est::SymbolicAmplitudeAwarePermutat
     p = symprobs(πs, wts, normalize = true)
     Probabilities(p)
 end
+
+alphabet_length(est::SymbolicAmplitudeAwarePermutation)::Int = factorial(est.m)
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
index b77956d46..9caa7f21e 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
@@ -285,3 +285,5 @@ function entropy_renyi!(
     ps = probabilities!(s, x, est)
     entropy_renyi(ps, α = α, base = base)
 end
+
+alphabet_length(est::SymbolicPermutation)::Int = factorial(est.m)
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
index 4694fcee7..d7fcd4f73 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
@@ -39,3 +39,5 @@ function probabilities(x::AbstractVector{T}, est::SymbolicWeightedPermutation) w
 
     Probabilities(symprobs(πs, wts, normalize = true))
 end
+
+alphabet_length(est::SymbolicWeightedPermutation)::Int = factorial(est.m)
diff --git a/src/probabilities_estimators/probabilities_estimators.jl b/src/probabilities_estimators/probabilities_estimators.jl
index 215d5980b..4f3cc8879 100644
--- a/src/probabilities_estimators/probabilities_estimators.jl
+++ b/src/probabilities_estimators/probabilities_estimators.jl
@@ -5,3 +5,5 @@ include("kernel_density.jl")
 include("timescales/timescales.jl")
 include("transfer_operator/transfer_operator.jl")
 include("dispersion/dispersion.jl")
+
+include("utils.jl")
diff --git a/src/probabilities_estimators/utils.jl b/src/probabilities_estimators/utils.jl
new file mode 100644
index 000000000..b0927a2d5
--- /dev/null
+++ b/src/probabilities_estimators/utils.jl
@@ -0,0 +1,21 @@
+export alphabet_length
+
+"""
+    alphabet_length(estimator) → Int
+
+Returns the total number of possible symbols/states implied by `estimator`.
+If the total number of states cannot be known a priori, an error is thrown.
+Primarily used for normalization of entropy values computed using symbolic estimators.
+
+## Examples
+
+```jldoctest setup = :(using Entropies)
+julia> est = SymbolicPermutation(m = 4)
+SymbolicPermutation{typeof(Entropies.isless_rand)}(1, 4, Entropies.isless_rand)
+
+julia> alphabet_length(est)
+24
+```
+"""
+alphabet_length(est) =
+    throw(error("alphabet_length not implemented for estimator of type $(typeof(est))"))
diff --git a/src/symbolization/GaussianSymbolization.jl b/src/symbolization/GaussianSymbolization.jl
index 416728634..6ceb4d11b 100644
--- a/src/symbolization/GaussianSymbolization.jl
+++ b/src/symbolization/GaussianSymbolization.jl
@@ -49,7 +49,7 @@ julia> Entropies.symbolize(x, GaussianSymbolization(c = 5))
 
 See also: [`symbolize`](@ref).
 """
-Base.@kwdef struct GaussianSymbolization{I <: Integer}
+Base.@kwdef struct GaussianSymbolization{I <: Integer} <: SymbolizationScheme
     c::I = 3
 end
 
diff --git a/test/dispersion.jl b/test/dispersion.jl
index b99cc8b88..ed7f2d71d 100644
--- a/test/dispersion.jl
+++ b/test/dispersion.jl
@@ -29,8 +29,7 @@ using Entropies, Test
         # We here start from pre-computed symbols `s`.
         x = [0.82,0.75,0.21,0.94,0.52,0.05,0.241,0.75,0.35,0.43,0.11,0.87]
         m, n_classes = 2, 3
-        est = Dispersion(m = m, s = GaussianSymbolization(n_classes), normalize = false)
-        est_norm = Dispersion(m = m, s = GaussianSymbolization(n_classes), normalize = true)
+        est = Dispersion(m = m, symbolization = GaussianSymbolization(c = n_classes))
 
         # Take only the non-zero probabilities from the paper (in `dispersion_histogram`,
         # we don't count zero-probability bins, so eliminate zeros for comparison).
@@ -50,34 +49,33 @@ using Entropies, Test
         # slightly from the paper. They get normalized DE of 0.85, but we get 0.84. 0.85 is
         # the normalized DE you'd get by manually normalizing the (erroneous) value from
         # their previous step.
-        res_norm = entropy_renyi(x, est_norm, base = MathConstants.e, q = 1)
+        res_norm = entropy_normalized(entropy_renyi, x, est, base = MathConstants.e, q = 1)
         @test round(res_norm, digits = 2) == 0.84
-
-        # Only defined for q = 1. There's potential in expanding the definition for q != 1,
-        # but that requires careful thinking about the normalization step. A small future
-        # paper on generalized dispersion entropy, perhaps?
-        @test_throws ArgumentError entropy_renyi(x, Dispersion(normalize = true), q = 2)
     end
 
     @testset "Reverse dispersion entropy" begin
-        # Reverse dispersion entropy is 0 when all probabilities are identical and equal
-        # to 1/(n_classes^m).
-        m, n_classes = 2, 2
-        flat_dist = Probabilities(repeat([1/m^n_classes], m^n_classes))
-        Hrde_minimal = Entropies.distance_to_whitenoise(flat_dist, n_classes, m,
-            normalize = false)
-        @test round(Hrde_minimal, digits = 7) ≈ 0.0
 
-        # Reverse dispersion entropy is maximal when there is only one non-zero dispersal
-        # pattern. Then reverse dispersion entropy is
-        # 1 - 1/(n_classes^m). When normalizing to this value, the RDE should be 1.0.
-        m, n_classes = 2, 2
-        single_element_dist = Probabilities([1.0, 0.0, 0.0, 0.0])
-        Hrde_maximal = Entropies.distance_to_whitenoise(single_element_dist, n_classes, m,
-            normalize = false)
-        Hrde_maximal_norm = Entropies.distance_to_whitenoise(single_element_dist, n_classes, m,
-            normalize = true)
-        @test round(Hrde_maximal, digits = 7) ≈ 1 - 1/(n_classes^m)
-        @test round(Hrde_maximal_norm, digits = 7) ≈ 1.0
+        @testset "Distance to whitenoise" begin
+            m, n_classes = 2, 2
+            est = Dispersion(m = m, symbolization = GaussianSymbolization(c = n_classes))
+
+             # Reverse dispersion entropy is 0 when all probabilities are identical and equal
+            # to 1/(n_classes^m).
+            flat_dist = Probabilities(repeat([1/m^n_classes], m^n_classes))
+            Hrde_minimal = distance_to_whitenoise(flat_dist, est, normalize = false)
+            @test round(Hrde_minimal, digits = 7) ≈ 0.0
+
+             # Reverse dispersion entropy is maximal when there is only one non-zero dispersal
+            # pattern. Then reverse dispersion entropy is
+            # 1 - 1/(n_classes^m). When normalizing to this value, the RDE should be 1.0.
+            m, n_classes = 2, 2
+            single_element_dist = Probabilities([1.0, 0.0, 0.0, 0.0])
+            Hrde_maximal = distance_to_whitenoise(single_element_dist, est,
+                normalize = false)
+            Hrde_maximal_norm = distance_to_whitenoise(single_element_dist, est,
+                normalize = true)
+            @test round(Hrde_maximal, digits = 7) ≈ 1 - 1/(n_classes^m)
+            @test round(Hrde_maximal_norm, digits = 7) ≈ 1.0
+        end
     end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 9e1fa2dbe..f43235f40 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -336,6 +336,34 @@ end
     @testset "Tsallis" begin
         p = Probabilities(repeat([1/5], 5))
         @assert round(entropy_tsallis(p, q = -1/2, k = 1), digits = 2) ≈ 6.79
+
+        # Analytical tests from Tsallis (1998)
+        # -----------------------------------
+        # Probability distribution has only  one element
+        @test entropy_tsallis(Probabilities([1.0])) ≈ 0.0
+
+        # One and only one probability of 1.0 => entropy → 0.
+        @test entropy_tsallis(Probabilities([1.0, 0.0, 0.0, 0.0])) ≈ 0.0
+
+        # Uniform distribution maximizes Tsallis entropy, which then equals log(N),
+        # where N is the number of states. Then the entropy attains its maximum value
+        # (N^(1 - q) - 1) / (1 - q)
+        N = 4
+        ps = Probabilities(repeat([1/N], N))
+
+        q_cases = [-2.0, -0.5, 0.5, 2.0]
+        t_entropies = [entropy_tsallis(ps, q = q) for q in q_cases]
+        maxvals = [maxentropy_tsallis(N, q) for q in q_cases]
+        @test all(t_entropies .≈ maxvals)
+
+        q_cases = [-2.0, -0.5, 0.5, 2.0]
+        k = 2
+        t_entropies = [entropy_tsallis(ps, q = q, k = 2) for q in q_cases]
+        maxvals = [maxentropy_tsallis(N, q, k = 2) for q in q_cases]
+        @test all(t_entropies .≈ maxvals)
+
+        # Reduces to Shannon entropy for q → 1.0
+        @test entropy_tsallis(ps, q = 1.0, base = 2) ≈ log(2, N)
     end
 end