Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 28 additions & 22 deletions Sources/StatKit/Descriptive Statistics/Quantile.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@ public extension Collection {
/// Since quantiles have no meaning on empty collections or probabilities outside of range [0, 1],
/// this method returns a NaN for calls under any such conditions.
///
/// - important: This method has undefined behavior on collections containing elements that are incomparable.
/// For example, an array containing NaN's will produce unpredictable results.
/// Collections containing NaN values will also produce a NaN result.
///
/// - complexity: O(n log n), where n is the length of the collection.
/// Infinite values are handled correctly as long as the chosen estimation method does not require
/// arithmetic between conflicting infinities (e.g. `+∞ − ∞`).
/// If such arithmetic is unavoidable for the requested probability and method, this method returns NaN.
///
/// - important: This method has undefined behavior on collections containing elements that are incomparable
/// for reasons other than NaN (e.g. custom types with a broken `Comparable` implementation).
func quantile<T: Comparable & ConvertibleToReal>(
probability: Double,
of variable: KeyPath<Element, T>,
Expand All @@ -23,50 +27,52 @@ public extension Collection {
guard
probability.isFinite,
0 <= probability,
probability <= 1
probability <= 1,
!self.isEmpty,
!self.contains(where: { $0[keyPath: variable].realValue.isNaN })
else { return .signalingNaN }

let ordered = self.sorted { lhs, rhs in
lhs[keyPath: variable] < rhs[keyPath: variable]
}

guard !ordered.isEmpty else { return .signalingNaN }

if probability == 1 {
guard let element = ordered.last else {
fatalError("Could not fetch last element of Sequence.")
fatalError("Could not fetch greatest element of collection.")
}
return element[keyPath: variable].realValue
}

if probability == 0 {
guard let element = ordered.first else {
fatalError("Could not fetch first element of Sequence.")
fatalError("Could not fetch smallest element of collection.")
}
return element[keyPath: variable].realValue
}

switch method {
case .inverseEmpiricalCDF:
let h = Double(ordered.count) * probability + 0.5
let index = Int((h - 0.5).rounded(.up)) - 1
let h = Double(ordered.count - 1) * probability
let index = Int(h.rounded(.up))
return ordered[index][keyPath: variable].realValue

case .averagedInverseEmpiricalCDF:
let h = Double(ordered.count) * probability + 0.5
let firstIndex = Int((h - 0.5).rounded(.up)) - 1
let secondIndex = Int((h + 0.5).rounded(.down)) - 1
return ordered[firstIndex...secondIndex].mean(variable: variable)
let h = Double(ordered.count - 1) * probability
let firstIndex = Int(h.rounded(.down))
let secondIndex = Int(h.rounded(.up))
let firstElement = ordered[firstIndex][keyPath: variable]
let secondElement = ordered[secondIndex][keyPath: variable]
return (firstElement.realValue + secondElement.realValue) / 2

case .closestOrOddIndexed:
let h = Double(ordered.count) * probability
let index = Int(h.rounded(.toNearestOrEven)) - 1
case .closest:
let h = Double(ordered.count - 1) * probability
let index = Int(h.rounded(.toNearestOrEven))
return ordered[index][keyPath: variable].realValue

case .lerpInverseEmpiricalCDF:
let h = Double(ordered.count) * probability
let firstIndex = Int(h.rounded(.down)) - 1
let secondIndex = Int(h.rounded(.up)) - 1
let h = Double(ordered.count - 1) * probability
let firstIndex = Int(h.rounded(.down))
let secondIndex = Int(h.rounded(.up))
let firstElement = ordered[firstIndex][keyPath: variable]
let secondElement = ordered[secondIndex][keyPath: variable]
let difference = secondElement - firstElement
Expand All @@ -84,8 +90,8 @@ public enum QuantileEstimationMethod: CaseIterable, Sendable {
case averagedInverseEmpiricalCDF
/// Computes the quantile by rounding to the closest observation.
///
/// In case of a tie, the odd index element will be chosen.
case closestOrOddIndexed
/// In case of a tie, the even index element will be chosen.
case closest
/// Computes the quantile usign the inverse empirical CDF, and linearly interpolates at discontinuities.
case lerpInverseEmpiricalCDF
}
126 changes: 109 additions & 17 deletions Tests/StatKitTests/Descriptive Statistics Tests/QuantileTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,66 @@ struct QuantileTests {
@Test(
"Valid data returns correct quantile",
arguments: [
([1, 4, 2, 6, 4, 3], 1, .inverseEmpiricalCDF, 6),
([1, 4, 2, 6, 4, 3], 0, .inverseEmpiricalCDF, 1),
([1, 4, 2, 6, 4, 3], 1, .averagedInverseEmpiricalCDF, 6),
([1, 4, 2, 6, 4, 3], 0, .averagedInverseEmpiricalCDF, 1),
([1, 4, 2, 6, 4, 3], 1, .closestOrOddIndexed, 6),
([1, 4, 2, 6, 4, 3], 0, .closestOrOddIndexed, 1),
([1, 4, 2, 6, 4, 3], 1, .lerpInverseEmpiricalCDF, 6),
([1, 4, 2, 6, 4, 3], 0, .lerpInverseEmpiricalCDF, 1),
([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.5, .inverseEmpiricalCDF, 5),
([1, 2, 3, 4, 5, 6, 7, 8, 9], 0.5, .inverseEmpiricalCDF, 5),
([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.5, .averagedInverseEmpiricalCDF, 5.5),
([1, 2, 3, 4, 5, 6, 7, 8, 9], 0.5, .averagedInverseEmpiricalCDF, 5),
([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.5, .closestOrOddIndexed, 5),
([1, 2, 3, 4, 5, 6, 7, 8, 9], 0.5, .closestOrOddIndexed, 4),
([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.5, .lerpInverseEmpiricalCDF, 5),
([1, 2, 3, 4, 5, 6, 7, 8, 9], 0.5, .lerpInverseEmpiricalCDF, 4.5),
([1, 4, 2, 6, 4, 3], 0, .inverseEmpiricalCDF, 1),
([1, 4, 2, 6, 4, 3], 0.25, .inverseEmpiricalCDF, 3),
([1, 4, 2, 6, 4, 3], 0.5, .inverseEmpiricalCDF, 4),
([1, 4, 2, 6, 4, 3], 0.75, .inverseEmpiricalCDF, 4),
([1, 4, 2, 6, 4, 3], 1, .inverseEmpiricalCDF, 6),
([1, 4, 2, 6, 4, 3], 0, .averagedInverseEmpiricalCDF, 1),
([1, 4, 2, 6, 4, 3], 0.25, .averagedInverseEmpiricalCDF, 2.5),
([1, 4, 2, 6, 4, 3], 0.5, .averagedInverseEmpiricalCDF, 3.5),
([1, 4, 2, 6, 4, 3], 0.75, .averagedInverseEmpiricalCDF, 4),
([1, 4, 2, 6, 4, 3], 1, .averagedInverseEmpiricalCDF, 6),
([1, 4, 2, 6, 4, 3], 0, .closest, 1),
([1, 4, 2, 6, 4, 3], 0.25, .closest, 2),
([1, 4, 2, 6, 4, 3], 0.5, .closest, 3),
([1, 4, 2, 6, 4, 3], 0.75, .closest, 4),
([1, 4, 2, 6, 4, 3], 1, .closest, 6),
([1, 4, 2, 6, 4, 3], 0, .lerpInverseEmpiricalCDF, 1),
([1, 4, 2, 6, 4, 3], 0.25, .lerpInverseEmpiricalCDF, 2.25),
([1, 4, 2, 6, 4, 3], 0.5, .lerpInverseEmpiricalCDF, 3.5),
([1, 4, 2, 6, 4, 3], 0.75, .lerpInverseEmpiricalCDF, 4),
([1, 4, 2, 6, 4, 3], 1, .lerpInverseEmpiricalCDF, 6),
([7, 3, 10, 1, 8, 4, 9, 2, 5, 6], 0.5, .inverseEmpiricalCDF, 6),
([5, 9, 2, 7, 1, 8, 3, 6, 4], 0.5, .inverseEmpiricalCDF, 5),
([7, 3, 10, 1, 8, 4, 9, 2, 5, 6], 0.5, .averagedInverseEmpiricalCDF, 5.5),
([5, 9, 2, 7, 1, 8, 3, 6, 4], 0.5, .averagedInverseEmpiricalCDF, 5),
([7, 3, 10, 1, 8, 4, 9, 2, 5, 6], 0.5, .closest, 5),
([5, 9, 2, 7, 1, 8, 3, 6, 4], 0.5, .closest, 5),
([7, 3, 10, 1, 8, 4, 9, 2, 5, 6], 0.5, .lerpInverseEmpiricalCDF, 5.5),
([5, 9, 2, 7, 1, 8, 3, 6, 4], 0.5, .lerpInverseEmpiricalCDF, 5),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.25, .inverseEmpiricalCDF, 6),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.5, .inverseEmpiricalCDF, 11),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.75, .inverseEmpiricalCDF, 16),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.25, .averagedInverseEmpiricalCDF, 5.5),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.5, .averagedInverseEmpiricalCDF, 10.5),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.75, .averagedInverseEmpiricalCDF, 15.5),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.25, .closest, 6),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.5, .closest, 11),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.75, .closest, 15),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.25, .lerpInverseEmpiricalCDF, 5.75),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.5, .lerpInverseEmpiricalCDF, 10.5),
([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.75, .lerpInverseEmpiricalCDF, 15.25),
([1, 2], 0, .inverseEmpiricalCDF, 1),
([1, 2], 1, .inverseEmpiricalCDF, 2),
([1, 2], 0.25, .inverseEmpiricalCDF, 2),
([1, 2], 0.75, .inverseEmpiricalCDF, 2),
([1, 2], 0, .averagedInverseEmpiricalCDF, 1),
([1, 2], 1, .averagedInverseEmpiricalCDF, 2),
([1, 2], 0.25, .averagedInverseEmpiricalCDF, 1.5),
([1, 2], 0.75, .averagedInverseEmpiricalCDF, 1.5),
([1, 2], 0, .closest, 1),
([1, 2], 1, .closest, 2),
([1, 2], 0.25, .closest, 1),
([1, 2], 0.75, .closest, 2),
([1, 2], 0, .lerpInverseEmpiricalCDF, 1),
([1, 2], 1, .lerpInverseEmpiricalCDF, 2),
([1, 2], 0.25, .lerpInverseEmpiricalCDF, 1.25),
([1, 2], 0.75, .lerpInverseEmpiricalCDF, 1.75),
([5], 0.5, .inverseEmpiricalCDF, 5),
([5], 0.5, .averagedInverseEmpiricalCDF, 5),
([5], 0.5, .closest, 5),
([5], 0.5, .lerpInverseEmpiricalCDF, 5),
] as [([Int], Double, QuantileEstimationMethod, Double)]
)
func validData(data: [Int], probability: Double, method: QuantileEstimationMethod, expectedQuantile: Double) async {
Expand All @@ -33,11 +77,59 @@ struct QuantileTests {
arguments: [
([], .inverseEmpiricalCDF),
([], .averagedInverseEmpiricalCDF),
([], .closestOrOddIndexed),
([], .closest),
([], .lerpInverseEmpiricalCDF),
] as [([Int], QuantileEstimationMethod)]
)
func invalidData(data: [Int], method: QuantileEstimationMethod) async {
#expect(data.quantile(probability: 1, of: \.self).isNaN)
}

@Test(
"Collection with NaN values returns NaN",
arguments: [
([.nan, 1.0, 2.0], 0.5, .inverseEmpiricalCDF),
([1.0, .nan, 2.0], 0.5, .inverseEmpiricalCDF),
([1.0, 2.0, .nan], 0.5, .inverseEmpiricalCDF),
([.nan, 1.0, 2.0], 0.5, .averagedInverseEmpiricalCDF),
([.nan, 1.0, 2.0], 0.5, .closest),
([.nan, 1.0, 2.0], 0.5, .lerpInverseEmpiricalCDF),
([.nan, 1.0, 2.0], 0.0, .inverseEmpiricalCDF),
([.nan, 1.0, 2.0], 1.0, .inverseEmpiricalCDF),
] as [([Double], Double, QuantileEstimationMethod)]
)
func nanInCollection(data: [Double], probability: Double, method: QuantileEstimationMethod) async {
#expect(data.quantile(probability: probability, of: \.self, method: method).isNaN)
}

@Test(
"Infinity values without conflicting arithmetic return the correct quantile",
arguments: [
([1.0, 2.0, .infinity], 0.75, .inverseEmpiricalCDF, Double.infinity),
([1.0, .infinity], 0.5, .inverseEmpiricalCDF, Double.infinity),
([-.infinity, 1.0, 2.0], 0.0, .inverseEmpiricalCDF, -.infinity),
([1.0, 2.0, .infinity], 1.0, .inverseEmpiricalCDF, Double.infinity),
([1.0, .infinity], 0.75, .closest, Double.infinity),
([-.infinity, 1.0], 0.25, .closest, -.infinity),
([1.0, .infinity], 0.5, .averagedInverseEmpiricalCDF, Double.infinity),
([-.infinity, 1.0], 0.5, .averagedInverseEmpiricalCDF, -.infinity),
([1.0, .infinity], 0.5, .lerpInverseEmpiricalCDF, Double.infinity),
] as [([Double], Double, QuantileEstimationMethod, Double)]
)
func infinityWellBehaved(data: [Double], probability: Double, method: QuantileEstimationMethod, expectedQuantile: Double) async {
let result = data.quantile(probability: probability, of: \.self, method: method)
#expect(result == expectedQuantile)
}

@Test(
"Conflicting infinities under arithmetic return NaN",
arguments: [
([-.infinity, .infinity], 0.5, .averagedInverseEmpiricalCDF),
([-.infinity, 1.0], 0.5, .lerpInverseEmpiricalCDF),
([-.infinity, .infinity], 0.5, .lerpInverseEmpiricalCDF),
] as [([Double], Double, QuantileEstimationMethod)]
)
func conflictingInfinitiesReturnNaN(data: [Double], probability: Double, method: QuantileEstimationMethod) async {
#expect(data.quantile(probability: probability, of: \.self, method: method).isNaN)
}
}
Loading