diff --git a/Sources/StatKit/Descriptive Statistics/Quantile.swift b/Sources/StatKit/Descriptive Statistics/Quantile.swift index e508f92..7f16b1e 100644 --- a/Sources/StatKit/Descriptive Statistics/Quantile.swift +++ b/Sources/StatKit/Descriptive Statistics/Quantile.swift @@ -10,10 +10,14 @@ public extension Collection { /// Since quantiles have no meaning on empty collections or probabilities outside of range [0, 1], /// this method returns a NaN for calls under any such conditions. /// - /// - important: This method has undefined behavior on collections containing elements that are incomparable. - /// For example, an array containing NaN's will produce unpredictable results. + /// Collections containing NaN values will also produce a NaN result. /// - /// - complexity: O(n log n), where n is the length of the collection. + /// Infinite values are handled correctly as long as the chosen estimation method does not require + /// arithmetic between conflicting infinities (e.g. `+∞ − ∞`). + /// If such arithmetic is unavoidable for the requested probability and method, this method returns NaN. + /// + /// - important: This method has undefined behavior on collections containing elements that are incomparable + /// for reasons other than NaN (e.g. custom types with a broken `Comparable` implementation). func quantile( probability: Double, of variable: KeyPath, @@ -23,50 +27,52 @@ public extension Collection { guard probability.isFinite, 0 <= probability, - probability <= 1 + probability <= 1, + !self.isEmpty, + !self.contains(where: { $0[keyPath: variable].realValue.isNaN }) else { return .signalingNaN } let ordered = self.sorted { lhs, rhs in lhs[keyPath: variable] < rhs[keyPath: variable] } - guard !ordered.isEmpty else { return .signalingNaN } - if probability == 1 { guard let element = ordered.last else { - fatalError("Could not fetch last element of Sequence.") + fatalError("Could not fetch greatest element of collection.") } return element[keyPath: variable].realValue } if probability == 0 { guard let element = ordered.first else { - fatalError("Could not fetch first element of Sequence.") + fatalError("Could not fetch smallest element of collection.") } return element[keyPath: variable].realValue } switch method { case .inverseEmpiricalCDF: - let h = Double(ordered.count) * probability + 0.5 - let index = Int((h - 0.5).rounded(.up)) - 1 + let h = Double(ordered.count - 1) * probability + let index = Int(h.rounded(.up)) return ordered[index][keyPath: variable].realValue case .averagedInverseEmpiricalCDF: - let h = Double(ordered.count) * probability + 0.5 - let firstIndex = Int((h - 0.5).rounded(.up)) - 1 - let secondIndex = Int((h + 0.5).rounded(.down)) - 1 - return ordered[firstIndex...secondIndex].mean(variable: variable) + let h = Double(ordered.count - 1) * probability + let firstIndex = Int(h.rounded(.down)) + let secondIndex = Int(h.rounded(.up)) + let firstElement = ordered[firstIndex][keyPath: variable] + let secondElement = ordered[secondIndex][keyPath: variable] + return (firstElement.realValue + secondElement.realValue) / 2 - case .closestOrOddIndexed: - let h = Double(ordered.count) * probability - let index = Int(h.rounded(.toNearestOrEven)) - 1 + case .closest: + let h = Double(ordered.count - 1) * probability + let index = Int(h.rounded(.toNearestOrEven)) return ordered[index][keyPath: variable].realValue case .lerpInverseEmpiricalCDF: - let h = Double(ordered.count) * probability - let firstIndex = Int(h.rounded(.down)) - 1 - let secondIndex = Int(h.rounded(.up)) - 1 + let h = Double(ordered.count - 1) * probability + let firstIndex = Int(h.rounded(.down)) + let secondIndex = Int(h.rounded(.up)) let firstElement = ordered[firstIndex][keyPath: variable] let secondElement = ordered[secondIndex][keyPath: variable] let difference = secondElement - firstElement @@ -84,8 +90,8 @@ public enum QuantileEstimationMethod: CaseIterable, Sendable { case averagedInverseEmpiricalCDF /// Computes the quantile by rounding to the closest observation. /// - /// In case of a tie, the odd index element will be chosen. - case closestOrOddIndexed + /// In case of a tie, the even index element will be chosen. + case closest /// Computes the quantile usign the inverse empirical CDF, and linearly interpolates at discontinuities. case lerpInverseEmpiricalCDF } diff --git a/Tests/StatKitTests/Descriptive Statistics Tests/QuantileTests.swift b/Tests/StatKitTests/Descriptive Statistics Tests/QuantileTests.swift index 0832916..9009bc6 100644 --- a/Tests/StatKitTests/Descriptive Statistics Tests/QuantileTests.swift +++ b/Tests/StatKitTests/Descriptive Statistics Tests/QuantileTests.swift @@ -6,22 +6,66 @@ struct QuantileTests { @Test( "Valid data returns correct quantile", arguments: [ - ([1, 4, 2, 6, 4, 3], 1, .inverseEmpiricalCDF, 6), - ([1, 4, 2, 6, 4, 3], 0, .inverseEmpiricalCDF, 1), - ([1, 4, 2, 6, 4, 3], 1, .averagedInverseEmpiricalCDF, 6), - ([1, 4, 2, 6, 4, 3], 0, .averagedInverseEmpiricalCDF, 1), - ([1, 4, 2, 6, 4, 3], 1, .closestOrOddIndexed, 6), - ([1, 4, 2, 6, 4, 3], 0, .closestOrOddIndexed, 1), - ([1, 4, 2, 6, 4, 3], 1, .lerpInverseEmpiricalCDF, 6), - ([1, 4, 2, 6, 4, 3], 0, .lerpInverseEmpiricalCDF, 1), - ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.5, .inverseEmpiricalCDF, 5), - ([1, 2, 3, 4, 5, 6, 7, 8, 9], 0.5, .inverseEmpiricalCDF, 5), - ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.5, .averagedInverseEmpiricalCDF, 5.5), - ([1, 2, 3, 4, 5, 6, 7, 8, 9], 0.5, .averagedInverseEmpiricalCDF, 5), - ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.5, .closestOrOddIndexed, 5), - ([1, 2, 3, 4, 5, 6, 7, 8, 9], 0.5, .closestOrOddIndexed, 4), - ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.5, .lerpInverseEmpiricalCDF, 5), - ([1, 2, 3, 4, 5, 6, 7, 8, 9], 0.5, .lerpInverseEmpiricalCDF, 4.5), + ([1, 4, 2, 6, 4, 3], 0, .inverseEmpiricalCDF, 1), + ([1, 4, 2, 6, 4, 3], 0.25, .inverseEmpiricalCDF, 3), + ([1, 4, 2, 6, 4, 3], 0.5, .inverseEmpiricalCDF, 4), + ([1, 4, 2, 6, 4, 3], 0.75, .inverseEmpiricalCDF, 4), + ([1, 4, 2, 6, 4, 3], 1, .inverseEmpiricalCDF, 6), + ([1, 4, 2, 6, 4, 3], 0, .averagedInverseEmpiricalCDF, 1), + ([1, 4, 2, 6, 4, 3], 0.25, .averagedInverseEmpiricalCDF, 2.5), + ([1, 4, 2, 6, 4, 3], 0.5, .averagedInverseEmpiricalCDF, 3.5), + ([1, 4, 2, 6, 4, 3], 0.75, .averagedInverseEmpiricalCDF, 4), + ([1, 4, 2, 6, 4, 3], 1, .averagedInverseEmpiricalCDF, 6), + ([1, 4, 2, 6, 4, 3], 0, .closest, 1), + ([1, 4, 2, 6, 4, 3], 0.25, .closest, 2), + ([1, 4, 2, 6, 4, 3], 0.5, .closest, 3), + ([1, 4, 2, 6, 4, 3], 0.75, .closest, 4), + ([1, 4, 2, 6, 4, 3], 1, .closest, 6), + ([1, 4, 2, 6, 4, 3], 0, .lerpInverseEmpiricalCDF, 1), + ([1, 4, 2, 6, 4, 3], 0.25, .lerpInverseEmpiricalCDF, 2.25), + ([1, 4, 2, 6, 4, 3], 0.5, .lerpInverseEmpiricalCDF, 3.5), + ([1, 4, 2, 6, 4, 3], 0.75, .lerpInverseEmpiricalCDF, 4), + ([1, 4, 2, 6, 4, 3], 1, .lerpInverseEmpiricalCDF, 6), + ([7, 3, 10, 1, 8, 4, 9, 2, 5, 6], 0.5, .inverseEmpiricalCDF, 6), + ([5, 9, 2, 7, 1, 8, 3, 6, 4], 0.5, .inverseEmpiricalCDF, 5), + ([7, 3, 10, 1, 8, 4, 9, 2, 5, 6], 0.5, .averagedInverseEmpiricalCDF, 5.5), + ([5, 9, 2, 7, 1, 8, 3, 6, 4], 0.5, .averagedInverseEmpiricalCDF, 5), + ([7, 3, 10, 1, 8, 4, 9, 2, 5, 6], 0.5, .closest, 5), + ([5, 9, 2, 7, 1, 8, 3, 6, 4], 0.5, .closest, 5), + ([7, 3, 10, 1, 8, 4, 9, 2, 5, 6], 0.5, .lerpInverseEmpiricalCDF, 5.5), + ([5, 9, 2, 7, 1, 8, 3, 6, 4], 0.5, .lerpInverseEmpiricalCDF, 5), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.25, .inverseEmpiricalCDF, 6), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.5, .inverseEmpiricalCDF, 11), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.75, .inverseEmpiricalCDF, 16), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.25, .averagedInverseEmpiricalCDF, 5.5), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.5, .averagedInverseEmpiricalCDF, 10.5), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.75, .averagedInverseEmpiricalCDF, 15.5), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.25, .closest, 6), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.5, .closest, 11), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.75, .closest, 15), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.25, .lerpInverseEmpiricalCDF, 5.75), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.5, .lerpInverseEmpiricalCDF, 10.5), + ([15, 3, 8, 20, 1, 12, 7, 18, 5, 10, 14, 2, 9, 16, 4, 11, 19, 6, 13, 17], 0.75, .lerpInverseEmpiricalCDF, 15.25), + ([1, 2], 0, .inverseEmpiricalCDF, 1), + ([1, 2], 1, .inverseEmpiricalCDF, 2), + ([1, 2], 0.25, .inverseEmpiricalCDF, 2), + ([1, 2], 0.75, .inverseEmpiricalCDF, 2), + ([1, 2], 0, .averagedInverseEmpiricalCDF, 1), + ([1, 2], 1, .averagedInverseEmpiricalCDF, 2), + ([1, 2], 0.25, .averagedInverseEmpiricalCDF, 1.5), + ([1, 2], 0.75, .averagedInverseEmpiricalCDF, 1.5), + ([1, 2], 0, .closest, 1), + ([1, 2], 1, .closest, 2), + ([1, 2], 0.25, .closest, 1), + ([1, 2], 0.75, .closest, 2), + ([1, 2], 0, .lerpInverseEmpiricalCDF, 1), + ([1, 2], 1, .lerpInverseEmpiricalCDF, 2), + ([1, 2], 0.25, .lerpInverseEmpiricalCDF, 1.25), + ([1, 2], 0.75, .lerpInverseEmpiricalCDF, 1.75), + ([5], 0.5, .inverseEmpiricalCDF, 5), + ([5], 0.5, .averagedInverseEmpiricalCDF, 5), + ([5], 0.5, .closest, 5), + ([5], 0.5, .lerpInverseEmpiricalCDF, 5), ] as [([Int], Double, QuantileEstimationMethod, Double)] ) func validData(data: [Int], probability: Double, method: QuantileEstimationMethod, expectedQuantile: Double) async { @@ -33,11 +77,59 @@ struct QuantileTests { arguments: [ ([], .inverseEmpiricalCDF), ([], .averagedInverseEmpiricalCDF), - ([], .closestOrOddIndexed), + ([], .closest), ([], .lerpInverseEmpiricalCDF), ] as [([Int], QuantileEstimationMethod)] ) func invalidData(data: [Int], method: QuantileEstimationMethod) async { #expect(data.quantile(probability: 1, of: \.self).isNaN) } + + @Test( + "Collection with NaN values returns NaN", + arguments: [ + ([.nan, 1.0, 2.0], 0.5, .inverseEmpiricalCDF), + ([1.0, .nan, 2.0], 0.5, .inverseEmpiricalCDF), + ([1.0, 2.0, .nan], 0.5, .inverseEmpiricalCDF), + ([.nan, 1.0, 2.0], 0.5, .averagedInverseEmpiricalCDF), + ([.nan, 1.0, 2.0], 0.5, .closest), + ([.nan, 1.0, 2.0], 0.5, .lerpInverseEmpiricalCDF), + ([.nan, 1.0, 2.0], 0.0, .inverseEmpiricalCDF), + ([.nan, 1.0, 2.0], 1.0, .inverseEmpiricalCDF), + ] as [([Double], Double, QuantileEstimationMethod)] + ) + func nanInCollection(data: [Double], probability: Double, method: QuantileEstimationMethod) async { + #expect(data.quantile(probability: probability, of: \.self, method: method).isNaN) + } + + @Test( + "Infinity values without conflicting arithmetic return the correct quantile", + arguments: [ + ([1.0, 2.0, .infinity], 0.75, .inverseEmpiricalCDF, Double.infinity), + ([1.0, .infinity], 0.5, .inverseEmpiricalCDF, Double.infinity), + ([-.infinity, 1.0, 2.0], 0.0, .inverseEmpiricalCDF, -.infinity), + ([1.0, 2.0, .infinity], 1.0, .inverseEmpiricalCDF, Double.infinity), + ([1.0, .infinity], 0.75, .closest, Double.infinity), + ([-.infinity, 1.0], 0.25, .closest, -.infinity), + ([1.0, .infinity], 0.5, .averagedInverseEmpiricalCDF, Double.infinity), + ([-.infinity, 1.0], 0.5, .averagedInverseEmpiricalCDF, -.infinity), + ([1.0, .infinity], 0.5, .lerpInverseEmpiricalCDF, Double.infinity), + ] as [([Double], Double, QuantileEstimationMethod, Double)] + ) + func infinityWellBehaved(data: [Double], probability: Double, method: QuantileEstimationMethod, expectedQuantile: Double) async { + let result = data.quantile(probability: probability, of: \.self, method: method) + #expect(result == expectedQuantile) + } + + @Test( + "Conflicting infinities under arithmetic return NaN", + arguments: [ + ([-.infinity, .infinity], 0.5, .averagedInverseEmpiricalCDF), + ([-.infinity, 1.0], 0.5, .lerpInverseEmpiricalCDF), + ([-.infinity, .infinity], 0.5, .lerpInverseEmpiricalCDF), + ] as [([Double], Double, QuantileEstimationMethod)] + ) + func conflictingInfinitiesReturnNaN(data: [Double], probability: Double, method: QuantileEstimationMethod) async { + #expect(data.quantile(probability: probability, of: \.self, method: method).isNaN) + } }