From 82f36cc19c36553c53b6a18746cc05a0356c6141 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Tue, 3 Nov 2020 22:40:47 -0500 Subject: [PATCH 1/8] Add methods to merge sorted sequences Add a type describing the subsets that can be taken of the merger of two sets. Add a type that is a sequence that takes two sequences, a predicate that both sources are sorted along, and a merger-subset state, and vends the seleected subset of the merger which is also sorted. Add an extension method to sequences that takes another sequence, ordering predicate, and subset selection and returns an array with the merged sequence. Add lazy variants of this method. Add overloads that default the ordering predicate to the standard less-than operator. --- Sources/Algorithms/MergeSorted.swift | 576 ++++++++++++++++++ .../MergeSortedTests.swift | 166 +++++ 2 files changed, 742 insertions(+) create mode 100644 Sources/Algorithms/MergeSorted.swift create mode 100644 Tests/SwiftAlgorithmsTests/MergeSortedTests.swift diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift new file mode 100644 index 00000000..3ced7808 --- /dev/null +++ b/Sources/Algorithms/MergeSorted.swift @@ -0,0 +1,576 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SetCombination +//===----------------------------------------------------------------------===// + +/// The manners two (multi)sets may be combined. +public enum SetCombination: CaseIterable { + /// Retain no elements. + case nothing + /// Retain the elements from the first source that do not have a counterpart + /// in the second. + case firstMinusSecond + /// Retain the elements from the second source that do not have a counterpart + /// in the first. + case secondMinusFirst + /// Retain the elements from both sources that do not have counterparts in the other. + case symmetricDifference + /// Retain one copy of each element that appears in both sources. + case intersection + /// Retain only the elements from the first source. + case first + /// Retain only the elements from the second source. + case second + /// Retain all the elements, collapsing shared elements to one copy. + case union + /// Retain all the elements, but keep both copies of each shared element. + case sum +} + +fileprivate extension SetCombination { + /// Determines which parts of the operands' merger need to be retained. + var vendsOut: (unshared1: Bool, unshared2: Bool, shared1: Bool, shared2: Bool) + { + switch self { + case .nothing: + return (false, false, false, false) + case .firstMinusSecond: + return (true, false, false, false) + case .secondMinusFirst: + return (false, true, false, false) + case .symmetricDifference: + return (true, true, false, false) + case .intersection: + return (false, false, true, false) + case .first: + return (true, false, true, false) + case .second: + return (false, true, false, true) + case .union: + return (true, true, true, false) + case .sum: + return (true, true, true, true) + } + } + + /// Determines if an operand needs to be read at all. + var readsFrom: (first: Bool, second: Bool) { + switch self { + case .nothing: + return (false, false) + case .first: + return (true, false) + case .second: + return (false, true) + default: + return (true, true) + } + } +} + +//===----------------------------------------------------------------------===// +// MergedSequence, MergedIterator +//===----------------------------------------------------------------------===// + +/// A lazy sorted sequence of a set combination of two sorted source sequences. +public struct MergedSequence +where Base1.Element == Base2.Element { + /// The base sequence for the first operand. + public let firstBase: Base1 + /// The base sequence for the second operand. + public let secondBase: Base2 + /// The blend of the merger to vend. + public let selection: SetCombination + /// The element-ordering predicate. + @usableFromInline + let areInIncreasingOrder: (Element, Element) throws -> Bool + + /// Creates a sorted sequence that merges the two given sorted sequences, all + /// using the given predicate to determine order, but keeping only the elements + /// indicated by the given status. + @usableFromInline + internal init( + _ base1: Base1, + _ base2: Base2, + keeping selection: SetCombination, + by areInIncreasingOrder: @escaping (Element, Element) throws -> Bool + ) { + firstBase = base1 + secondBase = base2 + self.selection = selection + self.areInIncreasingOrder = areInIncreasingOrder + } +} + +/// An iterator vending the sorted selection from a merger of two sorted source +/// virtual sequences. +public struct MergedIterator +where Base1.Element == Base2.Element { + /// The base iterator for the first operand. + var firstBase: Base1 + /// The base iterator for the second operand. + var secondBase: Base2 + /// The element-ordering predicate. + let areInIncreasingOrder: (Element, Element) throws -> Bool + + /// Whether elements from `firstBase` that do not have a counterpart from + /// `secondBase` will be vended. + let exclusivesFromFirst: Bool + /// Whether elements from `secondBase` that do not have a counterpart from + /// `firstBase` will be vended. + let exclusivesFromSecond: Bool + /// Whether elements from `firstBase` that have a counterpart in `secondBase` + /// will be vended. + let sharedFromFirst: Bool + /// Whether elements from `secondBase` that have a counterpart in `firstBase` + /// will be vended. + let sharedFromSecond: Bool + + /// Whether to read from `firstBase` each round. + let extractFromFirst: Bool + /// Whether to read from `secondBase` each round. + let extractFromSecond: Bool + + /// The last elements extracted. + var cache: (Element?, Element?) + /// Whether there is still a need to read elements. + var isDone: Bool + + /// Creates an iterator merging the elements from the given iterators, keeping + /// only the ones for the given selection, using the given predicate for the + /// sort order. + @usableFromInline + internal init( + _ base1: Base1, _ base2: Base2, keeping selection: SetCombination, + by areInIncreasingOrder: @escaping (Element, Element) throws -> Bool + ) { + firstBase = base1 + secondBase = base2 + self.areInIncreasingOrder = areInIncreasingOrder + (exclusivesFromFirst, exclusivesFromSecond, sharedFromFirst, + sharedFromSecond) = selection.vendsOut + (extractFromFirst, extractFromSecond) = selection.readsFrom + isDone = selection == .nothing + } +} + +private extension MergedIterator { + /// Advances the cache to the next state and indicates which component should + /// be used. + mutating func advanceCache() throws -> (useFirst: Bool, useSecond: Bool) { + while !isDone { + if extractFromFirst { + cache.0 = cache.0 ?? firstBase.next() + } + if extractFromSecond { + cache.1 = cache.1 ?? secondBase.next() + } + switch cache { + case (nil, nil): + isDone = true + case (_?, nil): + if exclusivesFromFirst { + return (true, false) + } + isDone = true + case (nil, _?): + if exclusivesFromSecond { + return (false, true) + } + isDone = true + case let (first?, second?): + if try areInIncreasingOrder(first, second) { + if exclusivesFromFirst { + return (true, false) + } + cache.0 = nil + } else if try areInIncreasingOrder(second, first) { + if exclusivesFromSecond { + return (false, true) + } + cache.1 = nil + } else { + if sharedFromFirst || sharedFromSecond { + return (true, true) + } + cache = (nil, nil) + } + } + } + return (false, false) + } + + /// Examines the cache's state and generates the next return value, or `nil` + /// if the state flags the end of the iteration. + mutating func generateNext(usingFirst: Bool, usingSecond: Bool) -> Element? { + switch (usingFirst, usingSecond) { + case (false, false): + assert(isDone) + return nil + case (true, false): + defer { cache.0 = nil } + return cache.0 + case (false, true): + defer { cache.1 = nil } + return cache.1 + case (true, true): + defer { + cache.0 = nil + if !(sharedFromFirst && sharedFromSecond) { + // When this isn't triggered, the shared value from the second source + // is retained until all of the equivalent values from the first + // source are exhausted, then the second source's versions will go. + // (They would go as exclusives-to-second, but that's OK becuase the + // only selection combination with both shared-from-first and -second + // also has exclusives-to-second.) + cache.1 = nil + } + } + return cache.0 + } + } +} + +internal extension MergedIterator { + /// Advances to the next element and returns it, or `nil` if no next element + /// exists; possibly throwing during the attempt. + @usableFromInline + mutating func throwingNext() throws -> Base2.Element? { + let (useFirstCache, useSecondCache) = try advanceCache() + return generateNext(usingFirst: useFirstCache, usingSecond: useSecondCache) + } +} + +extension MergedIterator: IteratorProtocol { + @inlinable + public mutating func next() -> Base1.Element? { return try! throwingNext() } +} + +extension MergedSequence: Sequence, LazySequenceProtocol { + public typealias Iterator = MergedIterator + public typealias Element = Iterator.Element + + @inlinable + public func makeIterator() -> Iterator { + return MergedIterator(firstBase.makeIterator(), secondBase.makeIterator(), + keeping: selection, by: areInIncreasingOrder) + } +} + +//===----------------------------------------------------------------------===// +// mergeSorted(with: keeping: into: by:) +//===----------------------------------------------------------------------===// + +internal extension Sequence { + /// Returns an instance of the given type whose elements are the merger of + /// this sequence and the given sequence, but keeping only the selected subset + /// of elements, assuming both sources are sorted according to the given + /// predicate that can compare elements. + /// + /// The predicate must be a *strict weak ordering* over the elements. That + /// is, for any elements `a`, `b`, and `c`, the following conditions must + /// hold: + /// + /// - `areInIncreasingOrder(a, a)` is always `false`. (Irreflexivity) + /// - If `areInIncreasingOrder(a, b)` and `areInIncreasingOrder(b, c)` are + /// both `true`, then `areInIncreasingOrder(a, c)` is also + /// `true`. (Transitive comparability) + /// - Two elements are *incomparable* if neither is ordered before the other + /// according to the predicate. If `a` and `b` are incomparable, and `b` + /// and `c` are incomparable, then `a` and `c` are also incomparable. + /// (Transitive incomparability) + /// + /// - Precondition: + /// - Both the receiver and `second` must be sorted according to + /// `areInIncreasingOrder`. + /// - If `selection` is neither `.nothing` nor `.second`, the receiver must + /// be finite. + /// - If `selection` is neither `.nothing` nor `.first`, `second` must be + /// finite. + /// + /// - Parameters: + /// - second: A sequence to merge with this sequence, as the second operand. + /// - selection: The subset of the merged multiset to return. + /// - type: A specifier for the returned instance's type. + /// - areInIncreasingOrder: A predicate that returns `true` if its first + /// argument should be ordered before its second argument; otherwise, + /// `false`. + /// - Returns: A collection with the elements of both this sequence and + /// `second`, still sorted, but instances banned by `selection` filtered + /// out. If the selection allows a given value from both sequences, the + /// instances from the receiver will precede the instances from `second`. + /// + /// - Complexity: O(*n* + *m*), where *n* and *m* are the lengths of this + /// sequence and `second`, respectively. + @usableFromInline + func mergeSorted( + with second: S, + keeping selection: SetCombination, + into type: T.Type, + by areInIncreasingOrder: (Element, Element) throws -> Bool + ) rethrows -> T where S.Element == Element, T.Element == Element { + var destination = T() + try withoutActuallyEscaping(areInIncreasingOrder) { + let source = MergedSequence(self, second, keeping: selection, by: $0) + var iterator = source.makeIterator() + destination.reserveCapacity(source.underestimatedCount) + while let element = try iterator.throwingNext() { + // `iterator` above would flag an error if the call wasn't wrapped in a + // closure, due to SR-680. + destination.append(element) + } + } + return destination + } +} + +//===----------------------------------------------------------------------===// +// mergeSorted(with: keeping: by:) +//===----------------------------------------------------------------------===// + +extension Sequence { + /// Returns an array listing the merger of this sequence and the given + /// sequence, but keeping only the selected subset, assuming both sources are + /// sorted according to the given predicate that can compare elements. + /// + /// The predicate must be a *strict weak ordering* over the elements. That + /// is, for any elements `a`, `b`, and `c`, the following conditions must + /// hold: + /// + /// - `areInIncreasingOrder(a, a)` is always `false`. (Irreflexivity) + /// - If `areInIncreasingOrder(a, b)` and `areInIncreasingOrder(b, c)` are + /// both `true`, then `areInIncreasingOrder(a, c)` is also + /// `true`. (Transitive comparability) + /// - Two elements are *incomparable* if neither is ordered before the other + /// according to the predicate. If `a` and `b` are incomparable, and `b` + /// and `c` are incomparable, then `a` and `c` are also incomparable. + /// (Transitive incomparability) + /// + /// - Precondition: + /// - Both the receiver and `second` must be sorted according to + /// `areInIncreasingOrder`. + /// - If `selection` is neither `.nothing` nor `.second`, the receiver must + /// be finite. + /// - If `selection` is neither `.nothing` nor `.first`, `second` must be + /// finite. + /// + /// - Parameters: + /// - second: A sequence to merge with this sequence, as the second operand. + /// - selection: The subset of the merged multiset to return. + /// - areInIncreasingOrder: A predicate that returns `true` if its first + /// argument should be ordered before its second argument; otherwise, + /// `false`. + /// - Returns: An array with the elements of both this sequence and `second`, + /// still sorted, but instances banned by `selection` filtered out. If the + /// selection allows a given value from both sequences, the instances from + /// the receiver will precede the instances from `second`. + /// + /// - Complexity: O(*n* + *m*), where *n* and *m* are the lengths of this + /// sequence and `second`, respectively. + @inlinable + public func mergeSorted( + with second: S, + keeping selection: SetCombination, + by areInIncreasingOrder: (Element, Element) throws -> Bool + ) rethrows -> [Element] where S.Element == Element { + return try mergeSorted(with: second, keeping: selection, into: Array.self, + by: areInIncreasingOrder) + } +} + +extension LazySequenceProtocol { + /// Returns a lazy sequence listing the merger of this lazy sequence and the + /// given lazy sequence, but keeping only the selected subset, assuming both + /// sources are sorted according to the given predicate that can compare + /// elements. + /// + /// The predicate must be a *strict weak ordering* over the elements. That + /// is, for any elements `a`, `b`, and `c`, the following conditions must + /// hold: + /// + /// - `areInIncreasingOrder(a, a)` is always `false`. (Irreflexivity) + /// - If `areInIncreasingOrder(a, b)` and `areInIncreasingOrder(b, c)` are + /// both `true`, then `areInIncreasingOrder(a, c)` is also + /// `true`. (Transitive comparability) + /// - Two elements are *incomparable* if neither is ordered before the other + /// according to the predicate. If `a` and `b` are incomparable, and `b` + /// and `c` are incomparable, then `a` and `c` are also incomparable. + /// (Transitive incomparability) + /// + /// The result sequence may be finite, even with a non-finite operand, if the + /// `selection` indicates that the operand won't be used. If a non-finite + /// operand is used, an element extraction may soft-lock if the operand never + /// emits a filtered-in value. + /// + /// - Precondition: Both the receiver and `second` must be sorted according to + /// `areInIncreasingOrder`. + /// + /// - Parameters: + /// - second: A lazy sequence to merge with this sequence, as the second + /// operand. + /// - selection: The subset of the merged multiset to return. + /// - areInIncreasingOrder: A predicate that returns `true` if its first + /// argument should be ordered before its second argument; otherwise, + /// `false`. + /// - Returns: A lazy sequence with the elements of both this sequence and + /// `second`, still sorted, but instances banned by `selection` filtered + /// out. If the selection allows a given value from both sequences, the + /// instances from the receiver will precede the instances from `second`. + @inlinable + public func mergeSorted( + with second: S, + keeping selection: SetCombination, + by areInIncreasingOrder: @escaping (Element, Element) -> Bool + ) -> MergedSequence where S.Element == Element { + return MergedSequence(elements, second.elements, keeping: selection, + by: areInIncreasingOrder) + } + + /// Returns a lazy sequence listing the merger of this lazy sequence and the + /// given sequence, but keeping only the selected subset, assuming both + /// sources are sorted according to the given predicate that can compare + /// elements. + /// + /// The predicate must be a *strict weak ordering* over the elements. That + /// is, for any elements `a`, `b`, and `c`, the following conditions must + /// hold: + /// + /// - `areInIncreasingOrder(a, a)` is always `false`. (Irreflexivity) + /// - If `areInIncreasingOrder(a, b)` and `areInIncreasingOrder(b, c)` are + /// both `true`, then `areInIncreasingOrder(a, c)` is also + /// `true`. (Transitive comparability) + /// - Two elements are *incomparable* if neither is ordered before the other + /// according to the predicate. If `a` and `b` are incomparable, and `b` + /// and `c` are incomparable, then `a` and `c` are also incomparable. + /// (Transitive incomparability) + /// + /// The result sequence may be finite, even with a non-finite operand, if the + /// `selection` indicates that the operand won't be used. If a non-finite + /// operand is used, an element extraction may soft-lock if the operand never + /// emits a filtered-in value. + /// + /// - Precondition: Both the receiver and `second` must be sorted according to + /// `areInIncreasingOrder`. + /// + /// - Parameters: + /// - second: A sequence to merge with this sequence, as the second operand. + /// - selection: The subset of the merged multiset to return. + /// - areInIncreasingOrder: A predicate that returns `true` if its first + /// argument should be ordered before its second argument; otherwise, + /// `false`. + /// - Returns: A lazily-generated sequence using the elements of both this + /// sequence and `second`, still sorted, but instances banned by `selection` + /// are filtered out. If the selection allows a given value from both + /// sequences, the instances from the receiver will precede the instances + /// from `second`. + @inlinable + public func mergeSorted( + with second: S, + keeping selection: SetCombination, + by areInIncreasingOrder: @escaping (Element, Element) -> Bool + ) -> MergedSequence where S.Element == Element { + return mergeSorted(with: second.lazy, keeping: selection, + by: areInIncreasingOrder) + } +} + +//===----------------------------------------------------------------------===// +// mergeSorted(with: keeping:) +//===----------------------------------------------------------------------===// + +extension Sequence where Element: Comparable { + /// Returns an array listing the merger of this sequence and the given + /// sequence, but keeping only the selected subset, and assuming both sources + /// are sorted. + /// + /// - Precondition: + /// - Both the receiver and `second` must be sorted. + /// - If `selection` is neither `.nothing` nor `.second`, the receiver must + /// be finite. + /// - If `selection` is neither `.nothing` nor `.first`, `second` must be + /// finite. + /// + /// - Parameters: + /// - second: A sequence to merge with this sequence, as the second operand. + /// - selection: The subset of the merged multiset to return. + /// - Returns: An array with the elements of both this sequence and `second`, + /// still sorted, but instances banned by `selection` filtered out. If the + /// selection allows a given value from both sequences, the instances from + /// the receiver will precede the instances from `second`. + /// + /// - Complexity: O(*n* + *m*), where *n* and *m* are the lengths of this + /// sequence and `second`, respectively. + @inlinable + public func mergeSorted( + with second: S, + keeping selection: SetCombination + ) -> [Element] where S.Element == Element { + return mergeSorted(with: second, keeping: selection, by: <) + } +} + +extension LazySequenceProtocol where Element: Comparable { + /// Returns a lazy sequence listing the merger of this lazy sequence and the + /// given lazy sequence, but keeping only the selected subset, and assuming + /// both sources are sorted. + /// + /// The result sequence may be finite, even with a non-finite operand, if the + /// `selection` indicates that the operand won't be used. If a non-finite + /// operand is used, an element extraction may soft-lock if the operand never + /// emits a filtered-in value. + /// + /// - Precondition: Both the receiver and `second` must be sorted. + /// + /// - Parameters: + /// - second: A lazy sequence to merge with this sequence, as the second + /// operand. + /// - selection: The subset of the merged multiset to return. + /// - Returns: A lazy sequence with the elements of both this sequence and + /// `second`, still sorted, but instances banned by `selection` filtered + /// out. If the selection allows a given value from both sequences, the + /// instances from the receiver will precede the instances from `second`. + @inlinable + public func mergeSorted( + with second: S, + keeping selection: SetCombination + ) -> MergedSequence where S.Element == Element { + return mergeSorted(with: second, keeping: selection, by: <) + } + + /// Returns a lazy sequence listing the merger of this lazy sequence and the + /// given sequence, but keeping only the selected subset, and assuming both + /// sources are sorted. + /// + /// The result sequence may be finite, even with a non-finite operand, if the + /// `selection` indicates that the operand won't be used. If a non-finite + /// operand is used, an element extraction may soft-lock if the operand never + /// emits a filtered-in value. + /// + /// - Precondition: Both the receiver and `second` must be sorted. + /// + /// - Parameters: + /// - second: A sequence to merge with this sequence, as the second operand. + /// - selection: The subset of the merged multiset to return. + /// - Returns: A lazily-generated sequence using the elements of both this + /// sequence and `second`, still sorted, but instances banned by `selection` + /// are filtered out. If the selection allows a given value from both + /// sequences, the instances from the receiver will precede the instances + /// from `second`. + @inlinable + public func mergeSorted( + with second: S, + keeping selection: SetCombination + ) -> MergedSequence where S.Element == Element { + return mergeSorted(with: second, keeping: selection, by: <) + } +} diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift new file mode 100644 index 00000000..5b2dcb72 --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -0,0 +1,166 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +@testable import Algorithms + +/// Unit tests for `SetCombination`, `MergedSequence`, and `mergedSorted`. +final class MergeSortedTests: XCTestCase { + /// Check the values and properties of `SetCollection`. + func testSelectionType() { + XCTAssertEqualSequences(SetCombination.allCases, [.nothing, + .firstMinusSecond, .secondMinusFirst, .symmetricDifference, .intersection, + .first, .second, .union, .sum + ]) + + // Use a merged-sequence's iterator to spy on the properties. + // (The properties only depend on the case, not the source types nor the + // predicate.) + let iterators = SetCombination.allCases.map { + MergedSequence(EmptyCollection(), EmptyCollection(), + keeping: $0, by: <).makeIterator() + } + XCTAssertEqualSequences(iterators.map(\.exclusivesFromFirst), + [false, true, false, true, false, true, false, true, true]) + XCTAssertEqualSequences(iterators.map(\.exclusivesFromSecond), + [false, false, true, true, false, false, true, true, true]) + XCTAssertEqualSequences(iterators.map(\.sharedFromFirst), + [false, false, false, false, true, true, false, true, true]) + XCTAssertEqualSequences(iterators.map(\.sharedFromSecond), + [false, false, false, false, false, false, true, false, true]) + XCTAssertEqualSequences(iterators.map(\.extractFromFirst), + [false, true, true, true, true, true, false, true, true]) + XCTAssertEqualSequences(iterators.map(\.extractFromSecond), + [false, true, true, true, true, false, true, true, true]) + } + + /// Check results from using empty operands, and using the generating methods. + func testEmpty() { + let empty = EmptyCollection() + let emptyMergerArrays = SetCombination.allCases.map { + empty.mergeSorted(with: empty, keeping: $0) + } + let emptyResults = Array(repeating: [Double](), + count: SetCombination.allCases.count) + XCTAssertEqualSequences(emptyMergerArrays, emptyResults) + + // Call the lazy methods. + let emptyMergerSingleLazy = SetCombination.allCases.map { + empty.lazy.mergeSorted(with: empty, keeping: $0) + } + XCTAssertEqualSequences(emptyMergerSingleLazy.map(Array.init), emptyResults) + + let emptyMergerDoubleLazy = SetCombination.allCases.map { + empty.lazy.mergeSorted(with: empty.lazy, keeping: $0) + } + XCTAssertEqualSequences(emptyMergerDoubleLazy.map(Array.init), emptyResults) + } + + /// Check results from using one empty and one non-empty operand. + func testExactlyOneEmpty() { + let limit = Int.random(in: 1..<100), nonEmpty = 0..() + let nonEmptyVersusEmptyMergers = SetCombination.allCases.map { + MergedSequence(nonEmpty, empty, keeping: $0, by: <) + } + XCTAssertEqualSequences(nonEmptyVersusEmptyMergers.map(Array.init), [ + [], nonEmptyArray, [], nonEmptyArray, [], nonEmptyArray, [], + nonEmptyArray, nonEmptyArray + ]) + + let emptyVersusNonEmptyMergers = SetCombination.allCases.map { + MergedSequence(empty, nonEmpty, keeping: $0, by: <) + } + XCTAssertEqualSequences(emptyVersusNonEmptyMergers.map(Array.init), [ + [], [], nonEmptyArray, nonEmptyArray, [], [], nonEmptyArray, + nonEmptyArray, nonEmptyArray + ]) + } + + /// Check results on using the same nonempty sequence for both operands. + func testIdentical() { + let sample = Array(1..= 2 else { return false } + guard self >= 4 else { return true } + + for divisor in 2.. divisor else { break } + + // The guards above cover everything. + } + return true + } +} From aef67feb91f14c0d8be6b330ffcd7ae6a49de10d Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Wed, 4 Nov 2020 10:26:59 -0500 Subject: [PATCH 2/8] Improve sorted merged sequences Implement the property that underreports the element count, built by combining the sources' counts. Implement the method that briefly gives access to the elements' buffer, non-NIL only when at most one source sequence needs to be used. Implement the secret method that optimizes the element-containment test, built by combining the sources' search results. --- Sources/Algorithms/MergeSorted.swift | 63 ++++++++ .../MergeSortedTests.swift | 146 ++++++++++++++++++ 2 files changed, 209 insertions(+) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 3ced7808..68662dad 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -265,6 +265,69 @@ extension MergedSequence: Sequence, LazySequenceProtocol { return MergedIterator(firstBase.makeIterator(), secondBase.makeIterator(), keeping: selection, by: areInIncreasingOrder) } + + @inlinable + public var underestimatedCount: Int { + switch selection { + case .firstMinusSecond, .secondMinusFirst, .symmetricDifference, + .intersection: + // Can't even guesstimate these without reading elements. + fallthrough + case .nothing: + return 0 + case .first: + return firstBase.underestimatedCount + case .second: + return secondBase.underestimatedCount + case .union: + return Swift.max(firstBase.underestimatedCount, + secondBase.underestimatedCount) + case .sum: + return firstBase.underestimatedCount + secondBase.underestimatedCount + } + } + + @inlinable + public func withContiguousStorageIfAvailable( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R? { + switch selection { + case .nothing: + return try body(UnsafeBufferPointer(start: nil, count: 0)) + case .first: + return try firstBase.withContiguousStorageIfAvailable(body) + case .second: + return try secondBase.withContiguousStorageIfAvailable(body) + default: + return nil + } + } + + @inlinable + public func _customContainsEquatableElement(_ element: Element) -> Bool? { + switch (selection, firstBase._customContainsEquatableElement(element), + secondBase._customContainsEquatableElement(element)) { + case (.nothing, _, _): + return false + case let (.intersection, contains1?, contains2?): + return contains1 && contains2 + case let (.first, possiblyContains1, _): + return possiblyContains1 + case let (.second, _, possiblyContains2): + return possiblyContains2 + case let (.union, contains1?, contains2?), + let (.sum, contains1?, contains2?): + return contains1 || contains2 + case (.union, true, _), (.union, _, true), (.sum, true, _), (.sum, _, true): + return true + default: + // - .intersection can't work if at least one is NIL. + // - .union and .sum can't work with dual NIL or one NIL and one FALSE. + // - .firstMinusSecond, .secondMinusFirst, and .symmetricDifference can't + // work with just existence; they need the full counts. + return nil + } + } } //===----------------------------------------------------------------------===// diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index 5b2dcb72..c055e660 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -42,6 +42,74 @@ final class MergeSortedTests: XCTestCase { [false, true, true, true, true, false, true, true, true]) } + /// Check the rough underestimated counts. + func testUnderestimatedCount() { + let empty = EmptyCollection(), single = CollectionOfOne(1.1), + repeated = repeatElement(5.5, count: 5) + let emptySelfMergers = SetCombination.allCases.map { + MergedSequence(empty, empty, keeping: $0, by: <) + } + XCTAssertEqualSequences(emptySelfMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 0, 0, 0, 0 + ]) + + let emptySingleMergers = SetCombination.allCases.map { + MergedSequence(empty, single, keeping: $0, by: <) + } + XCTAssertEqualSequences(emptySingleMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 0, 1, 1, 1 + ]) + + let emptyRepeatedMergers = SetCombination.allCases.map { + MergedSequence(empty, repeated, keeping: $0, by: <) + } + XCTAssertEqualSequences(emptyRepeatedMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 0, 5, 5, 5 + ]) + + let singleEmptyMergers = SetCombination.allCases.map { + MergedSequence(single, empty, keeping: $0, by: <) + } + XCTAssertEqualSequences(singleEmptyMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 1, 0, 1, 1 + ]) + + let singleSelfMergers = SetCombination.allCases.map { + MergedSequence(single, single, keeping: $0, by: <) + } + XCTAssertEqualSequences(singleSelfMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 1, 1, 1, 2 + ]) + + let singleRepeatedMergers = SetCombination.allCases.map { + MergedSequence(single, repeated, keeping: $0, by: <) + } + XCTAssertEqualSequences(singleRepeatedMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 1, 5, 5, 6 + ]) + + let repeatedEmptyMergers = SetCombination.allCases.map { + MergedSequence(repeated, empty, keeping: $0, by: <) + } + XCTAssertEqualSequences(repeatedEmptyMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 5, 0, 5, 5 + ]) + + let repeatedSingleMergers = SetCombination.allCases.map { + MergedSequence(repeated, single, keeping: $0, by: <) + } + XCTAssertEqualSequences(repeatedSingleMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 5, 1, 5, 6 + ]) + + let repeatedSelfMergers = SetCombination.allCases.map { + MergedSequence(repeated, repeated, keeping: $0, by: <) + } + XCTAssertEqualSequences(repeatedSelfMergers.map(\.underestimatedCount), [ + 0, 0, 0, 0, 0, 5, 5, 5, 10 + ]) + } + /// Check results from using empty operands, and using the generating methods. func testEmpty() { let empty = EmptyCollection() @@ -144,6 +212,84 @@ final class MergeSortedTests: XCTestCase { [], s2, s1, all, [], s2, s1, all, all ]) } + + /// Check direct buffer access. + func testBufferAccess() { + let sample1 = [2, 2], sample2 = [3, 3, 3] + let sample1to2Mergers = SetCombination.allCases.map { + MergedSequence(sample1, sample2, keeping: $0, by: <) + } + XCTAssertEqualSequences(sample1to2Mergers.map { merger in + merger.withContiguousStorageIfAvailable { buffer in + buffer.reduce(0, +) + } + }, [0, nil, nil, nil, nil, 4, 9, nil, nil]) + + let sample2to1Mergers = SetCombination.allCases.map { + MergedSequence(sample2, sample1, keeping: $0, by: <) + } + XCTAssertEqualSequences(sample2to1Mergers.map { merger in + merger.withContiguousStorageIfAvailable { buffer in + buffer.reduce(0, +) + } + }, [0, nil, nil, nil, nil, 9, 4, nil, nil]) + } + + /// Check the containment implementation method, indirectly. + func testOptimizedContainment() { + // Both operands' type supports optimized containment tests. + let range1 = 0..<2, range2 = 1..<3 + let range1to2Mergers = SetCombination.allCases.map { + MergedSequence(range1, range2, keeping: $0, by: <) + } + XCTAssertEqualSequences(range1to2Mergers.map { $0.contains(0) }, [ + false, true, false, true, false, true, false, true, true + ]) + XCTAssertEqualSequences(range1to2Mergers.map { $0.contains(1) }, [ + false, false, false, false, true, true, true, true, true + ]) + XCTAssertEqualSequences(range1to2Mergers.map { $0.contains(2) }, [ + false, false, true, true, false, false, true, true, true + ]) + XCTAssertEqualSequences(range1to2Mergers.map { $0.contains(3) }, [ + false, false, false, false, false, false, false, false, false + ]) + + // Exactly one operand's type supports optimized containment tests. + let sample1 = Array(range1) + let sampleRangeMergers = SetCombination.allCases.map { + MergedSequence(sample1, range2, keeping: $0, by: <) + } + XCTAssertEqualSequences(sampleRangeMergers.map { $0.contains(0) }, [ + false, true, false, true, false, true, false, true, true + ]) + XCTAssertEqualSequences(sampleRangeMergers.map { $0.contains(1) }, [ + false, false, false, false, true, true, true, true, true + ]) + XCTAssertEqualSequences(sampleRangeMergers.map { $0.contains(2) }, [ + false, false, true, true, false, false, true, true, true + ]) + XCTAssertEqualSequences(sampleRangeMergers.map { $0.contains(3) }, [ + false, false, false, false, false, false, false, false, false + ]) + + // Need to check both sides for the NIL-containment operand type. + let rangeSampleMergers = SetCombination.allCases.map { + MergedSequence(range2, sample1, keeping: $0, by: <) + } + XCTAssertEqualSequences(rangeSampleMergers.map { $0.contains(0) }, [ + false, false, true, true, false, false, true, true, true + ]) + XCTAssertEqualSequences(rangeSampleMergers.map { $0.contains(1) }, [ + false, false, false, false, true, true, true, true, true + ]) + XCTAssertEqualSequences(rangeSampleMergers.map { $0.contains(2) }, [ + false, true, false, true, false, true, false, true, true + ]) + XCTAssertEqualSequences(rangeSampleMergers.map { $0.contains(3) }, [ + false, false, false, false, false, false, false, false, false + ]) + } } //-----------------------------------------------------------------------------/ From f6e0f32231af804538c1914041d58b5c583c958b Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 5 Nov 2020 09:55:48 -0500 Subject: [PATCH 3/8] Add test to confirm code paths Add a test that will go through all of the code paths of the eager method to merge sorted sequences. --- .../MergeSortedTests.swift | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index c055e660..7c04fc17 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -290,6 +290,25 @@ final class MergeSortedTests: XCTestCase { false, false, false, false, false, false, false, false, false ]) } + + /// Check every combination for the array-returning merger method. + func testMoreNonlazyMerging() { + let range1 = 0..<10, range2 = 5..<15 + let range1to2Mergers = SetCombination.allCases.map { + range1.mergeSorted(with: range2, keeping: $0) + } + XCTAssertEqualSequences(range1to2Mergers, [ + [], + [0, 1, 2, 3, 4], + [10, 11, 12, 13, 14], + [0, 1, 2, 3, 4, 10, 11, 12, 13, 14], + [5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + [0, 1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 11, 12, 13, 14] + ]) + } } //-----------------------------------------------------------------------------/ From 1229245cdfee7c8c9679228dc73ff2b9cb8e95c3 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 5 Nov 2020 18:50:39 -0500 Subject: [PATCH 4/8] Change implementation of merging Replace the two private methods used to implement the throwing version of element iteration (for the iterator type) with a single private method that returns both sources' elements for a corresponding pair, using a custom return type. It's not only simpler, but should help in implementing merged sorted collections. --- Sources/Algorithms/MergeSorted.swift | 115 +++++++++++++++------------ 1 file changed, 66 insertions(+), 49 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 68662dad..88d6e4e4 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -164,80 +164,90 @@ where Base1.Element == Base2.Element { } } +/// Storage for a matching pair of elements, acknowledging the smaller. +fileprivate enum MergedIteratorMarker { + /// Only the first source has any more elements. + case first(Element) + /// Only the second source has any more elements. + case second(Element) + /// A matching pair of elements, but not equivalent. + case nonMatching(Element, Element, firstIsLower: Bool) + /// A matching pair of equivalent elements. + case matching(Element, Element, useFirst: Bool) +} + private extension MergedIterator { - /// Advances the cache to the next state and indicates which component should - /// be used. - mutating func advanceCache() throws -> (useFirst: Bool, useSecond: Bool) { + /// Advances and returns the next corresponding pair of elements, or `nil` if + /// no more exist. + mutating func dualNext() throws -> MergedIteratorMarker? { while !isDone { + // Read in the next element(s), when required. if extractFromFirst { cache.0 = cache.0 ?? firstBase.next() } if extractFromSecond { cache.1 = cache.1 ?? secondBase.next() } + + // The marker depends on the cached values' relative ranking. switch cache { case (nil, nil): + // No more elements to read. isDone = true - case (_?, nil): + case (let first?, nil): + // Only the first source has any more elements. if exclusivesFromFirst { - return (true, false) + cache.0 = nil + return .first(first) + } else { + // Don't unnecessarily read from a now-unused source. + isDone = true } - isDone = true - case (nil, _?): + case (nil, let second?): + // Only the second source has any more elements. if exclusivesFromSecond { - return (false, true) + cache.1 = nil + return .second(second) + } else { + // Don't unnecessarily read from a now-unused source. + isDone = true } - isDone = true case let (first?, second?): + // Return the smaller element, if allowed by the exclusive/shared flags. if try areInIncreasingOrder(first, second) { + cache.0 = nil if exclusivesFromFirst { - return (true, false) + return .nonMatching(first, second, firstIsLower: true) } - cache.0 = nil } else if try areInIncreasingOrder(second, first) { + cache.1 = nil if exclusivesFromSecond { - return (false, true) + return .nonMatching(first, second, firstIsLower: false) } - cache.1 = nil } else { - if sharedFromFirst || sharedFromSecond { - return (true, true) - } cache = (nil, nil) + switch (sharedFromFirst, sharedFromSecond) { + case (true, true): + // Keep the second source's element in the cache, so it can be + // retrieved during `.sum`. At that point, it would look like an + // exclusive-to-second, but that's OK because the only selection + // with both shared statuses as `true` also has exclusive-to-second + // as `true`. + cache.1 = second + fallthrough + case (true, false): + return .matching(first, second, useFirst: true) + case (false, true): + // Never reached because the selections that would trigger it make + // the sequence look like an exclusive-to-second instead. + return .matching(first, second, useFirst: false) + case (false, false): + break + } } } } - return (false, false) - } - - /// Examines the cache's state and generates the next return value, or `nil` - /// if the state flags the end of the iteration. - mutating func generateNext(usingFirst: Bool, usingSecond: Bool) -> Element? { - switch (usingFirst, usingSecond) { - case (false, false): - assert(isDone) - return nil - case (true, false): - defer { cache.0 = nil } - return cache.0 - case (false, true): - defer { cache.1 = nil } - return cache.1 - case (true, true): - defer { - cache.0 = nil - if !(sharedFromFirst && sharedFromSecond) { - // When this isn't triggered, the shared value from the second source - // is retained until all of the equivalent values from the first - // source are exhausted, then the second source's versions will go. - // (They would go as exclusives-to-second, but that's OK becuase the - // only selection combination with both shared-from-first and -second - // also has exclusives-to-second.) - cache.1 = nil - } - } - return cache.0 - } + return nil } } @@ -246,8 +256,15 @@ internal extension MergedIterator { /// exists; possibly throwing during the attempt. @usableFromInline mutating func throwingNext() throws -> Base2.Element? { - let (useFirstCache, useSecondCache) = try advanceCache() - return generateNext(usingFirst: useFirstCache, usingSecond: useSecondCache) + switch try dualNext() { + case .first(let element), .second(let element): + return element + case let .nonMatching(first, second, useFirst), + let .matching(first, second, useFirst): + return useFirst ? first : second + case .none: + return nil + } } } From 59a7e7ba542c7bf8f4c37cc96365b5de4e66ae36 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Fri, 6 Nov 2020 07:29:14 -0500 Subject: [PATCH 5/8] Remove unnecessary payload Remove a "which-one" flag from a case of an internal accounting enumeration type. It was never actually used. --- Sources/Algorithms/MergeSorted.swift | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 88d6e4e4..78798136 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -173,7 +173,7 @@ fileprivate enum MergedIteratorMarker { /// A matching pair of elements, but not equivalent. case nonMatching(Element, Element, firstIsLower: Bool) /// A matching pair of equivalent elements. - case matching(Element, Element, useFirst: Bool) + case matching(Element, Element) } private extension MergedIterator { @@ -235,12 +235,10 @@ private extension MergedIterator { // as `true`. cache.1 = second fallthrough - case (true, false): - return .matching(first, second, useFirst: true) - case (false, true): - // Never reached because the selections that would trigger it make - // the sequence look like an exclusive-to-second instead. - return .matching(first, second, useFirst: false) + case (true, false), (false, true): + // The second case above is never actually reached, because it'll + // look like an exclusive-to-second by access time. + return .matching(first, second) case (false, false): break } @@ -259,9 +257,10 @@ internal extension MergedIterator { switch try dualNext() { case .first(let element), .second(let element): return element - case let .nonMatching(first, second, useFirst), - let .matching(first, second, useFirst): + case let .nonMatching(first, second, useFirst): return useFirst ? first : second + case let .matching(first, _): + return first case .none: return nil } From e0ac76a9d2e876424dbc51102c32ce52f0c60ec4 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 12 Nov 2020 15:26:08 -0500 Subject: [PATCH 6/8] Conform sorted merged sequences to Collection Augment the type used to present the sorted merger of two sorted sequences to conditionally conform to Collection when both sources are also collections. This involves new types to represent Index, Indices, and Indices.Iterator. Like the eager merger algorithm, it reuses the lazy iteration code. --- Sources/Algorithms/MergeSorted.swift | 304 ++++++++++++++++++ .../MergeSortedTests.swift | 53 +++ 2 files changed, 357 insertions(+) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 78798136..147c4b38 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -346,6 +346,310 @@ extension MergedSequence: Sequence, LazySequenceProtocol { } } +//===----------------------------------------------------------------------===// +// MergedCollectionSteps, MergedCollectionStepIterator, +// MergedCollectionIterationStep +//===----------------------------------------------------------------------===// + +/// An iteration step within a `MergedSequence` when both sorted sources are +/// collections. +public struct MergedCollectionIterationStep { + /// Index to the current or upcoming element in the first source. + public let first: Base1 + /// Index to the current or upcoming element in the second source. + public let second: Base2 + /// Whether `first` points to a current element. + public let useFirst: Bool + /// Whether `second` points to a current element. + public let useSecond: Bool + + /// Creates an index from the given source indices, and caching their + /// dereferencing statuses. + @usableFromInline + init(_ base1: Base1, _ base2: Base2, useFirst: Bool, useSecond: Bool) { + first = base1 + second = base2 + self.useFirst = useFirst + self.useSecond = useSecond + } +} + +extension MergedCollectionIterationStep: Equatable {} + +extension MergedCollectionIterationStep: Hashable +where Base1: Hashable, Base2: Hashable {} + +extension MergedCollectionIterationStep: Comparable { + public static func < (lhs: Self, rhs: Self) -> Bool { + // The expression assumes that a failed comparison represents that its + // terms are equal. If we get one less-than and one greater-than, then this + // setup is inconsistent. That shouldn't happen and we won't waste time + // looking for it. + return lhs.first < rhs.first || lhs.second < rhs.second + } +} + +/// Enables instances of one of two types be considered under a single type. +fileprivate enum Either { + /// Stores an instance of the first type. + case first(First) + /// Stores an instance of the second type. + case second(Second) +} + +/// An iterator over of the steps taken to generate a `MergedSequence`, but only +/// when both sorted sources are collections. +public struct MergedCollectionStepIterator +where Base1.Element == Base2.Element { + /// The element type for each operand. + @usableFromInline + typealias InnerElement = Base1.Element + /// The first operand expressed as elements and indices. + typealias FirstIndexed = Indexed + /// The second operand expressed as elements and indices. + typealias SecondIndexed = Indexed + /// The consolidation of operands' elements and indices. + fileprivate typealias EitherElement = Either + /// The converter for the first operand. + fileprivate typealias FirstAsEither = LazyMapCollection + /// The converter for the second operand. + fileprivate typealias SecondAsEither = LazyMapCollection + /// An iterator for generating the raw stepping data. + fileprivate typealias Core = MergedIterator + + /// The iterator generating the raw stepping data. + fileprivate var core: Core + /// The past-the-end index for the first operand, to be used when the current + /// marker exhausted that collection. + let firstEndIndex: Base1.Index + /// The past-the-end index for the second operand, to be used when the current + /// marker exhuasted that collection. + let secondEndIndex: Base2.Index + + /// Creates an iterator showing how two sorted collections' elements are + /// visited to generate the sorted merger of those collections, all using the + /// given predicate to determine order, but keeping only the elements + /// indicated by the given status. + @usableFromInline + init(_ base1: Base1, _ base2: Base2, keeping selection: SetCombination, + by areInIncreasingOrder: @escaping (InnerElement, InnerElement) -> Bool + ) { + let firstMarkers = base1.indexed().lazy.map(EitherElement.first) + let secondMarkers = base2.indexed().lazy.map(EitherElement.second) + let eitherCompare: (EitherElement, EitherElement) -> Bool = { + switch ($0, $1) { + case let (.first(indexed1), .first(indexed2)): + return areInIncreasingOrder(indexed1.element, indexed2.element) + case let (.first(indexed1), .second(indexed2)): + return areInIncreasingOrder(indexed1.element, indexed2.element) + case let (.second(indexed1), .first(indexed2)): + return areInIncreasingOrder(indexed1.element, indexed2.element) + case let (.second(indexed1), .second(indexed2)): + return areInIncreasingOrder(indexed1.element, indexed2.element) + } + } + core = Core(firstMarkers.makeIterator(), secondMarkers.makeIterator(), + keeping: selection, by: eitherCompare) + firstEndIndex = base1.endIndex + secondEndIndex = base2.endIndex + } +} + +extension MergedCollectionStepIterator: IteratorProtocol { + public mutating func next() -> MergedCollectionIterationStep? { + switch try! core.dualNext() { + case let .first(.first((idx1, _))): + return Element(idx1, secondEndIndex, useFirst: true, useSecond: false) + case let .second(.second((idx2, _))): + return Element(firstEndIndex, idx2, useFirst: false, useSecond: true) + case let .nonMatching(.first((idx1, _)), .second((idx2, _)), firstIsLower): + return Element(idx1, idx2, useFirst: firstIsLower, + useSecond: !firstIsLower) + case let .matching(.first((idx1, _)), .second((idx2, _))): + return Element(idx1, idx2, useFirst: true, useSecond: true) + case .none: + return nil + case .first(.second), .second(.first), .nonMatching(.first, .first, _), + .nonMatching(.second, _, _), .matching(.first, .first), + .matching(.second, _): + preconditionFailure("Illegal combination of inner indices") + } + } +} + +/// A sequence over the steps taken to generate a `MergedSequence`, but only +/// when both sorted sources are collections. +/// +/// - Warning: When an instance also conforms to `Collection`, generation of +/// `startIndex` will take O(*n* + *m*) time instead of O(1) time, where *n* +/// and *m* are the lengths of the source collections. This also affects +/// anything implemented with `startIndex`, like `isEmpty`. +public struct MergedCollectionSteps +where Base1.Element == Base2.Element { + /// The element type for each operand. + @usableFromInline + typealias InnerElement = Base2.Element + + /// The base collection for the first operand. + public let firstBase: Base1 + /// The base collection for the second operand. + public let secondBase: Base2 + /// The blend of the merger to vend. + public let selection: SetCombination + /// The element-ordering predicate. + @usableFromInline + let areInIncreasingOrder: (InnerElement, InnerElement) -> Bool + + /// Creates a sorted sequence that merges the two given sorted sequences, all + /// using the given predicate to determine order, but keeping only the elements + /// indicated by the given status. + @usableFromInline + init(_ base1: Base1, _ base2: Base2, keeping selection: SetCombination, + by areInIncreasingOrder: @escaping (InnerElement, InnerElement) -> Bool + ) { + firstBase = base1 + secondBase = base2 + self.selection = selection + self.areInIncreasingOrder = areInIncreasingOrder + } +} + +extension MergedCollectionSteps: Sequence { + @inlinable + public var underestimatedCount: Int { + // Reuse the code from MergedSequence.underestimatedCount. This requires + // somehow removing the Collection conformance from at least one of the + // operands. Otherwise, that sequence will also be a Collection, which may + // use a version of this generic type as its Indices, leading to an + // escalating infinite recursion. + MergedSequence(AnySequence(firstBase), AnySequence(secondBase), + keeping: selection, by: areInIncreasingOrder) + .underestimatedCount + } + + @inlinable + public func makeIterator() -> MergedCollectionStepIterator { + return Iterator(firstBase, secondBase, keeping: selection, + by: areInIncreasingOrder) + } +} + +extension MergedCollectionSteps: Collection +where Base1.SubSequence == Base1, Base2.SubSequence == Base2 { + public typealias Index = Element + public typealias SubSequence = Self + + @inlinable + public var startIndex: Index { + var iterator = makeIterator() + return iterator.next() ?? endIndex + } + public var endIndex: Index { + Index(firstBase.endIndex, secondBase.endIndex, useFirst: false, + useSecond: false) + } + + @inlinable public subscript(position: Index) -> Element { return position } + @inlinable + public subscript(bounds: Range) -> SubSequence { + let subBase1 = firstBase[bounds.lowerBound.first.. Index { + let suffix = self[i...] + var iterator = suffix.makeIterator() + if let firstSuffixElement = iterator.next() { + assert(firstSuffixElement == i) + } else { + preconditionFailure("Attempt to increment past endIndex") + } + return iterator.next() ?? endIndex + } +} + +//===----------------------------------------------------------------------===// +// MergedCollection +//===----------------------------------------------------------------------===// + +/// A lazy sorted collection of a set combination of two sorted source +/// collections. +/// +/// - Warning: Calculation of `startIndex` will take O(*n* + *m*) time instead +/// of O(1) time, where *n* and *m* are the lengths of the source collections. +/// This also affects anything that needs to work with `startIndex`, like +/// `isEmpty`. +public typealias MergedCollection = + MergedSequence where T.Element == U.Element + +extension MergedCollection { + /// A sequence of each iteration stop. + public typealias IterationSteps = MergedCollectionSteps + + /// Expresses the sequence the elements from both operand collections in + /// formation are visited. + @inlinable + public var iterationSteps: IterationSteps { + IterationSteps(firstBase, secondBase, keeping: selection) { + try! areInIncreasingOrder($0, $1) + } + } +} + +extension MergedSequence: Collection, LazyCollectionProtocol +where Base1: Collection, Base2: Collection { + public typealias Index = MergedCollectionIterationStep + public typealias SubSequence = MergedSequence + + /// Expresses a quick-and-dirty reference to the entire collection, without + /// risking infinite recursion. + @usableFromInline + var subSelf: SubSequence { + SubSequence(firstBase[...], secondBase[...], keeping: selection, + by: areInIncreasingOrder) + } + + @inlinable + public var indices: SubSequence.IterationSteps { subSelf.iterationSteps } + @inlinable public var startIndex: Index { indices.startIndex } + @inlinable public var endIndex: Index { indices.endIndex } + + @inlinable + public subscript(position: Index) -> Element { + if position.useFirst { + return firstBase[position.first] + } else if position.useSecond { + return secondBase[position.second] + } else { + preconditionFailure("Attempt to dereference endIndex") + } + } + @inlinable + public subscript(bounds: Range) -> SubSequence { + let subBase1 = firstBase[bounds.lowerBound.first.. Index { + return self[i...].indices.index(after: i) + } +} + //===----------------------------------------------------------------------===// // mergeSorted(with: keeping: into: by:) //===----------------------------------------------------------------------===// diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index 7c04fc17..731bf839 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -130,6 +130,11 @@ final class MergeSortedTests: XCTestCase { empty.lazy.mergeSorted(with: empty.lazy, keeping: $0) } XCTAssertEqualSequences(emptyMergerDoubleLazy.map(Array.init), emptyResults) + + // Quick collection checks + XCTAssertEqualSequences(emptyMergerDoubleLazy.map(\.isEmpty), [ + true, true, true, true, true, true, true, true, true + ]) } /// Check results from using one empty and one non-empty operand. @@ -309,6 +314,54 @@ final class MergeSortedTests: XCTestCase { [0, 1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 11, 12, 13, 14] ]) } + + /// Check index iteration and dereferencing. + func testIndices() { + let range1 = 0..<10, range2 = 5...15 + let range1to2Mergers = SetCombination.allCases.map { + (MergedCollection(range1, range2, keeping: $0, by: <), + range1.mergeSorted(with: range2, keeping: $0)) + } + XCTAssertEqualSequences(range1to2Mergers.map(\.0.isEmpty), + range1to2Mergers.map(\.1.isEmpty)) + XCTAssertEqualSequences(range1to2Mergers.map { doubleMerger in + doubleMerger.0.indices.map { doubleMerger.0[$0] } + }, range1to2Mergers.map(\.1)) + XCTAssertEqualSequences(range1to2Mergers.map(\.0.iterationSteps + .underestimatedCount), [ + 0, 0, 0, 0, 0, 10, 11, 11, 21 + ]) + XCTAssertEqualSequences(range1to2Mergers.map { + var i = $0.0.startIndex, result = [Int]() + result.reserveCapacity($0.0.underestimatedCount) + while i < $0.0.endIndex { + result.append($0.0[i]) + $0.0.formIndex(after: &i) + } + return result + }, range1to2Mergers.map(\.1)) + + let range2to1Mergers = SetCombination.allCases.map { + (MergedCollection(range2, range1, keeping: $0, by: <), + range2.mergeSorted(with: range1, keeping: $0)) + } + XCTAssertEqualSequences(range2to1Mergers.map { doubleMerger in + doubleMerger.0.indices.map { doubleMerger.0[$0] } + }, range2to1Mergers.map(\.1)) + XCTAssertEqualSequences(range2to1Mergers.map(\.0.iterationSteps + .underestimatedCount), [ + 0, 0, 0, 0, 0, 11, 10, 11, 21 + ]) + XCTAssertEqualSequences(range2to1Mergers.map { + var i = $0.0.startIndex, result = [Int]() + result.reserveCapacity($0.0.underestimatedCount) + while i < $0.0.endIndex { + result.append($0.0[i]) + $0.0.formIndex(after: &i) + } + return result + }, range2to1Mergers.map(\.1)) + } } //-----------------------------------------------------------------------------/ From bcb7f2ce1b5549e55a600a21425979d50e25060e Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 12 Nov 2020 21:14:18 -0500 Subject: [PATCH 7/8] Add documentation for sorted merged sequences --- Guides/MergeSorted.md | 125 ++++++++++++++++++++++++++++++++++++++++++ README.md | 1 + 2 files changed, 126 insertions(+) create mode 100644 Guides/MergeSorted.md diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md new file mode 100644 index 00000000..ed5fa7a4 --- /dev/null +++ b/Guides/MergeSorted.md @@ -0,0 +1,125 @@ +# Permutations + +[[Source](../Sources/Algorithms/MergeSorted.swift) | + [Tests](../Tests/SwiftAlgorithmsTests/MergeSortedTests.swift)] + +A method that returns the merger of the sorted receiver and the sorted argument, +or a subset of that merger. The result is also sorted, with the same criteria. + +## Detailed Design + +The `mergeSorted(with:keeping:by:)` method is declared as a `Sequence` +extension, and returns a standard `Array` of the same element type. + +```swift +extension Sequence { + /// Returns an array listing the merger of this sequence and the given + /// sequence, but keeping only the selected subset, assuming both sources are + /// sorted according to the given predicate that can compare elements. + public func mergeSorted( + with second: S, + keeping selection: SetCombination, + by areInIncreasingOrder: (Element, Element) throws -> Bool + ) rethrows -> [Element] where S.Element == Element +} +``` + +Besides the sequence that will be combined with the receiver and the predicate +to be used as the sorting criteria, the following subsets of the merged sequence +can be selected: + +```swift +/// The manners two (multi)sets may be combined. +public enum SetCombination: CaseIterable { + case nothing, firstMinusSecond, secondMinusFirst, symmetricDifference, + intersection, first, second, union, sum +} +``` + +The `.sum` case is the usual merge sort. The `.nothing`, `.first`, `.second` +cases are somewhat degenerate and aren't generally used. The other cases are +the usual subsets. The difference between `.union` and `.sum` is that the +former generates mergers where common elements are included only once, while the +latter includes both copies of each shared value. When `.sum` is in place, the +copies from the second sequence go after all the copies from the first. + +When the `Element` type is `Comparable`, the `mergeSorted(with:keeping:)` method +is added, which defaults the comparison predicate to the standard `<` operator: + +```swift +extension Sequence where Element: Comparable { + /// Returns an array listing the merger of this sequence and the given + /// sequence, but keeping only the selected subset, and assuming both sources + /// are sorted. + public func mergeSorted( + with second: S, + keeping selection: SetCombination + ) -> [Element] where S.Element == Element +} +``` + +If the ordering predicate does not throw, then the merged sequence may be +computed on-demand by making at least the receiver lazy: + +```swift +extension LazySequenceProtocol { + /// Returns a lazy sequence listing the merger of this lazy sequence and the + /// given lazy sequence, but keeping only the selected subset, assuming both + /// sources are sorted according to the given predicate that can compare + /// elements. + public func mergeSorted( + with second: S, + keeping selection: SetCombination, + by areInIncreasingOrder: @escaping (Element, Element) -> Bool + ) -> MergedSequence where S.Element == Element + + /// Returns a lazy sequence listing the merger of this lazy sequence and the + /// given sequence, but keeping only the selected subset, assuming both + /// sources are sorted according to the given predicate that can compare + /// elements. + public func mergeSorted( + with second: S, + keeping selection: SetCombination, + by areInIncreasingOrder: @escaping (Element, Element) -> Bool + ) -> MergedSequence where S.Element == Element +} + +extension LazySequenceProtocol where Element: Comparable { + /// Returns a lazy sequence listing the merger of this lazy sequence and the + /// given lazy sequence, but keeping only the selected subset, and assuming + /// both sources are sorted. + public func mergeSorted( + with second: S, + keeping selection: SetCombination + ) -> MergedSequence where S.Element == Element + + /// Returns a lazy sequence listing the merger of this lazy sequence and the + /// given sequence, but keeping only the selected subset, and assuming both + /// sources are sorted. + public func mergeSorted( + with second: S, + keeping selection: SetCombination + ) -> MergedSequence where S.Element == Element +} +``` + +If both source sequences also conform to (at least) `Collection`, then the +returned sequence representing the merger is also a collection. + +### Complexity + +Calling `mergeSorted(with:keeping:by:)` or `mergeSorted(with:keeping:)` is an +O(*n* + *m*) operation, where *n* and *m* are the lengths of the operand +sequences. Creating an iterator and/or lazy sequence is O(1), while iterating +through all of lazy sequence will be O(*n* + *m*). If the kept subset is one of +the degenerate cases, the complexity will be shorter. + +### Comparison with other languages + +**C++:** The `` library defines the `set_difference`, +`set_intersection`, `set_symmetric_difference`, `set_union`, and `merge` +functions. They can be all distilled into one algorithm, which the +`mergeSorted(with:keeping:by:)` method and its overloads do for Swift. The +`.firstMinusSecond` and `.secondMinusFirst` subsets are equivalent to calls to +`set_difference`; `.intersection` to `set_intersection`; `.symmetricDifference` +to `set_symmetric_difference`; `.union` to `set_union`; and `.sum` to `merge`. diff --git a/README.md b/README.md index 6bc15bc8..8791c65f 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Read more about the package, and the intent behind it, in the [announcement on s - [`chunked(by:)`, `chunked(on:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Chunked.md): Eager and lazy operations that break a collection into chunks based on either a binary predicate or when the result of a projection changes. - [`indexed()`](https://github.com/apple/swift-algorithms/blob/main/Guides/Indexed.md): Iterate over tuples of a collection's indices and elements. +- [`mergeSorted(with:keeping:by:)`, `mergeSorted(with:keeping:)`](./Guides/MergeSorted.md): Eager and lazy operations that take another sequence, assume that both the given sequence and the receiver are sorted according to the given predicate (defaults to `<`), and returns the given subset of the sequences' merger (also sorted). ## Adding Swift Algorithms as a Dependency From 4899331feacee3e7c7cde0c72b9192457e5cbc29 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Fri, 13 Nov 2020 00:26:04 -0500 Subject: [PATCH 8/8] Add test with custom predicate Add test to merge sorted sequences with a custom predicate that is not the standard less-than operator. This predicate does not use all of the operands' members, so the source of a particular element can be tested. --- .../MergeSortedTests.swift | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index 731bf839..348b6f69 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -362,6 +362,66 @@ final class MergeSortedTests: XCTestCase { return result }, range2to1Mergers.map(\.1)) } + + /// Check the differences when elements from the first sequence are filtered + /// out versus the second. + func testPropertyOrdering() { + let sample1 = [0, 2, 4, 6, 8, 10, 12].map { ($0, true) } + let sample2 = [0, 3, 6, 9, 12].map { ($0, false) } + let sample1to2Mergers = SetCombination.allCases.map { + MergedSequence(sample1, sample2, keeping: $0, by: { x, y in x.0 < y.0 }) + } + XCTAssertEqualSequences(sample1to2Mergers.map { $0.map(\.0) }, [ + [], + [2, 4, 8, 10], + [3, 9], + [2, 3, 4, 8, 9, 10], + [0, 6, 12], + [0, 2, 4, 6, 8, 10, 12], + [0, 3, 6, 9, 12], + [0, 2, 3, 4, 6, 8, 9, 10, 12], + [0, 0, 2, 3, 4, 6, 6, 8, 9, 10, 12, 12] + ]) + XCTAssertEqualSequences(sample1to2Mergers.map { $0.map(\.1) }, [ + [], + [true, true, true, true], + [false, false], + [true, false, true, true, false, true], + [true, true, true], + [true, true, true, true, true, true, true], + [false, false, false, false, false], + [true, true, false, true, true, true, false, true, true], + [true, false, true, false, true, true, false, true, false, true, true, + false] + ]) + + let sample2to1Mergers = SetCombination.allCases.map { + MergedSequence(sample2, sample1, keeping: $0, by: { x, y in x.0 < y.0 }) + } + XCTAssertEqualSequences(sample2to1Mergers.map { $0.map(\.0) }, [ + [], + [3, 9], + [2, 4, 8, 10], + [2, 3, 4, 8, 9, 10], + [0, 6, 12], + [0, 3, 6, 9, 12], + [0, 2, 4, 6, 8, 10, 12], + [0, 2, 3, 4, 6, 8, 9, 10, 12], + [0, 0, 2, 3, 4, 6, 6, 8, 9, 10, 12, 12] + ]) + XCTAssertEqualSequences(sample2to1Mergers.map { $0.map(\.1) }, [ + [], + [false, false], + [true, true, true, true], + [true, false, true, true, false, true], + [false, false, false], + [false, false, false, false, false], + [true, true, true, true, true, true, true], + [false, true, false, true, false, true, false, true, false], + [false, true, true, false, true, false, true, true, false, true, false, + true] + ]) + } } //-----------------------------------------------------------------------------/