-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Improve performance of spsc_queue and stream. #44963
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+192
−107
Merged
Changes from 1 commit
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT | ||
// file at the top-level directory of this distribution and at | ||
// http://rust-lang.org/COPYRIGHT. | ||
// | ||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | ||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | ||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | ||
// option. This file may not be copied, modified, or distributed | ||
// except according to those terms. | ||
|
||
use ops::{Deref, DerefMut}; | ||
|
||
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
#[repr(align(64))] | ||
pub(super) struct Aligner; | ||
|
||
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
pub(super) struct CacheAligned<T>(pub T, pub Aligner); | ||
|
||
impl<T> Deref for CacheAligned<T> { | ||
type Target = T; | ||
fn deref(&self) -> &Self::Target { | ||
&self.0 | ||
} | ||
} | ||
|
||
impl<T> DerefMut for CacheAligned<T> { | ||
fn deref_mut(&mut self) -> &mut Self::Target { | ||
&mut self.0 | ||
} | ||
} | ||
|
||
impl<T> CacheAligned<T> { | ||
pub(super) fn new(t: T) -> Self { | ||
CacheAligned(t, Aligner) | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,50 +22,61 @@ use core::cell::UnsafeCell; | |
|
||
use sync::atomic::{AtomicPtr, AtomicUsize, Ordering}; | ||
|
||
use super::cache_aligned::CacheAligned; | ||
|
||
// Node within the linked list queue of messages to send | ||
struct Node<T> { | ||
// FIXME: this could be an uninitialized T if we're careful enough, and | ||
// that would reduce memory usage (and be a bit faster). | ||
// is it worth it? | ||
value: Option<T>, // nullable for re-use of nodes | ||
cached: bool, // This node goes into the node cache | ||
next: AtomicPtr<Node<T>>, // next node in the queue | ||
} | ||
|
||
/// The single-producer single-consumer queue. This structure is not cloneable, | ||
/// but it can be safely shared in an Arc if it is guaranteed that there | ||
/// is only one popper and one pusher touching the queue at any one point in | ||
/// time. | ||
pub struct Queue<T> { | ||
pub struct Queue<T, ProducerAddition=(), ConsumerAddition=()> { | ||
// consumer fields | ||
consumer: CacheAligned<Consumer<T, ConsumerAddition>>, | ||
|
||
// producer fields | ||
producer: CacheAligned<Producer<T, ProducerAddition>>, | ||
} | ||
|
||
struct Consumer<T, Addition> { | ||
tail: UnsafeCell<*mut Node<T>>, // where to pop from | ||
tail_prev: AtomicPtr<Node<T>>, // where to pop from | ||
cache_bound: usize, // maximum cache size | ||
cached_nodes: AtomicUsize, // number of nodes marked as cachable | ||
addition: Addition, | ||
} | ||
|
||
// producer fields | ||
struct Producer<T, Addition> { | ||
head: UnsafeCell<*mut Node<T>>, // where to push to | ||
first: UnsafeCell<*mut Node<T>>, // where to get new nodes from | ||
tail_copy: UnsafeCell<*mut Node<T>>, // between first/tail | ||
|
||
// Cache maintenance fields. Additions and subtractions are stored | ||
// separately in order to allow them to use nonatomic addition/subtraction. | ||
cache_bound: usize, | ||
cache_additions: AtomicUsize, | ||
cache_subtractions: AtomicUsize, | ||
addition: Addition, | ||
} | ||
|
||
unsafe impl<T: Send> Send for Queue<T> { } | ||
unsafe impl<T: Send, P: Send + Sync, C: Send + Sync> Send for Queue<T, P, C> { } | ||
|
||
unsafe impl<T: Send> Sync for Queue<T> { } | ||
unsafe impl<T: Send, P: Send + Sync, C: Send + Sync> Sync for Queue<T, P, C> { } | ||
|
||
impl<T> Node<T> { | ||
fn new() -> *mut Node<T> { | ||
Box::into_raw(box Node { | ||
value: None, | ||
cached: false, | ||
next: AtomicPtr::new(ptr::null_mut::<Node<T>>()), | ||
}) | ||
} | ||
} | ||
|
||
impl<T> Queue<T> { | ||
#[cfg(test)] | ||
/// Creates a new queue. | ||
/// | ||
/// This is unsafe as the type system doesn't enforce a single | ||
|
@@ -84,18 +95,60 @@ impl<T> Queue<T> { | |
/// no bound. Otherwise, the cache will never grow larger than | ||
/// `bound` (although the queue itself could be much larger. | ||
pub unsafe fn new(bound: usize) -> Queue<T> { | ||
Self::with_additions(bound, (), ()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method is never used on asm.js (Emscripten), causing an unused warning when testing [02:11:04] Testing libstd stage2 (x86_64-unknown-linux-gnu -> asmjs-unknown-emscripten)
[02:11:04] Compiling rand v0.0.0 (file:///checkout/src/librand)
[02:11:04] Compiling std_unicode v0.0.0 (file:///checkout/src/libstd_unicode)
[02:11:04] Compiling alloc v0.0.0 (file:///checkout/src/liballoc)
[02:11:04] Compiling core v0.0.0 (file:///checkout/src/libcore)
[02:11:12] Compiling collections v0.0.0 (file:///checkout/src/libcollections)
[02:11:21] Compiling std v0.0.0 (file:///checkout/src/libstd)
[02:11:50] error: method is never used: `new`
[02:11:50] --> /checkout/src/libstd/sync/mpsc/spsc_queue.rs:97:5
[02:11:50] |
[02:11:50] 97 | pub unsafe fn new(bound: usize) -> Queue<T> {
[02:11:50] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[02:11:50] |
[02:11:50] note: lint level defined here
[02:11:50] --> /checkout/src/libstd/lib.rs:232:9
[02:11:50] |
[02:11:50] 232| #![deny(warnings)]
[02:11:50] | ^^^^^^^^
[02:11:50] = note: #[deny(dead_code)] implied by #[deny(warnings)]
[02:11:50]
[02:11:51] error: aborting due to previous error
[02:11:51]
[02:11:52] error: Could not compile `std`.
[02:11:52] warning: build failed, waiting for other jobs to finish...
[02:12:49] error: build failed
[02:12:49]
[02:12:49]
[02:12:49] command did not execute successfully: "/checkout/obj/build/x86_64-unknown-linux-gnu/stage0/bin/cargo" "test" "--target" "asmjs-unknown-emscripten" "-j" "4" "--release" "--locked" "--color" "always" "--features" "panic-unwind jemalloc backtrace" "--manifest-path" "/checkout/src/libstd/Cargo.toml" "-p" "std:0.0.0" "-p" "std_unicode:0.0.0" "-p" "alloc:0.0.0" "-p" "panic_abort:0.0.0" "-p" "rand:0.0.0" "-p" "compiler_builtins:0.0.0" "-p" "unwind:0.0.0" "-p" "core:0.0.0" "-p" "libc:0.0.0" "-p" "collections:0.0.0" "-p" "alloc_system:0.0.0" "--"
[02:12:49] expected success, got: exit code: 101
[02:12:49]
[02:12:49]
[02:12:49] failed to run: /checkout/obj/build/bootstrap/debug/bootstrap test --target asmjs-unknown-emscripten
[02:12:49] Build completed unsuccessfully in 2:10:23 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've cfg'd it out for emscripten in 41320fa |
||
} | ||
} | ||
|
||
impl<T, ProducerAddition, ConsumerAddition> Queue<T, ProducerAddition, ConsumerAddition> { | ||
|
||
/// Creates a new queue. With given additional elements in the producer and | ||
/// consumer portions of the queue. | ||
/// | ||
/// Due to the performance implications of cache-contention, | ||
/// we wish to keep fields used mainly by the producer on a separate cache | ||
/// line than those used by the consumer. | ||
/// Since cache lines are usually 64 bytes, it is unreasonably expensive to | ||
/// allocate one for small fields, so we allow users to insert additional | ||
/// fields into the cache lines already allocated by this for the producer | ||
/// and consumer. | ||
/// | ||
/// This is unsafe as the type system doesn't enforce a single | ||
/// consumer-producer relationship. It also allows the consumer to `pop` | ||
/// items while there is a `peek` active due to all methods having a | ||
/// non-mutable receiver. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `bound` - This queue implementation is implemented with a linked | ||
/// list, and this means that a push is always a malloc. In | ||
/// order to amortize this cost, an internal cache of nodes is | ||
/// maintained to prevent a malloc from always being | ||
/// necessary. This bound is the limit on the size of the | ||
/// cache (if desired). If the value is 0, then the cache has | ||
/// no bound. Otherwise, the cache will never grow larger than | ||
/// `bound` (although the queue itself could be much larger. | ||
pub unsafe fn with_additions( | ||
bound: usize, | ||
producer_addition: ProducerAddition, | ||
consumer_addition: ConsumerAddition, | ||
) -> Self { | ||
let n1 = Node::new(); | ||
let n2 = Node::new(); | ||
(*n1).next.store(n2, Ordering::Relaxed); | ||
Queue { | ||
tail: UnsafeCell::new(n2), | ||
tail_prev: AtomicPtr::new(n1), | ||
head: UnsafeCell::new(n2), | ||
first: UnsafeCell::new(n1), | ||
tail_copy: UnsafeCell::new(n1), | ||
cache_bound: bound, | ||
cache_additions: AtomicUsize::new(0), | ||
cache_subtractions: AtomicUsize::new(0), | ||
consumer: CacheAligned::new(Consumer { | ||
tail: UnsafeCell::new(n2), | ||
tail_prev: AtomicPtr::new(n1), | ||
cache_bound: bound, | ||
cached_nodes: AtomicUsize::new(0), | ||
addition: consumer_addition | ||
}), | ||
producer: CacheAligned::new(Producer { | ||
head: UnsafeCell::new(n2), | ||
first: UnsafeCell::new(n1), | ||
tail_copy: UnsafeCell::new(n1), | ||
addition: producer_addition | ||
}), | ||
} | ||
} | ||
|
||
|
@@ -109,35 +162,25 @@ impl<T> Queue<T> { | |
assert!((*n).value.is_none()); | ||
(*n).value = Some(t); | ||
(*n).next.store(ptr::null_mut(), Ordering::Relaxed); | ||
(**self.head.get()).next.store(n, Ordering::Release); | ||
*self.head.get() = n; | ||
(**self.producer.head.get()).next.store(n, Ordering::Release); | ||
*(&self.producer.head).get() = n; | ||
} | ||
} | ||
|
||
unsafe fn alloc(&self) -> *mut Node<T> { | ||
// First try to see if we can consume the 'first' node for our uses. | ||
// We try to avoid as many atomic instructions as possible here, so | ||
// the addition to cache_subtractions is not atomic (plus we're the | ||
// only one subtracting from the cache). | ||
if *self.first.get() != *self.tail_copy.get() { | ||
if self.cache_bound > 0 { | ||
let b = self.cache_subtractions.load(Ordering::Relaxed); | ||
self.cache_subtractions.store(b + 1, Ordering::Relaxed); | ||
} | ||
let ret = *self.first.get(); | ||
*self.first.get() = (*ret).next.load(Ordering::Relaxed); | ||
if *self.producer.first.get() != *self.producer.tail_copy.get() { | ||
let ret = *self.producer.first.get(); | ||
*self.producer.0.first.get() = (*ret).next.load(Ordering::Relaxed); | ||
return ret; | ||
} | ||
// If the above fails, then update our copy of the tail and try | ||
// again. | ||
*self.tail_copy.get() = self.tail_prev.load(Ordering::Acquire); | ||
if *self.first.get() != *self.tail_copy.get() { | ||
if self.cache_bound > 0 { | ||
let b = self.cache_subtractions.load(Ordering::Relaxed); | ||
self.cache_subtractions.store(b + 1, Ordering::Relaxed); | ||
} | ||
let ret = *self.first.get(); | ||
*self.first.get() = (*ret).next.load(Ordering::Relaxed); | ||
*self.producer.0.tail_copy.get() = | ||
self.consumer.tail_prev.load(Ordering::Acquire); | ||
if *self.producer.first.get() != *self.producer.tail_copy.get() { | ||
let ret = *self.producer.first.get(); | ||
*self.producer.0.first.get() = (*ret).next.load(Ordering::Relaxed); | ||
return ret; | ||
} | ||
// If all of that fails, then we have to allocate a new node | ||
|
@@ -153,27 +196,27 @@ impl<T> Queue<T> { | |
// sentinel from where we should start popping from. Hence, look at | ||
// tail's next field and see if we can use it. If we do a pop, then | ||
// the current tail node is a candidate for going into the cache. | ||
let tail = *self.tail.get(); | ||
let tail = *self.consumer.tail.get(); | ||
let next = (*tail).next.load(Ordering::Acquire); | ||
if next.is_null() { return None } | ||
assert!((*next).value.is_some()); | ||
let ret = (*next).value.take(); | ||
|
||
*self.tail.get() = next; | ||
if self.cache_bound == 0 { | ||
self.tail_prev.store(tail, Ordering::Release); | ||
*self.consumer.0.tail.get() = next; | ||
if self.consumer.cache_bound == 0 { | ||
self.consumer.tail_prev.store(tail, Ordering::Release); | ||
} else { | ||
// FIXME: this is dubious with overflow. | ||
let additions = self.cache_additions.load(Ordering::Relaxed); | ||
let subtractions = self.cache_subtractions.load(Ordering::Relaxed); | ||
let size = additions - subtractions; | ||
|
||
if size < self.cache_bound { | ||
self.tail_prev.store(tail, Ordering::Release); | ||
self.cache_additions.store(additions + 1, Ordering::Relaxed); | ||
let cached_nodes = self.consumer.cached_nodes.load(Ordering::Relaxed); | ||
if cached_nodes < self.consumer.cache_bound && !(*tail).cached { | ||
self.consumer.cached_nodes.store(cached_nodes, Ordering::Relaxed); | ||
(*tail).cached = true; | ||
} | ||
|
||
if (*tail).cached { | ||
self.consumer.tail_prev.store(tail, Ordering::Release); | ||
} else { | ||
(*self.tail_prev.load(Ordering::Relaxed)) | ||
.next.store(next, Ordering::Relaxed); | ||
(*self.consumer.tail_prev.load(Ordering::Relaxed)) | ||
.next.store(next, Ordering::Relaxed); | ||
// We have successfully erased all references to 'tail', so | ||
// now we can safely drop it. | ||
let _: Box<Node<T>> = Box::from_raw(tail); | ||
|
@@ -194,17 +237,25 @@ impl<T> Queue<T> { | |
// This is essentially the same as above with all the popping bits | ||
// stripped out. | ||
unsafe { | ||
let tail = *self.tail.get(); | ||
let tail = *self.consumer.tail.get(); | ||
let next = (*tail).next.load(Ordering::Acquire); | ||
if next.is_null() { None } else { (*next).value.as_mut() } | ||
} | ||
} | ||
|
||
pub fn producer_addition(&self) -> &ProducerAddition { | ||
&self.producer.addition | ||
} | ||
|
||
pub fn consumer_addition(&self) -> &ConsumerAddition { | ||
&self.consumer.addition | ||
} | ||
} | ||
|
||
impl<T> Drop for Queue<T> { | ||
impl<T, ProducerAddition, ConsumerAddition> Drop for Queue<T, ProducerAddition, ConsumerAddition> { | ||
fn drop(&mut self) { | ||
unsafe { | ||
let mut cur = *self.first.get(); | ||
let mut cur = *self.producer.first.get(); | ||
while !cur.is_null() { | ||
let next = (*cur).next.load(Ordering::Relaxed); | ||
let _n: Box<Node<T>> = Box::from_raw(cur); | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: I think this can be just