From 0a203ddb33ada4b36bd042209fd8f8cdb865ad0d Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sun, 12 Jan 2014 21:35:12 +1100 Subject: [PATCH 01/21] std::trie: remove each_{key,value}_reverse internal iterators. This are *trivial* to reimplement in terms of each_reverse if that extra little bit of performance is needed. --- src/libstd/trie.rs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/libstd/trie.rs b/src/libstd/trie.rs index d8df84bbba8d1..b9a67129af92d 100644 --- a/src/libstd/trie.rs +++ b/src/libstd/trie.rs @@ -111,18 +111,6 @@ impl TrieMap { self.root.each_reverse(f) } - /// Visit all keys in reverse order - #[inline] - pub fn each_key_reverse(&self, f: |&uint| -> bool) -> bool { - self.each_reverse(|k, _| f(k)) - } - - /// Visit all values in reverse order - #[inline] - pub fn each_value_reverse(&self, f: |&T| -> bool) -> bool { - self.each_reverse(|_, v| f(v)) - } - /// Get an iterator over the key-value pairs in the map pub fn iter<'a>(&'a self) -> TrieMapIterator<'a, T> { TrieMapIterator { @@ -328,7 +316,7 @@ impl TrieSet { /// Visit all values in reverse order #[inline] pub fn each_reverse(&self, f: |&uint| -> bool) -> bool { - self.map.each_key_reverse(f) + self.map.each_reverse(|k, _| f(k)) } /// Get an iterator over the values in the set From 364f10103cf0e522bea4b6708d617895a7cd8c85 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 14 Jan 2014 01:37:52 +1100 Subject: [PATCH 02/21] std::trie: use unsafe code to give a 3x speed up to the iterator. This stores the stack of iterators inline (we have a maximum depth with `uint` keys), and then uses direct pointer offsetting to manipulate it, in a blazing fast way: Before: bench_iter_large ... bench: 43187 ns/iter (+/- 3082) bench_iter_small ... bench: 618 ns/iter (+/- 288) After: bench_iter_large ... bench: 13497 ns/iter (+/- 1575) bench_iter_small ... bench: 220 ns/iter (+/- 91) --- src/libstd/trie.rs | 157 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 118 insertions(+), 39 deletions(-) diff --git a/src/libstd/trie.rs b/src/libstd/trie.rs index b9a67129af92d..b48edc72871b3 100644 --- a/src/libstd/trie.rs +++ b/src/libstd/trie.rs @@ -11,14 +11,17 @@ //! Ordered containers with integer keys, implemented as radix tries (`TrieSet` and `TrieMap` types) use prelude::*; +use mem; use uint; use util::replace; +use unstable::intrinsics::init; use vec; // FIXME: #5244: need to manually update the TrieNode constructor static SHIFT: uint = 4; static SIZE: uint = 1 << SHIFT; static MASK: uint = SIZE - 1; +static NUM_CHUNKS: uint = uint::bits / SHIFT; enum Child { Internal(~TrieNode), @@ -113,21 +116,25 @@ impl TrieMap { /// Get an iterator over the key-value pairs in the map pub fn iter<'a>(&'a self) -> TrieMapIterator<'a, T> { - TrieMapIterator { - stack: ~[self.root.children.iter()], - remaining_min: self.length, - remaining_max: self.length - } + let mut iter = unsafe {TrieMapIterator::new()}; + iter.stack[0] = self.root.children.iter(); + iter.length = 1; + iter.remaining_min = self.length; + iter.remaining_max = self.length; + + iter } /// Get an iterator over the key-value pairs in the map, with the /// ability to mutate the values. pub fn mut_iter<'a>(&'a mut self) -> TrieMapMutIterator<'a, T> { - TrieMapMutIterator { - stack: ~[self.root.children.mut_iter()], - remaining_min: self.length, - remaining_max: self.length - } + let mut iter = unsafe {TrieMapMutIterator::new()}; + iter.stack[0] = self.root.children.mut_iter(); + iter.length = 1; + iter.remaining_min = self.length; + iter.remaining_max = self.length; + + iter } } @@ -176,16 +183,16 @@ macro_rules! bound { let key = $key; - let mut idx = 0; - let mut it = $iterator_name { - stack: ~[], - remaining_min: 0, - remaining_max: this.length - }; + let mut it = unsafe {$iterator_name::new()}; + // everything else is zero'd, as we want. + it.remaining_max = this.length; + // this addr is necessary for the `Internal` pattern. addr!(loop { let children = unsafe {addr!(& $($mut_)* (*node).children)}; - let child_id = chunk(key, idx); + // it.length is the current depth in the iterator and the + // current depth through the `uint` key we've traversed. + let child_id = chunk(key, it.length); let (slice_idx, ret) = match children[child_id] { Internal(ref $($mut_)* n) => { node = addr!(& $($mut_)* **n as * $($mut_)* TrieNode); @@ -202,9 +209,10 @@ macro_rules! bound { (child_id + 1, true) } }; - it.stack.push(children.$slice_from(slice_idx).$iter()); + // push to the stack. + it.stack[it.length] = children.$slice_from(slice_idx).$iter(); + it.length += 1; if ret { return it } - idx += 1; }) } } @@ -467,7 +475,8 @@ fn remove(count: &mut uint, child: &mut Child, key: uint, /// Forward iterator over a map pub struct TrieMapIterator<'a, T> { - priv stack: ~[vec::VecIterator<'a, Child>], + priv stack: [vec::VecIterator<'a, Child>, .. NUM_CHUNKS], + priv length: uint, priv remaining_min: uint, priv remaining_max: uint } @@ -475,7 +484,8 @@ pub struct TrieMapIterator<'a, T> { /// Forward iterator over the key-value pairs of a map, with the /// values being mutable. pub struct TrieMapMutIterator<'a, T> { - priv stack: ~[vec::VecMutIterator<'a, Child>], + priv stack: [vec::VecMutIterator<'a, Child>, .. NUM_CHUNKS], + priv length: uint, priv remaining_min: uint, priv remaining_max: uint } @@ -487,27 +497,96 @@ macro_rules! iterator_impl { ($name:ident, iter = $iter:ident, mutability = $($mut_:tt)*) => { + impl<'a, T> $name<'a, T> { + // Create new zero'd iterator. We have a thin gilding of safety by + // using init rather than uninit, so that the worst that can happen + // from failing to initialise correctly after calling these is a + // segfault. + #[cfg(target_word_size="32")] + unsafe fn new() -> $name<'a, T> { + $name { + remaining_min: 0, + remaining_max: 0, + length: 0, + // ick :( ... at least the compiler will tell us if we screwed up. + stack: [init(), init(), init(), init(), init(), init(), init(), init()] + } + } + + #[cfg(target_word_size="64")] + unsafe fn new() -> $name<'a, T> { + $name { + remaining_min: 0, + remaining_max: 0, + length: 0, + stack: [init(), init(), init(), init(), init(), init(), init(), init(), + init(), init(), init(), init(), init(), init(), init(), init()] + } + } + } + item!(impl<'a, T> Iterator<(uint, &'a $($mut_)* T)> for $name<'a, T> { + // you might wonder why we're not even trying to act within the + // rules, and are just manipulating raw pointers like there's no + // such thing as invalid pointers and memory unsafety. The + // reason is performance, without doing this we can get the + // bench_iter_large microbenchmark down to about 30000 ns/iter + // (using .unsafe_ref to index self.stack directly, 38000 + // ns/iter with [] checked indexing), but this smashes that down + // to 13500 ns/iter. + // + // Fortunately, it's still safe... + // + // We have an invariant that every Internal node + // corresponds to one push to self.stack, and one pop, + // nested appropriately. self.stack has enough storage + // to store the maximum depth of Internal nodes in the + // trie (8 on 32-bit platforms, 16 on 64-bit). fn next(&mut self) -> Option<(uint, &'a $($mut_)* T)> { - while !self.stack.is_empty() { - match self.stack[self.stack.len() - 1].next() { - None => { - self.stack.pop(); - } - Some(child) => { - addr!(match *child { - Internal(ref $($mut_)* node) => { - self.stack.push(node.children.$iter()); - } - External(key, ref $($mut_)* value) => { - self.remaining_max -= 1; - if self.remaining_min > 0 { - self.remaining_min -= 1; + let start_ptr = self.stack.as_mut_ptr(); + + unsafe { + // write_ptr is the next place to write to the stack. + // invariant: start_ptr <= write_ptr < end of the + // vector. + let mut write_ptr = start_ptr.offset(self.length as int); + while write_ptr != start_ptr { + // indexing back one is safe, since write_ptr > + // start_ptr now. + match (*write_ptr.offset(-1)).next() { + // exhausted this iterator (i.e. finished this + // Internal node), so pop from the stack. + // + // don't bother clearing the memory, because the + // next time we use it we'll've written to it + // first. + None => write_ptr = write_ptr.offset(-1), + Some(child) => { + addr!(match *child { + Internal(ref $($mut_)* node) => { + // going down a level, so push + // to the stack (this is the + // write referenced above) + *write_ptr = node.children.$iter(); + write_ptr = write_ptr.offset(1); + } + External(key, ref $($mut_)* value) => { + self.remaining_max -= 1; + if self.remaining_min > 0 { + self.remaining_min -= 1; + } + // store the new length of the + // stack, based on our current + // position. + self.length = (write_ptr as uint + - start_ptr as uint) / + mem::size_of_val(&*write_ptr); + + return Some((key, value)); } - return Some((key, value)); - } - Nothing => {} - }) + Nothing => {} + }) + } } } } From 38caca6bca1513f878c5468ca979e5ae77221cee Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 8 Jan 2014 00:22:01 +1100 Subject: [PATCH 03/21] rustc: Implement the #[managed] attribute and add an intrinsic to detect this. Renames owns_managed -> owns_at_managed, and adds owns_new_managed. --- src/librustc/front/feature_gate.rs | 11 +++++++++++ src/librustc/middle/lint.rs | 1 + src/librustc/middle/trans/_match.rs | 1 - src/librustc/middle/trans/intrinsic.rs | 8 ++++++-- src/librustc/middle/trans/tvec.rs | 1 - src/librustc/middle/ty.rs | 24 +++++++++++++++--------- src/librustc/middle/typeck/check/mod.rs | 3 +-- src/libstd/unstable/intrinsics.rs | 9 +++++++++ 8 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/librustc/front/feature_gate.rs b/src/librustc/front/feature_gate.rs index c0732e93bee74..ec4e2c0747a59 100644 --- a/src/librustc/front/feature_gate.rs +++ b/src/librustc/front/feature_gate.rs @@ -40,6 +40,7 @@ static KNOWN_FEATURES: &'static [(&'static str, Status)] = &[ ("once_fns", Active), ("asm", Active), ("managed_boxes", Active), + ("managed", Active), ("non_ascii_idents", Active), ("thread_local", Active), ("link_args", Active), @@ -153,6 +154,16 @@ impl Visitor<()> for Context { _ => {} } + match i.node { + ast::ItemEnum(..) | ast::ItemStruct(..) => { + if attr::contains_name(i.attrs, "managed") { + self.gate_feature("managed", i.span, + "the `managed` attribute doesn't have safe support \ + for built-in pointers like ~ and ~[]") + } + } + _ => {} + } visit::walk_item(self, i, ()); } diff --git a/src/librustc/middle/lint.rs b/src/librustc/middle/lint.rs index 6da58c03b7247..10d98fbfcde0c 100644 --- a/src/librustc/middle/lint.rs +++ b/src/librustc/middle/lint.rs @@ -926,6 +926,7 @@ static other_attrs: &'static [&'static str] = &[ "crate_map", "cfg", "doc", "export_name", "link_section", "no_freeze", "no_mangle", "no_send", "static_assert", "unsafe_no_drop_flag", "packed", "simd", "repr", "deriving", "unsafe_destructor", "link", + "managed", //mod-level "path", "link_name", "link_args", "nolink", "macro_escape", "no_implicit_prelude", diff --git a/src/librustc/middle/trans/_match.rs b/src/librustc/middle/trans/_match.rs index a7924946ed185..1c61240621a1c 100644 --- a/src/librustc/middle/trans/_match.rs +++ b/src/librustc/middle/trans/_match.rs @@ -2253,4 +2253,3 @@ fn simple_identifier<'a>(pat: &'a ast::Pat) -> Option<&'a ast::Path> { } } } - diff --git a/src/librustc/middle/trans/intrinsic.rs b/src/librustc/middle/trans/intrinsic.rs index ff5c22e726a37..a44678838417c 100644 --- a/src/librustc/middle/trans/intrinsic.rs +++ b/src/librustc/middle/trans/intrinsic.rs @@ -414,9 +414,13 @@ pub fn trans_intrinsic(ccx: @CrateContext, let tp_ty = substs.tys[0]; Ret(bcx, C_bool(ty::type_needs_drop(ccx.tcx, tp_ty))); } - "owns_managed" => { + "owns_at_managed" => { let tp_ty = substs.tys[0]; - Ret(bcx, C_bool(ty::type_contents(ccx.tcx, tp_ty).owns_managed())); + Ret(bcx, C_bool(ty::type_contents(ccx.tcx, tp_ty).owns_at_managed())); + } + "owns_new_managed" => { + let tp_ty = substs.tys[0]; + Ret(bcx, C_bool(ty::type_contents(ccx.tcx, tp_ty).owns_new_managed())); } "visit_tydesc" => { let td = get_param(decl, first_real_arg); diff --git a/src/librustc/middle/trans/tvec.rs b/src/librustc/middle/trans/tvec.rs index 100f28af97dad..031bbd85dcf4e 100644 --- a/src/librustc/middle/trans/tvec.rs +++ b/src/librustc/middle/trans/tvec.rs @@ -720,4 +720,3 @@ pub fn iter_vec_unboxed<'r, let dataptr = get_dataptr(bcx, body_ptr); return iter_vec_raw(bcx, dataptr, vec_ty, fill, f); } - diff --git a/src/librustc/middle/ty.rs b/src/librustc/middle/ty.rs index fd3bf0deae799..1a4747444a3c6 100644 --- a/src/librustc/middle/ty.rs +++ b/src/librustc/middle/ty.rs @@ -1759,14 +1759,15 @@ def_type_content_sets!( // Things that are owned by the value (second and third nibbles): OwnsOwned = 0b0000__00000001__0000, OwnsDtor = 0b0000__00000010__0000, - OwnsManaged /* see [1] below */ = 0b0000__00000100__0000, + OwnsAtManaged /* see [1] below */ = 0b0000__00000100__0000, OwnsAffine = 0b0000__00001000__0000, + OwnsNewManaged = 0b0000__00010000__0000, OwnsAll = 0b0000__11111111__0000, // Things that are reachable by the value in any way (fourth nibble): ReachesNonsendAnnot = 0b0001__00000000__0000, ReachesBorrowed = 0b0010__00000000__0000, - // ReachesManaged /* see [1] below */ = 0b0100__00000000__0000, + // ReachesAtManaged /* see [1] below */ = 0b0100__00000000__0000, ReachesMutable = 0b1000__00000000__0000, ReachesAll = 0b1111__00000000__0000, @@ -1802,7 +1803,7 @@ def_type_content_sets!( // [1] Do not set the bits TC::OwnsManaged or // TC::ReachesManaged directly, instead reference // TC::Managed to set them both at once. - Managed = 0b0100__00000100__0000, + AtManaged = 0b0100__00000100__0000, // All bits All = 0b1111__11111111__1111 @@ -1840,8 +1841,12 @@ impl TypeContents { !self.intersects(TC::Nonsendable) } - pub fn owns_managed(&self) -> bool { - self.intersects(TC::OwnsManaged) + pub fn owns_at_managed(&self) -> bool { + self.intersects(TC::OwnsAtManaged) + } + + pub fn owns_new_managed(&self) -> bool { + self.intersects(TC::OwnsNewManaged) } pub fn is_freezable(&self, _: ctxt) -> bool { @@ -1887,7 +1892,7 @@ impl TypeContents { * Includes only those bits that still apply * when indirected through a managed pointer (`@`) */ - TC::Managed | ( + TC::AtManaged | ( *self & TC::ReachesAll) } @@ -2055,7 +2060,7 @@ pub fn type_contents(cx: ctxt, ty: t) -> TypeContents { } ty_str(vstore_box) => { - TC::Managed + TC::AtManaged } ty_str(vstore_slice(r)) => { @@ -2126,7 +2131,7 @@ pub fn type_contents(cx: ctxt, ty: t) -> TypeContents { ty_opaque_closure_ptr(sigil) => { match sigil { ast::BorrowedSigil => TC::ReachesBorrowed, - ast::ManagedSigil => TC::Managed, + ast::ManagedSigil => TC::AtManaged, ast::OwnedSigil => TC::OwnsOwned, } } @@ -2156,7 +2161,8 @@ pub fn type_contents(cx: ctxt, ty: t) -> TypeContents { -> TypeContents { tc | TC::ReachesMutable.when(has_attr(cx, did, "no_freeze")) | - TC::ReachesNonsendAnnot.when(has_attr(cx, did, "no_send")) + TC::ReachesNonsendAnnot.when(has_attr(cx, did, "no_send")) | + TC::OwnsNewManaged.when(has_attr(cx, did, "managed")) } fn borrowed_contents(region: ty::Region, diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs index 1f643cfc80cc3..86060e48dae8a 100644 --- a/src/librustc/middle/typeck/check/mod.rs +++ b/src/librustc/middle/typeck/check/mod.rs @@ -4112,7 +4112,7 @@ pub fn check_intrinsic_type(ccx: @CrateCtxt, it: &ast::ForeignItem) { ty::mk_nil()) } "needs_drop" => (1u, ~[], ty::mk_bool()), - "owns_managed" => (1u, ~[], ty::mk_bool()), + "owns_at_managed" | "owns_new_managed" => (1u, ~[], ty::mk_bool()), "atomic_xchg" | "atomic_xadd" | "atomic_xsub" | "atomic_xchg_acq" | "atomic_xadd_acq" | "atomic_xsub_acq" | "atomic_xchg_rel" | "atomic_xadd_rel" | "atomic_xsub_rel" => { @@ -4367,4 +4367,3 @@ pub fn check_intrinsic_type(ccx: @CrateCtxt, it: &ast::ForeignItem) { ppaux::ty_to_str(ccx.tcx, fty))); } } - diff --git a/src/libstd/unstable/intrinsics.rs b/src/libstd/unstable/intrinsics.rs index 18a1790cd9b61..ca28cbfccdb32 100644 --- a/src/libstd/unstable/intrinsics.rs +++ b/src/libstd/unstable/intrinsics.rs @@ -373,8 +373,17 @@ extern "rust-intrinsic" { pub fn needs_drop() -> bool; /// Returns `true` if a type is managed (will be allocated on the local heap) + #[cfg(stage0)] // SNAP a5fa1d9 pub fn owns_managed() -> bool; + /// Returns `true` if a type is @-managed (will be allocated on the local heap) + #[cfg(not(stage0))] + pub fn owns_at_managed() -> bool; + + /// Returns `true` if a type contains a type marked with #[managed] + #[cfg(not(stage0))] + pub fn owns_new_managed() -> bool; + pub fn visit_tydesc(td: *TyDesc, tv: &mut TyVisitor); /// Get the address of the `__morestack` stack growth function. From 7bf35b28a2358bc259739939f1ac564538af7a4f Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 8 Jan 2014 02:09:24 +1100 Subject: [PATCH 04/21] std::gc: add a true garbage collector and use it for `Gc`. The garbage collector is a naive mark-and-sweep collector that is conservative on the stack and mostly conservative on the heap. This currently will not scan through ~, @, ~[] or @[], nor in any of the global or task/thread local storages. Hence, `Gc` placed in the wrong spot may be deallocated while still live, and so all borrowing methods are marked unsafe. (And `Gc` is marked `#[experimental]`.) --- src/libstd/gc.rs | 140 ----------- src/libstd/gc/collector.rs | 230 +++++++++++++++++ src/libstd/gc/mod.rs | 262 ++++++++++++++++++++ src/libstd/gc/ptr_map.rs | 97 ++++++++ src/libstd/lib.rs | 4 +- src/libstd/prelude.rs | 4 - src/libstd/rt/task.rs | 15 +- src/test/compile-fail/new-box-syntax-bad.rs | 3 +- src/test/run-pass/new-box-syntax.rs | 9 +- 9 files changed, 609 insertions(+), 155 deletions(-) delete mode 100644 src/libstd/gc.rs create mode 100644 src/libstd/gc/collector.rs create mode 100644 src/libstd/gc/mod.rs create mode 100644 src/libstd/gc/ptr_map.rs diff --git a/src/libstd/gc.rs b/src/libstd/gc.rs deleted file mode 100644 index 4cbecc9b42f85..0000000000000 --- a/src/libstd/gc.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/*! Task-local garbage-collected boxes - -The `Gc` type provides shared ownership of an immutable value. Destruction is not deterministic, and -will occur some time between every `Gc` handle being gone and the end of the task. The garbage -collector is task-local so `Gc` is not sendable. - -*/ - -use kinds::Send; -use clone::{Clone, DeepClone}; -use managed; - -/// Immutable garbage-collected pointer type -#[lang="gc"] -#[cfg(not(test))] -#[no_send] -pub struct Gc { - priv ptr: @T -} - -#[cfg(test)] -#[no_send] -pub struct Gc { - priv ptr: @T -} - -impl Gc { - /// Construct a new garbage-collected box - #[inline] - pub fn new(value: T) -> Gc { - Gc { ptr: @value } - } - - /// Borrow the value contained in the garbage-collected box - #[inline] - pub fn borrow<'r>(&'r self) -> &'r T { - &*self.ptr - } - - /// Determine if two garbage-collected boxes point to the same object - #[inline] - pub fn ptr_eq(&self, other: &Gc) -> bool { - managed::ptr_eq(self.ptr, other.ptr) - } -} - -impl Clone for Gc { - /// Clone the pointer only - #[inline] - fn clone(&self) -> Gc { - Gc{ ptr: self.ptr } - } -} - -/// An value that represents the task-local managed heap. -/// -/// Use this like `let foo = box(GC) Bar::new(...);` -#[lang="managed_heap"] -#[cfg(not(test))] -pub static GC: () = (); - -#[cfg(test)] -pub static GC: () = (); - -/// The `Send` bound restricts this to acyclic graphs where it is well-defined. -/// -/// A `Freeze` bound would also work, but `Send` *or* `Freeze` cannot be expressed. -impl DeepClone for Gc { - #[inline] - fn deep_clone(&self) -> Gc { - Gc::new(self.borrow().deep_clone()) - } -} - -#[cfg(test)] -mod tests { - use prelude::*; - use super::*; - use cell::RefCell; - - #[test] - fn test_clone() { - let x = Gc::new(RefCell::new(5)); - let y = x.clone(); - x.borrow().with_mut(|inner| { - *inner = 20; - }); - assert_eq!(y.borrow().with(|x| *x), 20); - } - - #[test] - fn test_deep_clone() { - let x = Gc::new(RefCell::new(5)); - let y = x.deep_clone(); - x.borrow().with_mut(|inner| { - *inner = 20; - }); - assert_eq!(y.borrow().with(|x| *x), 5); - } - - #[test] - fn test_simple() { - let x = Gc::new(5); - assert_eq!(*x.borrow(), 5); - } - - #[test] - fn test_simple_clone() { - let x = Gc::new(5); - let y = x.clone(); - assert_eq!(*x.borrow(), 5); - assert_eq!(*y.borrow(), 5); - } - - #[test] - fn test_ptr_eq() { - let x = Gc::new(5); - let y = x.clone(); - let z = Gc::new(7); - assert!(x.ptr_eq(&x)); - assert!(x.ptr_eq(&y)); - assert!(!x.ptr_eq(&z)); - } - - #[test] - fn test_destructor() { - let x = Gc::new(~5); - assert_eq!(**x.borrow(), 5); - } -} diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs new file mode 100644 index 0000000000000..f5ff68c179f6e --- /dev/null +++ b/src/libstd/gc/collector.rs @@ -0,0 +1,230 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use container::Container; +use gc::collector::ptr_map::PtrMap; +use iter::Iterator; +use libc; +use option::{Some, None}; +use ops::Drop; +use ptr::RawPtr; +use vec::{MutableVector, OwnedVector, ImmutableVector}; +use unstable::intrinsics; + +mod ptr_map; + +static DEFAULT_ALLOCS_PER_COLLECTION_MASK: uint = (1 << 10) - 1; + +/// A thread local (almost) conservative garbage collector. +/// +/// This makes no effort to check global variables, or even +/// thread-local ones. +/// +/// # Design +/// +/// This is a very crude mark-and-sweep conservative[1] +/// non-generational garbage collector. It stores two sets of +/// pointers, the GC'd pointers themselves, and regions of memory that +/// are roots for GC'd objects (that is, the regions that could +/// possibly contain references to GC'd pointers). +/// +/// For a collection, it scans the roots and the stack to find any +/// bitpatterns that look like GC'd pointers that we know about, and +/// then scans each of these to record all the reachable +/// objects. After doing so, any unreachable objects are freed. +/// +/// Currently, this just calls `malloc` and `free` for every +/// allocation. It could (and should) be reusing allocations. +/// +/// Also, this only counts pointers to the start of GC'd memory +/// regions as valid. This is fine for a simple type like `Gc`, +/// since the only way to get a pointer actually pointing inside the +/// contents requires a `.borrow()`, which freezes the `Gc` +/// reference that was borrowed. This `Gc` reference is +/// presumably[2] in a root (or some other place that is scanned) and +/// points at the start of the allocation, so the subpointer will +/// always be valid. (Yay for lifetimes & borrowing.) +/// +/// [1]: it has some smarts, the user can indicate that an allocation +/// should not be scanned, so that allocations that can never +/// contain a GC pointer are ignored. +/// +/// [2]: If the `Gc` reference is reachable but not being scanned +/// then the user already has a problem. +pub struct GarbageCollector { + /// Non-garbage-collectable roots + priv roots: PtrMap, + /// Garbage-collectable pointers. + priv gc_ptrs: PtrMap, + /// number of GC-able allocations performed. + priv gc_allocs: uint, + /// the number of allocations to do before collection (in mask + /// form, i.e. we are detecting `gc_allocs % (1 << n) == 0` for + /// some n). + priv gc_allocs_per_collection_mask: uint +} + +unsafe fn alloc_inner(ptrs: &mut PtrMap, size: uint, scan: bool) -> *mut u8 { + let ptr = if scan { + libc::calloc(size as libc::size_t, 1) + } else { + // no need to clear if we're not going to be scanning it + // anyway. + libc::malloc(size as libc::size_t) + }; + + if ptr.is_null() { + intrinsics::abort(); + } + ptrs.insert_alloc(ptr as uint, size, scan); + ptr as *mut u8 +} + +impl GarbageCollector { + pub fn new() -> GarbageCollector { + GarbageCollector { + roots: PtrMap::new(), + gc_ptrs: PtrMap::new(), + gc_allocs: 0, + gc_allocs_per_collection_mask: DEFAULT_ALLOCS_PER_COLLECTION_MASK + } + } + + /// Run a garbage collection if we're due for one. + pub unsafe fn occasional_collection(&mut self, stack_top: uint) { + if self.gc_allocs & self.gc_allocs_per_collection_mask == 0 { + self.collect(stack_top) + } + } + + /// Allocate `size` bytes of memory such that they are scanned for + /// other GC'd pointers (for use by types like `Gc>`). + pub unsafe fn alloc_gc(&mut self, size: uint) -> *mut u8 { + self.gc_allocs += 1; + alloc_inner(&mut self.gc_ptrs, size, true) + } + + /// Allocate `size` bytes of memory such that they are not scanned + /// for other GC'd pointers; this should be used for types like + /// `Gc`, or (in theory) `Gc<~Gc>` (note the + /// indirection). + pub unsafe fn alloc_gc_no_scan(&mut self, size: uint) -> *mut u8 { + self.gc_allocs += 1; + alloc_inner(&mut self.gc_ptrs, size, false) + } + + /// Register the block of memory [`start`, `end`) for scanning for + /// GC'd pointers. + pub unsafe fn register_root(&mut self, start: *(), end: *()) { + self.roots.insert_alloc(start as uint, end as uint - start as uint, true) + } + /// Stop scanning the root starting at `start` for GC'd pointers. + pub unsafe fn unregister_root(&mut self, start: *()) { + self.roots.remove(start as uint); + } + + /// Collect garbage. An upper bound on the position of any GC'd + /// pointers on the stack should be passed as `stack_top`. + pub unsafe fn collect(&mut self, stack_top: uint) { + clear_registers(0, 0, 0, 0, 0, 0); + + let stack: uint = 1; + let stack_end = &stack as *uint; + + let GarbageCollector { ref mut roots, ref mut gc_ptrs, .. } = *self; + + // Step 1. + gc_ptrs.mark_all_unreachable(); + + // Step 2. mark any reachable pointers + + // the list of pointers that are reachable and scannable, but + // haven't actually been scanned yet. + let mut grey_list = ~[]; + + // Step 2.1: search for GC'd pointers in any registered roots. + for (low, descr) in roots.iter() { + mark_words_between(gc_ptrs, &mut grey_list, + low as *uint, descr.high as *uint) + } + + // Step 2.2: search for them on the stack. + mark_words_between(gc_ptrs, &mut grey_list, stack_end, stack_top as *uint); + + // Step 2.3: search for GC references inside other reachable + // GC references. + let mut count = 0; + loop { + match grey_list.pop_opt() { + Some((low, high)) => { + count += 1; + mark_words_between(gc_ptrs, &mut grey_list, + low as *uint, high as *uint); + } + // everything scanned + None => break + } + } + + // Step 3. sweep all the unreachable ones for deallocation. + let unreachable = gc_ptrs.find_unreachable(); + for &(ptr, finaliser) in unreachable.iter() { + debug!("freeing {:x}", ptr); + + match finaliser { + Some(f) => f(ptr as *mut ()), + None => {} + } + gc_ptrs.remove(ptr); + libc::free(ptr as *libc::c_void); + } + + info!("GC scan: {} dead, {} live, {} scanned: took ms", + unreachable.len(), gc_ptrs.len(), count); + } +} + +impl Drop for GarbageCollector { + fn drop(&mut self) { + // free all the pointers we're controlling. + for (ptr, descr) in self.gc_ptrs.iter() { + match descr.finaliser { + Some(f) => f(ptr as *mut ()), + None => {} + } + unsafe {libc::free(ptr as *libc::c_void)} + } + } +} + +/// Scan the words from `low` to `high`, conservatively registering +/// any GC pointer bit patterns found. +unsafe fn mark_words_between(gc_ptrs: &mut PtrMap, grey_list: &mut ~[(uint, uint)], + mut low: *uint, high: *uint) { + debug!("scanning from {} to {}", low, high); + while low < high { + match gc_ptrs.mark_reachable_scan_info(*low) { + Some((top, scan)) => { + debug!("found {:x} at {:x}", *low, low as uint); + if scan { + grey_list.push((*low, top)); + } + } + None => {} + } + + low = low.offset(1); + } +} + +// cargo culted from Boehm. +#[inline(never)] +fn clear_registers(_: uint, _: uint, _: uint, + _: uint, _: uint, _: uint) {} diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs new file mode 100644 index 0000000000000..f9253d94caca8 --- /dev/null +++ b/src/libstd/gc/mod.rs @@ -0,0 +1,262 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! Task-local garbage-collected boxes + +The `Gc` type provides shared ownership of an immutable value. Destruction is not deterministic, and +will occur some time between every `Gc` handle being gone and the end of the task. The garbage +collector is task-local so `Gc` is not sendable. + +*/ + +#[allow(missing_doc)]; +#[allow(experimental)]; + +use kinds::Freeze; +use clone::{Clone, DeepClone}; +use mem; +use option::{Some, None}; +use rt::local; +use rt::task::{Task, GcUninit, GcExists, GcBorrowed}; +use util::replace; + +use unstable::intrinsics::{owns_new_managed, move_val_init}; + +pub mod collector; + +/// Immutable garbage-collected pointer type. +/// +/// # Warning +/// +/// This is highly experimental. Placing them in the wrong place can +/// cause live pointers to be deallocated or reused. Wrong places +/// can include: +/// +/// - global variables (including `#[thread_local]` ones) +/// - task-local storage +/// - both built-in allocating pointer types (`~` and `@`) +/// - both built-in allocating vector types (`~[]` and `@[]`) +/// - most library smart pointers, like `Rc` +#[no_send] +#[experimental] +#[managed] +pub struct Gc { + priv ptr: *T +} + +impl Gc { + /// Construct a new garbage-collected box + #[experimental="not rooted by built-in pointer and vector types"] + pub fn new(value: T) -> Gc { + let stack_top; + let gc; + { + // we need the task-local GC, and some upper bound on the + // top of the stack. The borrow is scoped so that we can + // use task things like logging etc. inside .collect() and + // (as much as possible) inside finalisers. + let mut task_ = local::Local::borrow(None::); + let task = task_.get(); + + match task.stack_bounds() { + (_, t) => stack_top = t, + } + + // mark the GC as borrowed: unfortunately this means that + // we can't use an GC functions any finalisers, + // e.g. Gc>> will fail/crash when collected. + gc = replace(&mut task.gc, GcBorrowed); + } + + + let mut gc = match gc { + // first GC allocation in this task, so create a new + // collector + GcUninit => ~collector::GarbageCollector::new(), + GcExists(gc) => gc, + GcBorrowed => fail!("Gc::new: Gc already borrowed.") + }; + + let size = mem::size_of::(); + let ptr; + unsafe { + gc.occasional_collection(stack_top); + + // if we don't contain anything that contains has a + // #[managed] pointer unboxed, then we don't don't need to + // scan, because there can never be a GC reference inside. + // FIXME: we currently count ~Gc as owning managed, + // but it shouldn't (~, or equivalent) should root the Gc + // itself. + ptr = if owns_new_managed::() { + gc.alloc_gc(size) + } else { + gc.alloc_gc_no_scan(size) + } as *mut T; + + move_val_init(&mut *ptr, value); + } + + // restore the garbage collector to the task. + let mut task = local::Local::borrow(None::); + task.get().gc = GcExists(gc); + + Gc { ptr: ptr as *T } + } +} + +impl Gc { + /// Borrow the value contained in the garbage-collected box. + /// + /// This is restricted to deeply immutable values, and so does not + /// require a write-barrier because no writes are possible. + /// + /// Currently `unsafe` because `~` and `~[]` do not root a `Gc` + /// box, and so, if that is the only reference to one, then that + /// `Gc` may be deallocated or the memory reused. + #[inline] + pub unsafe fn borrow<'r>(&'r self) -> &'r T { + &*self.ptr + } +} + +impl Gc { + /// Borrow the value contained in the garbage-collected box, with + /// a write-barrier. + /// + /// See `.borrow()` for the reason for `unsafe`. + #[inline] + pub unsafe fn borrow_write_barrier<'r>(&'r self) -> &'r T { + // a completely conservative non-generational GC needs no + // write barriers. + &*self.ptr + } + + /// Borrow the value contained in the garbage-collected box, + /// without a write-barrier. + /// + /// Because this has no write barrier, any writes to the value + /// must not write new references to other garbage-collected box. + #[inline] + pub unsafe fn borrow_no_write_barrier<'r>(&'r self) -> &'r T { + // a completely conservative non-generational GC needs no + // write barriers. + &*self.ptr + } +} + +impl Clone for Gc { + fn clone(&self) -> Gc { *self } +} + +/// The `Freeze` bound restricts this to acyclic graphs where it is well-defined. +/// +/// A `Send` bound would also work, but `Send` *or* `Freeze` cannot be expressed. +impl DeepClone for Gc { + #[inline] + fn deep_clone(&self) -> Gc { + Gc::new(unsafe {self.borrow_write_barrier().deep_clone()}) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use cell::RefCell; + use iter::Iterator; + use option::{Some, None}; + use vec::{ImmutableVector, MutableVector}; + + #[test] + fn test_clone() { + let x = Gc::new(RefCell::new(5)); + let y = x.clone(); + unsafe { + x.borrow_write_barrier().with_mut(|inner| { + *inner = 20; + }); + assert_eq!(y.borrow_write_barrier().with(|x| *x), 20); + } + } + + #[test] + fn test_deep_clone() { + let x = Gc::new(RefCell::new(5)); + let y = x.deep_clone(); + unsafe { + x.borrow_write_barrier().with_mut(|inner| { + *inner = 20; + }); + assert_eq!(y.borrow_write_barrier().with(|x| *x), 5); + } + } + + #[test] + fn test_simple() { + let x = Gc::new(5); + unsafe { + assert_eq!(*x.borrow(), 5); + } + } + + #[test] + fn test_simple_clone() { + let x = Gc::new(5); + let y = x.clone(); + unsafe { + assert_eq!(*x.borrow(), 5); + assert_eq!(*y.borrow(), 5); + } + } + + #[test] + #[ignore] // no finalisation of GC'd objects + fn test_destructor() { + let x = Gc::new(~5); + unsafe { + assert_eq!(**x.borrow(), 5); + } + } + + #[test] + fn test_many_allocs() { + // on the stack. + let mut ptrs = [None::>, .. 10000]; + for (i, ptr) in ptrs.mut_iter().enumerate() { + *ptr = Some(Gc::new(i)) + } + + for (i, ptr) in ptrs.iter().enumerate() { + unsafe { + let p = ptr.unwrap(); + assert_eq!(*p.borrow(), i); + } + } + } +} + +#[cfg(test)] +mod bench { + use super::*; + use iter::Iterator; + use option::{Some, None}; + use vec::{ImmutableVector, MutableVector}; + use extra::test::BenchHarness; + + #[bench] + fn many_allocs(bh: &mut BenchHarness) { + bh.iter(|| { + let mut ptrs = [None::>, .. 1000]; + for (i, ptr) in ptrs.mut_iter().enumerate() { + *ptr = Some(Gc::new(i)) + } + }) + } +} diff --git a/src/libstd/gc/ptr_map.rs b/src/libstd/gc/ptr_map.rs new file mode 100644 index 0000000000000..400c4ceeb9b2a --- /dev/null +++ b/src/libstd/gc/ptr_map.rs @@ -0,0 +1,97 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use container::Container; +use container::MutableMap; +use iter::Iterator; +use option::{Option, Some, None}; +use trie::{TrieMap, TrieMapIterator}; +use vec::ImmutableVector; + +pub struct PtrMap { + // a map from the start of each allocation to a descriptor + // containing information about it. + priv map: TrieMap, +} + +/// This representation could be optimised. +pub struct PtrDescr { + // the top edge of the allocation. + high: uint, + // the finaliser to run + finaliser: Option, + // whether this allocation is reachable + reachable: bool, + // whether this allocation should be scanned (i.e. whether it + // contains rooted references to GC pointers) + scan: bool +} + +impl PtrMap { + /// Create a new PtrMap. + pub fn new() -> PtrMap { + PtrMap { + map: TrieMap::new() + } + } + + /// Register an allocation starting at `ptr` running for `length` bytes + pub fn insert_alloc(&mut self, ptr: uint, length: uint, scan: bool) { + let descr = PtrDescr { high: ptr + length, reachable: false, scan: scan, finaliser: None }; + self.map.insert(ptr, descr); + } + + /// Mark every registered allocation as unreachable. + pub fn mark_all_unreachable(&mut self) { + for (_, d) in self.map.mut_iter() { + d.reachable = false; + } + } + + /// Look up the allocation starting at `ptr` and, if it is + /// currently marked as unreachable, mark it as reachable and + /// retrieve the high end & whether it requires scanning; + /// otherwise, return None. + pub fn mark_reachable_scan_info(&mut self, ptr: uint) -> Option<(uint, bool)> { + match self.map.find_mut(&ptr) { + Some(descr) => { + if !descr.reachable { + descr.reachable = true; + Some((descr.high, descr.scan)) + } else { + None + } + } + None => None + } + } + + /// Find the unreachable pointers in the map, returing `[(low, + /// finaliser)]`. + pub fn find_unreachable(&mut self) -> ~[(uint, Option)] { + self.map.iter() + .filter_map(|(low, descr)| + if !descr.reachable {Some((low, descr.finaliser))} else {None}) + .collect() + } + + /// Deregister the allocation starting at `ptr`. + pub fn remove(&mut self, ptr: uint) { + self.map.remove(&ptr); + } + + /// Iterate over `(low, &'a PtrDescr)`. + pub fn iter<'a>(&'a self) -> TrieMapIterator<'a, PtrDescr> { + self.map.iter() + } + + /// The number of pointers registered. + pub fn len(&self) -> uint { self.map.len() } +} diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index b99de70ea0735..91754a6d956ff 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -53,11 +53,12 @@ html_favicon_url = "http://www.rust-lang.org/favicon.ico", html_root_url = "http://static.rust-lang.org/doc/master")]; -#[feature(macro_rules, globs, asm, managed_boxes, thread_local, link_args)]; +#[feature(macro_rules, globs, asm, managed_boxes, thread_local, link_args, managed)]; // Don't link to std. We are std. #[no_std]; +#[warn(unknown_features)]; #[deny(non_camel_case_types)]; #[deny(missing_doc)]; @@ -122,6 +123,7 @@ pub mod owned; pub mod managed; pub mod borrow; pub mod rc; +#[cfg(not(stage0))] pub mod gc; diff --git a/src/libstd/prelude.rs b/src/libstd/prelude.rs index 9045bafbe4591..70cf5ab05c9db 100644 --- a/src/libstd/prelude.rs +++ b/src/libstd/prelude.rs @@ -85,10 +85,6 @@ pub use vec::{Vector, VectorVector, CopyableVector, ImmutableVector}; pub use comm::{Port, Chan, SharedChan}; pub use task::spawn; -// Reexported statics -#[cfg(not(test))] -pub use gc::GC; - /// Disposes of a value. #[inline] pub fn drop(_x: T) { } diff --git a/src/libstd/rt/task.rs b/src/libstd/rt/task.rs index b4ead4252ca41..2125c6ed20360 100644 --- a/src/libstd/rt/task.rs +++ b/src/libstd/rt/task.rs @@ -20,6 +20,8 @@ use cleanup; use clone::Clone; use io::Writer; use iter::{Iterator, Take}; +#[cfg(not(stage0))] +use gc::collector::GarbageCollector; use local_data; use logging::Logger; use ops::Drop; @@ -47,7 +49,7 @@ use unstable::finally::Finally; pub struct Task { heap: LocalHeap, - gc: GarbageCollector, + gc: PossibleGc, storage: LocalStorage, unwinder: Unwinder, death: Death, @@ -63,7 +65,14 @@ pub struct Task { priv imp: Option<~Runtime>, } -pub struct GarbageCollector; +pub enum PossibleGc { + GcUninit, + GcBorrowed, + GcExists(~GarbageCollector) +} + +#[cfg(stage0)] +struct GarbageCollector; pub struct LocalStorage(Option); /// A handle to a blocked task. Usually this means having the ~Task pointer by @@ -88,7 +97,7 @@ impl Task { pub fn new() -> Task { Task { heap: LocalHeap::new(), - gc: GarbageCollector, + gc: GcUninit, storage: LocalStorage(None), unwinder: Unwinder::new(), death: Death::new(), diff --git a/src/test/compile-fail/new-box-syntax-bad.rs b/src/test/compile-fail/new-box-syntax-bad.rs index 942c862120792..52751382d9f53 100644 --- a/src/test/compile-fail/new-box-syntax-bad.rs +++ b/src/test/compile-fail/new-box-syntax-bad.rs @@ -9,6 +9,5 @@ use std::owned::HEAP; pub fn main() { let x: Gc = box(HEAP) 2; //~ ERROR mismatched types let y: Gc = box(HEAP)(1 + 2); //~ ERROR mismatched types - let z: ~int = box(GC)(4 + 5); //~ ERROR mismatched types + //let z: ~int = box(GC)(4 + 5); //~ ERROR mismatched types } - diff --git a/src/test/run-pass/new-box-syntax.rs b/src/test/run-pass/new-box-syntax.rs index d237c4d9a4e85..4a223332dcead 100644 --- a/src/test/run-pass/new-box-syntax.rs +++ b/src/test/run-pass/new-box-syntax.rs @@ -14,13 +14,12 @@ struct Structure { pub fn main() { let x: ~int = box(HEAP) 2; let y: ~int = box 2; - let z: Gc = box(GC) 2; - let a: Gc = box(GC) Structure { + //let z: Gc = box(GC) 2; + /*let a: Gc = box(GC) Structure { x: 10, y: 20, - }; + };*/ let b: ~int = box()(1 + 2); let c = box()(3 + 4); - let d = box(GC)(5 + 6); + //let d = box(GC)(5 + 6); } - From 879af764e61e969d920afee7b6ed2a5502fbb54a Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 8 Jan 2014 15:06:40 +1100 Subject: [PATCH 05/21] std::gc: support running destructors on values in Gc. --- src/libstd/gc/collector.rs | 29 ++++++++++++++++++++--------- src/libstd/gc/mod.rs | 20 ++++++++++++++++---- src/libstd/gc/ptr_map.rs | 14 +++++++++++--- src/libstd/rt/task.rs | 4 ++++ 4 files changed, 51 insertions(+), 16 deletions(-) diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index f5ff68c179f6e..71afb7e950802 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -12,7 +12,7 @@ use container::Container; use gc::collector::ptr_map::PtrMap; use iter::Iterator; use libc; -use option::{Some, None}; +use option::{Some, None, Option}; use ops::Drop; use ptr::RawPtr; use vec::{MutableVector, OwnedVector, ImmutableVector}; @@ -38,7 +38,8 @@ static DEFAULT_ALLOCS_PER_COLLECTION_MASK: uint = (1 << 10) - 1; /// For a collection, it scans the roots and the stack to find any /// bitpatterns that look like GC'd pointers that we know about, and /// then scans each of these to record all the reachable -/// objects. After doing so, any unreachable objects are freed. +/// objects. After doing so, any unreachable objects have their +/// finalisers run and are freed. /// /// Currently, this just calls `malloc` and `free` for every /// allocation. It could (and should) be reusing allocations. @@ -71,7 +72,8 @@ pub struct GarbageCollector { priv gc_allocs_per_collection_mask: uint } -unsafe fn alloc_inner(ptrs: &mut PtrMap, size: uint, scan: bool) -> *mut u8 { +unsafe fn alloc_inner(ptrs: &mut PtrMap, size: uint, scan: bool, + finaliser: Option) -> *mut u8 { let ptr = if scan { libc::calloc(size as libc::size_t, 1) } else { @@ -83,7 +85,7 @@ unsafe fn alloc_inner(ptrs: &mut PtrMap, size: uint, scan: bool) -> *mut u8 { if ptr.is_null() { intrinsics::abort(); } - ptrs.insert_alloc(ptr as uint, size, scan); + ptrs.insert_alloc(ptr as uint, size, scan, finaliser); ptr as *mut u8 } @@ -106,24 +108,33 @@ impl GarbageCollector { /// Allocate `size` bytes of memory such that they are scanned for /// other GC'd pointers (for use by types like `Gc>`). - pub unsafe fn alloc_gc(&mut self, size: uint) -> *mut u8 { + /// + /// `finaliser` is passed the start of the allocation at some + /// unspecified pointer after the allocation has become + /// unreachable. + pub unsafe fn alloc_gc(&mut self, size: uint, finaliser: Option) -> *mut u8 { self.gc_allocs += 1; - alloc_inner(&mut self.gc_ptrs, size, true) + alloc_inner(&mut self.gc_ptrs, size, true, finaliser) } /// Allocate `size` bytes of memory such that they are not scanned /// for other GC'd pointers; this should be used for types like /// `Gc`, or (in theory) `Gc<~Gc>` (note the /// indirection). - pub unsafe fn alloc_gc_no_scan(&mut self, size: uint) -> *mut u8 { + /// + /// `finaliser` is passed the start of the allocation at some + /// unspecified pointer after the allocation has become + /// unreachable. + pub unsafe fn alloc_gc_no_scan(&mut self, size: uint, + finaliser: Option) -> *mut u8 { self.gc_allocs += 1; - alloc_inner(&mut self.gc_ptrs, size, false) + alloc_inner(&mut self.gc_ptrs, size, false, finaliser) } /// Register the block of memory [`start`, `end`) for scanning for /// GC'd pointers. pub unsafe fn register_root(&mut self, start: *(), end: *()) { - self.roots.insert_alloc(start as uint, end as uint - start as uint, true) + self.roots.insert_alloc(start as uint, end as uint - start as uint, true, None) } /// Stop scanning the root starting at `start` for GC'd pointers. pub unsafe fn unregister_root(&mut self, start: *()) { diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs index f9253d94caca8..af7a85ee8834c 100644 --- a/src/libstd/gc/mod.rs +++ b/src/libstd/gc/mod.rs @@ -23,14 +23,21 @@ use kinds::Freeze; use clone::{Clone, DeepClone}; use mem; use option::{Some, None}; +use ptr; use rt::local; use rt::task::{Task, GcUninit, GcExists, GcBorrowed}; use util::replace; -use unstable::intrinsics::{owns_new_managed, move_val_init}; +use unstable::intrinsics::{owns_new_managed, move_val_init, needs_drop}; pub mod collector; +fn pointer_run_dtor(p: *mut ()) { + unsafe { + ptr::read_ptr(p as *T); + } +} + /// Immutable garbage-collected pointer type. /// /// # Warning @@ -89,6 +96,12 @@ impl Gc { unsafe { gc.occasional_collection(stack_top); + let finaliser = if needs_drop::() { + Some(pointer_run_dtor::) + } else { + None + }; + // if we don't contain anything that contains has a // #[managed] pointer unboxed, then we don't don't need to // scan, because there can never be a GC reference inside. @@ -96,9 +109,9 @@ impl Gc { // but it shouldn't (~, or equivalent) should root the Gc // itself. ptr = if owns_new_managed::() { - gc.alloc_gc(size) + gc.alloc_gc(size, finaliser) } else { - gc.alloc_gc_no_scan(size) + gc.alloc_gc_no_scan(size, finaliser) } as *mut T; move_val_init(&mut *ptr, value); @@ -217,7 +230,6 @@ mod tests { } #[test] - #[ignore] // no finalisation of GC'd objects fn test_destructor() { let x = Gc::new(~5); unsafe { diff --git a/src/libstd/gc/ptr_map.rs b/src/libstd/gc/ptr_map.rs index 400c4ceeb9b2a..ba5824853840b 100644 --- a/src/libstd/gc/ptr_map.rs +++ b/src/libstd/gc/ptr_map.rs @@ -42,9 +42,17 @@ impl PtrMap { } } - /// Register an allocation starting at `ptr` running for `length` bytes - pub fn insert_alloc(&mut self, ptr: uint, length: uint, scan: bool) { - let descr = PtrDescr { high: ptr + length, reachable: false, scan: scan, finaliser: None }; + /// Register an allocation starting at `ptr` running for `length` + /// bytes. `scan` indicates if the allocation should be scanned, + /// and `finaliser` is the "destructor" to run on the region. + pub fn insert_alloc(&mut self, ptr: uint, length: uint, scan: bool, + finaliser: Option) { + let descr = PtrDescr { + high: ptr + length, + reachable: false, + scan: scan, + finaliser: finaliser + }; self.map.insert(ptr, descr); } diff --git a/src/libstd/rt/task.rs b/src/libstd/rt/task.rs index 2125c6ed20360..1eab9c5970388 100644 --- a/src/libstd/rt/task.rs +++ b/src/libstd/rt/task.rs @@ -40,6 +40,7 @@ use sync::arc::UnsafeArc; use sync::atomics::{AtomicUint, SeqCst}; use task::{TaskResult, TaskOpts}; use unstable::finally::Finally; +use util::replace; // The Task struct represents all state associated with a rust // task. There are at this point two primary "subtypes" of task, @@ -175,11 +176,14 @@ impl Task { let LocalStorage(ref mut optmap) = task.storage; optmap.take() }; + let gc = replace(&mut task.get().gc, GcUninit); drop(task); drop(storage_map); // Destroy remaining boxes. Also may run user dtors. unsafe { cleanup::annihilate(); } + // kill any remaining GC references. + drop(gc); // Finally, just in case user dtors printed/logged during TLS // cleanup and annihilation, re-destroy stdout and the logger. From 50dbe647c61a44b347ec3b0f0e93136bf84a2dd7 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 8 Jan 2014 15:28:35 +1100 Subject: [PATCH 06/21] std::gc: add root registration functions, using type information to shortcircuit when possible. In theory, this design (using `owns_new_managed` to detect when a type contains `Gc`) means a smart pointer/alternate vector can just unconditionally call `register_changes_root` and not worry about whether this is actually necessary (that is, whether the memory regions actually needs to be scanned for pointers) because those two functions do it themselves. --- src/libstd/gc/mod.rs | 75 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs index af7a85ee8834c..43339414b4cb1 100644 --- a/src/libstd/gc/mod.rs +++ b/src/libstd/gc/mod.rs @@ -20,13 +20,17 @@ collector is task-local so `Gc` is not sendable. #[allow(experimental)]; use kinds::Freeze; +use container::Container; use clone::{Clone, DeepClone}; +use iter::Iterator; use mem; use option::{Some, None}; use ptr; +use ptr::RawPtr; use rt::local; use rt::task::{Task, GcUninit, GcExists, GcBorrowed}; use util::replace; +use vec::ImmutableVector; use unstable::intrinsics::{owns_new_managed, move_val_init, needs_drop}; @@ -38,6 +42,76 @@ fn pointer_run_dtor(p: *mut ()) { } } +/// Possibly register the changes to the GC roots described by the +/// arguments. +/// +/// - `removals` contains the beginning of memory regions that were +/// (possibly) previously registered as GC roots. These pointers do +/// not have to have previously been registered nor do they even +/// have to be valid pointers. +/// - `additions` contains the beginning and length of memory regions +/// to register as new GC roots (including ones that are already +/// registered but now have a different length) +/// +/// The elements of `removals` are removed before `additions` are +/// added. +/// +/// The registration only occurs if `T` actually does have the +/// possibility to contain `Gc` (determined statically). Pointers +/// passed in `additions` should be removed as roots just before they +/// are deallocated or otherwise become invalid. +#[inline] +pub unsafe fn register_root_changes(removals: &[*T], + additions: &[(*T, uint)]) { + if owns_new_managed::() { + register_root_changes_always::(removals, additions) + } +} + +/// Unconditionally perform the registration and unregistration of GC +/// roots, ignoring type information. +/// +/// See the conditional but otherwise identical +/// `register_root_changes` for description. +pub unsafe fn register_root_changes_always(removals: &[*T], + additions: &[(*T, uint)]) { + let gc = { + let mut task = local::Local::borrow(None::); + replace(&mut task.get().gc, GcBorrowed) + }; + let mut gc = match gc { + // first GC interaction in this task, so create a new + // collector + GcUninit => { + if additions.len() != 0 { + // we need to add some roots, and we need a GC for + // that. + ~collector::GarbageCollector::new() + } else { + // we are only removing things, and if the GC doesn't + // exist, the pointers are already not registered as + // roots. + return + } + } + GcExists(gc) => gc, + GcBorrowed => fail!("register_root: Gc already borrowed.") + }; + + for ptr in removals.iter() { + gc.unregister_root(*ptr as *()); + } + for &(ptr, length) in additions.iter() { + let end = ptr.offset(length as int); + gc.register_root(ptr as *(), end as *()); + } + + { + let mut task = local::Local::borrow(None::); + task.get().gc = GcExists(gc); + } +} + /// Immutable garbage-collected pointer type. /// /// # Warning @@ -82,7 +156,6 @@ impl Gc { gc = replace(&mut task.gc, GcBorrowed); } - let mut gc = match gc { // first GC allocation in this task, so create a new // collector From 272d90bd8b7d06fedd97550d3643f8704d041c6c Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 8 Jan 2014 01:03:44 +1100 Subject: [PATCH 07/21] Add the library defined vector type from rust-core. --- src/libstd/lib.rs | 1 + src/libstd/libvec.rs | 195 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 src/libstd/libvec.rs diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 91754a6d956ff..58d206e907e1a 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -112,6 +112,7 @@ pub mod char; pub mod tuple; pub mod vec; +pub mod libvec; pub mod at_vec; pub mod str; diff --git a/src/libstd/libvec.rs b/src/libstd/libvec.rs new file mode 100644 index 0000000000000..3c8a0e8988c12 --- /dev/null +++ b/src/libstd/libvec.rs @@ -0,0 +1,195 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[allow(missing_doc)]; + +use container::Container; +use mem::size_of; +use unstable::intrinsics::move_val_init; +use unstable::raw; +use cast::{forget, transmute}; +use libc::{free, malloc, realloc}; +use ops::Drop; +use vec::{VecIterator, ImmutableVector}; +use libc::{c_void, size_t}; +use ptr::{read_ptr, RawPtr}; +use num::CheckedMul; +use option::{Option, Some, None}; +use iter::{Iterator, DoubleEndedIterator}; + +pub struct Vec { + priv len: uint, + priv cap: uint, + priv ptr: *mut T +} + +impl Vec { + #[inline(always)] + pub fn new() -> Vec { + Vec { len: 0, cap: 0, ptr: 0 as *mut T } + } + + pub fn with_capacity(capacity: uint) -> Vec { + if capacity == 0 { + Vec::new() + } else { + let size = capacity.checked_mul(&size_of::()).expect("out of mem"); + let ptr = unsafe { malloc(size as size_t) }; + if ptr.is_null() { + fail!("null pointer") + } + Vec { len: 0, cap: capacity, ptr: ptr as *mut T } + } + } +} + +impl Container for Vec { + #[inline(always)] + fn len(&self) -> uint { + self.len + } +} + +impl Vec { + #[inline(always)] + pub fn capacity(&self) -> uint { + self.cap + } + + pub fn reserve(&mut self, capacity: uint) { + if capacity >= self.len { + let size = capacity.checked_mul(&size_of::()).expect("out of mem"); + self.cap = capacity; + unsafe { + let ptr = realloc(self.ptr as *mut c_void, size as size_t) as *mut T; + if ptr.is_null() { + fail!("null pointer") + } + self.ptr = ptr; + } + } + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + unsafe { + if self.len == 0 { + free(self.ptr as *c_void); + self.cap = 0; + self.ptr = 0 as *mut T; + } else { + self.ptr = realloc(self.ptr as *mut c_void, + (self.len * size_of::()) as size_t) as *mut T; + self.cap = self.len; + } + } + } + + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + unsafe { + self.len -= 1; + Some(read_ptr(self.as_slice().unsafe_ref(self.len()))) + } + } + } + + #[inline] + pub fn push(&mut self, value: T) { + if self.len == self.cap { + if self.cap == 0 { self.cap += 2 } + let old_size = self.cap * size_of::(); + self.cap = self.cap * 2; + let size = old_size * 2; + if old_size > size { fail!("out of mem") } + unsafe { + self.ptr = realloc(self.ptr as *mut c_void, size as size_t) as *mut T; + } + } + + unsafe { + let end = self.ptr.offset(self.len as int) as *mut T; + move_val_init(&mut *end, value); + self.len += 1; + } + } + + #[inline] + pub fn as_slice<'a>(&'a self) -> &'a [T] { + let slice = raw::Slice { data: self.ptr as *T, len: self.len }; + unsafe { transmute(slice) } + } + + #[inline] + pub fn as_mut_slice<'a>(&'a mut self) -> &'a mut [T] { + let slice = raw::Slice { data: self.ptr as *T, len: self.len }; + unsafe { transmute(slice) } + } + + pub fn move_iter(self) -> MoveIterator { + unsafe { + let iter = transmute(self.as_slice().iter()); + let ptr = self.ptr as *mut u8; + forget(self); + MoveIterator { allocation: ptr, iter: iter } + } + } +} + + +#[unsafe_destructor] +impl Drop for Vec { + fn drop(&mut self) { + unsafe { + for x in self.as_slice().iter() { + read_ptr(x as *T); + } + free(self.ptr as *c_void) + } + } +} + +pub struct MoveIterator { + priv allocation: *mut u8, // the block of memory allocated for the vector + priv iter: VecIterator<'static, T> +} + +impl Iterator for MoveIterator { + fn next(&mut self) -> Option { + unsafe { + self.iter.next().map(|x| read_ptr(x)) + } + } + + fn size_hint(&self) -> (uint, Option) { + self.iter.size_hint() + } +} + +impl DoubleEndedIterator for MoveIterator { + fn next_back(&mut self) -> Option { + unsafe { + self.iter.next_back().map(|x| read_ptr(x)) + } + } +} + +#[unsafe_destructor] +impl Drop for MoveIterator { + fn drop(&mut self) { + // destroy the remaining elements + for _x in *self {} + unsafe { + free(self.allocation as *c_void) + } + } +} From 84c6d81a0f4c2fa06f82ae2682115d85ca882f56 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 8 Jan 2014 16:35:55 +1100 Subject: [PATCH 08/21] std::libvec: make it GC safe. --- src/libstd/lib.rs | 1 + src/libstd/libvec.rs | 33 +++++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 58d206e907e1a..104818f1e3065 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -112,6 +112,7 @@ pub mod char; pub mod tuple; pub mod vec; +#[cfg(not(stage0))] pub mod libvec; pub mod at_vec; pub mod str; diff --git a/src/libstd/libvec.rs b/src/libstd/libvec.rs index 3c8a0e8988c12..b0b1921176519 100644 --- a/src/libstd/libvec.rs +++ b/src/libstd/libvec.rs @@ -23,6 +23,7 @@ use ptr::{read_ptr, RawPtr}; use num::CheckedMul; use option::{Option, Some, None}; use iter::{Iterator, DoubleEndedIterator}; +use gc; pub struct Vec { priv len: uint, @@ -41,11 +42,13 @@ impl Vec { Vec::new() } else { let size = capacity.checked_mul(&size_of::()).expect("out of mem"); - let ptr = unsafe { malloc(size as size_t) }; - if ptr.is_null() { - fail!("null pointer") + unsafe { + let ptr = malloc(size as size_t); + if ptr.is_null() { fail!("null pointer") } + + gc::register_root_changes([], [(ptr as *T, capacity)]); + Vec { len: 0, cap: capacity, ptr: ptr as *mut T } } - Vec { len: 0, cap: capacity, ptr: ptr as *mut T } } } } @@ -69,9 +72,10 @@ impl Vec { self.cap = capacity; unsafe { let ptr = realloc(self.ptr as *mut c_void, size as size_t) as *mut T; - if ptr.is_null() { - fail!("null pointer") - } + if ptr.is_null() { fail!("null pointer") } + + gc::register_root_changes([self.ptr as *T], + [(ptr as *T, capacity)]); self.ptr = ptr; } } @@ -81,12 +85,16 @@ impl Vec { pub fn shrink_to_fit(&mut self) { unsafe { if self.len == 0 { + gc::register_root_changes([self.ptr as *T], []); free(self.ptr as *c_void); self.cap = 0; self.ptr = 0 as *mut T; } else { - self.ptr = realloc(self.ptr as *mut c_void, - (self.len * size_of::()) as size_t) as *mut T; + let ptr = realloc(self.ptr as *mut c_void, + (self.len * size_of::()) as size_t) as *mut T; + if ptr.is_null() { fail!("null pointer") } + gc::register_root_changes([self.ptr as *T], [(ptr as *T, self.len)]); + self.cap = self.len; } } @@ -112,7 +120,10 @@ impl Vec { let size = old_size * 2; if old_size > size { fail!("out of mem") } unsafe { - self.ptr = realloc(self.ptr as *mut c_void, size as size_t) as *mut T; + let ptr = realloc(self.ptr as *mut c_void, size as size_t) as *mut T; + gc::register_root_changes([self.ptr as *T], + [(ptr as *T, self.cap)]); + self.ptr = ptr; } } @@ -153,6 +164,7 @@ impl Drop for Vec { for x in self.as_slice().iter() { read_ptr(x as *T); } + gc::register_root_changes([self.ptr as *T], []); free(self.ptr as *c_void) } } @@ -189,6 +201,7 @@ impl Drop for MoveIterator { // destroy the remaining elements for _x in *self {} unsafe { + gc::register_root_changes([self.allocation as *T], []); free(self.allocation as *c_void) } } From 5260554390c1c68d461c8408bde2bb501d58e807 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 8 Jan 2014 16:36:51 +1100 Subject: [PATCH 09/21] std: add Uniq, a GC-safe version of ~. --- src/libstd/lib.rs | 3 +++ src/libstd/uniq.rs | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 src/libstd/uniq.rs diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 104818f1e3065..1664aa326b146 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -117,6 +117,9 @@ pub mod libvec; pub mod at_vec; pub mod str; +#[cfg(not(stage0))] +pub mod uniq; + pub mod ascii; pub mod send_str; diff --git a/src/libstd/uniq.rs b/src/libstd/uniq.rs new file mode 100644 index 0000000000000..0066783e47d2d --- /dev/null +++ b/src/libstd/uniq.rs @@ -0,0 +1,66 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[allow(missing_doc)]; +use ops::Drop; +use libc::{malloc, free, size_t, c_void}; +use gc; +use mem; +use ptr; +use ptr::RawPtr; +use unstable::intrinsics::move_val_init; + +#[unsafe_no_drop_flag] +pub struct Uniq { + priv ptr: *mut T +} + +impl Uniq { + pub fn new(value: T) -> Uniq { + unsafe { + let ptr = malloc(mem::size_of::() as size_t) as *mut T; + gc::register_root_changes([], [(ptr as *T, 1)]); + move_val_init(&mut *ptr, value); + Uniq { ptr: ptr } + } + } + + pub fn borrow<'a>(&'a self) -> &'a T { + unsafe { &*self.ptr } + } + pub fn borrow_mut<'a>(&'a mut self) -> &'a mut T { + unsafe { &mut *self.ptr } + } + pub fn move(mut self) -> T { + unsafe { + let val = ptr::read_ptr(self.ptr as *T); + drop_no_inner_dtor(&mut self); + val + } + } +} + +unsafe fn drop_no_inner_dtor(x: &mut Uniq) { + gc::register_root_changes([x.ptr as *T], []); + free(x.ptr as *c_void); + x.ptr = 0 as *mut T; +} + +#[unsafe_destructor] +impl Drop for Uniq { + fn drop(&mut self) { + if !self.ptr.is_null() { + unsafe { + ptr::read_ptr(self.ptr as *T); + drop_no_inner_dtor(self) + } + } + } +} From 100b1d832b86006e6a14e51b5096fc53dd17de44 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Thu, 9 Jan 2014 02:11:56 +1100 Subject: [PATCH 10/21] std::gc: cache small(ish) allocations to avoid thrashing the pointer trie. Allocations are rounded to the next power of two and cached when they become unreachable. Except for those above 1 MB. --- src/libstd/gc/collector.rs | 114 +++++++++++++++++++++++++++++-------- src/libstd/gc/ptr_map.rs | 53 +++++++++++++++-- 2 files changed, 138 insertions(+), 29 deletions(-) diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 71afb7e950802..3ba810a00f0df 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -12,16 +12,22 @@ use container::Container; use gc::collector::ptr_map::PtrMap; use iter::Iterator; use libc; +use num::BitCount; use option::{Some, None, Option}; use ops::Drop; +use ptr; use ptr::RawPtr; use vec::{MutableVector, OwnedVector, ImmutableVector}; -use unstable::intrinsics; +use uint; mod ptr_map; static DEFAULT_ALLOCS_PER_COLLECTION_MASK: uint = (1 << 10) - 1; +static ALLOC_CACHE_MIN_LOG: uint = 3; +static ALLOC_CACHE_MAX_LOG: uint = 20; + + /// A thread local (almost) conservative garbage collector. /// /// This makes no effort to check global variables, or even @@ -66,27 +72,33 @@ pub struct GarbageCollector { priv gc_ptrs: PtrMap, /// number of GC-able allocations performed. priv gc_allocs: uint, + /// cached allocations, of sizes 8, 16, 32, 64, ... 1 << 20 (1 MB) + /// (inclusive, with 8 at index 0). Anything smaller gets rounded + /// to 8, anything larger is uncached. + priv alloc_cache: [~[uint], .. ALLOC_CACHE_MAX_LOG - ALLOC_CACHE_MIN_LOG + 1], /// the number of allocations to do before collection (in mask /// form, i.e. we are detecting `gc_allocs % (1 << n) == 0` for /// some n). priv gc_allocs_per_collection_mask: uint } -unsafe fn alloc_inner(ptrs: &mut PtrMap, size: uint, scan: bool, - finaliser: Option) -> *mut u8 { - let ptr = if scan { - libc::calloc(size as libc::size_t, 1) - } else { - // no need to clear if we're not going to be scanning it - // anyway. - libc::malloc(size as libc::size_t) - }; - if ptr.is_null() { - intrinsics::abort(); +fn compute_log_rounded_up_size(size: uint) -> uint { + if size <= (1 << ALLOC_CACHE_MIN_LOG) { + // round up to the minimum + ALLOC_CACHE_MIN_LOG + } else { + // for powers of two 1 << n, this gives n + 1, otherwise, + // for a number like `0b101` it gives 3, which is exactly + // what we want. + let raw = uint::bits - size.leading_zeros(); + // power of two + if size & (size - 1) == 0 { + raw - 1 + } else { + raw + } } - ptrs.insert_alloc(ptr as uint, size, scan, finaliser); - ptr as *mut u8 } impl GarbageCollector { @@ -94,6 +106,11 @@ impl GarbageCollector { GarbageCollector { roots: PtrMap::new(), gc_ptrs: PtrMap::new(), + // :( ... at least the compiler will tell us when we have + // the wrong number. + alloc_cache: [~[], ~[], ~[], ~[], ~[], ~[], + ~[], ~[], ~[], ~[], ~[], ~[], + ~[], ~[], ~[], ~[], ~[], ~[]], gc_allocs: 0, gc_allocs_per_collection_mask: DEFAULT_ALLOCS_PER_COLLECTION_MASK } @@ -106,6 +123,49 @@ impl GarbageCollector { } } + unsafe fn alloc_inner(&mut self, size: uint, scan: bool, + finaliser: Option) -> *mut u8 { + let log_next_power_of_two = compute_log_rounded_up_size(size); + + // it's always larger than 3 + let alloc_size = if log_next_power_of_two <= ALLOC_CACHE_MAX_LOG { + match self.alloc_cache[log_next_power_of_two - ALLOC_CACHE_MIN_LOG].pop_opt() { + Some(ptr) => { + // attempt to reuse the metadata we have for that + // allocation already. + let success = self.gc_ptrs.reuse_alloc(ptr, size, scan, finaliser); + if success { + debug!("using cache for allocation of size {}", size); + if scan { + // clear memory that we may need to be scanning + ptr::set_memory(ptr as *mut u8, 0, size); + } + return ptr as *mut u8; + } + } + None => {} + } + // otherwise, just allocate as per usual. + 1 << log_next_power_of_two + } else { + // huge allocations allocate exactly what they want. + size + }; + + let ptr = if scan { + libc::calloc(alloc_size as libc::size_t, 1) + } else { + libc::malloc(alloc_size as libc::size_t) + }; + if ptr.is_null() { + fail!("GC failed to allocate.") + } + + self.gc_ptrs.insert_alloc(ptr as uint, size, scan, finaliser); + + ptr as *mut u8 + } + /// Allocate `size` bytes of memory such that they are scanned for /// other GC'd pointers (for use by types like `Gc>`). /// @@ -114,7 +174,7 @@ impl GarbageCollector { /// unreachable. pub unsafe fn alloc_gc(&mut self, size: uint, finaliser: Option) -> *mut u8 { self.gc_allocs += 1; - alloc_inner(&mut self.gc_ptrs, size, true, finaliser) + self.alloc_inner(size, true, finaliser) } /// Allocate `size` bytes of memory such that they are not scanned @@ -128,7 +188,7 @@ impl GarbageCollector { pub unsafe fn alloc_gc_no_scan(&mut self, size: uint, finaliser: Option) -> *mut u8 { self.gc_allocs += 1; - alloc_inner(&mut self.gc_ptrs, size, false, finaliser) + self.alloc_inner(size, false, finaliser) } /// Register the block of memory [`start`, `end`) for scanning for @@ -186,19 +246,27 @@ impl GarbageCollector { // Step 3. sweep all the unreachable ones for deallocation. let unreachable = gc_ptrs.find_unreachable(); - for &(ptr, finaliser) in unreachable.iter() { - debug!("freeing {:x}", ptr); - + for &(ptr, size, finaliser) in unreachable.iter() { match finaliser { Some(f) => f(ptr as *mut ()), None => {} } - gc_ptrs.remove(ptr); - libc::free(ptr as *libc::c_void); + + let log_rounded = compute_log_rounded_up_size(size); + // a "small" allocation so we cache it. + if log_rounded <= ALLOC_CACHE_MAX_LOG { + gc_ptrs.mark_unused(ptr); + self.alloc_cache[log_rounded - ALLOC_CACHE_MIN_LOG].push(ptr); + } else { + // a big one, so whatever, the OS can have its memory + // back. + gc_ptrs.remove(ptr); + libc::free(ptr as *libc::c_void); + } } info!("GC scan: {} dead, {} live, {} scanned: took ms", - unreachable.len(), gc_ptrs.len(), count); + unreachable.len(), gc_ptrs.len(), count); } } @@ -210,7 +278,7 @@ impl Drop for GarbageCollector { Some(f) => f(ptr as *mut ()), None => {} } - unsafe {libc::free(ptr as *libc::c_void)} + unsafe {libc::free(ptr as *libc::c_void);} } } } diff --git a/src/libstd/gc/ptr_map.rs b/src/libstd/gc/ptr_map.rs index ba5824853840b..429a56129b14d 100644 --- a/src/libstd/gc/ptr_map.rs +++ b/src/libstd/gc/ptr_map.rs @@ -34,6 +34,12 @@ pub struct PtrDescr { scan: bool } +impl PtrDescr { + fn is_used(&self) -> bool { + self.high != 0 + } +} + impl PtrMap { /// Create a new PtrMap. pub fn new() -> PtrMap { @@ -56,6 +62,29 @@ impl PtrMap { self.map.insert(ptr, descr); } + /// Attempt to reuse the allocation starting at `ptr`. Returns + /// `true` if it was successfully registered, otherwise `false` + /// (attempting to reuse an live allocation, or an allocation that + /// wasn't found). + pub fn reuse_alloc(&mut self, + ptr: uint, length: uint, scan: bool, + finaliser: Option) -> bool { + match self.map.find_mut(&ptr) { + Some(descr) => { + if descr.is_used() { + warn!("attempting to reuse a used allocation") + false // don't overwrite + } else { + descr.high = ptr + length; + descr.finaliser = finaliser; + descr.scan = scan; + true + } + } + None => false + } + } + /// Mark every registered allocation as unreachable. pub fn mark_all_unreachable(&mut self) { for (_, d) in self.map.mut_iter() { @@ -70,7 +99,7 @@ impl PtrMap { pub fn mark_reachable_scan_info(&mut self, ptr: uint) -> Option<(uint, bool)> { match self.map.find_mut(&ptr) { Some(descr) => { - if !descr.reachable { + if descr.is_used() && !descr.reachable { descr.reachable = true; Some((descr.high, descr.scan)) } else { @@ -82,12 +111,24 @@ impl PtrMap { } /// Find the unreachable pointers in the map, returing `[(low, - /// finaliser)]`. - pub fn find_unreachable(&mut self) -> ~[(uint, Option)] { + /// size, finaliser)]`. + pub fn find_unreachable(&mut self) -> ~[(uint, uint, Option)] { self.map.iter() - .filter_map(|(low, descr)| - if !descr.reachable {Some((low, descr.finaliser))} else {None}) - .collect() + .filter_map(|(low, descr)| { + if descr.is_used() && !descr.reachable { + Some((low, descr.high - low, descr.finaliser)) + } else { + None + } + }).collect() + } + + /// Mark an allocation as unused. + pub fn mark_unused(&mut self, ptr: uint) { + match self.map.find_mut(&ptr) { + Some(descr) => descr.high = 0, + None => {} + } } /// Deregister the allocation starting at `ptr`. From 7821eb69c1ec6ba44ecc3f37702036293f7f0017 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Thu, 9 Jan 2014 18:14:50 +1100 Subject: [PATCH 11/21] std::gc: flip an external reachability bit instead of the reachability of every pointer individually. This makes collections 15% faster. --- src/libstd/gc/collector.rs | 18 ++++++++++-------- src/libstd/gc/ptr_map.rs | 35 +++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 3ba810a00f0df..16163c5a04471 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -17,7 +17,7 @@ use option::{Some, None, Option}; use ops::Drop; use ptr; use ptr::RawPtr; -use vec::{MutableVector, OwnedVector, ImmutableVector}; +use vec::{OwnedVector, ImmutableVector}; use uint; mod ptr_map; @@ -211,25 +211,27 @@ impl GarbageCollector { let GarbageCollector { ref mut roots, ref mut gc_ptrs, .. } = *self; - // Step 1. - gc_ptrs.mark_all_unreachable(); + // Step 1. mark any reachable pointers - // Step 2. mark any reachable pointers + // every pointer is considered reachable on this exact line + // (new allocations are reachable by default) + gc_ptrs.toggle_reachability(); + // and now everything is considered unreachable. // the list of pointers that are reachable and scannable, but // haven't actually been scanned yet. let mut grey_list = ~[]; - // Step 2.1: search for GC'd pointers in any registered roots. + // Step 1.1: search for GC'd pointers in any registered roots. for (low, descr) in roots.iter() { mark_words_between(gc_ptrs, &mut grey_list, low as *uint, descr.high as *uint) } - // Step 2.2: search for them on the stack. + // Step 1.2: search for them on the stack. mark_words_between(gc_ptrs, &mut grey_list, stack_end, stack_top as *uint); - // Step 2.3: search for GC references inside other reachable + // Step 1.3: search for GC references inside other reachable // GC references. let mut count = 0; loop { @@ -244,7 +246,7 @@ impl GarbageCollector { } } - // Step 3. sweep all the unreachable ones for deallocation. + // Step 2. sweep all the unreachable ones for deallocation. let unreachable = gc_ptrs.find_unreachable(); for &(ptr, size, finaliser) in unreachable.iter() { match finaliser { diff --git a/src/libstd/gc/ptr_map.rs b/src/libstd/gc/ptr_map.rs index 429a56129b14d..f387dcb5441b1 100644 --- a/src/libstd/gc/ptr_map.rs +++ b/src/libstd/gc/ptr_map.rs @@ -19,6 +19,10 @@ pub struct PtrMap { // a map from the start of each allocation to a descriptor // containing information about it. priv map: TrieMap, + // The state of `reachable` that represents whether an allocations + // is reachable, i.e. descr.reachable_flag == this.reachable_state + // implies the pointer is reachable. + priv reachable_state: bool, } /// This representation could be optimised. @@ -27,8 +31,9 @@ pub struct PtrDescr { high: uint, // the finaliser to run finaliser: Option, - // whether this allocation is reachable - reachable: bool, + // whether this allocation is reachable (see + // PtrMap.reachable_state) + reachable_flag: bool, // whether this allocation should be scanned (i.e. whether it // contains rooted references to GC pointers) scan: bool @@ -44,7 +49,8 @@ impl PtrMap { /// Create a new PtrMap. pub fn new() -> PtrMap { PtrMap { - map: TrieMap::new() + map: TrieMap::new(), + reachable_state: true } } @@ -55,7 +61,7 @@ impl PtrMap { finaliser: Option) { let descr = PtrDescr { high: ptr + length, - reachable: false, + reachable_flag: self.reachable_state, scan: scan, finaliser: finaliser }; @@ -78,6 +84,7 @@ impl PtrMap { descr.high = ptr + length; descr.finaliser = finaliser; descr.scan = scan; + descr.reachable_flag = self.reachable_state; true } } @@ -85,13 +92,6 @@ impl PtrMap { } } - /// Mark every registered allocation as unreachable. - pub fn mark_all_unreachable(&mut self) { - for (_, d) in self.map.mut_iter() { - d.reachable = false; - } - } - /// Look up the allocation starting at `ptr` and, if it is /// currently marked as unreachable, mark it as reachable and /// retrieve the high end & whether it requires scanning; @@ -99,8 +99,8 @@ impl PtrMap { pub fn mark_reachable_scan_info(&mut self, ptr: uint) -> Option<(uint, bool)> { match self.map.find_mut(&ptr) { Some(descr) => { - if descr.is_used() && !descr.reachable { - descr.reachable = true; + if descr.is_used() && descr.reachable_flag != self.reachable_state { + descr.reachable_flag = self.reachable_state; Some((descr.high, descr.scan)) } else { None @@ -115,7 +115,7 @@ impl PtrMap { pub fn find_unreachable(&mut self) -> ~[(uint, uint, Option)] { self.map.iter() .filter_map(|(low, descr)| { - if descr.is_used() && !descr.reachable { + if descr.is_used() && descr.reachable_flag != self.reachable_state { Some((low, descr.high - low, descr.finaliser)) } else { None @@ -131,6 +131,13 @@ impl PtrMap { } } + /// After a collection this will flip an internal bit so that + /// everything is considered unreachable at the start of the next + /// collection. + pub fn toggle_reachability(&mut self) { + self.reachable_state = !self.reachable_state; + } + /// Deregister the allocation starting at `ptr`. pub fn remove(&mut self, ptr: uint) { self.map.remove(&ptr); From 81163c0a07e8962291d7e2ec5fec3c14467a256d Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Fri, 10 Jan 2014 16:28:17 +1100 Subject: [PATCH 12/21] std::gc: allow finalisers to touch the GC (in a restricted way). This allows Gc>> to work by allow it to register changes to the GC roots. An unsafe pointer to the GC is stored in the borrowed version, so any users should be careful they understand the implementation details of the GC before calling anything on that GC via that pointer. --- src/libstd/gc/collector.rs | 1 + src/libstd/gc/mod.rs | 86 +++++++++++++------ src/libstd/gc/ptr_map.rs | 2 +- src/libstd/rt/task.rs | 12 ++- .../run-pass/gc-with-finaliser-using-gc.rs | 52 +++++++++++ 5 files changed, 123 insertions(+), 30 deletions(-) create mode 100644 src/test/run-pass/gc-with-finaliser-using-gc.rs diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 16163c5a04471..6cc52ed65815c 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -276,6 +276,7 @@ impl Drop for GarbageCollector { fn drop(&mut self) { // free all the pointers we're controlling. for (ptr, descr) in self.gc_ptrs.iter() { + // unused pointers have their finalisers cleared. match descr.finaliser { Some(f) => f(ptr as *mut ()), None => {} diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs index 43339414b4cb1..cace6e78a6b22 100644 --- a/src/libstd/gc/mod.rs +++ b/src/libstd/gc/mod.rs @@ -34,6 +34,8 @@ use vec::ImmutableVector; use unstable::intrinsics::{owns_new_managed, move_val_init, needs_drop}; +use gc::collector::GarbageCollector; + pub mod collector; fn pointer_run_dtor(p: *mut ()) { @@ -75,18 +77,22 @@ pub unsafe fn register_root_changes(removals: &[*T], /// `register_root_changes` for description. pub unsafe fn register_root_changes_always(removals: &[*T], additions: &[(*T, uint)]) { - let gc = { + let mut gc = { let mut task = local::Local::borrow(None::); - replace(&mut task.get().gc, GcBorrowed) + + // we don't execute any external code inside here and + // everything is task local, so Uninit is fine (because + // nothing else will read it) + replace(&mut task.get().gc, GcUninit) }; - let mut gc = match gc { + match gc { // first GC interaction in this task, so create a new // collector GcUninit => { if additions.len() != 0 { // we need to add some roots, and we need a GC for // that. - ~collector::GarbageCollector::new() + gc = GcExists(~GarbageCollector::new()) } else { // we are only removing things, and if the GC doesn't // exist, the pointers are already not registered as @@ -94,22 +100,38 @@ pub unsafe fn register_root_changes_always(removals: &[*T], return } } - GcExists(gc) => gc, - GcBorrowed => fail!("register_root: Gc already borrowed.") - }; - - for ptr in removals.iter() { - gc.unregister_root(*ptr as *()); + // the task is cleaning up, so registering root changes would + // be pointless. + GcBorrowed(ptr) if ptr.is_null() => return, + _ => {} } - for &(ptr, length) in additions.iter() { - let end = ptr.offset(length as int); - gc.register_root(ptr as *(), end as *()); - } - { - let mut task = local::Local::borrow(None::); - task.get().gc = GcExists(gc); + let gc_ref = match gc { + GcUninit => unreachable!(), + GcExists(ref mut gc) => &mut **gc, + // you might wonder why we can do this safely. We hit this + // code path when a collection runs a finaliser that + // wishes to change any roots (usually deregistering a + // root). Finalisers run after all the scanning, and we + // don't touch the root information data structure while + // running them, so we're fine to modify it. + // + // (if `unsafe_gc` is null we'd've already returned from + // the check above) + GcBorrowed(unsafe_gc) => &mut *unsafe_gc + }; + + for ptr in removals.iter() { + gc_ref.unregister_root(*ptr as *()); + } + for &(ptr, length) in additions.iter() { + let end = ptr.offset(length as int); + gc_ref.register_root(ptr as *(), end as *()); + } } + + let mut task = local::Local::borrow(None::); + task.get().gc = gc; } /// Immutable garbage-collected pointer type. @@ -137,7 +159,7 @@ impl Gc { #[experimental="not rooted by built-in pointer and vector types"] pub fn new(value: T) -> Gc { let stack_top; - let gc; + let mut gc; { // we need the task-local GC, and some upper bound on the // top of the stack. The borrow is scoped so that we can @@ -150,18 +172,28 @@ impl Gc { (_, t) => stack_top = t, } - // mark the GC as borrowed: unfortunately this means that - // we can't use an GC functions any finalisers, - // e.g. Gc>> will fail/crash when collected. - gc = replace(&mut task.gc, GcBorrowed); + // some contortions to put a *mut GC reference back into + // the task if we're OK to go (i.e. not borrowed already) + // but we may need to construct a new GC and failure is + // not possible (task is borrowed) so... options. + gc = match replace(&mut task.gc, GcUninit) { + // we can't Gc::new while a collection is going on. + GcBorrowed(_) => None, + GcExists(gc) => Some(gc), + GcUninit => Some(~GarbageCollector::new()) + }; + match gc { + // `gc` is behind a ~ pointer, so it doesn't move and + // this raw pointer will be valid until task death. + Some(ref mut gc) => { task.gc = GcBorrowed(&mut **gc as *mut GarbageCollector) } + None => {} + } } let mut gc = match gc { - // first GC allocation in this task, so create a new - // collector - GcUninit => ~collector::GarbageCollector::new(), - GcExists(gc) => gc, - GcBorrowed => fail!("Gc::new: Gc already borrowed.") + // the task is unborrowed, so now we can fail! + None => fail!("Gc::new: Gc already borrowed."), + Some(gc) => gc, }; let size = mem::size_of::(); diff --git a/src/libstd/gc/ptr_map.rs b/src/libstd/gc/ptr_map.rs index f387dcb5441b1..a554393dbd8a8 100644 --- a/src/libstd/gc/ptr_map.rs +++ b/src/libstd/gc/ptr_map.rs @@ -126,7 +126,7 @@ impl PtrMap { /// Mark an allocation as unused. pub fn mark_unused(&mut self, ptr: uint) { match self.map.find_mut(&ptr) { - Some(descr) => descr.high = 0, + Some(descr) => { descr.high = 0; descr.finaliser = None } None => {} } } diff --git a/src/libstd/rt/task.rs b/src/libstd/rt/task.rs index 1eab9c5970388..879d834398943 100644 --- a/src/libstd/rt/task.rs +++ b/src/libstd/rt/task.rs @@ -27,6 +27,7 @@ use logging::Logger; use ops::Drop; use option::{Option, Some, None}; use prelude::drop; +use ptr; use result::{Result, Ok, Err}; use rt::Runtime; use rt::borrowck::BorrowRecord; @@ -68,7 +69,8 @@ pub struct Task { pub enum PossibleGc { GcUninit, - GcBorrowed, + // nullable + GcBorrowed(*mut GarbageCollector), GcExists(~GarbageCollector) } @@ -176,7 +178,13 @@ impl Task { let LocalStorage(ref mut optmap) = task.storage; optmap.take() }; - let gc = replace(&mut task.get().gc, GcUninit); + + // use the null to inform any finalisers that call + // back into the GC that we're collecting everything, + // and so they don't need to/can't do anything with + // it. + let gc = replace(&mut task.get().gc, GcBorrowed(ptr::mut_null())); + drop(task); drop(storage_map); diff --git a/src/test/run-pass/gc-with-finaliser-using-gc.rs b/src/test/run-pass/gc-with-finaliser-using-gc.rs new file mode 100644 index 0000000000000..7fb4c9d5cfafb --- /dev/null +++ b/src/test/run-pass/gc-with-finaliser-using-gc.rs @@ -0,0 +1,52 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[allow(experimental)]; + +// we need to make sure that a finaliser that needs to talk to the GC +// can run (because this happens inside a collection, when the GC is +// borrowed from the task) + +use std::libvec::Vec; +use std::gc::Gc; + +static mut dtor_actually_ran: bool = false; +struct Foo(uint); +impl Drop for Foo { + fn drop(&mut self) { + unsafe {dtor_actually_ran = true;} + } +} + +// put some more data on to the stack, so that the Gc::new() pointer +// below doesn't get picked up by the conservative stack scan of the +// collections in the for loop below. + +#[inline(never)] +fn make_some_stack_frames(n: uint) { + if n == 0 { + let mut v = Vec::new(); + let p = Gc::new(Foo(1)); + v.push(p); + Gc::new(v); + } else { + make_some_stack_frames(n - 1); + } +} + +fn main() { + make_some_stack_frames(100); + + for _ in range(0, 10000) { + Gc::new(10); + } + + assert!(unsafe {dtor_actually_ran}); +} From cc1015f954bc1fb92ec0b338ecbc6b3c093b4814 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sat, 11 Jan 2014 22:36:29 +1100 Subject: [PATCH 13/21] rustc: convert owns_new_managed to reaches_new_managed. When we're tracing, we don't just care about what's stored unboxed in a value, but whether we can possibly get to a #[managed] value via pointers. --- src/librustc/middle/trans/intrinsic.rs | 4 ++-- src/librustc/middle/ty.rs | 8 ++++---- src/librustc/middle/typeck/check/mod.rs | 2 +- src/libstd/gc/mod.rs | 6 +++--- src/libstd/unstable/intrinsics.rs | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/librustc/middle/trans/intrinsic.rs b/src/librustc/middle/trans/intrinsic.rs index a44678838417c..2912b90bfd02b 100644 --- a/src/librustc/middle/trans/intrinsic.rs +++ b/src/librustc/middle/trans/intrinsic.rs @@ -418,9 +418,9 @@ pub fn trans_intrinsic(ccx: @CrateContext, let tp_ty = substs.tys[0]; Ret(bcx, C_bool(ty::type_contents(ccx.tcx, tp_ty).owns_at_managed())); } - "owns_new_managed" => { + "reaches_new_managed" => { let tp_ty = substs.tys[0]; - Ret(bcx, C_bool(ty::type_contents(ccx.tcx, tp_ty).owns_new_managed())); + Ret(bcx, C_bool(ty::type_contents(ccx.tcx, tp_ty).reaches_new_managed())); } "visit_tydesc" => { let td = get_param(decl, first_real_arg); diff --git a/src/librustc/middle/ty.rs b/src/librustc/middle/ty.rs index 1a4747444a3c6..0cbe1b5232800 100644 --- a/src/librustc/middle/ty.rs +++ b/src/librustc/middle/ty.rs @@ -1761,13 +1761,13 @@ def_type_content_sets!( OwnsDtor = 0b0000__00000010__0000, OwnsAtManaged /* see [1] below */ = 0b0000__00000100__0000, OwnsAffine = 0b0000__00001000__0000, - OwnsNewManaged = 0b0000__00010000__0000, OwnsAll = 0b0000__11111111__0000, // Things that are reachable by the value in any way (fourth nibble): ReachesNonsendAnnot = 0b0001__00000000__0000, ReachesBorrowed = 0b0010__00000000__0000, // ReachesAtManaged /* see [1] below */ = 0b0100__00000000__0000, + ReachesNewManaged = 0b0100__00000000__0000, ReachesMutable = 0b1000__00000000__0000, ReachesAll = 0b1111__00000000__0000, @@ -1845,8 +1845,8 @@ impl TypeContents { self.intersects(TC::OwnsAtManaged) } - pub fn owns_new_managed(&self) -> bool { - self.intersects(TC::OwnsNewManaged) + pub fn reaches_new_managed(&self) -> bool { + self.intersects(TC::ReachesNewManaged) } pub fn is_freezable(&self, _: ctxt) -> bool { @@ -2162,7 +2162,7 @@ pub fn type_contents(cx: ctxt, ty: t) -> TypeContents { tc | TC::ReachesMutable.when(has_attr(cx, did, "no_freeze")) | TC::ReachesNonsendAnnot.when(has_attr(cx, did, "no_send")) | - TC::OwnsNewManaged.when(has_attr(cx, did, "managed")) + TC::ReachesNewManaged.when(has_attr(cx, did, "managed")) } fn borrowed_contents(region: ty::Region, diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs index 86060e48dae8a..6a5c4ba52bb50 100644 --- a/src/librustc/middle/typeck/check/mod.rs +++ b/src/librustc/middle/typeck/check/mod.rs @@ -4112,7 +4112,7 @@ pub fn check_intrinsic_type(ccx: @CrateCtxt, it: &ast::ForeignItem) { ty::mk_nil()) } "needs_drop" => (1u, ~[], ty::mk_bool()), - "owns_at_managed" | "owns_new_managed" => (1u, ~[], ty::mk_bool()), + "owns_at_managed" | "reaches_new_managed" => (1u, ~[], ty::mk_bool()), "atomic_xchg" | "atomic_xadd" | "atomic_xsub" | "atomic_xchg_acq" | "atomic_xadd_acq" | "atomic_xsub_acq" | "atomic_xchg_rel" | "atomic_xadd_rel" | "atomic_xsub_rel" => { diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs index cace6e78a6b22..eca85ade5838c 100644 --- a/src/libstd/gc/mod.rs +++ b/src/libstd/gc/mod.rs @@ -32,7 +32,7 @@ use rt::task::{Task, GcUninit, GcExists, GcBorrowed}; use util::replace; use vec::ImmutableVector; -use unstable::intrinsics::{owns_new_managed, move_val_init, needs_drop}; +use unstable::intrinsics::{reaches_new_managed, move_val_init, needs_drop}; use gc::collector::GarbageCollector; @@ -65,7 +65,7 @@ fn pointer_run_dtor(p: *mut ()) { #[inline] pub unsafe fn register_root_changes(removals: &[*T], additions: &[(*T, uint)]) { - if owns_new_managed::() { + if reaches_new_managed::() { register_root_changes_always::(removals, additions) } } @@ -213,7 +213,7 @@ impl Gc { // FIXME: we currently count ~Gc as owning managed, // but it shouldn't (~, or equivalent) should root the Gc // itself. - ptr = if owns_new_managed::() { + ptr = if reaches_new_managed::() { gc.alloc_gc(size, finaliser) } else { gc.alloc_gc_no_scan(size, finaliser) diff --git a/src/libstd/unstable/intrinsics.rs b/src/libstd/unstable/intrinsics.rs index ca28cbfccdb32..ffe7431d3dcfa 100644 --- a/src/libstd/unstable/intrinsics.rs +++ b/src/libstd/unstable/intrinsics.rs @@ -382,7 +382,7 @@ extern "rust-intrinsic" { /// Returns `true` if a type contains a type marked with #[managed] #[cfg(not(stage0))] - pub fn owns_new_managed() -> bool; + pub fn reaches_new_managed() -> bool; pub fn visit_tydesc(td: *TyDesc, tv: &mut TyVisitor); From c170b5b25713a2977cf462b5aec2bf5e061dbe5c Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sun, 12 Jan 2014 02:54:34 +1100 Subject: [PATCH 14/21] std::gc: convert to a tracing garbage collector. For the moment, this is designed around a non-special trait `Trace`, which allows values to precisely inform the GC of their contained Gc pointers. Unfortunately, the fact that the trait is just like any other means that generic data structures require a `Trace` bound on their generics always, to be able to register themselves with the GC, even for substitutions of the parameters that don't contain any Gc types at all. --- src/libstd/gc/collector.rs | 218 +++++++++++++++---------------------- src/libstd/gc/mod.rs | 181 +++++++++++++++++++++++++++--- src/libstd/gc/ptr_map.rs | 115 ++++++++++++------- src/libstd/libvec.rs | 57 +++++++--- src/libstd/uniq.rs | 21 +++- 5 files changed, 390 insertions(+), 202 deletions(-) diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 6cc52ed65815c..8044bddd7998a 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -8,18 +8,18 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use container::Container; use gc::collector::ptr_map::PtrMap; use iter::Iterator; use libc; use num::BitCount; use option::{Some, None, Option}; use ops::Drop; -use ptr; use ptr::RawPtr; use vec::{OwnedVector, ImmutableVector}; use uint; +use gc::GcTracer; + mod ptr_map; static DEFAULT_ALLOCS_PER_COLLECTION_MASK: uint = (1 << 10) - 1; @@ -27,44 +27,32 @@ static DEFAULT_ALLOCS_PER_COLLECTION_MASK: uint = (1 << 10) - 1; static ALLOC_CACHE_MIN_LOG: uint = 3; static ALLOC_CACHE_MAX_LOG: uint = 20; +pub type TracingFunc = fn(*(), uint, &mut GcTracer); -/// A thread local (almost) conservative garbage collector. -/// -/// This makes no effort to check global variables, or even -/// thread-local ones. + +/// A thread local garbage collector, precise on the head, +/// conservative on the stack, neither generational nor incremental. /// /// # Design /// -/// This is a very crude mark-and-sweep conservative[1] -/// non-generational garbage collector. It stores two sets of -/// pointers, the GC'd pointers themselves, and regions of memory that -/// are roots for GC'd objects (that is, the regions that could -/// possibly contain references to GC'd pointers). -/// -/// For a collection, it scans the roots and the stack to find any -/// bitpatterns that look like GC'd pointers that we know about, and -/// then scans each of these to record all the reachable -/// objects. After doing so, any unreachable objects have their -/// finalisers run and are freed. +/// Currently stores two sets of known pointers: /// -/// Currently, this just calls `malloc` and `free` for every -/// allocation. It could (and should) be reusing allocations. +/// - managed pointers (i.e. allocations entirely under the control of +/// this GC), and +/// - "roots", which are any other pointers/datastructures/memory +/// regions that have registered themselves as possibly containing +/// GC'd pointers (the registration includes a tracing function +/// pointer with which to find these GC'd pointers) /// -/// Also, this only counts pointers to the start of GC'd memory -/// regions as valid. This is fine for a simple type like `Gc`, -/// since the only way to get a pointer actually pointing inside the -/// contents requires a `.borrow()`, which freezes the `Gc` -/// reference that was borrowed. This `Gc` reference is -/// presumably[2] in a root (or some other place that is scanned) and -/// points at the start of the allocation, so the subpointer will -/// always be valid. (Yay for lifetimes & borrowing.) +/// A conservative stack-scan is performed where any bitpatterns that +/// look like pointers from either of the two sets above are taken to +/// be actual references and a tracing is initiated from there. /// -/// [1]: it has some smarts, the user can indicate that an allocation -/// should not be scanned, so that allocations that can never -/// contain a GC pointer are ignored. +/// Any managed pointers that were not visited in this search are +/// considered dead and deallocated. /// -/// [2]: If the `Gc` reference is reachable but not being scanned -/// then the user already has a problem. +/// Allocations and deallocations are performed directly with malloc +/// and free, with caching of small allocations. pub struct GarbageCollector { /// Non-garbage-collectable roots priv roots: PtrMap, @@ -82,7 +70,6 @@ pub struct GarbageCollector { priv gc_allocs_per_collection_mask: uint } - fn compute_log_rounded_up_size(size: uint) -> uint { if size <= (1 << ALLOC_CACHE_MIN_LOG) { // round up to the minimum @@ -123,23 +110,27 @@ impl GarbageCollector { } } - unsafe fn alloc_inner(&mut self, size: uint, scan: bool, - finaliser: Option) -> *mut u8 { + /// Allocate `size` bytes of memory such that they are scanned for + /// other GC'd pointers (for use by types like `Gc>`). + /// + /// `finaliser` is passed the start of the allocation at some + /// unspecified pointer after the allocation has become + /// unreachable. + pub unsafe fn alloc(&mut self, size: uint, + tracer: Option, + finaliser: Option) -> *mut u8 { + self.gc_allocs += 1; let log_next_power_of_two = compute_log_rounded_up_size(size); - // it's always larger than 3 + // it's always larger than ALLOC_CACHE_MIN_LOG let alloc_size = if log_next_power_of_two <= ALLOC_CACHE_MAX_LOG { match self.alloc_cache[log_next_power_of_two - ALLOC_CACHE_MIN_LOG].pop_opt() { Some(ptr) => { // attempt to reuse the metadata we have for that // allocation already. - let success = self.gc_ptrs.reuse_alloc(ptr, size, scan, finaliser); + let success = self.gc_ptrs.reuse_alloc(ptr, size, tracer, finaliser); if success { debug!("using cache for allocation of size {}", size); - if scan { - // clear memory that we may need to be scanning - ptr::set_memory(ptr as *mut u8, 0, size); - } return ptr as *mut u8; } } @@ -152,103 +143,93 @@ impl GarbageCollector { size }; - let ptr = if scan { - libc::calloc(alloc_size as libc::size_t, 1) - } else { - libc::malloc(alloc_size as libc::size_t) - }; + let ptr = libc::malloc(alloc_size as libc::size_t); if ptr.is_null() { fail!("GC failed to allocate.") } - self.gc_ptrs.insert_alloc(ptr as uint, size, scan, finaliser); + self.gc_ptrs.insert_alloc(ptr as uint, size, tracer, finaliser); ptr as *mut u8 } - /// Allocate `size` bytes of memory such that they are scanned for - /// other GC'd pointers (for use by types like `Gc>`). - /// - /// `finaliser` is passed the start of the allocation at some - /// unspecified pointer after the allocation has become - /// unreachable. - pub unsafe fn alloc_gc(&mut self, size: uint, finaliser: Option) -> *mut u8 { - self.gc_allocs += 1; - self.alloc_inner(size, true, finaliser) + /// Register the block of memory [`start`, `end`) for tracing when + /// a word matching `start` pointer is seen during a conservative + /// scan. On such a scan, `tracer` is called, passing in the + /// pointer and `metadata` (which can be arbitrary). + pub unsafe fn nongc_register(&mut self, start: *(), metadata: uint, tracer: TracingFunc) { + self.roots.insert_alloc(start as uint, metadata, Some(tracer), None) } - /// Allocate `size` bytes of memory such that they are not scanned - /// for other GC'd pointers; this should be used for types like - /// `Gc`, or (in theory) `Gc<~Gc>` (note the - /// indirection). - /// - /// `finaliser` is passed the start of the allocation at some - /// unspecified pointer after the allocation has become - /// unreachable. - pub unsafe fn alloc_gc_no_scan(&mut self, size: uint, - finaliser: Option) -> *mut u8 { - self.gc_allocs += 1; - self.alloc_inner(size, false, finaliser) + /// Update the metadata word associated with `start`. + pub unsafe fn nongc_update_metadata(&mut self, start: *(), metadata: uint) -> bool { + self.roots.update_metadata(start as uint, metadata) } - /// Register the block of memory [`start`, `end`) for scanning for - /// GC'd pointers. - pub unsafe fn register_root(&mut self, start: *(), end: *()) { - self.roots.insert_alloc(start as uint, end as uint - start as uint, true, None) - } - /// Stop scanning the root starting at `start` for GC'd pointers. - pub unsafe fn unregister_root(&mut self, start: *()) { + /// Stop considering the root starting at `start` for tracing. + pub unsafe fn nongc_unregister(&mut self, start: *()) { self.roots.remove(start as uint); } + /// Check if this is the first time that the non-GC'd pointer + /// `start` has been traced in this iteration. + pub fn nongc_first_trace(&mut self, start: *()) -> bool { + debug!("nongc_first_trace: checking {}", start); + self.roots.mark_reachable_scan_info(start as uint).is_some() + } + + /// Check if this is the first time that the GC'd pointer `start` + /// has been traced in this iteration. + pub fn gc_first_trace(&mut self, start: *()) -> bool { + debug!("gc_first_trace: checking {}", start); + self.gc_ptrs.mark_reachable_scan_info(start as uint).is_some() + } + + /// Run a conservative scan of the words from `start` to `end`. + pub unsafe fn conservative_scan(&mut self, mut start: *uint, end: *uint) { + while start < end { + let ptr = *start; + let trace_info = match self.gc_ptrs.mark_reachable_scan_info(ptr) { + i @ Some(_) => i, + None => self.roots.mark_reachable_scan_info(ptr) + }; + match trace_info { + Some((metadata, Some(tracer))) => { + tracer(ptr as *(), metadata, &mut GcTracer { gc: self }) + } + // don't need no tracing (either not a pointer we + // recognise, or one without a registered tracer) + _ => {} + } + + start = start.offset(1); + } + } + /// Collect garbage. An upper bound on the position of any GC'd /// pointers on the stack should be passed as `stack_top`. pub unsafe fn collect(&mut self, stack_top: uint) { + debug!("collecting"); clear_registers(0, 0, 0, 0, 0, 0); let stack: uint = 1; let stack_end = &stack as *uint; - let GarbageCollector { ref mut roots, ref mut gc_ptrs, .. } = *self; - // Step 1. mark any reachable pointers // every pointer is considered reachable on this exact line // (new allocations are reachable by default) - gc_ptrs.toggle_reachability(); + self.gc_ptrs.toggle_reachability(); + self.roots.inefficient_mark_all_unreachable(); // and now everything is considered unreachable. - // the list of pointers that are reachable and scannable, but - // haven't actually been scanned yet. - let mut grey_list = ~[]; + self.conservative_scan(stack_end, stack_top as *uint); - // Step 1.1: search for GC'd pointers in any registered roots. - for (low, descr) in roots.iter() { - mark_words_between(gc_ptrs, &mut grey_list, - low as *uint, descr.high as *uint) - } - - // Step 1.2: search for them on the stack. - mark_words_between(gc_ptrs, &mut grey_list, stack_end, stack_top as *uint); - - // Step 1.3: search for GC references inside other reachable - // GC references. - let mut count = 0; - loop { - match grey_list.pop_opt() { - Some((low, high)) => { - count += 1; - mark_words_between(gc_ptrs, &mut grey_list, - low as *uint, high as *uint); - } - // everything scanned - None => break - } - } // Step 2. sweep all the unreachable ones for deallocation. - let unreachable = gc_ptrs.find_unreachable(); + let unreachable = self.gc_ptrs.find_unreachable(); for &(ptr, size, finaliser) in unreachable.iter() { + debug!("unreachable: 0x{:x}", ptr); match finaliser { Some(f) => f(ptr as *mut ()), None => {} @@ -257,18 +238,15 @@ impl GarbageCollector { let log_rounded = compute_log_rounded_up_size(size); // a "small" allocation so we cache it. if log_rounded <= ALLOC_CACHE_MAX_LOG { - gc_ptrs.mark_unused(ptr); + self.gc_ptrs.mark_unused(ptr); self.alloc_cache[log_rounded - ALLOC_CACHE_MIN_LOG].push(ptr); } else { // a big one, so whatever, the OS can have its memory // back. - gc_ptrs.remove(ptr); + self.gc_ptrs.remove(ptr); libc::free(ptr as *libc::c_void); } } - - info!("GC scan: {} dead, {} live, {} scanned: took ms", - unreachable.len(), gc_ptrs.len(), count); } } @@ -286,26 +264,6 @@ impl Drop for GarbageCollector { } } -/// Scan the words from `low` to `high`, conservatively registering -/// any GC pointer bit patterns found. -unsafe fn mark_words_between(gc_ptrs: &mut PtrMap, grey_list: &mut ~[(uint, uint)], - mut low: *uint, high: *uint) { - debug!("scanning from {} to {}", low, high); - while low < high { - match gc_ptrs.mark_reachable_scan_info(*low) { - Some((top, scan)) => { - debug!("found {:x} at {:x}", *low, low as uint); - if scan { - grey_list.push((*low, top)); - } - } - None => {} - } - - low = low.offset(1); - } -} - // cargo culted from Boehm. #[inline(never)] fn clear_registers(_: uint, _: uint, _: uint, diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs index eca85ade5838c..d3d8bea1b9238 100644 --- a/src/libstd/gc/mod.rs +++ b/src/libstd/gc/mod.rs @@ -43,6 +43,11 @@ fn pointer_run_dtor(p: *mut ()) { ptr::read_ptr(p as *T); } } +fn pointer_trace(p: *(), _: uint, tracer: &mut GcTracer) { + unsafe { + (*(p as *T)).trace(tracer) + } +} /// Possibly register the changes to the GC roots described by the /// arguments. @@ -64,7 +69,7 @@ fn pointer_run_dtor(p: *mut ()) { /// are deallocated or otherwise become invalid. #[inline] pub unsafe fn register_root_changes(removals: &[*T], - additions: &[(*T, uint)]) { + additions: &[(*T, uint, collector::TracingFunc)]) { if reaches_new_managed::() { register_root_changes_always::(removals, additions) } @@ -76,7 +81,7 @@ pub unsafe fn register_root_changes(removals: &[*T], /// See the conditional but otherwise identical /// `register_root_changes` for description. pub unsafe fn register_root_changes_always(removals: &[*T], - additions: &[(*T, uint)]) { + additions: &[(*T, uint, collector::TracingFunc)]) { let mut gc = { let mut task = local::Local::borrow(None::); @@ -122,11 +127,12 @@ pub unsafe fn register_root_changes_always(removals: &[*T], }; for ptr in removals.iter() { - gc_ref.unregister_root(*ptr as *()); + debug!("unregistering {}", *ptr); + gc_ref.nongc_unregister(*ptr as *()); } - for &(ptr, length) in additions.iter() { - let end = ptr.offset(length as int); - gc_ref.register_root(ptr as *(), end as *()); + for &(ptr, metadata, tracer) in additions.iter() { + debug!("registering {} {}", ptr, metadata); + gc_ref.nongc_register(ptr as *(), metadata, tracer) } } @@ -134,6 +140,52 @@ pub unsafe fn register_root_changes_always(removals: &[*T], task.get().gc = gc; } +pub unsafe fn update_metadata(ptr: *T, metadata: uint) { + if reaches_new_managed::() { + update_metadata_always(ptr as *(), metadata) + } +} + +pub unsafe fn update_metadata_always(ptr: *(), metadata: uint) { + debug!("update_metadata_always: setting {} MD to {}", ptr, metadata); + let mut gc = { + let mut task = local::Local::borrow(None::); + + // we don't execute any external code inside here and + // everything is task local, so Uninit is fine (because + // nothing else will read it) + replace(&mut task.get().gc, GcUninit) + }; + match gc { + // can't update the metadata of a bad_pointer + GcUninit => return, + // the task is cleaning up, so registering root changes would + // be pointless. + GcBorrowed(ptr) if ptr.is_null() => return, + _ => {} + } + { + let gc_ref = match gc { + GcUninit => unreachable!(), + GcExists(ref mut gc) => &mut **gc, + // you might wonder why we can do this safely. We hit this + // code path when a collection runs a finaliser that + // wishes to change any roots (usually deregistering a + // root). Finalisers run after all the scanning, and we + // don't touch the root information data structure while + // running them, so we're fine to modify it. + // + // (if `unsafe_gc` is null we'd've already returned from + // the check above) + GcBorrowed(unsafe_gc) => &mut *unsafe_gc + }; + gc_ref.nongc_update_metadata(ptr, metadata); + } + + let mut task = local::Local::borrow(None::); + task.get().gc = gc; +} + /// Immutable garbage-collected pointer type. /// /// # Warning @@ -154,7 +206,7 @@ pub struct Gc { priv ptr: *T } -impl Gc { +impl Gc { /// Construct a new garbage-collected box #[experimental="not rooted by built-in pointer and vector types"] pub fn new(value: T) -> Gc { @@ -213,12 +265,8 @@ impl Gc { // FIXME: we currently count ~Gc as owning managed, // but it shouldn't (~, or equivalent) should root the Gc // itself. - ptr = if reaches_new_managed::() { - gc.alloc_gc(size, finaliser) - } else { - gc.alloc_gc_no_scan(size, finaliser) - } as *mut T; - + ptr = gc.alloc(size, Some(pointer_trace::), finaliser) as *mut T; + debug!("Gc::new: alloc'd {}", ptr); move_val_init(&mut *ptr, value); } @@ -277,13 +325,118 @@ impl Clone for Gc { /// The `Freeze` bound restricts this to acyclic graphs where it is well-defined. /// /// A `Send` bound would also work, but `Send` *or* `Freeze` cannot be expressed. -impl DeepClone for Gc { +impl DeepClone for Gc { #[inline] fn deep_clone(&self) -> Gc { Gc::new(unsafe {self.borrow_write_barrier().deep_clone()}) } } +/// Stores the appropriate tools for interacting with the garbage +/// collector while tracing. +pub struct GcTracer<'a> { + priv gc: &'a mut collector::GarbageCollector +} + +impl<'a> GcTracer<'a> { + /// Returns true on the first call for a given value of `ptr`, + /// which is a "pointer" to some non-GC'd memory region that has + /// been previously registered with the GC. + pub fn pointer_first_trace(&mut self, ptr: *()) -> bool { + self.gc.nongc_first_trace(ptr) + } + + pub unsafe fn conservative_scan(&mut self, start: *uint, end: *uint) { + self.gc.conservative_scan(start, end) + } +} + +/// Give information to the GC about precisely what piece of memory +/// contain `Gc` pointers. +pub trait Trace { + /// Trace the value, marking any reachable `Gc` pointers. + fn trace(&self, tracer: &mut GcTracer); +} + +impl Trace for Gc { + fn trace(&self, tracer: &mut GcTracer) { + if tracer.gc.gc_first_trace(self.ptr as *()) { + debug!("Gc.trace inner: {}", self.ptr); + // only both tracing if we can get to a Gc pointer. + unsafe { + if reaches_new_managed::() { + (*self.ptr).trace(tracer) + } + } + } + } +} + +/// Things that definitely don't contain any GC'd pointers. +macro_rules! no_gc { + ($($t:ty),*) => { + $( + impl Trace for $t { #[inline(always)] fn trace(&self, _: &mut GcTracer) {} } + )* + } +} + +no_gc! { + int, i8, i16, i32, i64, + uint, u8, u16, u32, u64, + f32, f64, + ~str +} +impl<'a> Trace for &'a str { #[inline(always)] fn trace(&self, _: &mut GcTracer) {} } + + +/// These should probably be compiler generated somehow... + +// create tracing impls for tuples. +macro_rules! trace_tuple { + () => { impl Trace for () { #[inline(always)] fn trace(&self, _: &mut GcTracer) {} } }; + + // we "abuse" `ident`s, so we can reuse them as both the type + // parameter and the variable name. + ($head:ident $(, $rest:ident)*) => { + // create an impl with all our arguments + impl<$head: Trace $(, $rest: Trace )*> Trace for ($head, $($rest),*) { + fn trace(&self, tracer: &mut GcTracer) { + let (ref $head, $( ref $rest ),*) = *self; + $head.trace(tracer); + $( $rest.trace(tracer); )* + } + } + + // recursively generate the shorter tuple impls + trace_tuple!($($rest),*) + } +} +trace_tuple! { + _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, + _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, + _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, + _31, _32, _33, _34, _35, _36, _37, _38, _39, _40 +} + +macro_rules! trace_fixed_vec { + ($($e: expr),*) => { + $( + impl Trace for [T, .. $e] { + fn trace(&self, tracer: &mut GcTracer) { + for v in self.iter() { + v.trace(tracer) + } + } + } + )* + } +} +trace_fixed_vec! { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 100, 1000, 10000, 100000 +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/libstd/gc/ptr_map.rs b/src/libstd/gc/ptr_map.rs index a554393dbd8a8..4faf035eb50d7 100644 --- a/src/libstd/gc/ptr_map.rs +++ b/src/libstd/gc/ptr_map.rs @@ -15,33 +15,45 @@ use option::{Option, Some, None}; use trie::{TrieMap, TrieMapIterator}; use vec::ImmutableVector; +use gc::collector::TracingFunc; + pub struct PtrMap { // a map from the start of each allocation to a descriptor // containing information about it. - priv map: TrieMap, - // The state of `reachable` that represents whether an allocations - // is reachable, i.e. descr.reachable_flag == this.reachable_state - // implies the pointer is reachable. - priv reachable_state: bool, + priv map: TrieMap<~PtrDescr>, + // The state of the REACHABLE bit of `PtrDescr.flags` that + // represents whether an allocations is reachable. + priv reachable_state: Flag, } +type Flag = u8; +static REACHABLE: Flag = 0b0000_0001; +static USED: Flag = 0b0000_0010; + /// This representation could be optimised. pub struct PtrDescr { - // the top edge of the allocation. - high: uint, + // arbitrary data associated with this pointer. + metadata: uint, + // the function to use to perform tracing on this allocation + tracer: Option, // the finaliser to run finaliser: Option, - // whether this allocation is reachable (see - // PtrMap.reachable_state) - reachable_flag: bool, - // whether this allocation should be scanned (i.e. whether it - // contains rooted references to GC pointers) - scan: bool + // tiny properties about this allocation. + flags: Flag, } impl PtrDescr { fn is_used(&self) -> bool { - self.high != 0 + self.flags & USED == USED + } + fn is_used_and_unreachable(&self, map_reachable_state: Flag) -> bool { + let unreachable = !(self.flags & REACHABLE == map_reachable_state); + self.is_used() & unreachable + } + + fn set_reachable(&mut self, reachability: Flag) { + // filter out the reachable bit and then set it explicitly + self.flags = (self.flags & !REACHABLE) | (reachability & REACHABLE); } } @@ -50,20 +62,23 @@ impl PtrMap { pub fn new() -> PtrMap { PtrMap { map: TrieMap::new(), - reachable_state: true + reachable_state: REACHABLE } } - /// Register an allocation starting at `ptr` running for `length` - /// bytes. `scan` indicates if the allocation should be scanned, - /// and `finaliser` is the "destructor" to run on the region. - pub fn insert_alloc(&mut self, ptr: uint, length: uint, scan: bool, + /// Register an allocation starting at `ptr`, with an arbitrary + /// piece of information `metadata`, a function to trace `tracer` + /// and the destructor `finaliser`. + pub fn insert_alloc(&mut self, + ptr: uint, + metadata: uint, + tracer: Option, finaliser: Option) { - let descr = PtrDescr { - high: ptr + length, - reachable_flag: self.reachable_state, - scan: scan, - finaliser: finaliser + let descr = ~PtrDescr { + flags: self.reachable_state | USED, + tracer: tracer, + finaliser: finaliser, + metadata: metadata }; self.map.insert(ptr, descr); } @@ -73,7 +88,9 @@ impl PtrMap { /// (attempting to reuse an live allocation, or an allocation that /// wasn't found). pub fn reuse_alloc(&mut self, - ptr: uint, length: uint, scan: bool, + ptr: uint, + metadata: uint, + tracer: Option, finaliser: Option) -> bool { match self.map.find_mut(&ptr) { Some(descr) => { @@ -81,10 +98,10 @@ impl PtrMap { warn!("attempting to reuse a used allocation") false // don't overwrite } else { - descr.high = ptr + length; descr.finaliser = finaliser; - descr.scan = scan; - descr.reachable_flag = self.reachable_state; + descr.metadata = metadata; + descr.tracer = tracer; + descr.flags = self.reachable_state | USED; true } } @@ -96,12 +113,13 @@ impl PtrMap { /// currently marked as unreachable, mark it as reachable and /// retrieve the high end & whether it requires scanning; /// otherwise, return None. - pub fn mark_reachable_scan_info(&mut self, ptr: uint) -> Option<(uint, bool)> { + pub fn mark_reachable_scan_info(&mut self, ptr: uint) -> Option<(uint, Option)> { match self.map.find_mut(&ptr) { Some(descr) => { - if descr.is_used() && descr.reachable_flag != self.reachable_state { - descr.reachable_flag = self.reachable_state; - Some((descr.high, descr.scan)) + if descr.is_used_and_unreachable(self.reachable_state) { + // mark it reachable + descr.set_reachable(self.reachable_state); + Some((descr.metadata, descr.tracer)) } else { None } @@ -110,13 +128,21 @@ impl PtrMap { } } + /// Set the word of metadata associated with `ptr` to `metadata`. + pub fn update_metadata<'a>(&'a mut self, ptr: uint, metadata: uint) -> bool { + match self.map.find_mut(&ptr) { + Some(ref mut descr) if descr.is_used() => { descr.metadata = metadata; true } + _ => false + } + } + /// Find the unreachable pointers in the map, returing `[(low, - /// size, finaliser)]`. + /// metadata, finaliser)]`. pub fn find_unreachable(&mut self) -> ~[(uint, uint, Option)] { self.map.iter() .filter_map(|(low, descr)| { - if descr.is_used() && descr.reachable_flag != self.reachable_state { - Some((low, descr.high - low, descr.finaliser)) + if descr.is_used_and_unreachable(self.reachable_state) { + Some((low, descr.metadata, descr.finaliser)) } else { None } @@ -126,7 +152,7 @@ impl PtrMap { /// Mark an allocation as unused. pub fn mark_unused(&mut self, ptr: uint) { match self.map.find_mut(&ptr) { - Some(descr) => { descr.high = 0; descr.finaliser = None } + Some(descr) => { descr.finaliser = None; descr.flags &= !USED; } None => {} } } @@ -135,7 +161,17 @@ impl PtrMap { /// everything is considered unreachable at the start of the next /// collection. pub fn toggle_reachability(&mut self) { - self.reachable_state = !self.reachable_state; + self.reachable_state ^= REACHABLE; + } + + /// Manually mark every pointer as unreachable. Prefer + /// `toggle_reachability` when you have the guarantee that all the + /// pointers in the map are currently considered reachable. + pub fn inefficient_mark_all_unreachable(&mut self) { + for (_, descr) in self.map.mut_iter() { + // invert to mark as unreachable + descr.set_reachable(self.reachable_state ^ REACHABLE) + } } /// Deregister the allocation starting at `ptr`. @@ -143,11 +179,12 @@ impl PtrMap { self.map.remove(&ptr); } - /// Iterate over `(low, &'a PtrDescr)`. - pub fn iter<'a>(&'a self) -> TrieMapIterator<'a, PtrDescr> { + /// Iterate over `(low, &'a ~PtrDescr)`. + pub fn iter<'a>(&'a self) -> TrieMapIterator<'a, ~PtrDescr> { self.map.iter() } /// The number of pointers registered. + #[allow(dead_code)] pub fn len(&self) -> uint { self.map.len() } } diff --git a/src/libstd/libvec.rs b/src/libstd/libvec.rs index b0b1921176519..2fb99bc3327dc 100644 --- a/src/libstd/libvec.rs +++ b/src/libstd/libvec.rs @@ -24,6 +24,7 @@ use num::CheckedMul; use option::{Option, Some, None}; use iter::{Iterator, DoubleEndedIterator}; use gc; +use gc::Trace; pub struct Vec { priv len: uint, @@ -31,7 +32,15 @@ pub struct Vec { priv ptr: *mut T } -impl Vec { +pub fn trace(ptr: *(), length: uint, tracer: &mut gc::GcTracer) { + debug!("libvec::trace: {} {}", ptr, length); + let v: &[T] = unsafe {transmute(raw::Slice { data: ptr as *T, len: length })}; + for t in v.iter() { + t.trace(tracer) + } +} + +impl Vec { #[inline(always)] pub fn new() -> Vec { Vec { len: 0, cap: 0, ptr: 0 as *mut T } @@ -46,7 +55,7 @@ impl Vec { let ptr = malloc(size as size_t); if ptr.is_null() { fail!("null pointer") } - gc::register_root_changes([], [(ptr as *T, capacity)]); + gc::register_root_changes([], [(ptr as *T, 0, trace::)]); Vec { len: 0, cap: capacity, ptr: ptr as *mut T } } } @@ -60,7 +69,7 @@ impl Container for Vec { } } -impl Vec { +impl Vec { #[inline(always)] pub fn capacity(&self) -> uint { self.cap @@ -75,7 +84,7 @@ impl Vec { if ptr.is_null() { fail!("null pointer") } gc::register_root_changes([self.ptr as *T], - [(ptr as *T, capacity)]); + [(ptr as *T, self.len, trace::)]); self.ptr = ptr; } } @@ -93,24 +102,13 @@ impl Vec { let ptr = realloc(self.ptr as *mut c_void, (self.len * size_of::()) as size_t) as *mut T; if ptr.is_null() { fail!("null pointer") } - gc::register_root_changes([self.ptr as *T], [(ptr as *T, self.len)]); + gc::register_root_changes([self.ptr as *T], [(ptr as *T, self.len, trace::)]); self.cap = self.len; } } } - pub fn pop(&mut self) -> Option { - if self.len == 0 { - None - } else { - unsafe { - self.len -= 1; - Some(read_ptr(self.as_slice().unsafe_ref(self.len()))) - } - } - } - #[inline] pub fn push(&mut self, value: T) { if self.len == self.cap { @@ -122,7 +120,7 @@ impl Vec { unsafe { let ptr = realloc(self.ptr as *mut c_void, size as size_t) as *mut T; gc::register_root_changes([self.ptr as *T], - [(ptr as *T, self.cap)]); + [(ptr as *T, self.len, trace::)]); self.ptr = ptr; } } @@ -131,6 +129,21 @@ impl Vec { let end = self.ptr.offset(self.len as int) as *mut T; move_val_init(&mut *end, value); self.len += 1; + gc::update_metadata(self.ptr as *T, self.len); + } + } +} + +impl Vec { + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + unsafe { + self.len -= 1; + gc::update_metadata(self.ptr as *T, self.len); + Some(read_ptr(self.as_slice().unsafe_ref(self.len()))) + } } } @@ -170,6 +183,16 @@ impl Drop for Vec { } } +impl Trace for Vec { + fn trace(&self, tracer: &mut gc::GcTracer) { + if tracer.pointer_first_trace(self.ptr as *()) { + for val in self.as_slice().iter() { + val.trace(tracer); + } + } + } +} + pub struct MoveIterator { priv allocation: *mut u8, // the block of memory allocated for the vector priv iter: VecIterator<'static, T> diff --git a/src/libstd/uniq.rs b/src/libstd/uniq.rs index 0066783e47d2d..10ebd955c5f8c 100644 --- a/src/libstd/uniq.rs +++ b/src/libstd/uniq.rs @@ -12,21 +12,28 @@ use ops::Drop; use libc::{malloc, free, size_t, c_void}; use gc; +use gc::Trace; use mem; use ptr; use ptr::RawPtr; use unstable::intrinsics::move_val_init; +fn trace(ptr: *(), _: uint, tracer: &mut gc::GcTracer) { + unsafe { + (*(ptr as *T)).trace(tracer) + } +} + #[unsafe_no_drop_flag] pub struct Uniq { priv ptr: *mut T } -impl Uniq { +impl Uniq { pub fn new(value: T) -> Uniq { unsafe { let ptr = malloc(mem::size_of::() as size_t) as *mut T; - gc::register_root_changes([], [(ptr as *T, 1)]); + gc::register_root_changes([], [(ptr as *T, 0, trace::)]); move_val_init(&mut *ptr, value); Uniq { ptr: ptr } } @@ -64,3 +71,13 @@ impl Drop for Uniq { } } } + +impl Trace for Uniq { + fn trace(&self, tracer: &mut gc::GcTracer) { + if tracer.pointer_first_trace(self.ptr as *()) { + unsafe { + (*self.ptr).trace(tracer) + } + } + } +} From 00fc1b3c474ff3de2a0b4c86ddbafe5193d963c2 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sun, 12 Jan 2014 17:53:44 +1100 Subject: [PATCH 15/21] std::gc: search task-local storage for garbage collected values. Storing a Gc in local data is now safe. --- src/libstd/gc/collector.rs | 8 ++++++ src/libstd/gc/mod.rs | 1 + src/libstd/local_data.rs | 19 ++++++++++++++ src/test/run-pass/gc-store-pointer-in-tls.rs | 27 ++++++++++++++++++++ 4 files changed, 55 insertions(+) create mode 100644 src/test/run-pass/gc-store-pointer-in-tls.rs diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 8044bddd7998a..0d5e328fa28fc 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -11,6 +11,7 @@ use gc::collector::ptr_map::PtrMap; use iter::Iterator; use libc; +use local_data; use num::BitCount; use option::{Some, None, Option}; use ops::Drop; @@ -225,6 +226,13 @@ impl GarbageCollector { self.conservative_scan(stack_end, stack_top as *uint); + // conservatively search task-local storage; this could + // possibly use the tydesc to be precise. + local_data::each_unborrowed_value(|ptr, tydesc| { + let end = (ptr as *u8).offset((*tydesc).size as int); + self.conservative_scan(ptr as *uint, end as *uint) + }); + // Step 2. sweep all the unreachable ones for deallocation. let unreachable = self.gc_ptrs.find_unreachable(); diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs index d3d8bea1b9238..7433febef975f 100644 --- a/src/libstd/gc/mod.rs +++ b/src/libstd/gc/mod.rs @@ -382,6 +382,7 @@ macro_rules! no_gc { } no_gc! { + bool, int, i8, i16, i32, i64, uint, u8, u16, u32, u64, f32, f64, diff --git a/src/libstd/local_data.rs b/src/libstd/local_data.rs index 159337bf50335..c872f011c91d4 100644 --- a/src/libstd/local_data.rs +++ b/src/libstd/local_data.rs @@ -44,6 +44,7 @@ use cast; use libc; use prelude::*; use rt::task::{Task, LocalStorage}; +use unstable::intrinsics::TyDesc; use util; /** @@ -132,6 +133,24 @@ fn key_to_key_value(key: Key) -> *libc::c_void { unsafe { cast::transmute(key) } } +/// Unsafely iterate over pointers and type descriptors of the values +/// in task-local storage that don't have an outstanding loan. +#[experimental="interface requires tuning"] +pub unsafe fn each_unborrowed_value(f: |*libc::c_void, *TyDesc|) { + let map = get_local_map(); + + for value in map.iter() { + match *value { + None => {} + Some((_, ref value, NoLoan)) => { + let &(tydesc, ptr): &(**TyDesc, *libc::c_void) = cast::transmute(value); + f(ptr, *tydesc) + } + Some(_) => {} + } + } +} + /// Removes a task-local value from task-local storage. This will return /// Some(value) if the key was present in TLS, otherwise it will return None. /// diff --git a/src/test/run-pass/gc-store-pointer-in-tls.rs b/src/test/run-pass/gc-store-pointer-in-tls.rs new file mode 100644 index 0000000000000..509a15fb73d97 --- /dev/null +++ b/src/test/run-pass/gc-store-pointer-in-tls.rs @@ -0,0 +1,27 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[allow(experimental)]; + +use std::local_data; +use std::gc::{Gc, set_collection_frequency}; + +local_data_key!(GC_KEY: Gc) + +fn main() { + set_collection_frequency(1); + // we squirrel away a GC pointer, and then check that it doesn't + // get overwritten. + local_data::set(GC_KEY, Gc::new(true)); + + for _ in range(0, 20) {Gc::new(false);} + + local_data::get(GC_KEY, |ptr| assert!(unsafe {*ptr.unwrap().borrow()})); +} From 14aeaefdc9cd6c5be78cd72a6a03b3e207635b21 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 15 Jan 2014 11:09:03 +1100 Subject: [PATCH 16/21] std::gc: avoid putting all unreachable points into a vector. --- src/libstd/gc/collector.rs | 51 ++++++++++++++++++++++++-------------- src/libstd/gc/ptr_map.rs | 32 +++++++++++------------- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 0d5e328fa28fc..6c270021486d0 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -233,27 +233,40 @@ impl GarbageCollector { self.conservative_scan(ptr as *uint, end as *uint) }); - // Step 2. sweep all the unreachable ones for deallocation. - let unreachable = self.gc_ptrs.find_unreachable(); - for &(ptr, size, finaliser) in unreachable.iter() { - debug!("unreachable: 0x{:x}", ptr); - match finaliser { - Some(f) => f(ptr as *mut ()), - None => {} - } + let mut bytes_collected = 0u; + let mut large_allocs = ~[]; + self.gc_ptrs.each_unreachable(|ptr, descr| { + debug!("unreachable: 0x{:x}", ptr); + match descr.finaliser { + Some(f) => f(ptr as *mut ()), + None => {} + } - let log_rounded = compute_log_rounded_up_size(size); - // a "small" allocation so we cache it. - if log_rounded <= ALLOC_CACHE_MAX_LOG { - self.gc_ptrs.mark_unused(ptr); - self.alloc_cache[log_rounded - ALLOC_CACHE_MIN_LOG].push(ptr); - } else { - // a big one, so whatever, the OS can have its memory - // back. - self.gc_ptrs.remove(ptr); - libc::free(ptr as *libc::c_void); - } + // GC'd pointers use the metadata to store the size + let log_rounded = compute_log_rounded_up_size(descr.metadata); + // a "small" allocation so we cache it. + if log_rounded <= ALLOC_CACHE_MAX_LOG { + // the each_unreachable driver marks this as + // unused internally. + self.alloc_cache[log_rounded - ALLOC_CACHE_MIN_LOG].push(ptr); + + let actual_size = 1 << log_rounded; + bytes_collected += actual_size; + } else { + large_allocs.push(ptr); + + bytes_collected += descr.metadata; + } + + true + }); + // have to do these removals outside that loop + for &ptr in large_allocs.iter() { + // a big one, so whatever, the OS can have its memory + // back. + self.gc_ptrs.remove(ptr); + libc::free(ptr as *libc::c_void); } } } diff --git a/src/libstd/gc/ptr_map.rs b/src/libstd/gc/ptr_map.rs index 4faf035eb50d7..85165cfc69e1a 100644 --- a/src/libstd/gc/ptr_map.rs +++ b/src/libstd/gc/ptr_map.rs @@ -136,25 +136,21 @@ impl PtrMap { } } - /// Find the unreachable pointers in the map, returing `[(low, - /// metadata, finaliser)]`. - pub fn find_unreachable(&mut self) -> ~[(uint, uint, Option)] { - self.map.iter() - .filter_map(|(low, descr)| { - if descr.is_used_and_unreachable(self.reachable_state) { - Some((low, descr.metadata, descr.finaliser)) - } else { - None - } - }).collect() - } - - /// Mark an allocation as unused. - pub fn mark_unused(&mut self, ptr: uint) { - match self.map.find_mut(&ptr) { - Some(descr) => { descr.finaliser = None; descr.flags &= !USED; } - None => {} + /// Find the unreachable pointers in the map, iterating over + /// `(low, descriptor)`. This marks each of these pointers as + /// unused (and clears their destructors) after calling `f`. + #[inline] + pub fn each_unreachable(&mut self, f: |uint, &PtrDescr| -> bool) -> bool { + for (low, descr) in self.map.mut_iter() { + if descr.is_used_and_unreachable(self.reachable_state) { + let cont = f(low, *descr); + // mark as unused + descr.finaliser = None; + descr.flags &= !USED; + if !cont { return false; } + } } + return true; } /// After a collection this will flip an internal bit so that From 9c693197c9549ef222b8c31281cdd8b36537ddd9 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 15 Jan 2014 11:11:13 +1100 Subject: [PATCH 17/21] std::gc: use a inverse-load-factor based collection policy. This makes garbage collections proportionally less frequent as the live heap gets larger. Each collection takes time proportional to the size of the heap, so doing a collection regularly (i.e. O(heap size) collections) gives O(heap size^2) behaviour. Doing the collections less frequently lets us keep O(heap size) performance. --- src/libstd/gc/collector.rs | 70 ++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 6c270021486d0..0571958c14f52 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -8,6 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use cmp; use gc::collector::ptr_map::PtrMap; use iter::Iterator; use libc; @@ -23,7 +24,8 @@ use gc::GcTracer; mod ptr_map; -static DEFAULT_ALLOCS_PER_COLLECTION_MASK: uint = (1 << 10) - 1; +static DEFAULT_INVERSE_LOAD_FACTOR: f32 = 2.0; +static MINIMUM_COLLECTION: uint = 65536; static ALLOC_CACHE_MIN_LOG: uint = 3; static ALLOC_CACHE_MAX_LOG: uint = 20; @@ -59,16 +61,29 @@ pub struct GarbageCollector { priv roots: PtrMap, /// Garbage-collectable pointers. priv gc_ptrs: PtrMap, - /// number of GC-able allocations performed. - priv gc_allocs: uint, /// cached allocations, of sizes 8, 16, 32, 64, ... 1 << 20 (1 MB) /// (inclusive, with 8 at index 0). Anything smaller gets rounded /// to 8, anything larger is uncached. priv alloc_cache: [~[uint], .. ALLOC_CACHE_MAX_LOG - ALLOC_CACHE_MIN_LOG + 1], - /// the number of allocations to do before collection (in mask - /// form, i.e. we are detecting `gc_allocs % (1 << n) == 0` for - /// some n). - priv gc_allocs_per_collection_mask: uint + + /// The ratio between (heap size for tracing/"marking") and the + /// number of bytes to allocate ("cons") per collection. + priv inverse_load_factor: f32, + + /// The number of bytes we should allocate before collecting. + priv bytes_for_next_gc: uint, + /// The number of bytes allocated since the last collection. + priv bytes_since_last_gc: uint, + + // the byte size of live allocations, that is, GC pointers + // considered reachable. + priv live_heap_bytes: uint, + // the total number of bytes that have been "allocated" by + // `.alloc` (including the allocations reused from the cache). + priv total_heap_bytes: uint, + + /// number of GC-able allocations performed. + priv gc_allocs: uint, } fn compute_log_rounded_up_size(size: uint) -> uint { @@ -99,14 +114,22 @@ impl GarbageCollector { alloc_cache: [~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[], ~[]], - gc_allocs: 0, - gc_allocs_per_collection_mask: DEFAULT_ALLOCS_PER_COLLECTION_MASK + + inverse_load_factor: DEFAULT_INVERSE_LOAD_FACTOR, + + bytes_for_next_gc: MINIMUM_COLLECTION, + bytes_since_last_gc: 0, + + live_heap_bytes: 0, + total_heap_bytes: 0, + + gc_allocs: 0 } } /// Run a garbage collection if we're due for one. pub unsafe fn occasional_collection(&mut self, stack_top: uint) { - if self.gc_allocs & self.gc_allocs_per_collection_mask == 0 { + if self.bytes_since_last_gc >= self.bytes_for_next_gc { self.collect(stack_top) } } @@ -121,6 +144,7 @@ impl GarbageCollector { tracer: Option, finaliser: Option) -> *mut u8 { self.gc_allocs += 1; + let log_next_power_of_two = compute_log_rounded_up_size(size); // it's always larger than ALLOC_CACHE_MIN_LOG @@ -131,6 +155,11 @@ impl GarbageCollector { // allocation already. let success = self.gc_ptrs.reuse_alloc(ptr, size, tracer, finaliser); if success { + let alloc_size = 1 << log_next_power_of_two; + self.bytes_since_last_gc += alloc_size; + self.total_heap_bytes += alloc_size; + self.live_heap_bytes += alloc_size; + debug!("using cache for allocation of size {}", size); return ptr as *mut u8; } @@ -144,6 +173,10 @@ impl GarbageCollector { size }; + self.bytes_since_last_gc += alloc_size; + self.total_heap_bytes += alloc_size; + self.live_heap_bytes += alloc_size; + let ptr = libc::malloc(alloc_size as libc::size_t); if ptr.is_null() { fail!("GC failed to allocate.") @@ -154,6 +187,14 @@ impl GarbageCollector { ptr as *mut u8 } + pub fn set_inverse_load_factor(&mut self, new_factor: f32) { + if !(new_factor > 1.0) { + fail!("GarbageCollector requires an inverse load factor > 1, not {}", new_factor) + } + + self.inverse_load_factor = new_factor; + } + /// Register the block of memory [`start`, `end`) for tracing when /// a word matching `start` pointer is seen during a conservative /// scan. On such a scan, `tracer` is called, passing in the @@ -252,10 +293,12 @@ impl GarbageCollector { self.alloc_cache[log_rounded - ALLOC_CACHE_MIN_LOG].push(ptr); let actual_size = 1 << log_rounded; + self.live_heap_bytes -= actual_size; bytes_collected += actual_size; } else { large_allocs.push(ptr); + self.live_heap_bytes -= descr.metadata; bytes_collected += descr.metadata; } @@ -268,6 +311,13 @@ impl GarbageCollector { self.gc_ptrs.remove(ptr); libc::free(ptr as *libc::c_void); } + + self.bytes_since_last_gc = 0; + self.bytes_for_next_gc = cmp::max(MINIMUM_COLLECTION, + (self.live_heap_bytes as f32 * + (self.inverse_load_factor - 1.0)) as uint); + info!("Collection: collected {}, leaving {} bytes. next GC in {} bytes.", + bytes_collected, self.live_heap_bytes, self.bytes_for_next_gc); } } From 8e42594811acd0e97cc5ab48f2b6a8c5bd798b98 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 15 Jan 2014 11:16:39 +1100 Subject: [PATCH 18/21] std::gc: remove a branch from `compute_log_rounded_up_size`. --- src/libstd/gc/collector.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 0571958c14f52..018c92ecec727 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -91,16 +91,11 @@ fn compute_log_rounded_up_size(size: uint) -> uint { // round up to the minimum ALLOC_CACHE_MIN_LOG } else { - // for powers of two 1 << n, this gives n + 1, otherwise, - // for a number like `0b101` it gives 3, which is exactly - // what we want. - let raw = uint::bits - size.leading_zeros(); - // power of two - if size & (size - 1) == 0 { - raw - 1 - } else { - raw - } + // This will never underflow, and will always preserve the + // highest bit, except in the case of powers of two; where it + // will increase the number of leading zeros by 1, which is + // exactly what we want (otherwise we'd round 16 up to 32). + uint::bits - (size - 1).leading_zeros() } } From 93716c093b8d84c1cd20264b81b116bdaec7f1d1 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 15 Jan 2014 22:54:38 +1100 Subject: [PATCH 19/21] Use lang items to allow registering ~ and ~[] for scanning with the GC. For the moment, it's just a conservative scan for each, but with some more trickery, we'll be able to get proper tracing. The lang items are just called for literal ~ and ~[] allocations, and also when they are freed. --- src/librustc/middle/lang_items.rs | 6 ++++-- src/librustc/middle/trans/base.rs | 26 +++++++++++++++++++++++--- src/librustc/middle/trans/glue.rs | 14 +++++++++++--- src/libstd/gc/collector.rs | 2 +- src/libstd/gc/mod.rs | 22 ++++++++++++++++++++++ src/libstd/vec.rs | 25 +++++++++++++++++++++++++ 6 files changed, 86 insertions(+), 9 deletions(-) diff --git a/src/librustc/middle/lang_items.rs b/src/librustc/middle/lang_items.rs index a89a9eb750215..232eb53c37e69 100644 --- a/src/librustc/middle/lang_items.rs +++ b/src/librustc/middle/lang_items.rs @@ -207,7 +207,7 @@ pub fn collect_language_items(crate: &ast::Crate, } lets_do_this! { - There are 40 lang items. + There are 42 lang items. // ID, Variant name, Name, Method name; 0, FreezeTraitLangItem, "freeze", freeze_trait; @@ -260,5 +260,7 @@ lets_do_this! { 37, ManagedHeapLangItem, "managed_heap", managed_heap; 38, ExchangeHeapLangItem, "exchange_heap", exchange_heap; 39, GcLangItem, "gc", gc; -} + 40, ManagedPointerNote, "managed_pointer_note", managed_pointer_note; + 41, ManagedPointerUnnote, "managed_pointer_unnote", managed_pointer_unnote; +} diff --git a/src/librustc/middle/trans/base.rs b/src/librustc/middle/trans/base.rs index 46f647cc1a64f..d0965fd744592 100644 --- a/src/librustc/middle/trans/base.rs +++ b/src/librustc/middle/trans/base.rs @@ -37,7 +37,7 @@ use metadata::{csearch, encoder}; use middle::astencode; use middle::lang_items::{LangItem, ExchangeMallocFnLangItem, StartFnLangItem}; use middle::lang_items::{MallocFnLangItem, ClosureExchangeMallocFnLangItem}; -use middle::lang_items::{EhPersonalityLangItem}; +use middle::lang_items::{EhPersonalityLangItem, ManagedPointerNote}; use middle::trans::_match; use middle::trans::adt; use middle::trans::base; @@ -354,17 +354,37 @@ pub fn malloc_raw_dyn<'a>( } } + fn require_gc_fn(bcx: &Block, t: ty::t) -> ast::DefId { + let li = &bcx.tcx().lang_items; + match li.require(ManagedPointerNote) { + Ok(id) => id, + Err(s) => { + bcx.tcx().sess.fatal(format!("allocation of `{}` {}", + bcx.ty_to_str(t), s)); + } + } + } + if heap == heap_exchange { let llty_value = type_of::type_of(ccx, t); - // Allocate space: let r = callee::trans_lang_call( bcx, require_alloc_fn(bcx, t, ExchangeMallocFnLangItem), [size], None); - rslt(r.bcx, PointerCast(r.bcx, r.val, llty_value.ptr_to())) + + if ty::type_contents(bcx.tcx(), t).reaches_new_managed() { + let s = callee::trans_lang_call( + r.bcx, + require_gc_fn(r.bcx, t), + [r.val, size], + None); + rslt(s.bcx, PointerCast(s.bcx, r.val, llty_value.ptr_to())) + } else { + rslt(r.bcx, PointerCast(r.bcx, r.val, llty_value.ptr_to())) + } } else { // we treat ~fn, @fn and @[] as @ here, which isn't ideal let langcall = match heap { diff --git a/src/librustc/middle/trans/glue.rs b/src/librustc/middle/trans/glue.rs index 9b3243fa3ef19..95bbde30a019d 100644 --- a/src/librustc/middle/trans/glue.rs +++ b/src/librustc/middle/trans/glue.rs @@ -17,7 +17,7 @@ use back::abi; use back::link::*; use lib; use lib::llvm::{llvm, ValueRef, True}; -use middle::lang_items::{FreeFnLangItem, ExchangeFreeFnLangItem}; +use middle::lang_items::{FreeFnLangItem, ExchangeFreeFnLangItem, ManagedPointerUnnote}; use middle::trans::adt; use middle::trans::base::*; use middle::trans::callee; @@ -111,7 +111,7 @@ fn simplified_glue_type(tcx: ty::ctxt, field: uint, t: ty::t) -> ty::t { return ty::mk_box(tcx, ty::mk_nil()); } - if field == abi::tydesc_field_drop_glue { + if field == abi::tydesc_field_drop_glue && !ty::type_contents(tcx, t).reaches_new_managed() { match ty::get(t).sty { ty::ty_box(typ) if !ty::type_needs_drop(tcx, typ) => @@ -302,7 +302,15 @@ pub fn make_free_glue<'a>(bcx: &'a Block<'a>, v: ValueRef, t: ty::t) let not_null = IsNotNull(bcx, box_datum.val); with_cond(bcx, not_null, |bcx| { let body_datum = box_datum.box_body(bcx); - let bcx = drop_ty(bcx, body_datum.to_ref_llval(bcx), body_datum.ty); + let mut bcx = drop_ty(bcx, body_datum.to_ref_llval(bcx), body_datum.ty); + + if ty::type_contents(bcx.tcx(), t).reaches_new_managed() { + bcx = callee::trans_lang_call(bcx, + langcall(bcx, None, bcx.ty_to_str(t), + ManagedPointerUnnote), + [PointerCast(bcx, box_datum.val, Type::i8p())], + None).bcx; + } trans_exchange_free(bcx, box_datum.val) }) } diff --git a/src/libstd/gc/collector.rs b/src/libstd/gc/collector.rs index 018c92ecec727..a82ba51e7ed20 100644 --- a/src/libstd/gc/collector.rs +++ b/src/libstd/gc/collector.rs @@ -30,7 +30,7 @@ static MINIMUM_COLLECTION: uint = 65536; static ALLOC_CACHE_MIN_LOG: uint = 3; static ALLOC_CACHE_MAX_LOG: uint = 20; -pub type TracingFunc = fn(*(), uint, &mut GcTracer); +pub type TracingFunc = unsafe fn(*(), uint, &mut GcTracer); /// A thread local garbage collector, precise on the head, diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs index 7433febef975f..63c884171acf7 100644 --- a/src/libstd/gc/mod.rs +++ b/src/libstd/gc/mod.rs @@ -140,6 +140,22 @@ pub unsafe fn register_root_changes_always(removals: &[*T], task.get().gc = gc; } + +#[lang="managed_pointer_note"] +pub fn managed_pointer_note(x: *u8, size: uint) { + unsafe { + register_root_changes_always([], + [(x, size, conservative_scan_tracer)]) + } +} +#[lang="managed_pointer_unnote"] +pub fn managed_pointer_unnote(x: *u8) { + unsafe { + register_root_changes_always([x], []); + } +} + + pub unsafe fn update_metadata(ptr: *T, metadata: uint) { if reaches_new_managed::() { update_metadata_always(ptr as *(), metadata) @@ -332,6 +348,12 @@ impl DeepClone for Gc { } } +pub unsafe fn conservative_scan_tracer(ptr: *(), size: uint, tracer: &mut GcTracer) { + let end = (ptr as *u8).offset(size as int) as *uint; + + tracer.conservative_scan(ptr as *uint, end); +} + /// Stores the appropriate tools for interacting with the garbage /// collector while tracing. pub struct GcTracer<'a> { diff --git a/src/libstd/vec.rs b/src/libstd/vec.rs index 797582e57f4ee..27857133de3aa 100644 --- a/src/libstd/vec.rs +++ b/src/libstd/vec.rs @@ -130,6 +130,25 @@ use unstable::raw::{Repr, Slice, Vec}; use unstable::raw::Box; use util; +#[cfg(not(stage0))] +fn pointer_change(delete: Option<*()>, add: Option<(*(), uint)>) { + use unstable::intrinsics::reaches_new_managed; + use gc::{conservative_scan_tracer, register_root_changes_always}; + + unsafe { + if reaches_new_managed::() { + let delete = match delete { Some(p) => &[p], None => &[] }; + let add = match add { + // XXX: this shouldn't be conservative + Some((p, size)) => &[(p, size, conservative_scan_tracer)], + None => &[] + }; + + register_root_changes_always(delete, add) + } + } +} + /** * Creates and initializes an owned vector. * @@ -216,6 +235,8 @@ pub fn with_capacity(capacity: uint) -> ~[T] { let ptr = malloc_raw(size) as *mut Vec<()>; (*ptr).alloc = alloc; (*ptr).fill = 0; + pointer_change::(None, Some((ptr as *(), alloc))); + cast::transmute(ptr) } } @@ -1538,9 +1559,12 @@ impl OwnedVector for ~[T] { if alloc / mem::nonzero_size_of::() != n || size < alloc { fail!("vector size is too large: {}", n); } + let old = *ptr; *ptr = realloc_raw(*ptr as *mut c_void, size) as *mut Vec<()>; (**ptr).alloc = alloc; + + pointer_change::(Some(old as *()), Some((*ptr as *(), alloc))); } } } @@ -3032,6 +3056,7 @@ impl Drop for MoveIterator { // destroy the remaining elements for _x in *self {} unsafe { + pointer_change::(Some(self.allocation as *()), None); exchange_free(self.allocation as *u8 as *c_char) } } From 16b0bb6b9d2c790d05ddef8ecb3743f4ba292107 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Fri, 17 Jan 2014 18:00:14 +1100 Subject: [PATCH 20/21] rustc: #[managed] implies not sendable. With this change, the compiler knows that `~Trait` can never contain or reach any #[managed] pointers, and so doesn't need to touch the GC at all. --- src/librustc/middle/ty.rs | 50 +++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/librustc/middle/ty.rs b/src/librustc/middle/ty.rs index 0cbe1b5232800..390b56ecc2552 100644 --- a/src/librustc/middle/ty.rs +++ b/src/librustc/middle/ty.rs @@ -1750,32 +1750,32 @@ macro_rules! def_type_content_sets( def_type_content_sets!( mod TC { - None = 0b0000__00000000__0000, + None = 0b00000000__00000000__0000, // Things that are interior to the value (first nibble): - InteriorUnsized = 0b0000__00000000__0001, - // InteriorAll = 0b0000__00000000__1111, + InteriorUnsized = 0b00000000__00000000__0001, + // InteriorAll = 0b000000000__00000000__1111, // Things that are owned by the value (second and third nibbles): - OwnsOwned = 0b0000__00000001__0000, - OwnsDtor = 0b0000__00000010__0000, - OwnsAtManaged /* see [1] below */ = 0b0000__00000100__0000, - OwnsAffine = 0b0000__00001000__0000, - OwnsAll = 0b0000__11111111__0000, - - // Things that are reachable by the value in any way (fourth nibble): - ReachesNonsendAnnot = 0b0001__00000000__0000, - ReachesBorrowed = 0b0010__00000000__0000, - // ReachesAtManaged /* see [1] below */ = 0b0100__00000000__0000, - ReachesNewManaged = 0b0100__00000000__0000, - ReachesMutable = 0b1000__00000000__0000, - ReachesAll = 0b1111__00000000__0000, + OwnsOwned = 0b00000000__00000001__0000, + OwnsDtor = 0b00000000__00000010__0000, + OwnsAtManaged /* see [1] below */ = 0b00000000__00000100__0000, + OwnsAffine = 0b00000000__00001000__0000, + OwnsAll = 0b00000000__11111111__0000, + + // Things that are reachable by the value in any way : + ReachesNonsendAnnot = 0b00000001__00000000__0000, + ReachesBorrowed = 0b00000010__00000000__0000, + // ReachesAtManaged /* see [1] below */ = 0b00000100__00000000__0000, + ReachesMutable = 0b00001000__00000000__0000, + ReachesNewManaged = 0b00010000__00000000__0000, + ReachesAll = 0b11111111__00000000__0000, // Things that cause values to *move* rather than *copy* - Moves = 0b0000__00001011__0000, + Moves = 0b00000000__00001011__0000, // Things that mean drop glue is necessary - NeedsDrop = 0b0000__00000111__0000, + NeedsDrop = 0b00000000__00000111__0000, // Things that prevent values from being sent // @@ -1784,29 +1784,29 @@ def_type_content_sets!( // both ReachesManaged and OwnsManaged so that when // a parameter has a bound T:Send, we are able to deduce // that it neither reaches nor owns a managed pointer. - Nonsendable = 0b0111__00000100__0000, + Nonsendable = 0b00010111__00000100__0000, // Things that prevent values from being considered freezable - Nonfreezable = 0b1000__00000000__0000, + Nonfreezable = 0b00001000__00000000__0000, // Things that prevent values from being considered 'static - Nonstatic = 0b0010__00000000__0000, + Nonstatic = 0b00000010__00000000__0000, // Things that prevent values from being considered sized - Nonsized = 0b0000__00000000__0001, + Nonsized = 0b00000000__00000000__0001, // Things that make values considered not POD (same as `Moves`) - Nonpod = 0b0000__00001111__0000, + Nonpod = 0b00000000__00001111__0000, // Bits to set when a managed value is encountered // // [1] Do not set the bits TC::OwnsManaged or // TC::ReachesManaged directly, instead reference // TC::Managed to set them both at once. - AtManaged = 0b0100__00000100__0000, + AtManaged = 0b00000100__00000100__0000, // All bits - All = 0b1111__11111111__1111 + All = 0b11111111__11111111__1111 } ) From 166890978e0176ac77668d0d2c4b3e347f23356a Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Fri, 17 Jan 2014 18:42:29 +1100 Subject: [PATCH 21/21] std::gc: borrow is no longer unsafe. Gc's inside ~ or ~[] won't be deallocated, so the major source of unsafety is gone. (They don't work inside @ still, but that's due for removal anyway.) --- src/libstd/gc/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libstd/gc/mod.rs b/src/libstd/gc/mod.rs index 63c884171acf7..14851e222da88 100644 --- a/src/libstd/gc/mod.rs +++ b/src/libstd/gc/mod.rs @@ -304,8 +304,8 @@ impl Gc { /// box, and so, if that is the only reference to one, then that /// `Gc` may be deallocated or the memory reused. #[inline] - pub unsafe fn borrow<'r>(&'r self) -> &'r T { - &*self.ptr + pub fn borrow<'r>(&'r self) -> &'r T { + unsafe {&*self.ptr} } } @@ -315,10 +315,10 @@ impl Gc { /// /// See `.borrow()` for the reason for `unsafe`. #[inline] - pub unsafe fn borrow_write_barrier<'r>(&'r self) -> &'r T { + pub fn borrow_write_barrier<'r>(&'r self) -> &'r T { // a completely conservative non-generational GC needs no // write barriers. - &*self.ptr + unsafe {&*self.ptr} } /// Borrow the value contained in the garbage-collected box, @@ -344,7 +344,7 @@ impl Clone for Gc { impl DeepClone for Gc { #[inline] fn deep_clone(&self) -> Gc { - Gc::new(unsafe {self.borrow_write_barrier().deep_clone()}) + Gc::new(self.borrow_write_barrier().deep_clone()) } }