diff --git a/src/librustc/middle/region.rs b/src/librustc/middle/region.rs index e4e554b65bf37..f65d3ad464c46 100644 --- a/src/librustc/middle/region.rs +++ b/src/librustc/middle/region.rs @@ -15,6 +15,9 @@ pass builds up the `scope_map`, which describes the parent links in the region hierarchy. The second pass infers which types must be region parameterized. +Most of the documentation on regions can be found in +`middle/typeck/infer/region_inference.rs` + */ diff --git a/src/librustc/middle/typeck/check/_match.rs b/src/librustc/middle/typeck/check/_match.rs index c2bf4594d30e1..45867ae77e080 100644 --- a/src/librustc/middle/typeck/check/_match.rs +++ b/src/librustc/middle/typeck/check/_match.rs @@ -15,6 +15,7 @@ use middle::typeck::check::demand; use middle::typeck::check::{check_block, check_expr_has_type, FnCtxt}; use middle::typeck::check::{instantiate_path, lookup_def}; use middle::typeck::check::{structure_of, valid_range_bounds}; +use middle::typeck::infer; use middle::typeck::require_same_types; use std::hashmap::{HashMap, HashSet}; @@ -29,8 +30,8 @@ pub fn check_match(fcx: @mut FnCtxt, arms: &[ast::arm]) { let tcx = fcx.ccx.tcx; - let pattern_ty = fcx.infcx().next_ty_var(); - check_expr_has_type(fcx, discrim, pattern_ty); + let discrim_ty = fcx.infcx().next_ty_var(); + check_expr_has_type(fcx, discrim, discrim_ty); // Typecheck the patterns first, so that we get types for all the // bindings. @@ -38,17 +39,22 @@ pub fn check_match(fcx: @mut FnCtxt, let pcx = pat_ctxt { fcx: fcx, map: pat_id_map(tcx.def_map, arm.pats[0]), - match_region: ty::re_scope(expr.id), - block_region: ty::re_scope(arm.body.node.id) }; - for arm.pats.iter().advance |p| { check_pat(&pcx, *p, pattern_ty);} + for arm.pats.iter().advance |p| { check_pat(&pcx, *p, discrim_ty);} } + // The result of the match is the common supertype of all the + // arms. Start out the value as bottom, since it's the, well, + // bottom the type lattice, and we'll be moving up the lattice as + // we process each arm. (Note that any match with 0 arms is matching + // on any empty type and is therefore unreachable; should the flow + // of execution reach it, we will fail, so bottom is an appropriate + // type in that case) + let mut result_ty = ty::mk_bot(); + // Now typecheck the blocks. - let mut result_ty = fcx.infcx().next_ty_var(); - let mut arm_non_bot = false; - let mut saw_err = false; + let mut saw_err = ty::type_is_error(discrim_ty); for arms.iter().advance |arm| { let mut guard_err = false; let mut guard_bot = false; @@ -75,26 +81,28 @@ pub fn check_match(fcx: @mut FnCtxt, else if guard_bot { fcx.write_bot(arm.body.node.id); } - else if !ty::type_is_bot(bty) { - arm_non_bot = true; // If the match *may* evaluate to a non-_|_ - // expr, the whole thing is non-_|_ - } - demand::suptype(fcx, arm.body.span, result_ty, bty); + + result_ty = + infer::common_supertype( + fcx.infcx(), + infer::MatchExpression(expr.span), + true, // result_ty is "expected" here + result_ty, + bty); } + if saw_err { result_ty = ty::mk_err(); - } - else if !arm_non_bot { + } else if ty::type_is_bot(discrim_ty) { result_ty = ty::mk_bot(); } + fcx.write_ty(expr.id, result_ty); } pub struct pat_ctxt { fcx: @mut FnCtxt, map: PatIdMap, - match_region: ty::Region, // Region for the match as a whole - block_region: ty::Region, // Region for the block of the arm } pub fn check_pat_variant(pcx: &pat_ctxt, pat: @ast::pat, path: @ast::Path, @@ -442,8 +450,8 @@ pub fn check_pat(pcx: &pat_ctxt, pat: @ast::pat, expected: ty::t) { // then the type of x is &M T where M is the mutability // and T is the expected type let region_var = - fcx.infcx().next_region_var_with_lb( - pat.span, pcx.block_region); + fcx.infcx().next_region_var( + infer::PatternRegion(pat.span)); let mt = ty::mt {ty: expected, mutbl: mutbl}; let region_ty = ty::mk_rptr(tcx, region_var, mt); demand::eqtype(fcx, pat.span, region_ty, typ); @@ -544,9 +552,8 @@ pub fn check_pat(pcx: &pat_ctxt, pat: @ast::pat, expected: ty::t) { } ast::pat_vec(ref before, slice, ref after) => { let default_region_var = - fcx.infcx().next_region_var_with_lb( - pat.span, pcx.block_region - ); + fcx.infcx().next_region_var( + infer::PatternRegion(pat.span)); let (elt_type, region_var) = match structure_of( fcx, pat.span, expected @@ -651,3 +658,4 @@ pub fn check_pointer_pat(pcx: &pat_ctxt, #[deriving(Eq)] enum PointerKind { Managed, Send, Borrowed } + diff --git a/src/librustc/middle/typeck/check/demand.rs b/src/librustc/middle/typeck/check/demand.rs index 7ca78068f0702..cf29d3f7f1f57 100644 --- a/src/librustc/middle/typeck/check/demand.rs +++ b/src/librustc/middle/typeck/check/demand.rs @@ -35,7 +35,7 @@ pub fn suptype_with_fn(fcx: @mut FnCtxt, ty_a: ty::t, ty_b: ty::t, handle_err: &fn(span, ty::t, ty::t, &ty::type_err)) { // n.b.: order of actual, expected is reversed - match infer::mk_subty(fcx.infcx(), b_is_expected, sp, + match infer::mk_subty(fcx.infcx(), b_is_expected, infer::Misc(sp), ty_b, ty_a) { result::Ok(()) => { /* ok */ } result::Err(ref err) => { @@ -45,7 +45,7 @@ pub fn suptype_with_fn(fcx: @mut FnCtxt, } pub fn eqtype(fcx: @mut FnCtxt, sp: span, expected: ty::t, actual: ty::t) { - match infer::mk_eqty(fcx.infcx(), false, sp, actual, expected) { + match infer::mk_eqty(fcx.infcx(), false, infer::Misc(sp), actual, expected) { Ok(()) => { /* ok */ } Err(ref err) => { fcx.report_mismatched_types(sp, expected, actual, err); diff --git a/src/librustc/middle/typeck/check/method.rs b/src/librustc/middle/typeck/check/method.rs index 95584889218ab..ee61399113a43 100644 --- a/src/librustc/middle/typeck/check/method.rs +++ b/src/librustc/middle/typeck/check/method.rs @@ -619,14 +619,18 @@ impl<'self> LookupContext<'self> { autoref: None})) } ty::ty_rptr(_, self_mt) => { - let region = self.infcx().next_region_var_nb(self.expr.span); + let region = + self.infcx().next_region_var( + infer::Autoref(self.expr.span)); (ty::mk_rptr(tcx, region, self_mt), ty::AutoDerefRef(ty::AutoDerefRef { autoderefs: autoderefs+1, autoref: Some(ty::AutoPtr(region, self_mt.mutbl))})) } ty::ty_evec(self_mt, vstore_slice(_)) => { - let region = self.infcx().next_region_var_nb(self.expr.span); + let region = + self.infcx().next_region_var( + infer::Autoref(self.expr.span)); (ty::mk_evec(tcx, self_mt, vstore_slice(region)), ty::AutoDerefRef(ty::AutoDerefRef { autoderefs: autoderefs, @@ -758,7 +762,9 @@ impl<'self> LookupContext<'self> { -> Option { // This is hokey. We should have mutability inference as a // variable. But for now, try &const, then &, then &mut: - let region = self.infcx().next_region_var_nb(self.expr.span); + let region = + self.infcx().next_region_var( + infer::Autoref(self.expr.span)); for mutbls.iter().advance |mutbl| { let autoref_ty = mk_autoref_ty(*mutbl, region); match self.search_for_method(autoref_ty) { @@ -970,7 +976,8 @@ impl<'self> LookupContext<'self> { let (_, opt_transformed_self_ty, fn_sig) = replace_bound_regions_in_fn_sig( tcx, @Nil, Some(transformed_self_ty), &bare_fn_ty.sig, - |_br| self.fcx.infcx().next_region_var_nb(self.expr.span)); + |br| self.fcx.infcx().next_region_var( + infer::BoundRegionInFnCall(self.expr.span, br))); let transformed_self_ty = opt_transformed_self_ty.get(); let fty = ty::mk_bare_fn(tcx, ty::BareFnTy {sig: fn_sig, ..bare_fn_ty}); debug!("after replacing bound regions, fty=%s", self.ty_to_str(fty)); @@ -982,7 +989,7 @@ impl<'self> LookupContext<'self> { // variables to unify etc). Since we checked beforehand, and // nothing has changed in the meantime, this unification // should never fail. - match self.fcx.mk_subty(false, self.self_expr.span, + match self.fcx.mk_subty(false, infer::Misc(self.self_expr.span), rcvr_ty, transformed_self_ty) { result::Ok(_) => (), result::Err(_) => { diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs index a6bc335bcdbc7..00ebca5abc147 100644 --- a/src/librustc/middle/typeck/check/mod.rs +++ b/src/librustc/middle/typeck/check/mod.rs @@ -467,8 +467,6 @@ pub fn check_fn(ccx: @mut CrateCtxt, let pcx = pat_ctxt { fcx: fcx, map: pat_id_map(tcx.def_map, input.pat), - match_region: region, - block_region: region, }; _match::check_pat(&pcx, input.pat, *arg_ty); } @@ -686,9 +684,14 @@ impl FnCtxt { result::Ok(self.block_region()) } else { result::Err(RegionError { - msg: fmt!("named region `%s` not in scope here", - bound_region_ptr_to_str(self.tcx(), br)), - replacement: self.infcx().next_region_var_nb(span) + msg: { + fmt!("named region `%s` not in scope here", + bound_region_ptr_to_str(self.tcx(), br)) + }, + replacement: { + self.infcx().next_region_var( + infer::BoundRegionError(span)) + } }) } } @@ -698,7 +701,7 @@ impl FnCtxt { impl region_scope for FnCtxt { fn anon_region(&self, span: span) -> Result { - result::Ok(self.infcx().next_region_var_nb(span)) + result::Ok(self.infcx().next_region_var(infer::MiscVariable(span))) } fn self_region(&self, span: span) -> Result { self.search_in_scope_regions(span, ty::br_self) @@ -845,11 +848,11 @@ impl FnCtxt { pub fn mk_subty(&self, a_is_expected: bool, - span: span, + origin: infer::TypeOrigin, sub: ty::t, sup: ty::t) -> Result<(), ty::type_err> { - infer::mk_subty(self.infcx(), a_is_expected, span, sub, sup) + infer::mk_subty(self.infcx(), a_is_expected, origin, sub, sup) } pub fn can_mk_subty(&self, sub: ty::t, sup: ty::t) @@ -857,9 +860,16 @@ impl FnCtxt { infer::can_mk_subty(self.infcx(), sub, sup) } - pub fn mk_assignty(&self, expr: @ast::expr, sub: ty::t, sup: ty::t) + pub fn mk_assignty(&self, + expr: @ast::expr, + sub: ty::t, + sup: ty::t) -> Result<(), ty::type_err> { - match infer::mk_coercety(self.infcx(), false, expr.span, sub, sup) { + match infer::mk_coercety(self.infcx(), + false, + infer::ExprAssignable(expr), + sub, + sup) { Ok(None) => result::Ok(()), Err(ref e) => result::Err((*e)), Ok(Some(adjustment)) => { @@ -876,20 +886,19 @@ impl FnCtxt { pub fn mk_eqty(&self, a_is_expected: bool, - span: span, + origin: infer::TypeOrigin, sub: ty::t, sup: ty::t) -> Result<(), ty::type_err> { - infer::mk_eqty(self.infcx(), a_is_expected, span, sub, sup) + infer::mk_eqty(self.infcx(), a_is_expected, origin, sub, sup) } pub fn mk_subr(&self, a_is_expected: bool, - span: span, + origin: infer::SubregionOrigin, sub: ty::Region, - sup: ty::Region) - -> Result<(), ty::type_err> { - infer::mk_subr(self.infcx(), a_is_expected, span, sub, sup) + sup: ty::Region) { + infer::mk_subr(self.infcx(), a_is_expected, origin, sub, sup) } pub fn with_region_lb(@mut self, lb: ast::node_id, f: &fn() -> R) @@ -905,7 +914,9 @@ impl FnCtxt { rp: Option, span: span) -> Option { - rp.map(|_rp| self.infcx().next_region_var_nb(span)) + rp.map( + |_| self.infcx().next_region_var( + infer::BoundRegionInTypeOrImpl(span))) } pub fn type_error_message(&self, @@ -1089,7 +1100,8 @@ pub fn impl_self_ty(vcx: &VtableContext, }; let self_r = if region_param.is_some() { - Some(vcx.infcx.next_region_var_nb(location_info.span)) + Some(vcx.infcx.next_region_var( + infer::BoundRegionInTypeOrImpl(location_info.span))) } else { None }; @@ -1352,7 +1364,8 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, let (_, _, fn_sig) = replace_bound_regions_in_fn_sig( fcx.tcx(), @Nil, None, &fn_sig, - |_br| fcx.infcx().next_region_var_nb(call_expr.span)); + |br| fcx.infcx().next_region_var( + infer::BoundRegionInFnCall(call_expr.span, br))); // Call the generic checker. check_argument_types(fcx, call_expr.span, fn_sig.inputs, f, @@ -1423,27 +1436,42 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, // A generic function for checking the then and else in an if // or if-check fn check_then_else(fcx: @mut FnCtxt, - thn: &ast::blk, - elsopt: Option<@ast::expr>, + cond_expr: @ast::expr, + then_blk: &ast::blk, + opt_else_expr: Option<@ast::expr>, id: ast::node_id, - _sp: span) { - let if_t = - match elsopt { - Some(els) => { - let if_t = fcx.infcx().next_ty_var(); - check_block(fcx, thn); - let thn_t = fcx.node_ty(thn.node.id); - demand::suptype(fcx, thn.span, if_t, thn_t); - check_expr_has_type(fcx, els, if_t); - if_t - } - None => { - check_block_no_value(fcx, thn); - ty::mk_nil() - } - }; + sp: span, + expected: Option) { + check_expr_has_type(fcx, cond_expr, ty::mk_bool()); + + let branches_ty = match opt_else_expr { + Some(else_expr) => { + check_block_with_expected(fcx, then_blk, expected); + let then_ty = fcx.node_ty(then_blk.node.id); + check_expr_with_opt_hint(fcx, else_expr, expected); + let else_ty = fcx.expr_ty(else_expr); + infer::common_supertype(fcx.infcx(), + infer::IfExpression(sp), + true, + then_ty, + else_ty) + } + None => { + check_block_no_value(fcx, then_blk); + ty::mk_nil() + } + }; + + let cond_ty = fcx.expr_ty(cond_expr); + let if_ty = if ty::type_is_error(cond_ty) { + ty::mk_err() + } else if ty::type_is_bot(cond_ty) { + ty::mk_bot() + } else { + branches_ty + }; - fcx.write_ty(id, if_t); + fcx.write_ty(id, if_ty); } fn lookup_op_method(fcx: @mut FnCtxt, @@ -2085,7 +2113,7 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, let expected_sty = unpack_expected(fcx, expected, |x| Some(copy *x)); let inner_ty = match expected_sty { Some(ty::ty_closure(ref fty)) => { - match fcx.mk_subty(false, expr.span, + match fcx.mk_subty(false, infer::Misc(expr.span), fty.sig.output, ty::mk_bool()) { result::Ok(_) => { ty::mk_closure(tcx, ty::ClosureTy { @@ -2395,7 +2423,8 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, // Finally, borrowck is charged with guaranteeing that the // value whose address was taken can actually be made to live // as long as it needs to live. - let region = fcx.infcx().next_region_var_nb(expr.span); + let region = fcx.infcx().next_region_var( + infer::AddrOfRegion(expr.span)); let tm = ty::mt { ty: fcx.expr_ty(oprnd), mutbl: mutbl }; let oprnd_t = if ty::type_is_error(tm.ty) { @@ -2437,7 +2466,7 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, Some(t) => t, None => fcx.ret_ty }; match expr_opt { - None => match fcx.mk_eqty(false, expr.span, + None => match fcx.mk_eqty(false, infer::Misc(expr.span), ret_ty, ty::mk_nil()) { result::Ok(_) => { /* fall through */ } result::Err(_) => { @@ -2487,25 +2516,9 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, fcx.write_nil(id); } } - ast::expr_if(cond, ref thn, elsopt) => { - check_expr_has_type(fcx, cond, ty::mk_bool()); - check_then_else(fcx, thn, elsopt, id, expr.span); - let cond_ty = fcx.expr_ty(cond); - let then_ty = fcx.node_ty(thn.node.id); - let else_is_bot = elsopt.map_default(false, |els| { - ty::type_is_bot(fcx.expr_ty(*els))}); - if ty::type_is_error(cond_ty) || ty::type_is_error(then_ty) { - fcx.write_error(id); - } - else if elsopt.map_default(false, |els| { - ty::type_is_error(fcx.expr_ty(*els)) }) { - fcx.write_error(id); - } - else if ty::type_is_bot(cond_ty) || - (ty::type_is_bot(then_ty) && else_is_bot) { - fcx.write_bot(id); - } - // Other cases were handled by check_then_else + ast::expr_if(cond, ref then_blk, opt_else_expr) => { + check_then_else(fcx, cond, then_blk, opt_else_expr, + id, expr.span, expected); } ast::expr_while(cond, ref body) => { check_expr_has_type(fcx, cond, ty::mk_bool()); @@ -2533,30 +2546,6 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, } ast::expr_match(discrim, ref arms) => { _match::check_match(fcx, expr, discrim, *arms); - let discrim_ty = fcx.expr_ty(discrim); - let arm_tys = arms.map(|a| fcx.node_ty(a.body.node.id)); - if ty::type_is_error(discrim_ty) || - arm_tys.iter().any_(|t| ty::type_is_error(*t)) { - fcx.write_error(id); - } - // keep in mind that `all` returns true in the empty vec case, - // which is what we want - else if ty::type_is_bot(discrim_ty) || - arm_tys.iter().all(|t| ty::type_is_bot(*t)) { - fcx.write_bot(id); - } - else { - // Find the first non-_|_ arm. - // We know there's at least one because we already checked - // for n=0 as well as all arms being _|_ in the previous - // `if`. - for arm_tys.iter().advance |arm_ty| { - if !ty::type_is_bot(*arm_ty) { - fcx.write_ty(id, *arm_ty); - break; - } - } - } } ast::expr_fn_block(ref decl, ref body) => { check_expr_fn(fcx, expr, None, @@ -2686,7 +2675,7 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, let el = ty::sequence_element_type(fcx.tcx(), t1); infer::mk_eqty(fcx.infcx(), false, - sp, el, t2).is_ok() + infer::Misc(sp), el, t2).is_ok() } } @@ -2907,8 +2896,6 @@ pub fn check_decl_local(fcx: @mut FnCtxt, local: @ast::local) { let pcx = pat_ctxt { fcx: fcx, map: pat_id_map(tcx.def_map, local.node.pat), - match_region: region, - block_region: region, }; _match::check_pat(&pcx, local.node.pat, t); let pat_ty = fcx.node_ty(local.node.pat.id); @@ -3412,7 +3399,7 @@ pub fn ast_expr_vstore_to_vstore(fcx: @mut FnCtxt, ast::expr_vstore_uniq => ty::vstore_uniq, ast::expr_vstore_box | ast::expr_vstore_mut_box => ty::vstore_box, ast::expr_vstore_slice | ast::expr_vstore_mut_slice => { - let r = fcx.infcx().next_region_var_nb(e.span); + let r = fcx.infcx().next_region_var(infer::AddrOfSlice(e.span)); ty::vstore_slice(r) } } diff --git a/src/librustc/middle/typeck/check/regionck.rs b/src/librustc/middle/typeck/check/regionck.rs index 80faad15695c3..2e41649e4dbb2 100644 --- a/src/librustc/middle/typeck/check/regionck.rs +++ b/src/librustc/middle/typeck/check/regionck.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/* +/*! The region check is a final pass that runs over the AST after we have inferred the type constraints but before we have actually finalized @@ -35,7 +35,9 @@ use middle::typeck::check::FnCtxt; use middle::typeck::check::regionmanip::relate_nested_regions; use middle::typeck::infer::resolve_and_force_all_but_regions; use middle::typeck::infer::resolve_type; -use util::ppaux::{note_and_explain_region, ty_to_str, region_to_str}; +use middle::typeck::infer; +use util::ppaux::{note_and_explain_region, ty_to_str, + region_to_str}; use middle::pat_util; use std::result; @@ -224,7 +226,9 @@ fn constrain_bindings_in_pat(pat: @ast::pat, rcx: @mut Rcx) { // variable's type enclose at least the variable's scope. let encl_region = tcx.region_maps.encl_region(id); - constrain_regions_in_type_of_node(rcx, id, encl_region, span); + constrain_regions_in_type_of_node( + rcx, id, encl_region, + infer::BindingTypeIsNotValidAtDecl(span)); } } @@ -298,7 +302,8 @@ fn visit_expr(expr: @ast::expr, (rcx, v): (@mut Rcx, rvt)) { // // FIXME(#6268) remove to support nested method calls constrain_regions_in_type_of_node( - rcx, expr.id, ty::re_scope(expr.id), expr.span); + rcx, expr.id, ty::re_scope(expr.id), + infer::AutoBorrow(expr.span)); } } _ => {} @@ -361,8 +366,11 @@ fn visit_expr(expr: @ast::expr, (rcx, v): (@mut Rcx, rvt)) { match ty::get(target_ty).sty { ty::ty_trait(_, _, ty::RegionTraitStore(trait_region), _, _) => { let source_ty = rcx.fcx.expr_ty(source); - constrain_regions_in_type(rcx, trait_region, - expr.span, source_ty); + constrain_regions_in_type( + rcx, + trait_region, + infer::RelateObjectBound(expr.span), + source_ty); } _ => () } @@ -379,7 +387,8 @@ fn visit_expr(expr: @ast::expr, (rcx, v): (@mut Rcx, rvt)) { // // FIXME(#6268) nested method calls requires that this rule change let ty0 = rcx.resolve_node_type(expr.id); - constrain_regions_in_type(rcx, ty::re_scope(expr.id), expr.span, ty0); + constrain_regions_in_type(rcx, ty::re_scope(expr.id), + infer::AddrOf(expr.span), ty0); } ast::expr_match(discr, ref arms) => { @@ -418,20 +427,8 @@ fn constrain_callee(rcx: @mut Rcx, match ty::get(callee_ty).sty { ty::ty_bare_fn(*) => { } ty::ty_closure(ref closure_ty) => { - match rcx.fcx.mk_subr(true, callee_expr.span, - call_region, closure_ty.region) { - result::Err(_) => { - tcx.sess.span_err( - callee_expr.span, - fmt!("cannot invoke closure outside of its lifetime")); - note_and_explain_region( - tcx, - "the closure is only valid for ", - closure_ty.region, - ""); - } - result::Ok(_) => {} - } + rcx.fcx.mk_subr(true, infer::InvokeClosure(callee_expr.span), + call_region, closure_ty.region); } _ => { // this should not happen, but it does if the program is @@ -479,7 +476,8 @@ fn constrain_call(rcx: @mut Rcx, // ensure that any regions appearing in the argument type are // valid for at least the lifetime of the function: constrain_regions_in_type_of_node( - rcx, arg_expr.id, callee_region, arg_expr.span); + rcx, arg_expr.id, callee_region, + infer::CallArg(arg_expr.span)); // unfortunately, there are two means of taking implicit // references, and we need to propagate constraints as a @@ -493,7 +491,7 @@ fn constrain_call(rcx: @mut Rcx, // as loop above, but for receiver for receiver.iter().advance |&r| { constrain_regions_in_type_of_node( - rcx, r.id, callee_region, r.span); + rcx, r.id, callee_region, infer::CallRcvr(r.span)); if implicitly_ref_args { guarantor::for_by_ref(rcx, r, callee_scope); } @@ -502,7 +500,8 @@ fn constrain_call(rcx: @mut Rcx, // constrain regions that may appear in the return type to be // valid for the function call: constrain_regions_in_type( - rcx, callee_region, call_expr.span, fn_sig.output); + rcx, callee_region, infer::CallReturn(call_expr.span), + fn_sig.output); } fn constrain_derefs(rcx: @mut Rcx, @@ -545,20 +544,8 @@ pub fn mk_subregion_due_to_derefence(rcx: @mut Rcx, deref_span: span, minimum_lifetime: ty::Region, maximum_lifetime: ty::Region) { - match rcx.fcx.mk_subr(true, deref_span, - minimum_lifetime, maximum_lifetime) { - result::Ok(*) => {} - result::Err(*) => { - rcx.tcx().sess.span_err( - deref_span, - fmt!("dereference of reference outside its lifetime")); - note_and_explain_region( - rcx.tcx(), - "the reference is only valid for ", - maximum_lifetime, - ""); - } - } + rcx.fcx.mk_subr(true, infer::DerefPointer(deref_span), + minimum_lifetime, maximum_lifetime) } @@ -581,19 +568,8 @@ fn constrain_index(rcx: @mut Rcx, match ty::get(indexed_ty).sty { ty::ty_estr(ty::vstore_slice(r_ptr)) | ty::ty_evec(_, ty::vstore_slice(r_ptr)) => { - match rcx.fcx.mk_subr(true, index_expr.span, r_index_expr, r_ptr) { - result::Ok(*) => {} - result::Err(*) => { - tcx.sess.span_err( - index_expr.span, - fmt!("index of slice outside its lifetime")); - note_and_explain_region( - tcx, - "the slice is only valid for ", - r_ptr, - ""); - } - } + rcx.fcx.mk_subr(true, infer::IndexSlice(index_expr.span), + r_index_expr, r_ptr); } _ => {} @@ -616,25 +592,8 @@ fn constrain_free_variables(rcx: @mut Rcx, let def = freevar.def; let en_region = encl_region_of_def(rcx.fcx, def); debug!("en_region = %s", en_region.repr(tcx)); - match rcx.fcx.mk_subr(true, freevar.span, - region, en_region) { - result::Ok(()) => {} - result::Err(_) => { - tcx.sess.span_err( - freevar.span, - "captured variable does not outlive the enclosing closure"); - note_and_explain_region( - tcx, - "captured variable is valid for ", - en_region, - ""); - note_and_explain_region( - tcx, - "closure is valid for ", - region, - ""); - } - } + rcx.fcx.mk_subr(true, infer::FreeVariable(freevar.span), + region, en_region); } } @@ -642,7 +601,7 @@ fn constrain_regions_in_type_of_node( rcx: @mut Rcx, id: ast::node_id, minimum_lifetime: ty::Region, - span: span) -> bool + origin: infer::SubregionOrigin) -> bool { //! Guarantees that any lifetimes which appear in the type of //! the node `id` (after applying adjustments) are valid for at @@ -655,18 +614,18 @@ fn constrain_regions_in_type_of_node( // report errors later on in the writeback phase. let ty0 = rcx.resolve_node_type(id); let adjustment = rcx.fcx.inh.adjustments.find_copy(&id); - let ty = ty::adjust_ty(tcx, span, ty0, adjustment); + let ty = ty::adjust_ty(tcx, origin.span(), ty0, adjustment); debug!("constrain_regions_in_type_of_node(\ ty=%s, ty0=%s, id=%d, minimum_lifetime=%?, adjustment=%?)", ty_to_str(tcx, ty), ty_to_str(tcx, ty0), id, minimum_lifetime, adjustment); - constrain_regions_in_type(rcx, minimum_lifetime, span, ty) + constrain_regions_in_type(rcx, minimum_lifetime, origin, ty) } fn constrain_regions_in_type( rcx: @mut Rcx, minimum_lifetime: ty::Region, - span: span, + origin: infer::SubregionOrigin, ty: ty::t) -> bool { /*! @@ -700,40 +659,14 @@ fn constrain_regions_in_type( // (e.g., the `&` in `fn(&T)`). Such regions need not be // constrained by `minimum_lifetime` as they are placeholders // for regions that are as-yet-unknown. + } else if r_sub == minimum_lifetime { + rcx.fcx.mk_subr( + true, origin, + r_sub, r_sup); } else { - match rcx.fcx.mk_subr(true, span, r_sub, r_sup) { - result::Err(_) => { - if r_sub == minimum_lifetime { - tcx.sess.span_err( - span, - fmt!("reference is not valid outside of its lifetime")); - note_and_explain_region( - tcx, - "the reference is only valid for ", - r_sup, - ""); - } else { - tcx.sess.span_err( - span, - fmt!("in type `%s`, pointer has a longer lifetime than \ - the data it references", - rcx.fcx.infcx().ty_to_str(ty))); - note_and_explain_region( - tcx, - "the pointer is valid for ", - r_sub, - ""); - note_and_explain_region( - tcx, - "but the referenced data is only valid for ", - r_sup, - ""); - } - rcx.errors_reported += 1u; - } - result::Ok(()) => { - } - } + rcx.fcx.mk_subr( + true, infer::ReferenceOutlivesReferent(ty, origin.span()), + r_sub, r_sup); } } @@ -788,8 +721,9 @@ pub mod guarantor { */ - use middle::typeck::check::regionck::{Rcx, infallibly_mk_subr}; + use middle::typeck::check::regionck::Rcx; use middle::typeck::check::regionck::mk_subregion_due_to_derefence; + use middle::typeck::infer; use middle::ty; use syntax::ast; use syntax::codemap::span; @@ -869,9 +803,11 @@ pub mod guarantor { rcx: @mut Rcx, expr: @ast::expr, sub_region: ty::Region, - sup_region: Option) { + sup_region: Option) + { for sup_region.iter().advance |r| { - infallibly_mk_subr(rcx, true, expr.span, sub_region, *r); + rcx.fcx.mk_subr(true, infer::Reborrow(expr.span), + sub_region, *r); } } } @@ -929,7 +865,7 @@ pub mod guarantor { let tcx = rcx.fcx.ccx.tcx; debug!("rptr_ty=%s", ty_to_str(tcx, rptr_ty)); let r = ty::ty_region(tcx, span, rptr_ty); - infallibly_mk_subr(rcx, true, span, r, bound); + rcx.fcx.mk_subr(true, infer::Reborrow(span), r, bound); } } @@ -1259,27 +1195,3 @@ pub mod guarantor { } } - -pub fn infallibly_mk_subr(rcx: @mut Rcx, - a_is_expected: bool, - span: span, - a: ty::Region, - b: ty::Region) { - /*! - * Constrains `a` to be a subregion of `b`. In many cases, we - * know that this can never yield an error due to the way that - * region inferencing works. Therefore just report a bug if we - * ever see `Err(_)`. - */ - - match rcx.fcx.mk_subr(a_is_expected, span, a, b) { - result::Ok(()) => {} - result::Err(e) => { - rcx.fcx.ccx.tcx.sess.span_bug( - span, - fmt!("Supposedly infallible attempt to \ - make %? < %? failed: %?", - a, b, e)); - } - } -} diff --git a/src/librustc/middle/typeck/check/vtable.rs b/src/librustc/middle/typeck/check/vtable.rs index 7a3c02efebec1..d908633443961 100644 --- a/src/librustc/middle/typeck/check/vtable.rs +++ b/src/librustc/middle/typeck/check/vtable.rs @@ -101,18 +101,18 @@ fn lookup_vtables(vcx: &VtableContext, // Substitute the values of the type parameters that may // appear in the bound. - let trait_ref = (*trait_ref).subst(tcx, substs); + let trait_ref = trait_ref.subst(tcx, substs); debug!("after subst: %s", trait_ref.repr(tcx)); - match lookup_vtable(vcx, location_info, *ty, &trait_ref, is_early) { + match lookup_vtable(vcx, location_info, *ty, trait_ref, is_early) { Some(vtable) => param_result.push(vtable), None => { vcx.tcx().sess.span_fatal( location_info.span, fmt!("failed to find an implementation of \ trait %s for %s", - vcx.infcx.trait_ref_to_str(&trait_ref), + vcx.infcx.trait_ref_to_str(trait_ref), vcx.infcx.ty_to_str(*ty))); } } @@ -152,8 +152,8 @@ fn fixup_substs(vcx: &VtableContext, location_info: &LocationInfo, fn relate_trait_refs(vcx: &VtableContext, location_info: &LocationInfo, - act_trait_ref: &ty::TraitRef, - exp_trait_ref: &ty::TraitRef) + act_trait_ref: @ty::TraitRef, + exp_trait_ref: @ty::TraitRef) { /*! * @@ -162,8 +162,11 @@ fn relate_trait_refs(vcx: &VtableContext, * error otherwise. */ - match infer::mk_sub_trait_refs(vcx.infcx, false, location_info.span, - act_trait_ref, exp_trait_ref) + match infer::mk_sub_trait_refs(vcx.infcx, + false, + infer::RelateTraitRefs(location_info.span), + act_trait_ref, + exp_trait_ref) { result::Ok(()) => {} // Ok. result::Err(ref err) => { @@ -191,7 +194,7 @@ fn relate_trait_refs(vcx: &VtableContext, fn lookup_vtable(vcx: &VtableContext, location_info: &LocationInfo, ty: ty::t, - trait_ref: &ty::TraitRef, + trait_ref: @ty::TraitRef, is_early: bool) -> Option { @@ -304,7 +307,8 @@ fn lookup_vtable(vcx: &VtableContext, } = impl_self_ty(vcx, location_info, im.did); match infer::mk_subty(vcx.infcx, false, - location_info.span, + infer::RelateSelfType( + location_info.span), ty, for_ty) { result::Err(_) => loop, @@ -337,11 +341,10 @@ fn lookup_vtable(vcx: &VtableContext, vcx.infcx.trait_ref_to_str(trait_ref), vcx.infcx.trait_ref_to_str(of_trait_ref)); - let of_trait_ref = - (*of_trait_ref).subst(tcx, &substs); + let of_trait_ref = of_trait_ref.subst(tcx, &substs); relate_trait_refs( vcx, location_info, - &of_trait_ref, trait_ref); + of_trait_ref, trait_ref); // Recall that trait_ref -- the trait type // we're casting to -- is the trait with @@ -450,7 +453,7 @@ fn fixup_ty(vcx: &VtableContext, fn connect_trait_tps(vcx: &VtableContext, location_info: &LocationInfo, impl_substs: &ty::substs, - trait_ref: &ty::TraitRef, + trait_ref: @ty::TraitRef, impl_did: ast::def_id) { let tcx = vcx.tcx(); @@ -461,8 +464,8 @@ fn connect_trait_tps(vcx: &VtableContext, "connect_trait_tps invoked on a type impl") }; - let impl_trait_ref = (*impl_trait_ref).subst(tcx, impl_substs); - relate_trait_refs(vcx, location_info, &impl_trait_ref, trait_ref); + let impl_trait_ref = impl_trait_ref.subst(tcx, impl_substs); + relate_trait_refs(vcx, location_info, impl_trait_ref, trait_ref); } fn insert_vtables(fcx: @mut FnCtxt, @@ -581,7 +584,7 @@ pub fn early_resolve_expr(ex: @ast::expr, ccx: fcx.ccx, infcx: fcx.infcx() }; - let target_trait_ref = ty::TraitRef { + let target_trait_ref = @ty::TraitRef { def_id: target_def_id, substs: ty::substs { tps: copy target_substs.tps, @@ -593,7 +596,7 @@ pub fn early_resolve_expr(ex: @ast::expr, lookup_vtable(&vcx, location_info, mt.ty, - &target_trait_ref, + target_trait_ref, is_early); match vtable_opt { Some(vtable) => { @@ -622,7 +625,8 @@ pub fn early_resolve_expr(ex: @ast::expr, ty::RegionTraitStore(rb)) => { infer::mk_subr(fcx.infcx(), false, - ex.span, + infer::RelateObjectBound( + ex.span), rb, ra); } diff --git a/src/librustc/middle/typeck/coherence.rs b/src/librustc/middle/typeck/coherence.rs index 8748d3dcd23be..24ac63ac7b079 100644 --- a/src/librustc/middle/typeck/coherence.rs +++ b/src/librustc/middle/typeck/coherence.rs @@ -36,6 +36,7 @@ use middle::typeck::infer::combine::Combine; use middle::typeck::infer::InferCtxt; use middle::typeck::infer::{new_infer_ctxt, resolve_ivar}; use middle::typeck::infer::{resolve_nested_tvar, resolve_type}; +use middle::typeck::infer; use syntax::ast::{crate, def_id, def_struct, def_ty}; use syntax::ast::{item, item_enum, item_impl, item_mod, item_struct}; use syntax::ast::{local_crate, method, trait_ref, ty_path}; @@ -546,10 +547,10 @@ impl CoherenceChecker { pub fn universally_quantify_polytype(&self, polytype: ty_param_bounds_and_ty) -> UniversalQuantificationResult { - // NDM--this span is bogus. let self_region = polytype.generics.region_param.map( - |_r| self.inference_context.next_region_var_nb(dummy_sp())); + |_| self.inference_context.next_region_var( + infer::BoundRegionInCoherence)); let bounds_count = polytype.generics.type_param_defs.len(); let type_parameters = self.inference_context.next_ty_vars(bounds_count); @@ -580,11 +581,9 @@ impl CoherenceChecker { b: &'a UniversalQuantificationResult) -> bool { - let mut might_unify = true; - let _ = do self.inference_context.probe { - let result = self.inference_context.sub(true, dummy_sp()) - .tys(a.monotype, b.monotype); - if result.is_ok() { + match infer::can_mk_subty(self.inference_context, + a.monotype, b.monotype) { + Ok(_) => { // Check to ensure that each parameter binding respected its // kind bounds. let xs = [a, b]; @@ -604,8 +603,7 @@ impl CoherenceChecker { self.inference_context.tcx, resolved_ty) { - might_unify = false; - break; + return false; } } Err(*) => { @@ -615,13 +613,13 @@ impl CoherenceChecker { } } } - } else { - might_unify = false; + true } - result - }; - might_unify + Err(_) => { + false + } + } } pub fn get_self_type_for_implementation(&self, implementation: @Impl) diff --git a/src/librustc/middle/typeck/collect.rs b/src/librustc/middle/typeck/collect.rs index 85bd2bc2d75ea..de05aca61caf5 100644 --- a/src/librustc/middle/typeck/collect.rs +++ b/src/librustc/middle/typeck/collect.rs @@ -615,7 +615,8 @@ pub fn compare_impl_method(tcx: ty::ctxt, }; debug!("trait_fty (post-subst): %s", trait_fty.repr(tcx)); - match infer::mk_subty(infcx, false, cm.span, impl_fty, trait_fty) { + match infer::mk_subty(infcx, false, infer::MethodCompatCheck(cm.span), + impl_fty, trait_fty) { result::Ok(()) => {} result::Err(ref terr) => { tcx.sess.span_err( diff --git a/src/librustc/middle/typeck/infer/coercion.rs b/src/librustc/middle/typeck/infer/coercion.rs index 63f882f5e541e..03d243797b32d 100644 --- a/src/librustc/middle/typeck/infer/coercion.rs +++ b/src/librustc/middle/typeck/infer/coercion.rs @@ -70,7 +70,7 @@ use middle::ty::{AutoDerefRef}; use middle::ty::{vstore_slice, vstore_box, vstore_uniq}; use middle::ty::{mt}; use middle::ty; -use middle::typeck::infer::{CoerceResult, resolve_type}; +use middle::typeck::infer::{CoerceResult, resolve_type, Coercion}; use middle::typeck::infer::combine::CombineFields; use middle::typeck::infer::sub::Sub; use middle::typeck::infer::to_str::InferStr; @@ -165,7 +165,7 @@ impl Coerce { } Err(e) => { self.infcx.tcx.sess.span_bug( - self.span, + self.trace.origin.span(), fmt!("Failed to resolve even without \ any force options: %?", e)); } @@ -189,7 +189,7 @@ impl Coerce { // yield. let sub = Sub(**self); - let r_borrow = self.infcx.next_region_var_nb(self.span); + let r_borrow = self.infcx.next_region_var(Coercion(self.trace)); let inner_ty = match *sty_a { ty::ty_box(mt_a) => mt_a.ty, @@ -227,7 +227,7 @@ impl Coerce { } }; - let r_a = self.infcx.next_region_var_nb(self.span); + let r_a = self.infcx.next_region_var(Coercion(self.trace)); let a_borrowed = ty::mk_estr(self.infcx.tcx, vstore_slice(r_a)); if_ok!(self.subtype(a_borrowed, b)); Ok(Some(@AutoDerefRef(AutoDerefRef { @@ -247,7 +247,7 @@ impl Coerce { b.inf_str(self.infcx)); let sub = Sub(**self); - let r_borrow = self.infcx.next_region_var_nb(self.span); + let r_borrow = self.infcx.next_region_var(Coercion(self.trace)); let ty_inner = match *sty_a { ty::ty_evec(mt, _) => mt.ty, _ => { @@ -285,7 +285,7 @@ impl Coerce { } }; - let r_borrow = self.infcx.next_region_var_nb(self.span); + let r_borrow = self.infcx.next_region_var(Coercion(self.trace)); let a_borrowed = ty::mk_closure( self.infcx.tcx, ty::ClosureTy { diff --git a/src/librustc/middle/typeck/infer/combine.rs b/src/librustc/middle/typeck/infer/combine.rs index adc263cbc4d8b..f03f01732291f 100644 --- a/src/librustc/middle/typeck/infer/combine.rs +++ b/src/librustc/middle/typeck/infer/combine.rs @@ -65,6 +65,7 @@ use middle::typeck::infer::sub::Sub; use middle::typeck::infer::to_str::InferStr; use middle::typeck::infer::unify::{InferCtxtMethods}; use middle::typeck::infer::{InferCtxt, cres, ures}; +use middle::typeck::infer::{TypeOrigin, TypeTrace}; use util::common::indent; use std::result::{iter_vec2, map_vec2}; @@ -79,7 +80,7 @@ pub trait Combine { fn infcx(&self) -> @mut InferCtxt; fn tag(&self) -> ~str; fn a_is_expected(&self) -> bool; - fn span(&self) -> span; + fn trace(&self) -> TypeTrace; fn sub(&self) -> Sub; fn lub(&self) -> Lub; @@ -121,7 +122,7 @@ pub trait Combine { pub struct CombineFields { infcx: @mut InferCtxt, a_is_expected: bool, - span: span, + trace: TypeTrace, } pub fn expected_found( diff --git a/src/librustc/middle/typeck/infer/doc.rs b/src/librustc/middle/typeck/infer/doc.rs new file mode 100644 index 0000000000000..11bfbc637169e --- /dev/null +++ b/src/librustc/middle/typeck/infer/doc.rs @@ -0,0 +1,243 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +# Type inference engine + +This is loosely based on standard HM-type inference, but with an +extension to try and accommodate subtyping. There is nothing +principled about this extension; it's sound---I hope!---but it's a +heuristic, ultimately, and does not guarantee that it finds a valid +typing even if one exists (in fact, there are known scenarios where it +fails, some of which may eventually become problematic). + +## Key idea + +The main change is that each type variable T is associated with a +lower-bound L and an upper-bound U. L and U begin as bottom and top, +respectively, but gradually narrow in response to new constraints +being introduced. When a variable is finally resolved to a concrete +type, it can (theoretically) select any type that is a supertype of L +and a subtype of U. + +There are several critical invariants which we maintain: + +- the upper-bound of a variable only becomes lower and the lower-bound + only becomes higher over time; +- the lower-bound L is always a subtype of the upper bound U; +- the lower-bound L and upper-bound U never refer to other type variables, + but only to types (though those types may contain type variables). + +> An aside: if the terms upper- and lower-bound confuse you, think of +> "supertype" and "subtype". The upper-bound is a "supertype" +> (super=upper in Latin, or something like that anyway) and the lower-bound +> is a "subtype" (sub=lower in Latin). I find it helps to visualize +> a simple class hierarchy, like Java minus interfaces and +> primitive types. The class Object is at the root (top) and other +> types lie in between. The bottom type is then the Null type. +> So the tree looks like: +> +> Object +> / \ +> String Other +> \ / +> (null) +> +> So the upper bound type is the "supertype" and the lower bound is the +> "subtype" (also, super and sub mean upper and lower in Latin, or something +> like that anyway). + +## Satisfying constraints + +At a primitive level, there is only one form of constraint that the +inference understands: a subtype relation. So the outside world can +say "make type A a subtype of type B". If there are variables +involved, the inferencer will adjust their upper- and lower-bounds as +needed to ensure that this relation is satisfied. (We also allow "make +type A equal to type B", but this is translated into "A <: B" and "B +<: A") + +As stated above, we always maintain the invariant that type bounds +never refer to other variables. This keeps the inference relatively +simple, avoiding the scenario of having a kind of graph where we have +to pump constraints along and reach a fixed point, but it does impose +some heuristics in the case where the user is relating two type +variables A <: B. + +Combining two variables such that variable A will forever be a subtype +of variable B is the trickiest part of the algorithm because there is +often no right choice---that is, the right choice will depend on +future constraints which we do not yet know. The problem comes about +because both A and B have bounds that can be adjusted in the future. +Let's look at some of the cases that can come up. + +Imagine, to start, the best case, where both A and B have an upper and +lower bound (that is, the bounds are not top nor bot respectively). In +that case, if we're lucky, A.ub <: B.lb, and so we know that whatever +A and B should become, they will forever have the desired subtyping +relation. We can just leave things as they are. + +### Option 1: Unify + +However, suppose that A.ub is *not* a subtype of B.lb. In +that case, we must make a decision. One option is to unify A +and B so that they are one variable whose bounds are: + + UB = GLB(A.ub, B.ub) + LB = LUB(A.lb, B.lb) + +(Note that we will have to verify that LB <: UB; if it does not, the +types are not intersecting and there is an error) In that case, A <: B +holds trivially because A==B. However, we have now lost some +flexibility, because perhaps the user intended for A and B to end up +as different types and not the same type. + +Pictorally, what this does is to take two distinct variables with +(hopefully not completely) distinct type ranges and produce one with +the intersection. + + B.ub B.ub + /\ / + A.ub / \ A.ub / + / \ / \ \ / + / X \ UB + / / \ \ / \ + / / / \ / / + \ \ / / \ / + \ X / LB + \ / \ / / \ + \ / \ / / \ + A.lb B.lb A.lb B.lb + + +### Option 2: Relate UB/LB + +Another option is to keep A and B as distinct variables but set their +bounds in such a way that, whatever happens, we know that A <: B will hold. +This can be achieved by ensuring that A.ub <: B.lb. In practice there +are two ways to do that, depicted pictorally here: + + Before Option #1 Option #2 + + B.ub B.ub B.ub + /\ / \ / \ + A.ub / \ A.ub /(B')\ A.ub /(B')\ + / \ / \ \ / / \ / / + / X \ __UB____/ UB / + / / \ \ / | | / + / / / \ / | | / + \ \ / / /(A')| | / + \ X / / LB ______LB/ + \ / \ / / / \ / (A')/ \ + \ / \ / \ / \ \ / \ + A.lb B.lb A.lb B.lb A.lb B.lb + +In these diagrams, UB and LB are defined as before. As you can see, +the new ranges `A'` and `B'` are quite different from the range that +would be produced by unifying the variables. + +### What we do now + +Our current technique is to *try* (transactionally) to relate the +existing bounds of A and B, if there are any (i.e., if `UB(A) != top +&& LB(B) != bot`). If that succeeds, we're done. If it fails, then +we merge A and B into same variable. + +This is not clearly the correct course. For example, if `UB(A) != +top` but `LB(B) == bot`, we could conceivably set `LB(B)` to `UB(A)` +and leave the variables unmerged. This is sometimes the better +course, it depends on the program. + +The main case which fails today that I would like to support is: + + fn foo(x: T, y: T) { ... } + + fn bar() { + let x: @mut int = @mut 3; + let y: @int = @3; + foo(x, y); + } + +In principle, the inferencer ought to find that the parameter `T` to +`foo(x, y)` is `@const int`. Today, however, it does not; this is +because the type variable `T` is merged with the type variable for +`X`, and thus inherits its UB/LB of `@mut int`. This leaves no +flexibility for `T` to later adjust to accommodate `@int`. + +### What to do when not all bounds are present + +In the prior discussion we assumed that A.ub was not top and B.lb was +not bot. Unfortunately this is rarely the case. Often type variables +have "lopsided" bounds. For example, if a variable in the program has +been initialized but has not been used, then its corresponding type +variable will have a lower bound but no upper bound. When that +variable is then used, we would like to know its upper bound---but we +don't have one! In this case we'll do different things depending on +how the variable is being used. + +## Transactional support + +Whenever we adjust merge variables or adjust their bounds, we always +keep a record of the old value. This allows the changes to be undone. + +## Regions + +I've only talked about type variables here, but region variables +follow the same principle. They have upper- and lower-bounds. A +region A is a subregion of a region B if A being valid implies that B +is valid. This basically corresponds to the block nesting structure: +the regions for outer block scopes are superregions of those for inner +block scopes. + +## Integral and floating-point type variables + +There is a third variety of type variable that we use only for +inferring the types of unsuffixed integer literals. Integral type +variables differ from general-purpose type variables in that there's +no subtyping relationship among the various integral types, so instead +of associating each variable with an upper and lower bound, we just +use simple unification. Each integer variable is associated with at +most one integer type. Floating point types are handled similarly to +integral types. + +## GLB/LUB + +Computing the greatest-lower-bound and least-upper-bound of two +types/regions is generally straightforward except when type variables +are involved. In that case, we follow a similar "try to use the bounds +when possible but otherwise merge the variables" strategy. In other +words, `GLB(A, B)` where `A` and `B` are variables will often result +in `A` and `B` being merged and the result being `A`. + +## Type coercion + +We have a notion of assignability which differs somewhat from +subtyping; in particular it may cause region borrowing to occur. See +the big comment later in this file on Type Coercion for specifics. + +### In conclusion + +I showed you three ways to relate `A` and `B`. There are also more, +of course, though I'm not sure if there are any more sensible options. +The main point is that there are various options, each of which +produce a distinct range of types for `A` and `B`. Depending on what +the correct values for A and B are, one of these options will be the +right choice: but of course we don't know the right values for A and B +yet, that's what we're trying to find! In our code, we opt to unify +(Option #1). + +# Implementation details + +We make use of a trait-like impementation strategy to consolidate +duplicated code between subtypes, GLB, and LUB computations. See the +section on "Type Combining" below for details. + +*/ \ No newline at end of file diff --git a/src/librustc/middle/typeck/infer/error_reporting.rs b/src/librustc/middle/typeck/infer/error_reporting.rs new file mode 100644 index 0000000000000..e533f019b4692 --- /dev/null +++ b/src/librustc/middle/typeck/infer/error_reporting.rs @@ -0,0 +1,472 @@ +// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +Error Reporting Code for the inference engine + +Because of the way inference, and in particular region inference, +works, it often happens that errors are not detected until far after +the relevant line of code has been type-checked. Therefore, there is +an elaborate system to track why a particular constraint in the +inference graph arose so that we can explain to the user what gave +rise to a patricular error. + +The basis of the system are the "origin" types. An "origin" is the +reason that a constraint or inference variable arose. There are +different "origin" enums for different kinds of constraints/variables +(e.g., `TypeOrigin`, `RegionVariableOrigin`). An origin always has +a span, but also more information so that we can generate a meaningful +error message. + +Having a catalogue of all the different reasons an error can arise is +also useful for other reasons, like cross-referencing FAQs etc, though +we are not really taking advantage of this yet. + +# Region Inference + +Region inference is particularly tricky because it always succeeds "in +the moment" and simply registers a constraint. Then, at the end, we +can compute the full graph and report errors, so we need to be able to +store and later report what gave rise to the conflicting constraints. + +# Subtype Trace + +Determing whether `T1 <: T2` often involves a number of subtypes and +subconstraints along the way. A "TypeTrace" is an extended version +of an origin that traces the types and other values that were being +compared. It is not necessarily comprehensive (in fact, at the time of +this writing it only tracks the root values being compared) but I'd +like to extend it to include significant "waypoints". For example, if +you are comparing `(T1, T2) <: (T3, T4)`, and the problem is that `T2 +<: T4` fails, I'd like the trace to include enough information to say +"in the 2nd element of the tuple". Similarly, failures when comparing +arguments or return types in fn types should be able to cite the +specific position, etc. + +# Reality vs plan + +Of course, there is still a LOT of code in typeck that has yet to be +ported to this system, and which relies on string concatenation at the +time of error detection. + +*/ + +use std::prelude::*; +use middle::ty; +use middle::ty::Region; +use middle::typeck::infer; +use middle::typeck::infer::InferCtxt; +use middle::typeck::infer::TypeTrace; +use middle::typeck::infer::TypeOrigin; +use middle::typeck::infer::SubregionOrigin; +use middle::typeck::infer::RegionVariableOrigin; +use middle::typeck::infer::Types; +use middle::typeck::infer::TraitRefs; +use middle::typeck::infer::ValuePairs; +use middle::typeck::infer::region_inference::RegionResolutionError; +use middle::typeck::infer::region_inference::ConcreteFailure; +use middle::typeck::infer::region_inference::SubSupConflict; +use middle::typeck::infer::region_inference::SupSupConflict; +use syntax::opt_vec; +use syntax::opt_vec::OptVec; +use util::ppaux::UserString; +use util::ppaux::note_and_explain_region; + +pub trait ErrorReporting { + pub fn report_region_errors(@mut self, + errors: &OptVec); + + pub fn report_and_explain_type_error(@mut self, + trace: TypeTrace, + terr: &ty::type_err); + + fn values_str(@mut self, values: &ValuePairs) -> Option<~str>; + + fn expected_found_str( + @mut self, + exp_found: &ty::expected_found) + -> Option<~str>; + + fn report_concrete_failure(@mut self, + origin: SubregionOrigin, + sub: Region, + sup: Region); + + fn report_sub_sup_conflict(@mut self, + var_origin: RegionVariableOrigin, + sub_origin: SubregionOrigin, + sub_region: Region, + sup_origin: SubregionOrigin, + sup_region: Region); + + fn report_sup_sup_conflict(@mut self, + var_origin: RegionVariableOrigin, + origin1: SubregionOrigin, + region1: Region, + origin2: SubregionOrigin, + region2: Region); +} + + +impl ErrorReporting for InferCtxt { + pub fn report_region_errors(@mut self, + errors: &OptVec) { + for errors.iter().advance |error| { + match *error { + ConcreteFailure(origin, sub, sup) => { + self.report_concrete_failure(origin, sub, sup); + } + + SubSupConflict(var_origin, + sub_origin, sub_r, + sup_origin, sup_r) => { + self.report_sub_sup_conflict(var_origin, + sub_origin, sub_r, + sup_origin, sup_r); + } + + SupSupConflict(var_origin, + origin1, r1, + origin2, r2) => { + self.report_sup_sup_conflict(var_origin, + origin1, r1, + origin2, r2); + } + } + } + } + + pub fn report_and_explain_type_error(@mut self, + trace: TypeTrace, + terr: &ty::type_err) { + let tcx = self.tcx; + + let expected_found_str = match self.values_str(&trace.values) { + Some(v) => v, + None => { + return; /* derived error */ + } + }; + + let message_root_str = match trace.origin { + infer::Misc(_) => "mismatched types", + infer::MethodCompatCheck(_) => "method not compatible with trait", + infer::ExprAssignable(_) => "mismatched types", + infer::RelateTraitRefs(_) => "mismatched traits", + infer::RelateSelfType(_) => "mismatched types", + infer::MatchExpression(_) => "match arms have incompatible types", + infer::IfExpression(_) => "if and else have incompatible types", + }; + + self.tcx.sess.span_err( + trace.origin.span(), + fmt!("%s: %s (%s)", + message_root_str, + expected_found_str, + ty::type_err_to_str(tcx, terr))); + + ty::note_and_explain_type_err(self.tcx, terr); + } + + fn values_str(@mut self, values: &ValuePairs) -> Option<~str> { + /*! + * Returns a string of the form "expected `%s` but found `%s`", + * or None if this is a derived error. + */ + match *values { + infer::Types(ref exp_found) => { + self.expected_found_str(exp_found) + } + infer::TraitRefs(ref exp_found) => { + self.expected_found_str(exp_found) + } + } + } + + fn expected_found_str( + @mut self, + exp_found: &ty::expected_found) + -> Option<~str> + { + let expected = exp_found.expected.resolve(self); + if expected.contains_error() { + return None; + } + + let found = exp_found.found.resolve(self); + if found.contains_error() { + return None; + } + + Some(fmt!("expected `%s` but found `%s`", + expected.user_string(self.tcx), + found.user_string(self.tcx))) + } + + fn report_concrete_failure(@mut self, + origin: SubregionOrigin, + sub: Region, + sup: Region) { + match origin { + infer::Subtype(trace) => { + let terr = ty::terr_regions_does_not_outlive(sup, sub); + self.report_and_explain_type_error(trace, &terr); + } + infer::Reborrow(span) => { + self.tcx.sess.span_err( + span, + "lifetime of borrowed pointer outlines \ + lifetime of borrowed content..."); + note_and_explain_region( + self.tcx, + "...the borrowed pointer is valid for ", + sub, + "..."); + note_and_explain_region( + self.tcx, + "...but the borrowed content is only valid for ", + sup, + ""); + } + infer::InvokeClosure(span) => { + self.tcx.sess.span_err( + span, + "cannot invoke closure outside of its lifetime"); + note_and_explain_region( + self.tcx, + "the closure is only valid for ", + sup, + ""); + } + infer::DerefPointer(span) => { + self.tcx.sess.span_err( + span, + "dereference of reference outside its lifetime"); + note_and_explain_region( + self.tcx, + "the reference is only valid for ", + sup, + ""); + } + infer::FreeVariable(span) => { + self.tcx.sess.span_err( + span, + "captured variable does not outlive the enclosing closure"); + note_and_explain_region( + self.tcx, + "captured variable is valid for ", + sup, + ""); + note_and_explain_region( + self.tcx, + "closure is valid for ", + sub, + ""); + } + infer::IndexSlice(span) => { + self.tcx.sess.span_err( + span, + fmt!("index of slice outside its lifetime")); + note_and_explain_region( + self.tcx, + "the slice is only valid for ", + sup, + ""); + } + infer::RelateObjectBound(span) => { + self.tcx.sess.span_err( + span, + "lifetime of the source pointer does not outlive \ + lifetime bound of the object type"); + note_and_explain_region( + self.tcx, + "object type is valid for ", + sub, + ""); + note_and_explain_region( + self.tcx, + "source pointer is only valid for ", + sup, + ""); + } + infer::CallRcvr(span) => { + self.tcx.sess.span_err( + span, + "lifetime of method receiver does not outlive \ + the method call"); + note_and_explain_region( + self.tcx, + "the receiver is only valid for ", + sup, + ""); + } + infer::CallArg(span) => { + self.tcx.sess.span_err( + span, + "lifetime of function argument does not outlive \ + the function call"); + note_and_explain_region( + self.tcx, + "the function argument is only valid for ", + sup, + ""); + } + infer::CallReturn(span) => { + self.tcx.sess.span_err( + span, + "lifetime of return value does not outlive \ + the function call"); + note_and_explain_region( + self.tcx, + "the return value is only valid for ", + sup, + ""); + } + infer::AddrOf(span) => { + self.tcx.sess.span_err( + span, + "borrowed pointer is not valid \ + at the time of borrow"); + note_and_explain_region( + self.tcx, + "the borrow is only valid for ", + sup, + ""); + } + infer::AutoBorrow(span) => { + self.tcx.sess.span_err( + span, + "automatically borrowed pointer is not valid \ + at the time of borrow"); + note_and_explain_region( + self.tcx, + "the automatic borrow is only valid for ", + sup, + ""); + } + infer::BindingTypeIsNotValidAtDecl(span) => { + self.tcx.sess.span_err( + span, + "lifetime of variable does not enclose its declaration"); + note_and_explain_region( + self.tcx, + "the variable is only valid for ", + sup, + ""); + } + infer::ReferenceOutlivesReferent(ty, span) => { + self.tcx.sess.span_err( + origin.span(), + fmt!("in type `%s`, pointer has a longer lifetime than \ + the data it references", + ty.user_string(self.tcx))); + note_and_explain_region( + self.tcx, + "the pointer is valid for ", + sub, + ""); + note_and_explain_region( + self.tcx, + "but the referenced data is only valid for ", + sup, + ""); + } + } + } + + fn report_sub_sup_conflict(@mut self, + var_origin: RegionVariableOrigin, + sub_origin: SubregionOrigin, + sub_region: Region, + sup_origin: SubregionOrigin, + sup_region: Region) { + self.tcx.sess.span_err( + var_origin.span(), + fmt!("cannot infer an appropriate lifetime \ + due to conflicting requirements")); + + note_and_explain_region( + self.tcx, + "first, the lifetime cannot outlive ", + sup_region, + "..."); + + self.tcx.sess.span_note( + sup_origin.span(), + fmt!("...due to the following expression")); + + note_and_explain_region( + self.tcx, + "but, the lifetime must be valid for ", + sub_region, + "..."); + + self.tcx.sess.span_note( + sub_origin.span(), + fmt!("...due to the following expression")); + } + + fn report_sup_sup_conflict(@mut self, + var_origin: RegionVariableOrigin, + origin1: SubregionOrigin, + region1: Region, + origin2: SubregionOrigin, + region2: Region) { + self.tcx.sess.span_err( + var_origin.span(), + fmt!("cannot infer an appropriate lifetime \ + due to conflicting requirements")); + + note_and_explain_region( + self.tcx, + "first, the lifetime must be contained by ", + region1, + "..."); + + self.tcx.sess.span_note( + origin1.span(), + fmt!("...due to the following expression")); + + note_and_explain_region( + self.tcx, + "but, the lifetime must also be contained by ", + region2, + "..."); + + self.tcx.sess.span_note( + origin2.span(), + fmt!("...due to the following expression")); + } +} + +trait Resolvable { + fn resolve(&self, infcx: @mut InferCtxt) -> Self; + fn contains_error(&self) -> bool; +} + +impl Resolvable for ty::t { + fn resolve(&self, infcx: @mut InferCtxt) -> ty::t { + infcx.resolve_type_vars_if_possible(*self) + } + fn contains_error(&self) -> bool { + ty::type_is_error(*self) + } +} + +impl Resolvable for @ty::TraitRef { + fn resolve(&self, infcx: @mut InferCtxt) -> @ty::TraitRef { + @infcx.resolve_type_vars_in_trait_ref_if_possible(*self) + } + fn contains_error(&self) -> bool { + ty::trait_ref_contains_error(*self) + } +} + diff --git a/src/librustc/middle/typeck/infer/glb.rs b/src/librustc/middle/typeck/infer/glb.rs index 0dd45919be14c..fefbf2336c29f 100644 --- a/src/librustc/middle/typeck/infer/glb.rs +++ b/src/librustc/middle/typeck/infer/glb.rs @@ -18,6 +18,7 @@ use middle::typeck::infer::lub::Lub; use middle::typeck::infer::sub::Sub; use middle::typeck::infer::to_str::InferStr; use middle::typeck::infer::{cres, InferCtxt}; +use middle::typeck::infer::{TypeTrace, Subtype}; use middle::typeck::infer::fold_regions_in_sig; use middle::typeck::isr_alist; use syntax::ast; @@ -37,7 +38,7 @@ impl Combine for Glb { fn infcx(&self) -> @mut InferCtxt { self.infcx } fn tag(&self) -> ~str { ~"glb" } fn a_is_expected(&self) -> bool { self.a_is_expected } - fn span(&self) -> span { self.span } + fn trace(&self) -> TypeTrace { self.trace } fn sub(&self) -> Sub { Sub(**self) } fn lub(&self) -> Lub { Lub(**self) } @@ -127,9 +128,7 @@ impl Combine for Glb { a.inf_str(self.infcx), b.inf_str(self.infcx)); - do indent { - self.infcx.region_vars.glb_regions(self.span, a, b) - } + Ok(self.infcx.region_vars.glb_regions(Subtype(self.trace), a, b)) } fn contraregions(&self, a: ty::Region, b: ty::Region) @@ -181,11 +180,11 @@ impl Combine for Glb { // Instantiate each bound region with a fresh region variable. let (a_with_fresh, a_isr) = self.infcx.replace_bound_regions_with_fresh_regions( - self.span, a); + self.trace, a); let a_vars = var_ids(self, a_isr); let (b_with_fresh, b_isr) = self.infcx.replace_bound_regions_with_fresh_regions( - self.span, b); + self.trace, b); let b_vars = var_ids(self, b_isr); // Collect constraints. @@ -277,7 +276,7 @@ impl Combine for Glb { } this.infcx.tcx.sess.span_bug( - this.span, + this.trace.origin.span(), fmt!("could not find original bound region for %?", r)); } diff --git a/src/librustc/middle/typeck/infer/lattice.rs b/src/librustc/middle/typeck/infer/lattice.rs index 73e43c6c07629..b1a6aefd17949 100644 --- a/src/librustc/middle/typeck/infer/lattice.rs +++ b/src/librustc/middle/typeck/infer/lattice.rs @@ -530,7 +530,7 @@ pub fn var_ids(this: &T, isr: isr_alist) -> ~[RegionVid] { ty::re_infer(ty::ReVar(r)) => { result.push(r); } r => { this.infcx().tcx.sess.span_bug( - this.span(), + this.trace().origin.span(), fmt!("Found non-region-vid: %?", r)); } } diff --git a/src/librustc/middle/typeck/infer/lub.rs b/src/librustc/middle/typeck/infer/lub.rs index ad063be86146a..efb1dc200b521 100644 --- a/src/librustc/middle/typeck/infer/lub.rs +++ b/src/librustc/middle/typeck/infer/lub.rs @@ -19,6 +19,7 @@ use middle::typeck::infer::sub::Sub; use middle::typeck::infer::to_str::InferStr; use middle::typeck::infer::{cres, InferCtxt}; use middle::typeck::infer::fold_regions_in_sig; +use middle::typeck::infer::{TypeTrace, Subtype}; use middle::typeck::isr_alist; use util::common::indent; use util::ppaux::mt_to_str; @@ -44,7 +45,7 @@ impl Combine for Lub { fn infcx(&self) -> @mut InferCtxt { self.infcx } fn tag(&self) -> ~str { ~"lub" } fn a_is_expected(&self) -> bool { self.a_is_expected } - fn span(&self) -> span { self.span } + fn trace(&self) -> TypeTrace { self.trace } fn sub(&self) -> Sub { Sub(**self) } fn lub(&self) -> Lub { Lub(**self) } @@ -119,9 +120,7 @@ impl Combine for Lub { a.inf_str(self.infcx), b.inf_str(self.infcx)); - do indent { - self.infcx.region_vars.lub_regions(self.span, a, b) - } + Ok(self.infcx.region_vars.lub_regions(Subtype(self.trace), a, b)) } fn fn_sigs(&self, a: &ty::FnSig, b: &ty::FnSig) -> cres { @@ -137,10 +136,10 @@ impl Combine for Lub { // Instantiate each bound region with a fresh region variable. let (a_with_fresh, a_isr) = self.infcx.replace_bound_regions_with_fresh_regions( - self.span, a); + self.trace, a); let (b_with_fresh, _) = self.infcx.replace_bound_regions_with_fresh_regions( - self.span, b); + self.trace, b); // Collect constraints. let sig0 = if_ok!(super_fn_sigs(self, &a_with_fresh, &b_with_fresh)); @@ -196,7 +195,7 @@ impl Combine for Lub { } this.infcx.tcx.sess.span_bug( - this.span, + this.trace.origin.span(), fmt!("Region %? is not associated with \ any bound region from A!", r0)); } diff --git a/src/librustc/middle/typeck/infer/mod.rs b/src/librustc/middle/typeck/infer/mod.rs index 28d943b580797..3360edc6a4678 100644 --- a/src/librustc/middle/typeck/infer/mod.rs +++ b/src/librustc/middle/typeck/infer/mod.rs @@ -8,239 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# Type inference engine - -This is loosely based on standard HM-type inference, but with an -extension to try and accommodate subtyping. There is nothing -principled about this extension; it's sound---I hope!---but it's a -heuristic, ultimately, and does not guarantee that it finds a valid -typing even if one exists (in fact, there are known scenarios where it -fails, some of which may eventually become problematic). - -## Key idea - -The main change is that each type variable T is associated with a -lower-bound L and an upper-bound U. L and U begin as bottom and top, -respectively, but gradually narrow in response to new constraints -being introduced. When a variable is finally resolved to a concrete -type, it can (theoretically) select any type that is a supertype of L -and a subtype of U. - -There are several critical invariants which we maintain: - -- the upper-bound of a variable only becomes lower and the lower-bound - only becomes higher over time; -- the lower-bound L is always a subtype of the upper bound U; -- the lower-bound L and upper-bound U never refer to other type variables, - but only to types (though those types may contain type variables). - -> An aside: if the terms upper- and lower-bound confuse you, think of -> "supertype" and "subtype". The upper-bound is a "supertype" -> (super=upper in Latin, or something like that anyway) and the lower-bound -> is a "subtype" (sub=lower in Latin). I find it helps to visualize -> a simple class hierarchy, like Java minus interfaces and -> primitive types. The class Object is at the root (top) and other -> types lie in between. The bottom type is then the Null type. -> So the tree looks like: -> -> Object -> / \ -> String Other -> \ / -> (null) -> -> So the upper bound type is the "supertype" and the lower bound is the -> "subtype" (also, super and sub mean upper and lower in Latin, or something -> like that anyway). - -## Satisfying constraints - -At a primitive level, there is only one form of constraint that the -inference understands: a subtype relation. So the outside world can -say "make type A a subtype of type B". If there are variables -involved, the inferencer will adjust their upper- and lower-bounds as -needed to ensure that this relation is satisfied. (We also allow "make -type A equal to type B", but this is translated into "A <: B" and "B -<: A") - -As stated above, we always maintain the invariant that type bounds -never refer to other variables. This keeps the inference relatively -simple, avoiding the scenario of having a kind of graph where we have -to pump constraints along and reach a fixed point, but it does impose -some heuristics in the case where the user is relating two type -variables A <: B. - -Combining two variables such that variable A will forever be a subtype -of variable B is the trickiest part of the algorithm because there is -often no right choice---that is, the right choice will depend on -future constraints which we do not yet know. The problem comes about -because both A and B have bounds that can be adjusted in the future. -Let's look at some of the cases that can come up. - -Imagine, to start, the best case, where both A and B have an upper and -lower bound (that is, the bounds are not top nor bot respectively). In -that case, if we're lucky, A.ub <: B.lb, and so we know that whatever -A and B should become, they will forever have the desired subtyping -relation. We can just leave things as they are. - -### Option 1: Unify - -However, suppose that A.ub is *not* a subtype of B.lb. In -that case, we must make a decision. One option is to unify A -and B so that they are one variable whose bounds are: - - UB = GLB(A.ub, B.ub) - LB = LUB(A.lb, B.lb) - -(Note that we will have to verify that LB <: UB; if it does not, the -types are not intersecting and there is an error) In that case, A <: B -holds trivially because A==B. However, we have now lost some -flexibility, because perhaps the user intended for A and B to end up -as different types and not the same type. - -Pictorally, what this does is to take two distinct variables with -(hopefully not completely) distinct type ranges and produce one with -the intersection. - - B.ub B.ub - /\ / - A.ub / \ A.ub / - / \ / \ \ / - / X \ UB - / / \ \ / \ - / / / \ / / - \ \ / / \ / - \ X / LB - \ / \ / / \ - \ / \ / / \ - A.lb B.lb A.lb B.lb - - -### Option 2: Relate UB/LB - -Another option is to keep A and B as distinct variables but set their -bounds in such a way that, whatever happens, we know that A <: B will hold. -This can be achieved by ensuring that A.ub <: B.lb. In practice there -are two ways to do that, depicted pictorally here: - - Before Option #1 Option #2 - - B.ub B.ub B.ub - /\ / \ / \ - A.ub / \ A.ub /(B')\ A.ub /(B')\ - / \ / \ \ / / \ / / - / X \ __UB____/ UB / - / / \ \ / | | / - / / / \ / | | / - \ \ / / /(A')| | / - \ X / / LB ______LB/ - \ / \ / / / \ / (A')/ \ - \ / \ / \ / \ \ / \ - A.lb B.lb A.lb B.lb A.lb B.lb - -In these diagrams, UB and LB are defined as before. As you can see, -the new ranges `A'` and `B'` are quite different from the range that -would be produced by unifying the variables. - -### What we do now - -Our current technique is to *try* (transactionally) to relate the -existing bounds of A and B, if there are any (i.e., if `UB(A) != top -&& LB(B) != bot`). If that succeeds, we're done. If it fails, then -we merge A and B into same variable. - -This is not clearly the correct course. For example, if `UB(A) != -top` but `LB(B) == bot`, we could conceivably set `LB(B)` to `UB(A)` -and leave the variables unmerged. This is sometimes the better -course, it depends on the program. - -The main case which fails today that I would like to support is: - - fn foo(x: T, y: T) { ... } - - fn bar() { - let x: @mut int = @mut 3; - let y: @int = @3; - foo(x, y); - } - -In principle, the inferencer ought to find that the parameter `T` to -`foo(x, y)` is `@const int`. Today, however, it does not; this is -because the type variable `T` is merged with the type variable for -`X`, and thus inherits its UB/LB of `@mut int`. This leaves no -flexibility for `T` to later adjust to accommodate `@int`. - -### What to do when not all bounds are present - -In the prior discussion we assumed that A.ub was not top and B.lb was -not bot. Unfortunately this is rarely the case. Often type variables -have "lopsided" bounds. For example, if a variable in the program has -been initialized but has not been used, then its corresponding type -variable will have a lower bound but no upper bound. When that -variable is then used, we would like to know its upper bound---but we -don't have one! In this case we'll do different things depending on -how the variable is being used. - -## Transactional support - -Whenever we adjust merge variables or adjust their bounds, we always -keep a record of the old value. This allows the changes to be undone. - -## Regions - -I've only talked about type variables here, but region variables -follow the same principle. They have upper- and lower-bounds. A -region A is a subregion of a region B if A being valid implies that B -is valid. This basically corresponds to the block nesting structure: -the regions for outer block scopes are superregions of those for inner -block scopes. - -## Integral and floating-point type variables - -There is a third variety of type variable that we use only for -inferring the types of unsuffixed integer literals. Integral type -variables differ from general-purpose type variables in that there's -no subtyping relationship among the various integral types, so instead -of associating each variable with an upper and lower bound, we just -use simple unification. Each integer variable is associated with at -most one integer type. Floating point types are handled similarly to -integral types. - -## GLB/LUB - -Computing the greatest-lower-bound and least-upper-bound of two -types/regions is generally straightforward except when type variables -are involved. In that case, we follow a similar "try to use the bounds -when possible but otherwise merge the variables" strategy. In other -words, `GLB(A, B)` where `A` and `B` are variables will often result -in `A` and `B` being merged and the result being `A`. - -## Type coercion - -We have a notion of assignability which differs somewhat from -subtyping; in particular it may cause region borrowing to occur. See -the big comment later in this file on Type Coercion for specifics. - -### In conclusion - -I showed you three ways to relate `A` and `B`. There are also more, -of course, though I'm not sure if there are any more sensible options. -The main point is that there are various options, each of which -produce a distinct range of types for `A` and `B`. Depending on what -the correct values for A and B are, one of these options will be the -right choice: but of course we don't know the right values for A and B -yet, that's what we're trying to find! In our code, we opt to unify -(Option #1). - -# Implementation details - -We make use of a trait-like impementation strategy to consolidate -duplicated code between subtypes, GLB, and LUB computations. See the -section on "Type Combining" below for details. - -*/ +/*! See doc.rs for documentation */ pub use middle::ty::IntVarValue; @@ -260,11 +28,14 @@ use middle::typeck::infer::combine::{Combine, CombineFields, eq_tys}; use middle::typeck::infer::region_inference::{RegionVarBindings}; use middle::typeck::infer::resolve::{resolver}; use middle::typeck::infer::sub::Sub; +use middle::typeck::infer::lub::Lub; use middle::typeck::infer::to_str::InferStr; use middle::typeck::infer::unify::{ValsAndBindings, Root}; +use middle::typeck::infer::error_reporting::ErrorReporting; use middle::typeck::isr_alist; use util::common::indent; -use util::ppaux::{bound_region_to_str, ty_to_str, trait_ref_to_str}; +use util::ppaux::{bound_region_to_str, ty_to_str, trait_ref_to_str, Repr, + UserString}; use std::result; use std::vec; @@ -275,17 +46,20 @@ use syntax::ast; use syntax::codemap; use syntax::codemap::span; +pub mod doc; pub mod macros; pub mod combine; pub mod glb; pub mod lattice; pub mod lub; +#[path = "region_inference/mod.rs"] pub mod region_inference; pub mod resolve; pub mod sub; pub mod to_str; pub mod unify; pub mod coercion; +pub mod error_reporting; pub type Bound = Option; pub struct Bounds { @@ -319,6 +93,133 @@ pub struct InferCtxt { region_vars: RegionVarBindings, } +/// Why did we require that the two types be related? +/// +/// See `error_reporting.rs` for more details +pub enum TypeOrigin { + // Not yet categorized in a better way + Misc(span), + + // Checking that method of impl is compatible with trait + MethodCompatCheck(span), + + // Checking that this expression can be assigned where it needs to be + ExprAssignable(@ast::expr), + + // Relating trait refs when resolving vtables + RelateTraitRefs(span), + + // Relating trait refs when resolving vtables + RelateSelfType(span), + + // Computing common supertype in a match expression + MatchExpression(span), + + // Computing common supertype in an if expression + IfExpression(span), +} + +/// See `error_reporting.rs` for more details +pub enum ValuePairs { + Types(ty::expected_found), + TraitRefs(ty::expected_found<@ty::TraitRef>), +} + +/// The trace designates the path through inference that we took to +/// encounter an error or subtyping constraint. +/// +/// See `error_reporting.rs` for more details. +pub struct TypeTrace { + origin: TypeOrigin, + values: ValuePairs, +} + +/// The origin of a `r1 <= r2` constraint. +/// +/// See `error_reporting.rs` for more details +pub enum SubregionOrigin { + // Arose from a subtyping relation + Subtype(TypeTrace), + + // Invocation of closure must be within its lifetime + InvokeClosure(span), + + // Dereference of borrowed pointer must be within its lifetime + DerefPointer(span), + + // Closure bound must not outlive captured free variables + FreeVariable(span), + + // Index into slice must be within its lifetime + IndexSlice(span), + + // When casting `&'a T` to an `&'b Trait` object, + // relating `'a` to `'b` + RelateObjectBound(span), + + // Creating a pointer `b` to contents of another borrowed pointer + Reborrow(span), + + // (&'a &'b T) where a >= b + ReferenceOutlivesReferent(ty::t, span), + + // A `ref b` whose region does not enclose the decl site + BindingTypeIsNotValidAtDecl(span), + + // Regions appearing in a method receiver must outlive method call + CallRcvr(span), + + // Regions appearing in a function argument must outlive func call + CallArg(span), + + // Region in return type of invoked fn must enclose call + CallReturn(span), + + // Region resulting from a `&` expr must enclose the `&` expr + AddrOf(span), + + // An auto-borrow that does not enclose the expr where it occurs + AutoBorrow(span), +} + +/// Reasons to create a region inference variable +/// +/// See `error_reporting.rs` for more details +pub enum RegionVariableOrigin { + // Region variables created for ill-categorized reasons, + // mostly indicates places in need of refactoring + MiscVariable(span), + + // Regions created by a `&P` or `[...]` pattern + PatternRegion(span), + + // Regions created by `&` operator + AddrOfRegion(span), + + // Regions created by `&[...]` literal + AddrOfSlice(span), + + // Regions created as part of an autoref of a method receiver + Autoref(span), + + // Regions created as part of an automatic coercion + Coercion(TypeTrace), + + // Region variables created for bound regions + // in a function or method that is called + BoundRegionInFnCall(span, ty::bound_region), + + // Region variables created for bound regions + // when doing subtyping/lub/glb computations + BoundRegionInFnType(span, ty::bound_region), + + BoundRegionInTypeOrImpl(span), + + BoundRegionInCoherence, + + BoundRegionError(span), +} + pub enum fixup_err { unresolved_int_ty(IntVid), unresolved_ty(TyVid), @@ -364,16 +265,51 @@ pub fn new_infer_ctxt(tcx: ty::ctxt) -> @mut InferCtxt { } } +pub fn common_supertype(cx: @mut InferCtxt, + origin: TypeOrigin, + a_is_expected: bool, + a: ty::t, + b: ty::t) + -> ty::t { + /*! + * Computes the least upper-bound of `a` and `b`. If this is + * not possible, reports an error and returns ty::err. + */ + + debug!("common_supertype(%s, %s)", a.inf_str(cx), b.inf_str(cx)); + + let trace = TypeTrace { + origin: origin, + values: Types(expected_found(a_is_expected, a, b)) + }; + + let result = do cx.commit { + cx.lub(a_is_expected, trace).tys(a, b) + }; + + match result { + Ok(t) => t, + Err(ref err) => { + cx.report_and_explain_type_error(trace, err); + ty::mk_err() + } + } +} + pub fn mk_subty(cx: @mut InferCtxt, a_is_expected: bool, - span: span, + origin: TypeOrigin, a: ty::t, b: ty::t) -> ures { debug!("mk_subty(%s <: %s)", a.inf_str(cx), b.inf_str(cx)); do indent { do cx.commit { - cx.sub(a_is_expected, span).tys(a, b) + let trace = TypeTrace { + origin: origin, + values: Types(expected_found(a_is_expected, a, b)) + }; + cx.sub(a_is_expected, trace).tys(a, b) } }.to_ures() } @@ -382,35 +318,40 @@ pub fn can_mk_subty(cx: @mut InferCtxt, a: ty::t, b: ty::t) -> ures { debug!("can_mk_subty(%s <: %s)", a.inf_str(cx), b.inf_str(cx)); do indent { do cx.probe { - cx.sub(true, codemap::dummy_sp()).tys(a, b) + let trace = TypeTrace { + origin: Misc(codemap::dummy_sp()), + values: Types(expected_found(true, a, b)) + }; + cx.sub(true, trace).tys(a, b) } }.to_ures() } pub fn mk_subr(cx: @mut InferCtxt, a_is_expected: bool, - span: span, + origin: SubregionOrigin, a: ty::Region, - b: ty::Region) - -> ures { + b: ty::Region) { debug!("mk_subr(%s <: %s)", a.inf_str(cx), b.inf_str(cx)); - do indent { - do cx.commit { - cx.sub(a_is_expected, span).regions(a, b) - } - }.to_ures() + cx.region_vars.start_snapshot(); + cx.region_vars.make_subregion(origin, a, b); + cx.region_vars.commit(); } pub fn mk_eqty(cx: @mut InferCtxt, a_is_expected: bool, - span: span, + origin: TypeOrigin, a: ty::t, b: ty::t) -> ures { debug!("mk_eqty(%s <: %s)", a.inf_str(cx), b.inf_str(cx)); do indent { do cx.commit { - let suber = cx.sub(a_is_expected, span); + let trace = TypeTrace { + origin: origin, + values: Types(expected_found(a_is_expected, a, b)) + }; + let suber = cx.sub(a_is_expected, trace); eq_tys(&suber, a, b) } }.to_ures() @@ -418,31 +359,49 @@ pub fn mk_eqty(cx: @mut InferCtxt, pub fn mk_sub_trait_refs(cx: @mut InferCtxt, a_is_expected: bool, - span: span, - a: &ty::TraitRef, - b: &ty::TraitRef) + origin: TypeOrigin, + a: @ty::TraitRef, + b: @ty::TraitRef) -> ures { debug!("mk_sub_trait_refs(%s <: %s)", a.inf_str(cx), b.inf_str(cx)); do indent { do cx.commit { - let suber = cx.sub(a_is_expected, span); + let trace = TypeTrace { + origin: origin, + values: TraitRefs(expected_found(a_is_expected, a, b)) + }; + let suber = cx.sub(a_is_expected, trace); suber.trait_refs(a, b) } }.to_ures() } +fn expected_found(a_is_expected: bool, + a: T, + b: T) -> ty::expected_found { + if a_is_expected { + ty::expected_found {expected: a, found: b} + } else { + ty::expected_found {expected: b, found: a} + } +} + pub fn mk_coercety(cx: @mut InferCtxt, a_is_expected: bool, - span: span, + origin: TypeOrigin, a: ty::t, b: ty::t) -> CoerceResult { debug!("mk_coercety(%s -> %s)", a.inf_str(cx), b.inf_str(cx)); do indent { do cx.commit { - Coerce(cx.combine_fields(a_is_expected, span)).tys(a, b) + let trace = TypeTrace { + origin: origin, + values: Types(expected_found(a_is_expected, a, b)) + }; + Coerce(cx.combine_fields(a_is_expected, trace)).tys(a, b) } } } @@ -451,8 +410,11 @@ pub fn can_mk_coercety(cx: @mut InferCtxt, a: ty::t, b: ty::t) -> ures { debug!("can_mk_coercety(%s -> %s)", a.inf_str(cx), b.inf_str(cx)); do indent { do cx.probe { - let span = codemap::dummy_sp(); - Coerce(cx.combine_fields(true, span)).tys(a, b) + let trace = TypeTrace { + origin: Misc(codemap::dummy_sp()), + values: Types(expected_found(true, a, b)) + }; + Coerce(cx.combine_fields(true, trace)).tys(a, b) } }.to_ures() } @@ -535,15 +497,21 @@ struct Snapshot { } impl InferCtxt { - pub fn combine_fields(@mut self, a_is_expected: bool, span: span) + pub fn combine_fields(@mut self, + a_is_expected: bool, + trace: TypeTrace) -> CombineFields { CombineFields {infcx: self, a_is_expected: a_is_expected, - span: span} + trace: trace} } - pub fn sub(@mut self, a_is_expected: bool, span: span) -> Sub { - Sub(self.combine_fields(a_is_expected, span)) + pub fn sub(@mut self, a_is_expected: bool, trace: TypeTrace) -> Sub { + Sub(self.combine_fields(a_is_expected, trace)) + } + + pub fn lub(@mut self, a_is_expected: bool, trace: TypeTrace) -> Lub { + Lub(self.combine_fields(a_is_expected, trace)) } pub fn in_snapshot(&self) -> bool { @@ -663,31 +631,13 @@ impl InferCtxt { ty::mk_float_var(self.tcx, self.next_float_var_id()) } - pub fn next_region_var_nb(&mut self, span: span) -> ty::Region { - ty::re_infer(ty::ReVar(self.region_vars.new_region_var(span))) - } - - pub fn next_region_var_with_lb(&mut self, - span: span, - lb_region: ty::Region) - -> ty::Region { - let region_var = self.next_region_var_nb(span); - - // add lb_region as a lower bound on the newly built variable - assert!(self.region_vars.make_subregion(span, - lb_region, - region_var).is_ok()); - - return region_var; - } - - pub fn next_region_var(&mut self, span: span, scope_id: ast::node_id) - -> ty::Region { - self.next_region_var_with_lb(span, ty::re_scope(scope_id)) + pub fn next_region_var(&mut self, origin: RegionVariableOrigin) -> ty::Region { + ty::re_infer(ty::ReVar(self.region_vars.new_region_var(origin))) } - pub fn resolve_regions(&mut self) { - self.region_vars.resolve_regions(); + pub fn resolve_regions(@mut self) { + let errors = self.region_vars.resolve_regions(); + self.report_region_errors(&errors); // see error_reporting.rs } pub fn ty_to_str(@mut self, t: ty::t) -> ~str { @@ -809,17 +759,13 @@ impl InferCtxt { } pub fn replace_bound_regions_with_fresh_regions(&mut self, - span: span, + trace: TypeTrace, fsig: &ty::FnSig) - -> (ty::FnSig, - isr_alist) { + -> (ty::FnSig, isr_alist) { let(isr, _, fn_sig) = replace_bound_regions_in_fn_sig(self.tcx, @Nil, None, fsig, |br| { - // N.B.: The name of the bound region doesn't have anything to - // do with the region variable that's created for it. The - // only thing we're doing with `br` here is using it in the - // debug message. - let rvar = self.next_region_var_nb(span); + let rvar = self.next_region_var( + BoundRegionInFnType(trace.origin.span(), br)); debug!("Bound region %s maps to %?", bound_region_to_str(self.tcx, "", false, br), rvar); @@ -838,3 +784,125 @@ pub fn fold_regions_in_sig( ty::fold_regions(tcx, t, |r, in_fn| fldr(r, in_fn)) } } + +impl TypeTrace { + pub fn span(&self) -> span { + self.origin.span() + } +} + +impl Repr for TypeTrace { + fn repr(&self, tcx: ty::ctxt) -> ~str { + fmt!("TypeTrace(%s)", self.origin.repr(tcx)) + } +} + +impl TypeOrigin { + pub fn span(&self) -> span { + match *self { + MethodCompatCheck(span) => span, + ExprAssignable(expr) => expr.span, + Misc(span) => span, + RelateTraitRefs(span) => span, + RelateSelfType(span) => span, + MatchExpression(span) => span, + IfExpression(span) => span, + } + } +} + +impl Repr for TypeOrigin { + fn repr(&self, tcx: ty::ctxt) -> ~str { + match *self { + MethodCompatCheck(a) => fmt!("MethodCompatCheck(%s)", a.repr(tcx)), + ExprAssignable(a) => fmt!("ExprAssignable(%s)", a.repr(tcx)), + Misc(a) => fmt!("Misc(%s)", a.repr(tcx)), + RelateTraitRefs(a) => fmt!("RelateTraitRefs(%s)", a.repr(tcx)), + RelateSelfType(a) => fmt!("RelateSelfType(%s)", a.repr(tcx)), + MatchExpression(a) => fmt!("MatchExpression(%s)", a.repr(tcx)), + IfExpression(a) => fmt!("IfExpression(%s)", a.repr(tcx)), + } + } +} + +impl SubregionOrigin { + pub fn span(&self) -> span { + match *self { + Subtype(a) => a.span(), + InvokeClosure(a) => a, + DerefPointer(a) => a, + FreeVariable(a) => a, + IndexSlice(a) => a, + RelateObjectBound(a) => a, + Reborrow(a) => a, + ReferenceOutlivesReferent(_, a) => a, + BindingTypeIsNotValidAtDecl(a) => a, + CallRcvr(a) => a, + CallArg(a) => a, + CallReturn(a) => a, + AddrOf(a) => a, + AutoBorrow(a) => a, + } + } +} + +impl Repr for SubregionOrigin { + fn repr(&self, tcx: ty::ctxt) -> ~str { + match *self { + Subtype(a) => fmt!("Subtype(%s)", a.repr(tcx)), + InvokeClosure(a) => fmt!("InvokeClosure(%s)", a.repr(tcx)), + DerefPointer(a) => fmt!("DerefPointer(%s)", a.repr(tcx)), + FreeVariable(a) => fmt!("FreeVariable(%s)", a.repr(tcx)), + IndexSlice(a) => fmt!("IndexSlice(%s)", a.repr(tcx)), + RelateObjectBound(a) => fmt!("RelateObjectBound(%s)", a.repr(tcx)), + Reborrow(a) => fmt!("Reborrow(%s)", a.repr(tcx)), + ReferenceOutlivesReferent(_, a) => fmt!("ReferenceOutlivesReferent(%s)", a.repr(tcx)), + BindingTypeIsNotValidAtDecl(a) => fmt!("BindingTypeIsNotValidAtDecl(%s)", a.repr(tcx)), + CallRcvr(a) => fmt!("CallRcvr(%s)", a.repr(tcx)), + CallArg(a) => fmt!("CallArg(%s)", a.repr(tcx)), + CallReturn(a) => fmt!("CallReturn(%s)", a.repr(tcx)), + AddrOf(a) => fmt!("AddrOf(%s)", a.repr(tcx)), + AutoBorrow(a) => fmt!("AutoBorrow(%s)", a.repr(tcx)), + } + } +} + +impl RegionVariableOrigin { + pub fn span(&self) -> span { + match *self { + MiscVariable(a) => a, + PatternRegion(a) => a, + AddrOfRegion(a) => a, + AddrOfSlice(a) => a, + Autoref(a) => a, + Coercion(a) => a.span(), + BoundRegionInFnCall(a, _) => a, + BoundRegionInFnType(a, _) => a, + BoundRegionInTypeOrImpl(a) => a, + BoundRegionInCoherence => codemap::dummy_sp(), + BoundRegionError(a) => a, + } + } +} + +impl Repr for RegionVariableOrigin { + fn repr(&self, tcx: ty::ctxt) -> ~str { + match *self { + MiscVariable(a) => fmt!("MiscVariable(%s)", a.repr(tcx)), + PatternRegion(a) => fmt!("PatternRegion(%s)", a.repr(tcx)), + AddrOfRegion(a) => fmt!("AddrOfRegion(%s)", a.repr(tcx)), + AddrOfSlice(a) => fmt!("AddrOfSlice(%s)", a.repr(tcx)), + Autoref(a) => fmt!("Autoref(%s)", a.repr(tcx)), + Coercion(a) => fmt!("Coercion(%s)", a.repr(tcx)), + BoundRegionInFnCall(a, b) => fmt!("BoundRegionInFnCall(%s,%s)", + a.repr(tcx), b.repr(tcx)), + BoundRegionInFnType(a, b) => fmt!("BoundRegionInFnType(%s,%s)", + a.repr(tcx), b.repr(tcx)), + BoundRegionInTypeOrImpl(a) => fmt!("BoundRegionInTypeOrImpl(%s)", + a.repr(tcx)), + BoundRegionInCoherence => fmt!("BoundRegionInCoherence"), + BoundRegionError(a) => fmt!("BoundRegionError(%s)", a.repr(tcx)), + } + } +} + diff --git a/src/librustc/middle/typeck/infer/region_inference/doc.rs b/src/librustc/middle/typeck/infer/region_inference/doc.rs new file mode 100644 index 0000000000000..df6d5dc1b2060 --- /dev/null +++ b/src/librustc/middle/typeck/infer/region_inference/doc.rs @@ -0,0 +1,773 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +Region inference module. + +# Terminology + +Note that we use the terms region and lifetime interchangeably, +though the term `lifetime` is preferred. + +# Introduction + +Region inference uses a somewhat more involved algorithm than type +inference. It is not the most efficient thing ever written though it +seems to work well enough in practice (famous last words). The reason +that we use a different algorithm is because, unlike with types, it is +impractical to hand-annotate with regions (in some cases, there aren't +even the requisite syntactic forms). So we have to get it right, and +it's worth spending more time on a more involved analysis. Moreover, +regions are a simpler case than types: they don't have aggregate +structure, for example. + +Unlike normal type inference, which is similar in spirit to H-M and thus +works progressively, the region type inference works by accumulating +constraints over the course of a function. Finally, at the end of +processing a function, we process and solve the constraints all at +once. + +The constraints are always of one of three possible forms: + +- ConstrainVarSubVar(R_i, R_j) states that region variable R_i + must be a subregion of R_j +- ConstrainRegSubVar(R, R_i) states that the concrete region R + (which must not be a variable) must be a subregion of the varibale R_i +- ConstrainVarSubReg(R_i, R) is the inverse + +# Building up the constraints + +Variables and constraints are created using the following methods: + +- `new_region_var()` creates a new, unconstrained region variable; +- `make_subregion(R_i, R_j)` states that R_i is a subregion of R_j +- `lub_regions(R_i, R_j) -> R_k` returns a region R_k which is + the smallest region that is greater than both R_i and R_j +- `glb_regions(R_i, R_j) -> R_k` returns a region R_k which is + the greatest region that is smaller than both R_i and R_j + +The actual region resolution algorithm is not entirely +obvious, though it is also not overly complex. + +## Snapshotting + +It is also permitted to try (and rollback) changes to the graph. This +is done by invoking `start_snapshot()`, which returns a value. Then +later you can call `rollback_to()` which undoes the work. +Alternatively, you can call `commit()` which ends all snapshots. +Snapshots can be recursive---so you can start a snapshot when another +is in progress, but only the root snapshot can "commit". + +# Resolving constraints + +The constraint resolution algorithm is not super complex but also not +entirely obvious. Here I describe the problem somewhat abstractly, +then describe how the current code works. There may be other, smarter +ways of doing this with which I am unfamiliar and can't be bothered to +research at the moment. - NDM + +## The problem + +Basically our input is a directed graph where nodes can be divided +into two categories: region variables and concrete regions. Each edge +`R -> S` in the graph represents a constraint that the region `R` is a +subregion of the region `S`. + +Region variable nodes can have arbitrary degree. There is one region +variable node per region variable. + +Each concrete region node is associated with some, well, concrete +region: e.g., a free lifetime, or the region for a particular scope. +Note that there may be more than one concrete region node for a +particular region value. Moreover, because of how the graph is built, +we know that all concrete region nodes have either in-degree 1 or +out-degree 1. + +Before resolution begins, we build up the constraints in a hashmap +that maps `Constraint` keys to spans. During resolution, we construct +the actual `Graph` structure that we describe here. + +## Our current algorithm + +We divide region variables into two groups: Expanding and Contracting. +Expanding region variables are those that have a concrete region +predecessor (direct or indirect). Contracting region variables are +all others. + +We first resolve the values of Expanding region variables and then +process Contracting ones. We currently use an iterative, fixed-point +procedure (but read on, I believe this could be replaced with a linear +walk). Basically we iterate over the edges in the graph, ensuring +that, if the source of the edge has a value, then this value is a +subregion of the target value. If the target does not yet have a +value, it takes the value from the source. If the target already had +a value, then the resulting value is Least Upper Bound of the old and +new values. When we are done, each Expanding node will have the +smallest region that it could possibly have and still satisfy the +constraints. + +We next process the Contracting nodes. Here we again iterate over the +edges, only this time we move values from target to source (if the +source is a Contracting node). For each contracting node, we compute +its value as the GLB of all its successors. Basically contracting +nodes ensure that there is overlap between their successors; we will +ultimately infer the largest overlap possible. + +# The Region Hierarchy + +## Without closures + +Let's first consider the region hierarchy without thinking about +closures, because they add a lot of complications. The region +hierarchy *basically* mirrors the lexical structure of the code. +There is a region for every piece of 'evaluation' that occurs, meaning +every expression, block, and pattern (patterns are considered to +"execute" by testing the value they are applied to and creating any +relevant bindings). So, for example: + + fn foo(x: int, y: int) { // -+ + // +------------+ // | + // | +-----+ // | + // | +-+ +-+ +-+ // | + // | | | | | | | // | + // v v v v v v v // | + let z = x + y; // | + ... // | + } // -+ + + fn bar() { ... } + +In this example, there is a region for the fn body block as a whole, +and then a subregion for the declaration of the local variable. +Within that, there are sublifetimes for the assignment pattern and +also the expression `x + y`. The expression itself has sublifetimes +for evaluating `x` and and `y`. + +## Function calls + +Function calls are a bit tricky. I will describe how we handle them +*now* and then a bit about how we can improve them (Issue #6268). + +Consider a function call like `func(expr1, expr2)`, where `func`, +`arg1`, and `arg2` are all arbitrary expressions. Currently, +we construct a region hierarchy like: + + +----------------+ + | | + +--+ +---+ +---+| + v v v v v vv + func(expr1, expr2) + +Here you can see that the call as a whole has a region and the +function plus arguments are subregions of that. As a side-effect of +this, we get a lot of spurious errors around nested calls, in +particular when combined with `&mut` functions. For example, a call +like this one + + self.foo(self.bar()) + +where both `foo` and `bar` are `&mut self` functions will always yield +an error. + +Here is a more involved example (which is safe) so we can see what's +going on: + + struct Foo { f: uint, g: uint } + ... + fn add(p: &mut uint, v: uint) { + *p += v; + } + ... + fn inc(p: &mut uint) -> uint { + *p += 1; *p + } + fn weird() { + let mut x: ~Foo = ~Foo { ... }; + 'a: add(&mut (*x).f, + 'b: inc(&mut (*x).f)) // (*) + } + +The important part is the line marked `(*)` which contains a call to +`add()`. The first argument is a mutable borrow of the field `f`. The +second argument also borrows the field `f`. Now, in the current borrow +checker, the first borrow is given the lifetime of the call to +`add()`, `'a`. The second borrow is given the lifetime of `'b` of the +call to `inc()`. Because `'b` is considered to be a sublifetime of +`'a`, an error is reported since there are two co-existing mutable +borrows of the same data. + +However, if we were to examine the lifetimes a bit more carefully, we +can see that this error is unnecessary. Let's examine the lifetimes +involved with `'a` in detail. We'll break apart all the steps involved +in a call expression: + + 'a: { + 'a_arg1: let a_temp1: ... = add; + 'a_arg2: let a_temp2: &'a mut uint = &'a mut (*x).f; + 'a_arg3: let a_temp3: uint = { + let b_temp1: ... = inc; + let b_temp2: &'b = &'b mut (*x).f; + 'b_call: b_temp1(b_temp2) + }; + 'a_call: a_temp1(a_temp2, a_temp3) // (**) + } + +Here we see that the lifetime `'a` includes a number of substatements. +In particular, there is this lifetime I've called `'a_call` that +corresponds to the *actual execution of the function `add()`*, after +all arguments have been evaluated. There is a corresponding lifetime +`'b_call` for the execution of `inc()`. If we wanted to be precise +about it, the lifetime of the two borrows should be `'a_call` and +`'b_call` respectively, since the borrowed pointers that were created +will not be dereferenced except during the execution itself. + +However, this model by itself is not sound. The reason is that +while the two borrowed pointers that are created will never be used +simultaneously, it is still true that the first borrowed pointer is +*created* before the second argument is evaluated, and so even though +it will not be *dereferenced* during the evaluation of the second +argument, it can still be *invalidated* by that evaluation. Consider +this similar but unsound example: + + struct Foo { f: uint, g: uint } + ... + fn add(p: &mut uint, v: uint) { + *p += v; + } + ... + fn consume(x: ~Foo) -> uint { + x.f + x.g + } + fn weird() { + let mut x: ~Foo = ~Foo { ... }; + 'a: add(&mut (*x).f, consume(x)) // (*) + } + +In this case, the second argument to `add` actually consumes `x`, thus +invalidating the first argument. + +So, for now, we exclude the `call` lifetimes from our model. +Eventually I would like to include them, but we will have to make the +borrow checker handle this situation correctly. In particular, if +there is a borrowed pointer created whose lifetime does not enclose +the borrow expression, we must issue sufficient restrictions to ensure +that the pointee remains valid. + +## Adding closures + +The other significant complication to the region hierarchy is +closures. I will describe here how closures should work, though some +of the work to implement this model is ongoing at the time of this +writing. + +The body of closures are type-checked along with the function that +creates them. However, unlike other expressions that appear within the +function body, it is not entirely obvious when a closure body executes +with respect to the other expressions. This is because the closure +body will execute whenever the closure is called; however, we can +never know precisely when the closure will be called, especially +without some sort of alias analysis. + +However, we can place some sort of limits on when the closure +executes. In particular, the type of every closure `fn:'r K` includes +a region bound `'r`. This bound indicates the maximum lifetime of that +closure; once we exit that region, the closure cannot be called +anymore. Therefore, we say that the lifetime of the closure body is a +sublifetime of the closure bound, but the closure body itself is unordered +with respect to other parts of the code. + +For example, consider the following fragment of code: + + 'a: { + let closure: fn:'a() = || 'b: { + 'c: ... + }; + 'd: ... + } + +Here we have four lifetimes, `'a`, `'b`, `'c`, and `'d`. The closure +`closure` is bounded by the lifetime `'a`. The lifetime `'b` is the +lifetime of the closure body, and `'c` is some statement within the +closure body. Finally, `'d` is a statement within the outer block that +created the closure. + +We can say that the closure body `'b` is a sublifetime of `'a` due to +the closure bound. By the usual lexical scoping conventions, the +statement `'c` is clearly a sublifetime of `'b`, and `'d` is a +sublifetime of `'d`. However, there is no ordering between `'c` and +`'d` per se (this kind of ordering between statements is actually only +an issue for dataflow; passes like the borrow checker must assume that +closures could execute at any time from the moment they are created +until they go out of scope). + +### Complications due to closure bound inference + +There is only one problem with the above model: in general, we do not +actually *know* the closure bounds during region inference! In fact, +closure bounds are almost always region variables! This is very tricky +because the inference system implicitly assumes that we can do things +like compute the LUB of two scoped lifetimes without needing to know +the values of any variables. + +Here is an example to illustrate the problem: + + fn identify(x: T) -> T { x } + + fn foo() { // 'foo is the function body + 'a: { + let closure = identity(|| 'b: { + 'c: ... + }); + 'd: closure(); + } + 'e: ...; + } + +In this example, the closure bound is not explicit. At compile time, +we will create a region variable (let's call it `V0`) to represent the +closure bound. + +The primary difficulty arises during the constraint propagation phase. +Imagine there is some variable with incoming edges from `'c` and `'d`. +This means that the value of the variable must be `LUB('c, +'d)`. However, without knowing what the closure bound `V0` is, we +can't compute the LUB of `'c` and `'d`! Any we don't know the closure +bound until inference is done. + +The solution is to rely on the fixed point nature of inference. +Basically, when we must compute `LUB('c, 'd)`, we just use the current +value for `V0` as the closure's bound. If `V0`'s binding should +change, then we will do another round of inference, and the result of +`LUB('c, 'd)` will change. + +One minor implication of this is that the graph does not in fact track +the full set of dependencies between edges. We cannot easily know +whether the result of a LUB computation will change, since there may +be indirect dependencies on other variables that are not reflected on +the graph. Therefore, we must *always* iterate over all edges when +doing the fixed point calculation, not just those adjacent to nodes +whose values have changed. + +Were it not for this requirement, we could in fact avoid fixed-point +iteration altogether. In that universe, we could instead first +identify and remove strongly connected components (SCC) in the graph. +Note that such components must consist solely of region variables; all +of these variables can effectively be unified into a single variable. +Once SCCs are removed, we are left with a DAG. At this point, we +could walk the DAG in toplogical order once to compute the expanding +nodes, and again in reverse topological order to compute the +contracting nodes. However, as I said, this does not work given the +current treatment of closure bounds, but perhaps in the future we can +address this problem somehow and make region inference somewhat more +efficient. Note that this is solely a matter of performance, not +expressiveness. + +# Skolemization and functions + +One of the trickiest and most subtle aspects of regions is dealing +with the fact that region variables are bound in function types. I +strongly suggest that if you want to understand the situation, you +read this paper (which is, admittedly, very long, but you don't have +to read the whole thing): + +http://research.microsoft.com/en-us/um/people/simonpj/papers/higher-rank/ + +Although my explanation will never compete with SPJ's (for one thing, +his is approximately 100 pages), I will attempt to explain the basic +problem and also how we solve it. Note that the paper only discusses +subtyping, not the computation of LUB/GLB. + +The problem we are addressing is that there is a kind of subtyping +between functions with bound region parameters. Consider, for +example, whether the following relation holds: + + fn(&'a int) <: &fn(&'b int)? (Yes, a => b) + +The answer is that of course it does. These two types are basically +the same, except that in one we used the name `a` and one we used +the name `b`. + +In the examples that follow, it becomes very important to know whether +a lifetime is bound in a function type (that is, is a lifetime +parameter) or appears free (is defined in some outer scope). +Therefore, from now on I will write the bindings explicitly, using a +notation like `fn(&'a int)` to indicate that `a` is a lifetime +parameter. + +Now let's consider two more function types. Here, we assume that the +`self` lifetime is defined somewhere outside and hence is not a +lifetime parameter bound by the function type (it "appears free"): + + fn(&'a int) <: &fn(&'self int)? (Yes, a => self) + +This subtyping relation does in fact hold. To see why, you have to +consider what subtyping means. One way to look at `T1 <: T2` is to +say that it means that it is always ok to treat an instance of `T1` as +if it had the type `T2`. So, with our functions, it is always ok to +treat a function that can take pointers with any lifetime as if it +were a function that can only take a pointer with the specific +lifetime `&self`. After all, `&self` is a lifetime, after all, and +the function can take values of any lifetime. + +You can also look at subtyping as the *is a* relationship. This amounts +to the same thing: a function that accepts pointers with any lifetime +*is a* function that accepts pointers with some specific lifetime. + +So, what if we reverse the order of the two function types, like this: + + fn(&'self int) <: &fn(&'a int)? (No) + +Does the subtyping relationship still hold? The answer of course is +no. In this case, the function accepts *only the lifetime `&self`*, +so it is not reasonable to treat it as if it were a function that +accepted any lifetime. + +What about these two examples: + + fn(&'a int, &'b int) <: &fn(&'a int, &'a int)? (Yes) + fn(&'a int, &'a int) <: &fn(&'a int, &'b int)? (No) + +Here, it is true that functions which take two pointers with any two +lifetimes can be treated as if they only accepted two pointers with +the same lifetime, but not the reverse. + +## The algorithm + +Here is the algorithm we use to perform the subtyping check: + +1. Replace all bound regions in the subtype with new variables +2. Replace all bound regions in the supertype with skolemized + equivalents. A "skolemized" region is just a new fresh region + name. +3. Check that the parameter and return types match as normal +4. Ensure that no skolemized regions 'leak' into region variables + visible from "the outside" + +Let's walk through some examples and see how this algorithm plays out. + +#### First example + +We'll start with the first example, which was: + + 1. fn(&'a T) <: &fn(&'b T)? Yes: a -> b + +After steps 1 and 2 of the algorithm we will have replaced the types +like so: + + 1. fn(&'A T) <: &fn(&'x T)? + +Here the upper case `&A` indicates a *region variable*, that is, a +region whose value is being inferred by the system. I also replaced +`&b` with `&x`---I'll use letters late in the alphabet (`x`, `y`, `z`) +to indicate skolemized region names. We can assume they don't appear +elsewhere. Note that neither the sub- nor the supertype bind any +region names anymore (as indicated by the absence of `<` and `>`). + +The next step is to check that the parameter types match. Because +parameters are contravariant, this means that we check whether: + + &'x T <: &'A T + +Region pointers are contravariant so this implies that + + &A <= &x + +must hold, where `<=` is the subregion relationship. Processing +*this* constrain simply adds a constraint into our graph that `&A <= +&x` and is considered successful (it can, for example, be satisfied by +choosing the value `&x` for `&A`). + +So far we have encountered no error, so the subtype check succeeds. + +#### The third example + +Now let's look first at the third example, which was: + + 3. fn(&'self T) <: &fn(&'b T)? No! + +After steps 1 and 2 of the algorithm we will have replaced the types +like so: + + 3. fn(&'self T) <: &fn(&'x T)? + +This looks pretty much the same as before, except that on the LHS +`&self` was not bound, and hence was left as-is and not replaced with +a variable. The next step is again to check that the parameter types +match. This will ultimately require (as before) that `&self` <= `&x` +must hold: but this does not hold. `self` and `x` are both distinct +free regions. So the subtype check fails. + +#### Checking for skolemization leaks + +You may be wondering about that mysterious last step in the algorithm. +So far it has not been relevant. The purpose of that last step is to +catch something like *this*: + + fn() -> fn(&'a T) <: &fn() -> fn(&'b T)? No. + +Here the function types are the same but for where the binding occurs. +The subtype returns a function that expects a value in precisely one +region. The supertype returns a function that expects a value in any +region. If we allow an instance of the subtype to be used where the +supertype is expected, then, someone could call the fn and think that +the return value has type `fn(&'b T)` when it really has type +`fn(&'a T)` (this is case #3, above). Bad. + +So let's step through what happens when we perform this subtype check. +We first replace the bound regions in the subtype (the supertype has +no bound regions). This gives us: + + fn() -> fn(&'A T) <: &fn() -> fn(&'b T)? + +Now we compare the return types, which are covariant, and hence we have: + + fn(&'A T) <: &fn(&'b T)? + +Here we skolemize the bound region in the supertype to yield: + + fn(&'A T) <: &fn(&'x T)? + +And then proceed to compare the argument types: + + &'x T <: &'A T + &A <= &x + +Finally, this is where it gets interesting! This is where an error +*should* be reported. But in fact this will not happen. The reason why +is that `A` is a variable: we will infer that its value is the fresh +region `x` and think that everything is happy. In fact, this behavior +is *necessary*, it was key to the first example we walked through. + +The difference between this example and the first one is that the variable +`A` already existed at the point where the skolemization occurred. In +the first example, you had two functions: + + fn(&'a T) <: &fn(&'b T) + +and hence `&A` and `&x` were created "together". In general, the +intention of the skolemized names is that they are supposed to be +fresh names that could never be equal to anything from the outside. +But when inference comes into play, we might not be respecting this +rule. + +So the way we solve this is to add a fourth step that examines the +constraints that refer to skolemized names. Basically, consider a +non-directed verison of the constraint graph. Let `Tainted(x)` be the +set of all things reachable from a skolemized variable `x`. +`Tainted(x)` should not contain any regions that existed before the +step at which the skolemization was performed. So this case here +would fail because `&x` was created alone, but is relatable to `&A`. + +## Computing the LUB and GLB + +The paper I pointed you at is written for Haskell. It does not +therefore considering subtyping and in particular does not consider +LUB or GLB computation. We have to consider this. Here is the +algorithm I implemented. + +First though, let's discuss what we are trying to compute in more +detail. The LUB is basically the "common supertype" and the GLB is +"common subtype"; one catch is that the LUB should be the +*most-specific* common supertype and the GLB should be *most general* +common subtype (as opposed to any common supertype or any common +subtype). + +Anyway, to help clarify, here is a table containing some +function pairs and their LUB/GLB: + +``` +Type 1 Type 2 LUB GLB +fn(&a) fn(&X) fn(&X) fn(&a) +fn(&A) fn(&X) -- fn(&a) +fn(&a, &b) fn(&x, &x) fn(&a, &a) fn(&a, &b) +fn(&a, &b, &a) fn(&x, &y, &y) fn(&a, &a, &a) fn(&a,&b,&c) +``` + +### Conventions + +I use lower-case letters (e.g., `&a`) for bound regions and upper-case +letters for free regions (`&A`). Region variables written with a +dollar-sign (e.g., `$a`). I will try to remember to enumerate the +bound-regions on the fn type as well (e.g., `fn(&a)`). + +### High-level summary + +Both the LUB and the GLB algorithms work in a similar fashion. They +begin by replacing all bound regions (on both sides) with fresh region +inference variables. Therefore, both functions are converted to types +that contain only free regions. We can then compute the LUB/GLB in a +straightforward way, as described in `combine.rs`. This results in an +interim type T. The algorithms then examine the regions that appear +in T and try to, in some cases, replace them with bound regions to +yield the final result. + +To decide whether to replace a region `R` that appears in `T` with a +bound region, the algorithms make use of two bits of information. +First is a set `V` that contains all region variables created as part +of the LUB/GLB computation. `V` will contain the region variables +created to replace the bound regions in the input types, but it also +contains 'intermediate' variables created to represent the LUB/GLB of +individual regions. Basically, when asked to compute the LUB/GLB of a +region variable with another region, the inferencer cannot oblige +immediately since the valuese of that variables are not known. +Therefore, it creates a new variable that is related to the two +regions. For example, the LUB of two variables `$x` and `$y` is a +fresh variable `$z` that is constrained such that `$x <= $z` and `$y +<= $z`. So `V` will contain these intermediate variables as well. + +The other important factor in deciding how to replace a region in T is +the function `Tainted($r)` which, for a region variable, identifies +all regions that the region variable is related to in some way +(`Tainted()` made an appearance in the subtype computation as well). + +### LUB + +The LUB algorithm proceeds in three steps: + +1. Replace all bound regions (on both sides) with fresh region + inference variables. +2. Compute the LUB "as normal", meaning compute the GLB of each + pair of argument types and the LUB of the return types and + so forth. Combine those to a new function type `F`. +3. Replace each region `R` that appears in `F` as follows: + - Let `V` be the set of variables created during the LUB + computational steps 1 and 2, as described in the previous section. + - If `R` is not in `V`, replace `R` with itself. + - If `Tainted(R)` contains a region that is not in `V`, + replace `R` with itself. + - Otherwise, select the earliest variable in `Tainted(R)` that originates + from the left-hand side and replace `R` with the bound region that + this variable was a replacement for. + +So, let's work through the simplest example: `fn(&A)` and `fn(&a)`. +In this case, `&a` will be replaced with `$a` and the interim LUB type +`fn($b)` will be computed, where `$b=GLB(&A,$a)`. Therefore, `V = +{$a, $b}` and `Tainted($b) = { $b, $a, &A }`. When we go to replace +`$b`, we find that since `&A \in Tainted($b)` is not a member of `V`, +we leave `$b` as is. When region inference happens, `$b` will be +resolved to `&A`, as we wanted. + +Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`. In +this case, we'll end up with a (pre-replacement) LUB type of `fn(&g, +&h)` and a graph that looks like: + +``` + $a $b *--$x + \ \ / / + \ $h-* / + $g-----------* +``` + +Here `$g` and `$h` are fresh variables that are created to represent +the LUB/GLB of things requiring inference. This means that `V` and +`Tainted` will look like: + +``` +V = {$a, $b, $g, $h, $x} +Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x } +``` + +Therefore we replace both `$g` and `$h` with `$a`, and end up +with the type `fn(&a, &a)`. + +### GLB + +The procedure for computing the GLB is similar. The difference lies +in computing the replacements for the various variables. For each +region `R` that appears in the type `F`, we again compute `Tainted(R)` +and examine the results: + +1. If `R` is not in `V`, it is not replaced. +2. Else, if `Tainted(R)` contains only variables in `V`, and it + contains exactly one variable from the LHS and one variable from + the RHS, then `R` can be mapped to the bound version of the + variable from the LHS. +3. Else, if `Tainted(R)` contains no variable from the LHS and no + variable from the RHS, then `R` can be mapped to itself. +4. Else, `R` is mapped to a fresh bound variable. + +These rules are pretty complex. Let's look at some examples to see +how they play out. + +Out first example was `fn(&a)` and `fn(&X)`. In this case, `&a` will +be replaced with `$a` and we will ultimately compute a +(pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`. +Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}. To find the +replacement for `$g` we consult the rules above: +- Rule (1) does not apply because `$g \in V` +- Rule (2) does not apply because `&X \in Tainted($g)` +- Rule (3) does not apply because `$a \in Tainted($g)` +- Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`. +So our final result is `fn(&z)`, which is correct. + +The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again +have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`. +Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`. In this case, +by rule (3), `$g` is mapped to itself, and hence the result is +`fn($g)`. This result is correct (in this case, at least), but it is +indicative of a case that *can* lead us into concluding that there is +no GLB when in fact a GLB does exist. See the section "Questionable +Results" below for more details. + +The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as +before, we'll end up with `F=fn($g, $h)` where `Tainted($g) = +Tainted($h) = {$g, $h, $a, $b, $c}`. Only rule (4) applies and hence +we'll select fresh bound variables `y` and `z` and wind up with +`fn(&y, &z)`. + +For the last example, let's consider what may seem trivial, but is +not: `fn(&a, &a)` and `fn(&b, &b)`. In this case, we'll get `F=fn($g, +$h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a, +$x}`. Both of these sets contain exactly one bound variable from each +side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which +is the desired result. + +### Shortcomings and correctness + +You may be wondering whether this algorithm is correct. The answer is +"sort of". There are definitely cases where they fail to compute a +result even though a correct result exists. I believe, though, that +if they succeed, then the result is valid, and I will attempt to +convince you. The basic argument is that the "pre-replacement" step +computes a set of constraints. The replacements, then, attempt to +satisfy those constraints, using bound identifiers where needed. + +For now I will briefly go over the cases for LUB/GLB and identify +their intent: + +- LUB: + - The region variables that are substituted in place of bound regions + are intended to collect constraints on those bound regions. + - If Tainted(R) contains only values in V, then this region is unconstrained + and can therefore be generalized, otherwise it cannot. +- GLB: + - The region variables that are substituted in place of bound regions + are intended to collect constraints on those bound regions. + - If Tainted(R) contains exactly one variable from each side, and + only variables in V, that indicates that those two bound regions + must be equated. + - Otherwise, if Tainted(R) references any variables from left or right + side, then it is trying to combine a bound region with a free one or + multiple bound regions, so we need to select fresh bound regions. + +Sorry this is more of a shorthand to myself. I will try to write up something +more convincing in the future. + +#### Where are the algorithms wrong? + +- The pre-replacement computation can fail even though using a + bound-region would have succeeded. +- We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the + GLB of $a and $b. But if inference finds that $a and $b must be mapped + to regions without a GLB, then this is effectively a failure to compute + the GLB. However, the result `fn<$c>(fn($c))` is a valid GLB. + +*/ diff --git a/src/librustc/middle/typeck/infer/region_inference.rs b/src/librustc/middle/typeck/infer/region_inference/mod.rs similarity index 52% rename from src/librustc/middle/typeck/infer/region_inference.rs rename to src/librustc/middle/typeck/infer/region_inference/mod.rs index db17405fc2663..96cb5d3c747c3 100644 --- a/src/librustc/middle/typeck/infer/region_inference.rs +++ b/src/librustc/middle/typeck/infer/region_inference/mod.rs @@ -8,533 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Region inference module. - -# Introduction - -Region inference uses a somewhat more involved algorithm than type -inference. It is not the most efficient thing ever written though it -seems to work well enough in practice (famous last words). The reason -that we use a different algorithm is because, unlike with types, it is -impractical to hand-annotate with regions (in some cases, there aren't -even the requisite syntactic forms). So we have to get it right, and -it's worth spending more time on a more involved analysis. Moreover, -regions are a simpler case than types: they don't have aggregate -structure, for example. - -Unlike normal type inference, which is similar in spirit to H-M and thus -works progressively, the region type inference works by accumulating -constraints over the course of a function. Finally, at the end of -processing a function, we process and solve the constraints all at -once. - -The constraints are always of one of three possible forms: - -- ConstrainVarSubVar(R_i, R_j) states that region variable R_i - must be a subregion of R_j -- ConstrainRegSubVar(R, R_i) states that the concrete region R - (which must not be a variable) must be a subregion of the varibale R_i -- ConstrainVarSubReg(R_i, R) is the inverse - -# Building up the constraints - -Variables and constraints are created using the following methods: - -- `new_region_var()` creates a new, unconstrained region variable; -- `make_subregion(R_i, R_j)` states that R_i is a subregion of R_j -- `lub_regions(R_i, R_j) -> R_k` returns a region R_k which is - the smallest region that is greater than both R_i and R_j -- `glb_regions(R_i, R_j) -> R_k` returns a region R_k which is - the greatest region that is smaller than both R_i and R_j - -The actual region resolution algorithm is not entirely -obvious, though it is also not overly complex. I'll explain -the algorithm as it currently works, then explain a somewhat -more complex variant that would probably scale better for -large graphs (and possibly all graphs). - -## Snapshotting - -It is also permitted to try (and rollback) changes to the graph. This -is done by invoking `start_snapshot()`, which returns a value. Then -later you can call `rollback_to()` which undoes the work. -Alternatively, you can call `commit()` which ends all snapshots. -Snapshots can be recursive---so you can start a snapshot when another -is in progress, but only the root snapshot can "commit". - -# Resolving constraints - -The constraint resolution algorithm is not super complex but also not -entirely obvious. Here I describe the problem somewhat abstractly, -then describe how the current code works, and finally describe a -better solution that is as of yet unimplemented. There may be other, -smarter ways of doing this with which I am unfamiliar and can't be -bothered to research at the moment. - NDM - -## The problem - -Basically our input is a directed graph where nodes can be divided -into two categories: region variables and concrete regions. Each edge -`R -> S` in the graph represents a constraint that the region `R` is a -subregion of the region `S`. - -Region variable nodes can have arbitrary degree. There is one region -variable node per region variable. - -Each concrete region node is associated with some, well, concrete -region: e.g., a free lifetime, or the region for a particular scope. -Note that there may be more than one concrete region node for a -particular region value. Moreover, because of how the graph is built, -we know that all concrete region nodes have either in-degree 1 or -out-degree 1. - -Before resolution begins, we build up the constraints in a hashmap -that maps `Constraint` keys to spans. During resolution, we construct -the actual `Graph` structure that we describe here. - -## Our current algorithm - -We divide region variables into two groups: Expanding and Contracting. -Expanding region variables are those that have a concrete region -predecessor (direct or indirect). Contracting region variables are -all others. - -We first resolve the values of Expanding region variables and then -process Contracting ones. We currently use an iterative, fixed-point -procedure (but read on, I believe this could be replaced with a linear -walk). Basically we iterate over the edges in the graph, ensuring -that, if the source of the edge has a value, then this value is a -subregion of the target value. If the target does not yet have a -value, it takes the value from the source. If the target already had -a value, then the resulting value is Least Upper Bound of the old and -new values. When we are done, each Expanding node will have the -smallest region that it could possibly have and still satisfy the -constraints. - -We next process the Contracting nodes. Here we again iterate over the -edges, only this time we move values from target to source (if the -source is a Contracting node). For each contracting node, we compute -its value as the GLB of all its successors. Basically contracting -nodes ensure that there is overlap between their successors; we will -ultimately infer the largest overlap possible. - -### A better algorithm - -Fixed-point iteration is not necessary. What we ought to do is first -identify and remove strongly connected components (SCC) in the graph. -Note that such components must consist solely of region variables; all -of these variables can effectively be unified into a single variable. - -Once SCCs are removed, we are left with a DAG. At this point, we can -walk the DAG in toplogical order once to compute the expanding nodes, -and again in reverse topological order to compute the contracting -nodes. The main reason I did not write it this way is that I did not -feel like implementing the SCC and toplogical sort algorithms at the -moment. - -# Skolemization and functions - -One of the trickiest and most subtle aspects of regions is dealing -with the fact that region variables are bound in function types. I -strongly suggest that if you want to understand the situation, you -read this paper (which is, admittedly, very long, but you don't have -to read the whole thing): - -http://research.microsoft.com/en-us/um/people/simonpj/papers/higher-rank/ - -Although my explanation will never compete with SPJ's (for one thing, -his is approximately 100 pages), I will attempt to explain the basic -problem and also how we solve it. Note that the paper only discusses -subtyping, not the computation of LUB/GLB. - -The problem we are addressing is that there is a kind of subtyping -between functions with bound region parameters. Consider, for -example, whether the following relation holds: - - fn(&'a int) <: &fn(&'b int)? (Yes, a => b) - -The answer is that of course it does. These two types are basically -the same, except that in one we used the name `a` and one we used -the name `b`. - -In the examples that follow, it becomes very important to know whether -a lifetime is bound in a function type (that is, is a lifetime -parameter) or appears free (is defined in some outer scope). -Therefore, from now on I will write the bindings explicitly, using a -notation like `fn(&'a int)` to indicate that `a` is a lifetime -parameter. - -Now let's consider two more function types. Here, we assume that the -`self` lifetime is defined somewhere outside and hence is not a -lifetime parameter bound by the function type (it "appears free"): - - fn(&'a int) <: &fn(&'self int)? (Yes, a => self) - -This subtyping relation does in fact hold. To see why, you have to -consider what subtyping means. One way to look at `T1 <: T2` is to -say that it means that it is always ok to treat an instance of `T1` as -if it had the type `T2`. So, with our functions, it is always ok to -treat a function that can take pointers with any lifetime as if it -were a function that can only take a pointer with the specific -lifetime `&self`. After all, `&self` is a lifetime, after all, and -the function can take values of any lifetime. - -You can also look at subtyping as the *is a* relationship. This amounts -to the same thing: a function that accepts pointers with any lifetime -*is a* function that accepts pointers with some specific lifetime. - -So, what if we reverse the order of the two function types, like this: - - fn(&'self int) <: &fn(&'a int)? (No) - -Does the subtyping relationship still hold? The answer of course is -no. In this case, the function accepts *only the lifetime `&self`*, -so it is not reasonable to treat it as if it were a function that -accepted any lifetime. - -What about these two examples: - - fn(&'a int, &'b int) <: &fn(&'a int, &'a int)? (Yes) - fn(&'a int, &'a int) <: &fn(&'a int, &'b int)? (No) - -Here, it is true that functions which take two pointers with any two -lifetimes can be treated as if they only accepted two pointers with -the same lifetime, but not the reverse. - -## The algorithm - -Here is the algorithm we use to perform the subtyping check: - -1. Replace all bound regions in the subtype with new variables -2. Replace all bound regions in the supertype with skolemized - equivalents. A "skolemized" region is just a new fresh region - name. -3. Check that the parameter and return types match as normal -4. Ensure that no skolemized regions 'leak' into region variables - visible from "the outside" - -Let's walk through some examples and see how this algorithm plays out. - -#### First example - -We'll start with the first example, which was: - - 1. fn(&'a T) <: &fn(&'b T)? Yes: a -> b - -After steps 1 and 2 of the algorithm we will have replaced the types -like so: - - 1. fn(&'A T) <: &fn(&'x T)? - -Here the upper case `&A` indicates a *region variable*, that is, a -region whose value is being inferred by the system. I also replaced -`&b` with `&x`---I'll use letters late in the alphabet (`x`, `y`, `z`) -to indicate skolemized region names. We can assume they don't appear -elsewhere. Note that neither the sub- nor the supertype bind any -region names anymore (as indicated by the absence of `<` and `>`). - -The next step is to check that the parameter types match. Because -parameters are contravariant, this means that we check whether: - - &'x T <: &'A T - -Region pointers are contravariant so this implies that - - &A <= &x - -must hold, where `<=` is the subregion relationship. Processing -*this* constrain simply adds a constraint into our graph that `&A <= -&x` and is considered successful (it can, for example, be satisfied by -choosing the value `&x` for `&A`). - -So far we have encountered no error, so the subtype check succeeds. - -#### The third example - -Now let's look first at the third example, which was: - - 3. fn(&'self T) <: &fn(&'b T)? No! - -After steps 1 and 2 of the algorithm we will have replaced the types -like so: - - 3. fn(&'self T) <: &fn(&'x T)? - -This looks pretty much the same as before, except that on the LHS -`&self` was not bound, and hence was left as-is and not replaced with -a variable. The next step is again to check that the parameter types -match. This will ultimately require (as before) that `&self` <= `&x` -must hold: but this does not hold. `self` and `x` are both distinct -free regions. So the subtype check fails. - -#### Checking for skolemization leaks - -You may be wondering about that mysterious last step in the algorithm. -So far it has not been relevant. The purpose of that last step is to -catch something like *this*: - - fn() -> fn(&'a T) <: &fn() -> fn(&'b T)? No. - -Here the function types are the same but for where the binding occurs. -The subtype returns a function that expects a value in precisely one -region. The supertype returns a function that expects a value in any -region. If we allow an instance of the subtype to be used where the -supertype is expected, then, someone could call the fn and think that -the return value has type `fn(&'b T)` when it really has type -`fn(&'a T)` (this is case #3, above). Bad. - -So let's step through what happens when we perform this subtype check. -We first replace the bound regions in the subtype (the supertype has -no bound regions). This gives us: - - fn() -> fn(&'A T) <: &fn() -> fn(&'b T)? - -Now we compare the return types, which are covariant, and hence we have: - - fn(&'A T) <: &fn(&'b T)? - -Here we skolemize the bound region in the supertype to yield: - - fn(&'A T) <: &fn(&'x T)? - -And then proceed to compare the argument types: - - &'x T <: &'A T - &A <= &x - -Finally, this is where it gets interesting! This is where an error -*should* be reported. But in fact this will not happen. The reason why -is that `A` is a variable: we will infer that its value is the fresh -region `x` and think that everything is happy. In fact, this behavior -is *necessary*, it was key to the first example we walked through. - -The difference between this example and the first one is that the variable -`A` already existed at the point where the skolemization occurred. In -the first example, you had two functions: - - fn(&'a T) <: &fn(&'b T) - -and hence `&A` and `&x` were created "together". In general, the -intention of the skolemized names is that they are supposed to be -fresh names that could never be equal to anything from the outside. -But when inference comes into play, we might not be respecting this -rule. - -So the way we solve this is to add a fourth step that examines the -constraints that refer to skolemized names. Basically, consider a -non-directed verison of the constraint graph. Let `Tainted(x)` be the -set of all things reachable from a skolemized variable `x`. -`Tainted(x)` should not contain any regions that existed before the -step at which the skolemization was performed. So this case here -would fail because `&x` was created alone, but is relatable to `&A`. - -## Computing the LUB and GLB - -The paper I pointed you at is written for Haskell. It does not -therefore considering subtyping and in particular does not consider -LUB or GLB computation. We have to consider this. Here is the -algorithm I implemented. - -First though, let's discuss what we are trying to compute in more -detail. The LUB is basically the "common supertype" and the GLB is -"common subtype"; one catch is that the LUB should be the -*most-specific* common supertype and the GLB should be *most general* -common subtype (as opposed to any common supertype or any common -subtype). - -Anyway, to help clarify, here is a table containing some -function pairs and their LUB/GLB: - -``` -Type 1 Type 2 LUB GLB -fn(&a) fn(&X) fn(&X) fn(&a) -fn(&A) fn(&X) -- fn(&a) -fn(&a, &b) fn(&x, &x) fn(&a, &a) fn(&a, &b) -fn(&a, &b, &a) fn(&x, &y, &y) fn(&a, &a, &a) fn(&a,&b,&c) -``` - -### Conventions - -I use lower-case letters (e.g., `&a`) for bound regions and upper-case -letters for free regions (`&A`). Region variables written with a -dollar-sign (e.g., `$a`). I will try to remember to enumerate the -bound-regions on the fn type as well (e.g., `fn(&a)`). - -### High-level summary - -Both the LUB and the GLB algorithms work in a similar fashion. They -begin by replacing all bound regions (on both sides) with fresh region -inference variables. Therefore, both functions are converted to types -that contain only free regions. We can then compute the LUB/GLB in a -straightforward way, as described in `combine.rs`. This results in an -interim type T. The algorithms then examine the regions that appear -in T and try to, in some cases, replace them with bound regions to -yield the final result. - -To decide whether to replace a region `R` that appears in `T` with a -bound region, the algorithms make use of two bits of information. -First is a set `V` that contains all region variables created as part -of the LUB/GLB computation. `V` will contain the region variables -created to replace the bound regions in the input types, but it also -contains 'intermediate' variables created to represent the LUB/GLB of -individual regions. Basically, when asked to compute the LUB/GLB of a -region variable with another region, the inferencer cannot oblige -immediately since the valuese of that variables are not known. -Therefore, it creates a new variable that is related to the two -regions. For example, the LUB of two variables `$x` and `$y` is a -fresh variable `$z` that is constrained such that `$x <= $z` and `$y -<= $z`. So `V` will contain these intermediate variables as well. - -The other important factor in deciding how to replace a region in T is -the function `Tainted($r)` which, for a region variable, identifies -all regions that the region variable is related to in some way -(`Tainted()` made an appearance in the subtype computation as well). - -### LUB - -The LUB algorithm proceeds in three steps: - -1. Replace all bound regions (on both sides) with fresh region - inference variables. -2. Compute the LUB "as normal", meaning compute the GLB of each - pair of argument types and the LUB of the return types and - so forth. Combine those to a new function type `F`. -3. Replace each region `R` that appears in `F` as follows: - - Let `V` be the set of variables created during the LUB - computational steps 1 and 2, as described in the previous section. - - If `R` is not in `V`, replace `R` with itself. - - If `Tainted(R)` contains a region that is not in `V`, - replace `R` with itself. - - Otherwise, select the earliest variable in `Tainted(R)` that originates - from the left-hand side and replace `R` with the bound region that - this variable was a replacement for. - -So, let's work through the simplest example: `fn(&A)` and `fn(&a)`. -In this case, `&a` will be replaced with `$a` and the interim LUB type -`fn($b)` will be computed, where `$b=GLB(&A,$a)`. Therefore, `V = -{$a, $b}` and `Tainted($b) = { $b, $a, &A }`. When we go to replace -`$b`, we find that since `&A \in Tainted($b)` is not a member of `V`, -we leave `$b` as is. When region inference happens, `$b` will be -resolved to `&A`, as we wanted. - -Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`. In -this case, we'll end up with a (pre-replacement) LUB type of `fn(&g, -&h)` and a graph that looks like: - -``` - $a $b *--$x - \ \ / / - \ $h-* / - $g-----------* -``` - -Here `$g` and `$h` are fresh variables that are created to represent -the LUB/GLB of things requiring inference. This means that `V` and -`Tainted` will look like: - -``` -V = {$a, $b, $g, $h, $x} -Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x } -``` - -Therefore we replace both `$g` and `$h` with `$a`, and end up -with the type `fn(&a, &a)`. - -### GLB - -The procedure for computing the GLB is similar. The difference lies -in computing the replacements for the various variables. For each -region `R` that appears in the type `F`, we again compute `Tainted(R)` -and examine the results: - -1. If `R` is not in `V`, it is not replaced. -2. Else, if `Tainted(R)` contains only variables in `V`, and it - contains exactly one variable from the LHS and one variable from - the RHS, then `R` can be mapped to the bound version of the - variable from the LHS. -3. Else, if `Tainted(R)` contains no variable from the LHS and no - variable from the RHS, then `R` can be mapped to itself. -4. Else, `R` is mapped to a fresh bound variable. - -These rules are pretty complex. Let's look at some examples to see -how they play out. - -Out first example was `fn(&a)` and `fn(&X)`. In this case, `&a` will -be replaced with `$a` and we will ultimately compute a -(pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`. -Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}. To find the -replacement for `$g` we consult the rules above: -- Rule (1) does not apply because `$g \in V` -- Rule (2) does not apply because `&X \in Tainted($g)` -- Rule (3) does not apply because `$a \in Tainted($g)` -- Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`. -So our final result is `fn(&z)`, which is correct. - -The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again -have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`. -Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`. In this case, -by rule (3), `$g` is mapped to itself, and hence the result is -`fn($g)`. This result is correct (in this case, at least), but it is -indicative of a case that *can* lead us into concluding that there is -no GLB when in fact a GLB does exist. See the section "Questionable -Results" below for more details. - -The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as -before, we'll end up with `F=fn($g, $h)` where `Tainted($g) = -Tainted($h) = {$g, $h, $a, $b, $c}`. Only rule (4) applies and hence -we'll select fresh bound variables `y` and `z` and wind up with -`fn(&y, &z)`. - -For the last example, let's consider what may seem trivial, but is -not: `fn(&a, &a)` and `fn(&b, &b)`. In this case, we'll get `F=fn($g, -$h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a, -$x}`. Both of these sets contain exactly one bound variable from each -side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which -is the desired result. - -### Shortcomings and correctness - -You may be wondering whether this algorithm is correct. The answer is -"sort of". There are definitely cases where they fail to compute a -result even though a correct result exists. I believe, though, that -if they succeed, then the result is valid, and I will attempt to -convince you. The basic argument is that the "pre-replacement" step -computes a set of constraints. The replacements, then, attempt to -satisfy those constraints, using bound identifiers where needed. - -For now I will briefly go over the cases for LUB/GLB and identify -their intent: - -- LUB: - - The region variables that are substituted in place of bound regions - are intended to collect constraints on those bound regions. - - If Tainted(R) contains only values in V, then this region is unconstrained - and can therefore be generalized, otherwise it cannot. -- GLB: - - The region variables that are substituted in place of bound regions - are intended to collect constraints on those bound regions. - - If Tainted(R) contains exactly one variable from each side, and - only variables in V, that indicates that those two bound regions - must be equated. - - Otherwise, if Tainted(R) references any variables from left or right - side, then it is trying to combine a bound region with a free one or - multiple bound regions, so we need to select fresh bound regions. - -Sorry this is more of a shorthand to myself. I will try to write up something -more convincing in the future. - -#### Where are the algorithms wrong? - -- The pre-replacement computation can fail even though using a - bound-region would have succeeded. -- We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the - GLB of $a and $b. But if inference finds that $a and $b must be mapped - to regions without a GLB, then this is effectively a failure to compute - the GLB. However, the result `fn<$c>(fn($c))` is a valid GLB. - -*/ +/*! See doc.rs */ use middle::ty; @@ -542,8 +16,10 @@ use middle::ty::{FreeRegion, Region, RegionVid}; use middle::ty::{re_empty, re_static, re_infer, re_free, re_bound}; use middle::ty::{re_scope, ReVar, ReSkolemized, br_fresh}; use middle::typeck::infer::cres; +use middle::typeck::infer::{RegionVariableOrigin, SubregionOrigin}; +use middle::typeck::infer; use util::common::indenter; -use util::ppaux::note_and_explain_region; +use util::ppaux::{note_and_explain_region, Repr, UserString}; use std::cell::Cell; use std::hashmap::{HashMap, HashSet}; @@ -551,12 +27,17 @@ use std::uint; use std::vec; use syntax::codemap::span; use syntax::ast; +use syntax::opt_vec; +use syntax::opt_vec::OptVec; -#[deriving(Eq,IterBytes)] +mod doc; + +#[deriving(Eq, IterBytes)] enum Constraint { ConstrainVarSubVar(RegionVid, RegionVid), ConstrainRegSubVar(Region, RegionVid), - ConstrainVarSubReg(RegionVid, Region) + ConstrainVarSubReg(RegionVid, Region), + ConstrainRegSubReg(Region, Region), } #[deriving(Eq, IterBytes)] @@ -576,12 +57,37 @@ enum CombineMapType { Lub, Glb } +pub enum RegionResolutionError { + /// `ConcreteFailure(o, a, b)`: + /// + /// `o` requires that `a <= b`, but this does not hold + ConcreteFailure(SubregionOrigin, Region, Region), + + /// `SubSupConflict(v, sub_origin, sub_r, sup_origin, sup_r)`: + /// + /// Could not infer a value for `v` because `sub_r <= v` (due to + /// `sub_origin`) but `v <= sup_r` (due to `sup_origin`) and + /// `sub_r <= sup_r` does not hold. + SubSupConflict(RegionVariableOrigin, + SubregionOrigin, Region, + SubregionOrigin, Region), + + /// `SupSupConflict(v, origin1, r1, origin2, r2)`: + /// + /// Could not infer a value for `v` because `v <= r1` (due to + /// `origin1`) and `v <= r2` (due to `origin2`) and + /// `r1` and `r2` have no intersection. + SupSupConflict(RegionVariableOrigin, + SubregionOrigin, Region, + SubregionOrigin, Region), +} + type CombineMap = HashMap; pub struct RegionVarBindings { tcx: ty::ctxt, - var_spans: ~[span], - constraints: HashMap, + var_origins: ~[RegionVariableOrigin], + constraints: HashMap, lubs: CombineMap, glbs: CombineMap, skolemization_count: uint, @@ -606,7 +112,7 @@ pub struct RegionVarBindings { pub fn RegionVarBindings(tcx: ty::ctxt) -> RegionVarBindings { RegionVarBindings { tcx: tcx, - var_spans: ~[], + var_origins: ~[], values: Cell::new_empty(), constraints: HashMap::new(), lubs: HashMap::new(), @@ -647,8 +153,8 @@ impl RegionVarBindings { match undo_item { Snapshot => {} AddVar(vid) => { - assert_eq!(self.var_spans.len(), vid.to_uint() + 1); - self.var_spans.pop(); + assert_eq!(self.var_origins.len(), vid.to_uint() + 1); + self.var_origins.pop(); } AddConstraint(ref constraint) => { self.constraints.remove(constraint); @@ -664,18 +170,18 @@ impl RegionVarBindings { } pub fn num_vars(&mut self) -> uint { - self.var_spans.len() + self.var_origins.len() } - pub fn new_region_var(&mut self, span: span) -> RegionVid { + pub fn new_region_var(&mut self, origin: RegionVariableOrigin) -> RegionVid { let id = self.num_vars(); - self.var_spans.push(span); + self.var_origins.push(origin); let vid = RegionVid { id: id }; if self.in_snapshot() { self.undo_log.push(AddVar(vid)); } - debug!("created new region variable %? with span %?", - vid, self.tcx.sess.codemap.span_to_str(span)); + debug!("created new region variable %? with origin %?", + vid, origin.repr(self.tcx)); return vid; } @@ -705,109 +211,106 @@ impl RegionVarBindings { re_bound(br_fresh(sc)) } - pub fn add_constraint(&mut self, constraint: Constraint, span: span) { + pub fn add_constraint(&mut self, + constraint: Constraint, + origin: SubregionOrigin) { // cannot add constraints once regions are resolved assert!(self.values.is_empty()); debug!("RegionVarBindings: add_constraint(%?)", constraint); - if self.constraints.insert(constraint, span) { + if self.constraints.insert(constraint, origin) { if self.in_snapshot() { self.undo_log.push(AddConstraint(constraint)); } } } - pub fn make_subregion(&mut self, span: span, sub: Region, sup: Region) - -> cres<()> { + pub fn make_subregion(&mut self, + origin: SubregionOrigin, + sub: Region, + sup: Region) { // cannot add constraints once regions are resolved assert!(self.values.is_empty()); debug!("RegionVarBindings: make_subregion(%?, %?)", sub, sup); match (sub, sup) { (re_infer(ReVar(sub_id)), re_infer(ReVar(sup_id))) => { - self.add_constraint(ConstrainVarSubVar(sub_id, sup_id), span); - Ok(()) + self.add_constraint(ConstrainVarSubVar(sub_id, sup_id), origin); } (r, re_infer(ReVar(sup_id))) => { - self.add_constraint(ConstrainRegSubVar(r, sup_id), span); - Ok(()) + self.add_constraint(ConstrainRegSubVar(r, sup_id), origin); } (re_infer(ReVar(sub_id)), r) => { - self.add_constraint(ConstrainVarSubReg(sub_id, r), span); - Ok(()) + self.add_constraint(ConstrainVarSubReg(sub_id, r), origin); } (re_bound(br), _) => { self.tcx.sess.span_bug( - span, + origin.span(), fmt!("Cannot relate bound region as subregion: %?", br)); } (_, re_bound(br)) => { self.tcx.sess.span_bug( - span, + origin.span(), fmt!("Cannot relate bound region as superregion: %?", br)); } _ => { - if self.is_subregion_of(sub, sup) { - Ok(()) - } else { - Err(ty::terr_regions_does_not_outlive(sub, sup)) - } + self.add_constraint(ConstrainRegSubReg(sub, sup), origin); } } } - pub fn lub_regions(&mut self, span: span, a: Region, b: Region) - -> cres { + pub fn lub_regions(&mut self, + origin: SubregionOrigin, + a: Region, + b: Region) + -> Region { // cannot add constraints once regions are resolved assert!(self.values.is_empty()); debug!("RegionVarBindings: lub_regions(%?, %?)", a, b); match (a, b) { - (re_static, _) | (_, re_static) => { - Ok(re_static) // nothing lives longer than static - } - - (re_infer(ReVar(*)), _) | (_, re_infer(ReVar(*))) => { - self.combine_vars( - Lub, a, b, span, - |this, old_r, new_r| this.make_subregion(span, old_r, new_r)) - } + (re_static, _) | (_, re_static) => { + re_static // nothing lives longer than static + } - _ => { - Ok(self.lub_concrete_regions(a, b)) - } + _ => { + self.combine_vars( + Lub, a, b, origin, + |this, old_r, new_r| + this.make_subregion(origin, old_r, new_r)) + } } } - pub fn glb_regions(&mut self, span: span, a: Region, b: Region) - -> cres { + pub fn glb_regions(&mut self, + origin: SubregionOrigin, + a: Region, + b: Region) + -> Region { // cannot add constraints once regions are resolved assert!(self.values.is_empty()); debug!("RegionVarBindings: glb_regions(%?, %?)", a, b); match (a, b) { - (re_static, r) | (r, re_static) => { - // static lives longer than everything else - Ok(r) - } - - (re_infer(ReVar(*)), _) | (_, re_infer(ReVar(*))) => { - self.combine_vars( - Glb, a, b, span, - |this, old_r, new_r| this.make_subregion(span, new_r, old_r)) - } + (re_static, r) | (r, re_static) => { + // static lives longer than everything else + r + } - _ => { - self.glb_concrete_regions(a, b) - } + _ => { + self.combine_vars( + Glb, a, b, origin, + |this, old_r, new_r| + this.make_subregion(origin, new_r, old_r)) + } } } pub fn resolve_var(&mut self, rid: RegionVid) -> ty::Region { if self.values.is_empty() { self.tcx.sess.span_bug( - self.var_spans[rid.to_uint()], + self.var_origins[rid.to_uint()].span(), fmt!("Attempt to resolve region variable before values have \ been computed!")); } @@ -830,46 +333,41 @@ impl RegionVarBindings { } } + fn combine_map<'a>(&'a mut self, + t: CombineMapType) + -> &'a mut CombineMap + { + match t { + Glb => &mut self.glbs, + Lub => &mut self.lubs, + } + } + pub fn combine_vars(&mut self, t: CombineMapType, a: Region, b: Region, - span: span, + origin: SubregionOrigin, relate: &fn(this: &mut RegionVarBindings, old_r: Region, - new_r: Region) -> cres<()>) - -> cres { + new_r: Region)) + -> Region { let vars = TwoRegions { a: a, b: b }; - let c; - { - // FIXME (#3850): shouldn't need a scope, nor should this need to be - // done twice to get the maps out - { - let combines = match t { - Glb => &self.glbs, Lub => &self.lubs - }; - match combines.find(&vars) { - Some(&c) => return Ok(re_infer(ReVar(c))), - None => () - } - } - c = self.new_region_var(span); - { - let combines = match t { - Glb => &mut self.glbs, Lub => &mut self.lubs - }; - combines.insert(vars, c); + match self.combine_map(t).find(&vars) { + Some(&c) => { + return re_infer(ReVar(c)); } + None => {} } + let c = self.new_region_var(infer::MiscVariable(origin.span())); + self.combine_map(t).insert(vars, c); if self.in_snapshot() { self.undo_log.push(AddCombination(t, vars)); } - do relate(self, a, re_infer(ReVar(c))).then { - do relate(self, b, re_infer(ReVar(c))).then { - debug!("combine_vars() c=%?", c); - Ok(re_infer(ReVar(c))) - } - } + relate(self, a, re_infer(ReVar(c))); + relate(self, b, re_infer(ReVar(c))); + debug!("combine_vars() c=%?", c); + re_infer(ReVar(c)) } pub fn vars_created_since_snapshot(&mut self, snapshot: uint) @@ -924,6 +422,9 @@ impl RegionVarBindings { AddConstraint(ConstrainVarSubReg(ref a, ref b)) => { Some((re_infer(ReVar(*a)), *b)) } + AddConstraint(ConstrainRegSubReg(a, b)) => { + Some((a, b)) + } _ => { None } @@ -931,11 +432,11 @@ impl RegionVarBindings { match regs { None => {} - Some((ref r1, ref r2)) => { + Some((r1, r2)) => { result_set = - consider_adding_edge(result_set, &r, r1, r2); + consider_adding_edge(result_set, r, r1, r2); result_set = - consider_adding_edge(result_set, &r, r2, r1); + consider_adding_edge(result_set, r, r2, r1); } } @@ -948,14 +449,14 @@ impl RegionVarBindings { return result_set; fn consider_adding_edge(result_set: ~[Region], - r: &Region, - r1: &Region, - r2: &Region) -> ~[Region] + r: Region, + r1: Region, + r2: Region) -> ~[Region] { let mut result_set = result_set; - if *r == *r1 { // Clearly, this is potentially inefficient. - if !result_set.iter().any_(|x| x == r2) { - result_set.push(*r2); + if r == r1 { // Clearly, this is potentially inefficient. + if !result_set.iter().any_(|x| *x == r2) { + result_set.push(r2); } } return result_set; @@ -969,10 +470,12 @@ impl RegionVarBindings { constraints, assuming such values can be found; if they cannot, errors are reported. */ - pub fn resolve_regions(&mut self) { + pub fn resolve_regions(&mut self) -> OptVec { debug!("RegionVarBindings: resolve_regions()"); - let v = self.infer_variable_values(); + let mut errors = opt_vec::Empty; + let v = self.infer_variable_values(&mut errors); self.values.put_back(v); + errors } } @@ -994,7 +497,7 @@ impl RegionVarBindings { (re_infer(ReVar(v_id)), _) | (_, re_infer(ReVar(v_id))) => { self.tcx.sess.span_bug( - self.var_spans[v_id.to_uint()], + self.var_origins[v_id.to_uint()].span(), fmt!("lub_concrete_regions invoked with \ non-concrete regions: %?, %?", a, b)); } @@ -1096,7 +599,7 @@ impl RegionVarBindings { (re_infer(ReVar(v_id)), _) | (_, re_infer(ReVar(v_id))) => { self.tcx.sess.span_bug( - self.var_spans[v_id.to_uint()], + self.var_origins[v_id.to_uint()].span(), fmt!("glb_concrete_regions invoked with \ non-concrete regions: %?, %?", a, b)); } @@ -1173,9 +676,11 @@ impl RegionVarBindings { } } - fn report_type_error(&mut self, span: span, terr: &ty::type_err) { + fn report_type_error(&mut self, + origin: SubregionOrigin, + terr: &ty::type_err) { let terr_str = ty::type_err_to_str(self.tcx, terr); - self.tcx.sess.span_err(span, terr_str); + self.tcx.sess.span_err(origin.span(), terr_str); } fn intersect_scopes(&self, @@ -1210,7 +715,7 @@ enum Classification { Expanding, Contracting } enum GraphNodeValue { NoValue, Value(Region), ErrorValue } struct GraphNode { - span: span, + origin: RegionVariableOrigin, classification: Classification, value: GraphNodeValue, head_edge: [uint, ..2], @@ -1219,7 +724,6 @@ struct GraphNode { struct GraphEdge { next_edge: [uint, ..2], constraint: Constraint, - span: span, } struct Graph { @@ -1227,20 +731,23 @@ struct Graph { edges: ~[GraphEdge], } -struct SpannedRegion { +struct RegionAndOrigin { region: Region, - span: span, + origin: SubregionOrigin, } impl RegionVarBindings { - pub fn infer_variable_values(&mut self) -> ~[GraphNodeValue] { + fn infer_variable_values(&mut self, + errors: &mut OptVec) + -> ~[GraphNodeValue] { let mut graph = self.construct_graph(); self.expansion(&mut graph); self.contraction(&mut graph); - self.extract_values_and_report_conflicts(&graph) + self.collect_concrete_region_errors(&graph, errors); + self.extract_values_and_collect_conflicts(&graph, errors) } - pub fn construct_graph(&mut self) -> Graph { + fn construct_graph(&mut self) -> Graph { let num_vars = self.num_vars(); let num_edges = self.constraints.len(); @@ -1251,7 +758,7 @@ impl RegionVarBindings { // those nodes that have a concrete region predecessor to // Expanding. classification: Contracting, - span: self.var_spans[var_idx], + origin: self.var_origins[var_idx], value: NoValue, head_edge: [uint::max_value, uint::max_value] } @@ -1259,11 +766,10 @@ impl RegionVarBindings { // It would be nice to write this using map(): let mut edges = vec::with_capacity(num_edges); - for self.constraints.iter().advance |(constraint, span)| { + for self.constraints.iter().advance |(constraint, _)| { edges.push(GraphEdge { next_edge: [uint::max_value, uint::max_value], constraint: *constraint, - span: *span }); } @@ -1284,6 +790,10 @@ impl RegionVarBindings { ConstrainVarSubReg(a_id, _) => { insert_edge(&mut graph, a_id, Outgoing, edge_idx); } + ConstrainRegSubReg(*) => { + // Relations between two concrete regions do not + // require an edge in the graph. + } } } @@ -1305,7 +815,7 @@ impl RegionVarBindings { } } - pub fn expansion(&mut self, graph: &mut Graph) { + fn expansion(&mut self, graph: &mut Graph) { do iterate_until_fixed_point(~"Expansion", graph) |nodes, edge| { match edge.constraint { ConstrainRegSubVar(a_region, b_vid) => { @@ -1325,15 +835,19 @@ impl RegionVarBindings { // This is a contraction constraint. Ignore it. false } + ConstrainRegSubReg(*) => { + // No region variables involved. Ignore. + false + } } } } - pub fn expand_node(&mut self, - a_region: Region, - b_vid: RegionVid, - b_node: &mut GraphNode) - -> bool { + fn expand_node(&mut self, + a_region: Region, + b_vid: RegionVid, + b_node: &mut GraphNode) + -> bool { debug!("expand_node(%?, %? == %?)", a_region, b_vid, b_node.value); @@ -1365,7 +879,8 @@ impl RegionVarBindings { } } - pub fn contraction(&mut self, graph: &mut Graph) { + fn contraction(&mut self, + graph: &mut Graph) { do iterate_until_fixed_point(~"Contraction", graph) |nodes, edge| { match edge.constraint { ConstrainRegSubVar(*) => { @@ -1385,15 +900,19 @@ impl RegionVarBindings { let a_node = &mut nodes[a_vid.to_uint()]; self.contract_node(a_vid, a_node, b_region) } + ConstrainRegSubReg(*) => { + // No region variables involved. Ignore. + false + } } } } - pub fn contract_node(&mut self, - a_vid: RegionVid, - a_node: &mut GraphNode, - b_region: Region) - -> bool { + fn contract_node(&mut self, + a_vid: RegionVid, + a_node: &mut GraphNode, + b_region: Region) + -> bool { debug!("contract_node(%? == %?/%?, %?)", a_vid, a_node.value, a_node.classification, b_region); @@ -1461,9 +980,44 @@ impl RegionVarBindings { } } - pub fn extract_values_and_report_conflicts(&mut self, graph: &Graph) - -> ~[GraphNodeValue] { - debug!("extract_values_and_report_conflicts()"); + fn collect_concrete_region_errors( + &mut self, + graph: &Graph, + errors: &mut OptVec) + { + let num_edges = graph.edges.len(); + for uint::range(0, num_edges) |edge_idx| { + let edge = &graph.edges[edge_idx]; + let origin = self.constraints.get_copy(&edge.constraint); + + let (sub, sup) = match edge.constraint { + ConstrainVarSubVar(*) | + ConstrainRegSubVar(*) | + ConstrainVarSubReg(*) => { + loop; + } + ConstrainRegSubReg(sub, sup) => { + (sub, sup) + } + }; + + if self.is_subregion_of(sub, sup) { + loop; + } + + debug!("ConcreteFailure: !(sub <= sup): sub=%?, sup=%?", + sub, sup); + errors.push(ConcreteFailure(origin, sub, sup)); + } + } + + fn extract_values_and_collect_conflicts( + &mut self, + graph: &Graph, + errors: &mut OptVec) + -> ~[GraphNodeValue] + { + debug!("extract_values_and_collect_conflicts()"); // This is the best way that I have found to suppress // duplicate and related errors. Basically we keep a set of @@ -1516,12 +1070,12 @@ impl RegionVarBindings { let node_vid = RegionVid { id: idx }; match node.classification { Expanding => { - self.report_error_for_expanding_node( - graph, dup_vec, node_vid); + self.collect_error_for_expanding_node( + graph, dup_vec, node_vid, errors); } Contracting => { - self.report_error_for_contracting_node( - graph, dup_vec, node_vid); + self.collect_error_for_contracting_node( + graph, dup_vec, node_vid, errors); } } } @@ -1531,10 +1085,13 @@ impl RegionVarBindings { }).collect() } - pub fn report_error_for_expanding_node(&mut self, - graph: &Graph, - dup_vec: &mut [uint], - node_idx: RegionVid) { + fn collect_error_for_expanding_node( + &mut self, + graph: &Graph, + dup_vec: &mut [uint], + node_idx: RegionVid, + errors: &mut OptVec) + { // Errors in expanding nodes result from a lower-bound that is // not contained by an upper-bound. let (lower_bounds, lower_dup) = @@ -1550,50 +1107,33 @@ impl RegionVarBindings { for upper_bounds.iter().advance |upper_bound| { if !self.is_subregion_of(lower_bound.region, upper_bound.region) { - - self.tcx.sess.span_err( - self.var_spans[node_idx.to_uint()], - fmt!("cannot infer an appropriate lifetime \ - due to conflicting requirements")); - - note_and_explain_region( - self.tcx, - "first, the lifetime cannot outlive ", - upper_bound.region, - "..."); - - self.tcx.sess.span_note( - upper_bound.span, - fmt!("...due to the following expression")); - - note_and_explain_region( - self.tcx, - "but, the lifetime must be valid for ", + errors.push(SubSupConflict( + self.var_origins[node_idx.to_uint()], + lower_bound.origin, lower_bound.region, - "..."); - - self.tcx.sess.span_note( - lower_bound.span, - fmt!("...due to the following expression")); - + upper_bound.origin, + upper_bound.region)); return; } } } self.tcx.sess.span_bug( - self.var_spans[node_idx.to_uint()], - fmt!("report_error_for_expanding_node() could not find error \ + self.var_origins[node_idx.to_uint()].span(), + fmt!("collect_error_for_expanding_node() could not find error \ for var %?, lower_bounds=%s, upper_bounds=%s", node_idx, lower_bounds.map(|x| x.region).repr(self.tcx), upper_bounds.map(|x| x.region).repr(self.tcx))); } - pub fn report_error_for_contracting_node(&mut self, - graph: &Graph, - dup_vec: &mut [uint], - node_idx: RegionVid) { + fn collect_error_for_contracting_node( + &mut self, + graph: &Graph, + dup_vec: &mut [uint], + node_idx: RegionVid, + errors: &mut OptVec) + { // Errors in contracting nodes result from two upper-bounds // that have no intersection. let (upper_bounds, dup_found) = @@ -1609,32 +1149,12 @@ impl RegionVarBindings { upper_bound_2.region) { Ok(_) => {} Err(_) => { - - self.tcx.sess.span_err( - self.var_spans[node_idx.to_uint()], - fmt!("cannot infer an appropriate lifetime \ - due to conflicting requirements")); - - note_and_explain_region( - self.tcx, - "first, the lifetime must be contained by ", + errors.push(SupSupConflict( + self.var_origins[node_idx.to_uint()], + upper_bound_1.origin, upper_bound_1.region, - "..."); - - self.tcx.sess.span_note( - upper_bound_1.span, - fmt!("...due to the following expression")); - - note_and_explain_region( - self.tcx, - "but, the lifetime must also be contained by ", - upper_bound_2.region, - "..."); - - self.tcx.sess.span_note( - upper_bound_2.span, - fmt!("...due to the following expression")); - + upper_bound_2.origin, + upper_bound_2.region)); return; } } @@ -1642,23 +1162,23 @@ impl RegionVarBindings { } self.tcx.sess.span_bug( - self.var_spans[node_idx.to_uint()], - fmt!("report_error_for_contracting_node() could not find error \ + self.var_origins[node_idx.to_uint()].span(), + fmt!("collect_error_for_contracting_node() could not find error \ for var %?, upper_bounds=%s", node_idx, upper_bounds.map(|x| x.region).repr(self.tcx))); } - pub fn collect_concrete_regions(&mut self, - graph: &Graph, - orig_node_idx: RegionVid, - dir: Direction, - dup_vec: &mut [uint]) - -> (~[SpannedRegion], bool) { + fn collect_concrete_regions(&mut self, + graph: &Graph, + orig_node_idx: RegionVid, + dir: Direction, + dup_vec: &mut [uint]) + -> (~[RegionAndOrigin], bool) { struct WalkState { set: HashSet, stack: ~[RegionVid], - result: ~[SpannedRegion], + result: ~[RegionAndOrigin], dup_found: bool } let mut state = WalkState { @@ -1720,17 +1240,19 @@ impl RegionVarBindings { ConstrainRegSubVar(region, _) | ConstrainVarSubReg(_, region) => { - state.result.push(SpannedRegion { + state.result.push(RegionAndOrigin { region: region, - span: edge.span + origin: this.constraints.get_copy(&edge.constraint) }); } + + ConstrainRegSubReg(*) => {} } } } } - pub fn each_edge(&mut self, + pub fn each_edge(&self, graph: &Graph, node_idx: RegionVid, dir: Direction, diff --git a/src/librustc/middle/typeck/infer/sub.rs b/src/librustc/middle/typeck/infer/sub.rs index 905e86a73f064..72178500b54ed 100644 --- a/src/librustc/middle/typeck/infer/sub.rs +++ b/src/librustc/middle/typeck/infer/sub.rs @@ -20,6 +20,7 @@ use middle::typeck::infer::InferCtxt; use middle::typeck::infer::lattice::CombineFieldsLatticeMethods; use middle::typeck::infer::lub::Lub; use middle::typeck::infer::to_str::InferStr; +use middle::typeck::infer::{TypeTrace, Subtype}; use util::common::{indent, indenter}; use util::ppaux::bound_region_to_str; @@ -36,7 +37,7 @@ impl Combine for Sub { fn infcx(&self) -> @mut InferCtxt { self.infcx } fn tag(&self) -> ~str { ~"sub" } fn a_is_expected(&self) -> bool { self.a_is_expected } - fn span(&self) -> span { self.span } + fn trace(&self) -> TypeTrace { self.trace } fn sub(&self) -> Sub { Sub(**self) } fn lub(&self) -> Lub { Lub(**self) } @@ -62,12 +63,8 @@ impl Combine for Sub { self.tag(), a.inf_str(self.infcx), b.inf_str(self.infcx)); - do indent { - match self.infcx.region_vars.make_subregion(self.span, a, b) { - Ok(()) => Ok(a), - Err(ref e) => Err((*e)) - } - } + self.infcx.region_vars.make_subregion(Subtype(self.trace), a, b); + Ok(a) } fn mts(&self, a: &ty::mt, b: &ty::mt) -> cres { @@ -170,7 +167,7 @@ impl Combine for Sub { // region variable. let (a_sig, _) = self.infcx.replace_bound_regions_with_fresh_regions( - self.span, a); + self.trace, a); // Second, we instantiate each bound region in the supertype with a // fresh concrete region. diff --git a/src/librustc/middle/typeck/mod.rs b/src/librustc/middle/typeck/mod.rs index 4eaa0b69d9ccd..bbcfc73853a1e 100644 --- a/src/librustc/middle/typeck/mod.rs +++ b/src/librustc/middle/typeck/mod.rs @@ -263,7 +263,7 @@ pub fn require_same_types( } } - match infer::mk_eqty(l_infcx, t1_is_expected, span, t1, t2) { + match infer::mk_eqty(l_infcx, t1_is_expected, infer::Misc(span), t1, t2) { result::Ok(()) => true, result::Err(ref terr) => { l_tcx.sess.span_err(span, msg() + ": " + diff --git a/src/librustc/util/ppaux.rs b/src/librustc/util/ppaux.rs index 80344a9894fb1..6289407114534 100644 --- a/src/librustc/util/ppaux.rs +++ b/src/librustc/util/ppaux.rs @@ -608,6 +608,12 @@ impl Repr for @ast::pat { } } +impl Repr for ty::bound_region { + fn repr(&self, tcx: ctxt) -> ~str { + bound_region_ptr_to_str(tcx, *self) + } +} + impl Repr for ty::Region { fn repr(&self, tcx: ctxt) -> ~str { region_to_str(tcx, "", false, *self) @@ -793,6 +799,19 @@ impl Repr for ty::BuiltinBounds { } } +impl Repr for span { + fn repr(&self, tcx: ctxt) -> ~str { + tcx.sess.codemap.span_to_str(*self) + } +} + +impl UserString for @A { + fn user_string(&self, tcx: ctxt) -> ~str { + let this: &A = &**self; + this.user_string(tcx) + } +} + impl UserString for ty::BuiltinBounds { fn user_string(&self, tcx: ctxt) -> ~str { if self.is_empty() { ~"" } else { diff --git a/src/test/compile-fail/arc-rw-cond-shouldnt-escape.rs b/src/test/compile-fail/arc-rw-cond-shouldnt-escape.rs index 82868647e57da..b00b701191e2e 100644 --- a/src/test/compile-fail/arc-rw-cond-shouldnt-escape.rs +++ b/src/test/compile-fail/arc-rw-cond-shouldnt-escape.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern: reference is not valid outside of its lifetime +// error-pattern: lifetime of return value does not outlive the function call extern mod extra; use extra::arc; fn main() { diff --git a/src/test/compile-fail/arc-rw-state-shouldnt-escape.rs b/src/test/compile-fail/arc-rw-state-shouldnt-escape.rs index 6bd32866f8989..001e6cf922f67 100644 --- a/src/test/compile-fail/arc-rw-state-shouldnt-escape.rs +++ b/src/test/compile-fail/arc-rw-state-shouldnt-escape.rs @@ -8,14 +8,15 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern: reference is not valid outside of its lifetime extern mod extra; use extra::arc; fn main() { let x = ~arc::RWARC(1); - let mut y = None; + let mut y = None; //~ ERROR lifetime of variable does not enclose its declaration do x.write |one| { y = Some(one); } *y.unwrap() = 2; + //~^ ERROR lifetime of return value does not outlive the function call + //~^^ ERROR dereference of reference outside its lifetime } diff --git a/src/test/compile-fail/arc-rw-write-mode-cond-shouldnt-escape.rs b/src/test/compile-fail/arc-rw-write-mode-cond-shouldnt-escape.rs index 534475319033f..59e899dbbf2ef 100644 --- a/src/test/compile-fail/arc-rw-write-mode-cond-shouldnt-escape.rs +++ b/src/test/compile-fail/arc-rw-write-mode-cond-shouldnt-escape.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern: reference is not valid outside of its lifetime +// error-pattern: lifetime of variable does not enclose its declaration extern mod extra; use extra::arc; fn main() { diff --git a/src/test/compile-fail/arc-rw-write-mode-shouldnt-escape.rs b/src/test/compile-fail/arc-rw-write-mode-shouldnt-escape.rs index decb7b8af9f3a..2599fb4dfa0c7 100644 --- a/src/test/compile-fail/arc-rw-write-mode-shouldnt-escape.rs +++ b/src/test/compile-fail/arc-rw-write-mode-shouldnt-escape.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern: reference is not valid outside of its lifetime +// error-pattern: lifetime of variable does not enclose its declaration extern mod extra; use extra::arc; fn main() { diff --git a/src/test/compile-fail/borrowck-move-by-capture.rs b/src/test/compile-fail/borrowck-move-by-capture.rs index 0efde1df6c22b..4ee824d1d49ad 100644 --- a/src/test/compile-fail/borrowck-move-by-capture.rs +++ b/src/test/compile-fail/borrowck-move-by-capture.rs @@ -1,6 +1,4 @@ -extern mod extra; - -fn main() { +pub fn main() { let foo = ~3; let _pfoo = &foo; let _f: @fn() -> int = || *foo + 5; diff --git a/src/test/compile-fail/if-branch-types.rs b/src/test/compile-fail/if-branch-types.rs index 74baceb39d32c..1c6dd0ef9f657 100644 --- a/src/test/compile-fail/if-branch-types.rs +++ b/src/test/compile-fail/if-branch-types.rs @@ -8,6 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern:mismatched types - -fn main() { let x = if true { 10i } else { 10u }; } +fn main() { + let x = if true { 10i } else { 10u }; + //~^ ERROR if and else have incompatible types: expected `int` but found `uint` +} diff --git a/src/test/compile-fail/kindck-owned-trait-contains.rs b/src/test/compile-fail/kindck-owned-trait-contains.rs index 6bb90bff228d4..33e122867bb3d 100644 --- a/src/test/compile-fail/kindck-owned-trait-contains.rs +++ b/src/test/compile-fail/kindck-owned-trait-contains.rs @@ -23,13 +23,13 @@ fn main() { // Error results because the type of is inferred to be // @repeat<&'blk int> where blk is the lifetime of the block below. - let y = { //~ ERROR reference is not valid + let y = { //~ ERROR lifetime of variable does not enclose its declaration let x: &'blk int = &3; repeater(@x) }; - assert!(3 == *(y.get())); //~ ERROR dereference of reference outside its lifetime - //~^ ERROR reference is not valid outside of its lifetime - //~^^ ERROR reference is not valid outside of its lifetime - //~^^^ ERROR reference is not valid outside of its lifetime + assert!(3 == *(y.get())); + //~^ ERROR dereference of reference outside its lifetime + //~^^ ERROR automatically borrowed pointer is not valid at the time of borrow + //~^^^ ERROR lifetime of return value does not outlive the function call //~^^^^ ERROR cannot infer an appropriate lifetime } diff --git a/src/test/compile-fail/lub-if.rs b/src/test/compile-fail/lub-if.rs new file mode 100644 index 0000000000000..358c61921470f --- /dev/null +++ b/src/test/compile-fail/lub-if.rs @@ -0,0 +1,52 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Test that we correctly consider the type of `match` to be the LUB +// of the various arms, particularly in the case where regions are +// involved. + +pub fn opt_str0<'a>(maybestr: &'a Option<~str>) -> &'a str { + if maybestr.is_none() { + "(none)" + } else { + let s: &'a str = *maybestr.get_ref(); + s + } +} + +pub fn opt_str1<'a>(maybestr: &'a Option<~str>) -> &'a str { + if maybestr.is_some() { + let s: &'a str = *maybestr.get_ref(); + s + } else { + "(none)" + } +} + +pub fn opt_str2<'a>(maybestr: &'a Option<~str>) -> &'static str { + if maybestr.is_none() { //~ ERROR mismatched types + "(none)" + } else { + let s: &'a str = *maybestr.get_ref(); + s + } +} + +pub fn opt_str3<'a>(maybestr: &'a Option<~str>) -> &'static str { + if maybestr.is_some() { //~ ERROR mismatched types + let s: &'a str = *maybestr.get_ref(); + s + } else { + "(none)" + } +} + + +fn main() {} \ No newline at end of file diff --git a/src/test/compile-fail/lub-match.rs b/src/test/compile-fail/lub-match.rs new file mode 100644 index 0000000000000..2a61b72997d1f --- /dev/null +++ b/src/test/compile-fail/lub-match.rs @@ -0,0 +1,55 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Test that we correctly consider the type of `match` to be the LUB +// of the various arms, particularly in the case where regions are +// involved. + +pub fn opt_str0<'a>(maybestr: &'a Option<~str>) -> &'a str { + match *maybestr { + Some(ref s) => { + let s: &'a str = *s; + s + } + None => "(none)", + } +} + +pub fn opt_str1<'a>(maybestr: &'a Option<~str>) -> &'a str { + match *maybestr { + None => "(none)", + Some(ref s) => { + let s: &'a str = *s; + s + } + } +} + +pub fn opt_str2<'a>(maybestr: &'a Option<~str>) -> &'static str { + match *maybestr { //~ ERROR mismatched types + None => "(none)", + Some(ref s) => { + let s: &'a str = *s; + s + } + } +} + +pub fn opt_str3<'a>(maybestr: &'a Option<~str>) -> &'static str { + match *maybestr { //~ ERROR mismatched types + Some(ref s) => { + let s: &'a str = *s; + s + } + None => "(none)", + } +} + +fn main() {} \ No newline at end of file diff --git a/src/test/compile-fail/regions-bounds.rs b/src/test/compile-fail/regions-bounds.rs index 35ba386243807..ab2ac6cc0e5b9 100644 --- a/src/test/compile-fail/regions-bounds.rs +++ b/src/test/compile-fail/regions-bounds.rs @@ -17,10 +17,12 @@ struct a_class<'self> { x:&'self int } fn a_fn1<'a,'b>(e: an_enum<'a>) -> an_enum<'b> { return e; //~ ERROR mismatched types: expected `an_enum<'b>` but found `an_enum<'a>` + //~^ ERROR cannot infer an appropriate lifetime } fn a_fn3<'a,'b>(e: a_class<'a>) -> a_class<'b> { return e; //~ ERROR mismatched types: expected `a_class<'b>` but found `a_class<'a>` + //~^ ERROR cannot infer an appropriate lifetime } fn a_fn4<'a,'b>() { diff --git a/src/test/compile-fail/regions-escape-bound-fn-2.rs b/src/test/compile-fail/regions-escape-bound-fn-2.rs index 9cee55643f89c..305aa68528493 100644 --- a/src/test/compile-fail/regions-escape-bound-fn-2.rs +++ b/src/test/compile-fail/regions-escape-bound-fn-2.rs @@ -15,6 +15,6 @@ fn with_int(f: &fn(x: &int)) { fn main() { let mut x = None; - //~^ ERROR reference is not valid outside of its lifetime + //~^ ERROR lifetime of variable does not enclose its declaration with_int(|y| x = Some(y)); } diff --git a/src/test/compile-fail/regions-escape-via-trait-or-not.rs b/src/test/compile-fail/regions-escape-via-trait-or-not.rs index aa431d6b81c6e..5b6dc1b2f4fb4 100644 --- a/src/test/compile-fail/regions-escape-via-trait-or-not.rs +++ b/src/test/compile-fail/regions-escape-via-trait-or-not.rs @@ -23,7 +23,7 @@ fn with(f: &fn(x: &int) -> R) -> int { } fn return_it() -> int { - with(|o| o) //~ ERROR reference is not valid outside of its lifetime + with(|o| o) //~ ERROR lifetime of function argument does not outlive the function call } fn main() { diff --git a/src/test/run-pass/issue-4325.rs b/src/test/compile-fail/regions-free-region-ordering-incorrect.rs similarity index 60% rename from src/test/run-pass/issue-4325.rs rename to src/test/compile-fail/regions-free-region-ordering-incorrect.rs index 8e65c15f1c4ae..c2bd64fddafce 100644 --- a/src/test/run-pass/issue-4325.rs +++ b/src/test/compile-fail/regions-free-region-ordering-incorrect.rs @@ -8,16 +8,23 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +// Test that free regions ordering only goes one way. That is, +// we have `&'a Node<'self, T>`, which implies that `'a <= 'self`, +// but not `'self <= 'a`. Hence returning `&self.val` (which has lifetime +// `'a`) where `'self` is expected yields an error. +// +// This test began its life as a test for issue #4325. + struct Node<'self, T> { val: T, next: Option<&'self Node<'self, T>> } impl<'self, T> Node<'self, T> { - fn get(&self) -> &'self T { + fn get<'a>(&'a self) -> &'self T { match self.next { Some(ref next) => next.get(), - None => &self.val + None => &self.val //~ ERROR cannot infer an appropriate lifetime } } } diff --git a/src/test/compile-fail/regions-infer-at-fn-not-param.rs b/src/test/compile-fail/regions-infer-at-fn-not-param.rs index c8813b73e6b3a..488d1f3940d6e 100644 --- a/src/test/compile-fail/regions-infer-at-fn-not-param.rs +++ b/src/test/compile-fail/regions-infer-at-fn-not-param.rs @@ -20,7 +20,10 @@ struct not_parameterized2 { g: @fn() } -fn take1(p: parameterized1) -> parameterized1 { p } //~ ERROR mismatched types +fn take1(p: parameterized1) -> parameterized1 { p } +//~^ ERROR mismatched types +//~^^ ERROR cannot infer an appropriate lifetime + fn take3(p: not_parameterized1) -> not_parameterized1 { p } fn take4(p: not_parameterized2) -> not_parameterized2 { p } diff --git a/src/test/compile-fail/regions-infer-covariance-due-to-arg.rs b/src/test/compile-fail/regions-infer-covariance-due-to-arg.rs index 85cc6e6ce2487..c33ca2dab2ee0 100644 --- a/src/test/compile-fail/regions-infer-covariance-due-to-arg.rs +++ b/src/test/compile-fail/regions-infer-covariance-due-to-arg.rs @@ -22,6 +22,7 @@ fn to_same_lifetime<'r>(bi: covariant<'r>) { fn to_shorter_lifetime<'r>(bi: covariant<'r>) { let bj: covariant<'blk> = bi; //~ ERROR mismatched types + //~^ ERROR cannot infer an appropriate lifetime } fn to_longer_lifetime<'r>(bi: covariant<'r>) -> covariant<'static> { diff --git a/src/test/compile-fail/regions-infer-not-param.rs b/src/test/compile-fail/regions-infer-not-param.rs index a0ecb08a08957..fa853b82d9ebd 100644 --- a/src/test/compile-fail/regions-infer-not-param.rs +++ b/src/test/compile-fail/regions-infer-not-param.rs @@ -23,6 +23,11 @@ struct indirect2<'self> { } fn take_direct(p: direct) -> direct { p } //~ ERROR mismatched types +//~^ ERROR cannot infer an appropriate lifetime + fn take_indirect1(p: indirect1) -> indirect1 { p } + fn take_indirect2(p: indirect2) -> indirect2 { p } //~ ERROR mismatched types +//~^ ERROR cannot infer an appropriate lifetime + fn main() {} diff --git a/src/test/compile-fail/regions-infer-paramd-indirect.rs b/src/test/compile-fail/regions-infer-paramd-indirect.rs index e8d66ab297b71..0b4aa44010bdc 100644 --- a/src/test/compile-fail/regions-infer-paramd-indirect.rs +++ b/src/test/compile-fail/regions-infer-paramd-indirect.rs @@ -30,6 +30,7 @@ impl<'self> set_f<'self> for c<'self> { fn set_f_bad(&self, b: @b) { self.f = b; //~ ERROR mismatched types: expected `@@&'self int` but found `@@&int` + //~^ ERROR cannot infer an appropriate lifetime } } diff --git a/src/test/compile-fail/regions-nested-fns.rs b/src/test/compile-fail/regions-nested-fns.rs index 74399967446ea..244e9cc06a1ac 100644 --- a/src/test/compile-fail/regions-nested-fns.rs +++ b/src/test/compile-fail/regions-nested-fns.rs @@ -22,6 +22,7 @@ fn nested<'x>(x: &'x int) { ignore::<&fn<'z>(&'z int) -> &'z int>(|z| { if false { return x; } //~ ERROR mismatched types + //~^ ERROR cannot infer an appropriate lifetime if false { return ay; } return z; }); diff --git a/src/test/compile-fail/regions-ret-borrowed-1.rs b/src/test/compile-fail/regions-ret-borrowed-1.rs index a572d90313b6a..542711687190e 100644 --- a/src/test/compile-fail/regions-ret-borrowed-1.rs +++ b/src/test/compile-fail/regions-ret-borrowed-1.rs @@ -18,6 +18,8 @@ fn with<'a, R>(f: &fn(x: &'a int) -> R) -> R { fn return_it<'a>() -> &'a int { with(|o| o) //~ ERROR mismatched types + //~^ ERROR lifetime of return value does not outlive the function call + //~^^ ERROR cannot infer an appropriate lifetime } fn main() { diff --git a/src/test/compile-fail/regions-ret-borrowed.rs b/src/test/compile-fail/regions-ret-borrowed.rs index ec9a908ba9876..4d646aa364a40 100644 --- a/src/test/compile-fail/regions-ret-borrowed.rs +++ b/src/test/compile-fail/regions-ret-borrowed.rs @@ -21,6 +21,8 @@ fn with(f: &fn(x: &int) -> R) -> R { fn return_it() -> &int { with(|o| o) //~ ERROR mismatched types + //~^ ERROR lifetime of return value does not outlive the function call + //~^^ ERROR cannot infer an appropriate lifetime } fn main() { diff --git a/src/test/compile-fail/sync-cond-shouldnt-escape.rs b/src/test/compile-fail/sync-cond-shouldnt-escape.rs index b22d4d3b2e2e3..2006027e79707 100644 --- a/src/test/compile-fail/sync-cond-shouldnt-escape.rs +++ b/src/test/compile-fail/sync-cond-shouldnt-escape.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern: reference is not valid outside of its lifetime +// error-pattern: lifetime of variable does not enclose its declaration extern mod extra; use extra::sync; diff --git a/src/test/compile-fail/sync-rwlock-cond-shouldnt-escape.rs b/src/test/compile-fail/sync-rwlock-cond-shouldnt-escape.rs index 518e67800d763..4108201f91159 100644 --- a/src/test/compile-fail/sync-rwlock-cond-shouldnt-escape.rs +++ b/src/test/compile-fail/sync-rwlock-cond-shouldnt-escape.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern: reference is not valid outside of its lifetime +// error-pattern: lifetime of method receiver does not outlive the method call extern mod extra; use extra::sync; fn main() { diff --git a/src/test/compile-fail/sync-rwlock-write-mode-cond-shouldnt-escape.rs b/src/test/compile-fail/sync-rwlock-write-mode-cond-shouldnt-escape.rs index 09b83887bcfd1..43b4d9aabb876 100644 --- a/src/test/compile-fail/sync-rwlock-write-mode-cond-shouldnt-escape.rs +++ b/src/test/compile-fail/sync-rwlock-write-mode-cond-shouldnt-escape.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern: reference is not valid outside of its lifetime +// error-pattern: lifetime of variable does not enclose its declaration extern mod extra; use extra::sync; fn main() { diff --git a/src/test/compile-fail/sync-rwlock-write-mode-shouldnt-escape.rs b/src/test/compile-fail/sync-rwlock-write-mode-shouldnt-escape.rs index 679c4a72598ea..15af7be524687 100644 --- a/src/test/compile-fail/sync-rwlock-write-mode-shouldnt-escape.rs +++ b/src/test/compile-fail/sync-rwlock-write-mode-shouldnt-escape.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// error-pattern: reference is not valid outside of its lifetime +// error-pattern: lifetime of variable does not enclose its declaration extern mod extra; use extra::sync; fn main() {