Implement GLB algorithm. (Issue #2263)

r=brson

Implement GLB algorithm. (Issue #2263)
r=brson
c3a74d87 · Niko Matsakis · 3b71d144 · c3a74d87 · c3a74d87 · c3a74d87
15 changed file
--- a/src/librustc/metadata/tyencode.rs
+++ b/src/librustc/metadata/tyencode.rs
@@ -188,6 +188,9 @@ fn enc_bound_region(w: io::Writer, cx: @ctxt, br: ty::bound_region) {
        w.write_char('|');
        enc_bound_region(w, cx, *br);
      }
+      ty::br_fresh(id) => {
+        w.write_uint(id);
+      }
    }
 }


--- a/src/librustc/middle/astencode.rs
+++ b/src/librustc/middle/astencode.rs
@@ -418,7 +418,8 @@ fn tr(xcx: extended_decode_ctxt) -> ty::Region {
 impl ty::bound_region: tr {
    fn tr(xcx: extended_decode_ctxt) -> ty::bound_region {
        match self {
-            ty::br_anon(_) | ty::br_named(_) | ty::br_self => self,
+            ty::br_anon(_) | ty::br_named(_) | ty::br_self |
+            ty::br_fresh(_) => self,
            ty::br_cap_avoid(id, br) => ty::br_cap_avoid(xcx.tr_id(id),
                                                         @br.tr(xcx))
        }

--- a/src/librustc/middle/ty.rs
+++ b/src/librustc/middle/ty.rs
@@ -72,7 +72,7 @@
 export expr_is_lval, expr_kind;
 export ExprKind, LvalueExpr, RvalueDatumExpr, RvalueDpsExpr, RvalueStmtExpr;
 export field_ty;
-export fold_ty, fold_sty_to_ty, fold_region, fold_regions;
+export fold_ty, fold_sty_to_ty, fold_region, fold_regions, fold_sig;
 export apply_op_on_t_to_ty_fn;
 export fold_regions_and_ty, walk_regions_and_ty;
 export field;
@@ -145,7 +145,7 @@
 export Region, bound_region, encl_region;
 export re_bound, re_free, re_scope, re_static, re_infer;
 export ReVar, ReSkolemized;
-export br_self, br_anon, br_named, br_cap_avoid;
+export br_self, br_anon, br_named, br_cap_avoid, br_fresh;
 export get, type_has_params, type_needs_infer, type_has_regions;
 export type_is_region_ptr;
 export type_id;
@@ -606,6 +606,9 @@ enum bound_region {
    /// Named region parameters for functions (a in &a/T)
    br_named(ast::ident),

+    /// Fresh bound identifiers created during GLB computations.
+    br_fresh(uint),
+
    /**
     * Handles capture-avoiding substitution in a rather subtle case.  If you
     * have a closure whose argument types are being inferred based on the
@@ -1311,6 +1314,17 @@ fn fold_sty_to_ty(tcx: ty::ctxt, sty: &sty, foldop: fn(t) -> t) -> t {
    mk_t(tcx, fold_sty(sty, foldop))
 }

+fn fold_sig(sig: &FnSig, fldop: fn(t) -> t) -> FnSig {
+    let args = do sig.inputs.map |arg| {
+        { mode: arg.mode, ty: fldop(arg.ty) }
+    };
+
+    FnSig {
+        inputs: move args,
+        output: fldop(sig.output)
+    }
+}
+
 fn fold_sty(sty: &sty, fldop: fn(t) -> t) -> sty {
    fn fold_substs(substs: &substs, fldop: fn(t) -> t) -> substs {
        {self_r: substs.self_r,
@@ -1489,8 +1503,8 @@ fn apply_op_on_t_to_ty_fn(
 fn fold_regions(
    cx: ctxt,
    ty: t,
-    fldr: fn(r: Region, in_fn: bool) -> Region) -> t {
-
+    fldr: fn(r: Region, in_fn: bool) -> Region) -> t
+{
    fn do_fold(cx: ctxt, ty: t, in_fn: bool,
               fldr: fn(Region, bool) -> Region) -> t {
        if !type_has_regions(ty) { return ty; }
@@ -2744,7 +2758,10 @@ impl bound_region : to_bytes::IterBytes {
          to_bytes::iter_bytes_2(&2u8, ident, lsb0, f),

          ty::br_cap_avoid(ref id, ref br) =>
-          to_bytes::iter_bytes_3(&3u8, id, br, lsb0, f)
+          to_bytes::iter_bytes_3(&3u8, id, br, lsb0, f),
+
+          ty::br_fresh(ref x) =>
+          to_bytes::iter_bytes_2(&4u8, x, lsb0, f)
        }
    }
 }
@@ -4493,6 +4510,12 @@ impl bound_region : cmp::Eq {
                    _ => false
                }
            }
+            br_fresh(e0a) => {
+                match (*other) {
+                    br_fresh(e0b) => e0a == e0b,
+                    _ => false
+                }
+            }
        }
    }
    pure fn ne(&self, other: &bound_region) -> bool { !(*self).eq(other) }

--- a/src/librustc/middle/typeck/infer/assignment.rs
+++ b/src/librustc/middle/typeck/infer/assignment.rs
@@ -80,7 +80,7 @@ fn to_ares(+c: cres<ty::t>) -> ares {

 impl Assign {
    fn tys(a: ty::t, b: ty::t) -> ares {
-        debug!("Assign.tys(%s -> %s)",
+        debug!("Assign.tys(%s => %s)",
               a.to_str(self.infcx),
               b.to_str(self.infcx));
        let _r = indenter();
@@ -139,7 +139,7 @@ fn assign_tys_or_sub(
        a: ty::t, b: ty::t,
        +a_bnd: Option<ty::t>, +b_bnd: Option<ty::t>) -> ares {

-        debug!("Assign.assign_tys_or_sub(%s -> %s, %s -> %s)",
+        debug!("Assign.assign_tys_or_sub(%s => %s, %s => %s)",
               a.to_str(self.infcx), b.to_str(self.infcx),
               a_bnd.to_str(self.infcx), b_bnd.to_str(self.infcx));
        let _r = indenter();

--- a/src/librustc/middle/typeck/infer/combine.rs
+++ b/src/librustc/middle/typeck/infer/combine.rs
@@ -69,6 +69,7 @@ trait combine {
    fn infcx() -> infer_ctxt;
    fn tag() -> ~str;
    fn a_is_expected() -> bool;
+    fn span() -> span;

    fn sub() -> Sub;
    fn lub() -> Lub;

--- a/src/librustc/middle/typeck/infer/glb.rs
+++ b/src/librustc/middle/typeck/infer/glb.rs
@@ -15,6 +15,8 @@
 use middle::typeck::infer::sub::Sub;
 use middle::typeck::infer::to_str::ToStr;

+use std::list;
+
 use syntax::ast::{Many, Once};

 enum Glb = combine_fields;  // "greatest lower bound" (common subtype)
@@ -23,6 +25,7 @@ impl Glb: combine {
    fn infcx() -> infer_ctxt { self.infcx }
    fn tag() -> ~str { ~"glb" }
    fn a_is_expected() -> bool { self.a_is_expected }
+    fn span() -> span { self.span }

    fn sub() -> Sub { Sub(*self) }
    fn lub() -> Lub { Lub(*self) }
@@ -144,7 +147,122 @@ fn args(a: ty::arg, b: ty::arg) -> cres<ty::arg> {
    }

    fn fns(a: &ty::FnTy, b: &ty::FnTy) -> cres<ty::FnTy> {
-        super_fns(&self, a, b)
+        // Note: this is a subtle algorithm.  For a full explanation,
+        // please see the large comment in `region_inference.rs`.
+
+        debug!("%s.fns(%?, %?)",
+               self.tag(), a.to_str(self.infcx), b.to_str(self.infcx));
+        let _indenter = indenter();
+
+        // Take a snapshot.  We'll never roll this back, but in later
+        // phases we do want to be able to examine "all bindings that
+        // were created as part of this type comparison", and making a
+        // snapshot is a convenient way to do that.
+        let snapshot = self.infcx.region_vars.start_snapshot();
+
+        // Instantiate each bound region with a fresh region variable.
+        let (a_with_fresh, a_isr) =
+            self.infcx.replace_bound_regions_with_fresh_regions(
+                self.span, a);
+        let a_vars = var_ids(&self, a_isr);
+        let (b_with_fresh, b_isr) =
+            self.infcx.replace_bound_regions_with_fresh_regions(
+                self.span, b);
+        let b_vars = var_ids(&self, b_isr);
+
+        // Collect constraints.
+        let fn_ty0 = if_ok!(super_fns(&self, &a_with_fresh, &b_with_fresh));
+        debug!("fn_ty0 = %s", fn_ty0.to_str(self.infcx));
+
+        // Generalize the regions appearing in fn_ty0 if possible
+        let new_vars =
+            self.infcx.region_vars.vars_created_since_snapshot(snapshot);
+        let fn_ty1 =
+            self.infcx.fold_regions_in_sig(
+                &fn_ty0,
+                |r, _in_fn| generalize_region(&self, snapshot,
+                                              new_vars, a_isr, a_vars, b_vars,
+                                              r));
+        debug!("fn_ty1 = %s", fn_ty1.to_str(self.infcx));
+        return Ok(move fn_ty1);
+
+        fn generalize_region(self: &Glb,
+                             snapshot: uint,
+                             new_vars: &[RegionVid],
+                             a_isr: isr_alist,
+                             a_vars: &[RegionVid],
+                             b_vars: &[RegionVid],
+                             r0: ty::Region) -> ty::Region {
+            if !is_var_in_set(new_vars, r0) {
+                return r0;
+            }
+
+            let tainted = self.infcx.region_vars.tainted(snapshot, r0);
+
+            let mut a_r = None, b_r = None, only_new_vars = true;
+            for tainted.each |r| {
+                if is_var_in_set(a_vars, *r) {
+                    if a_r.is_some() {
+                        return fresh_bound_variable(self);
+                    } else {
+                        a_r = Some(*r);
+                    }
+                } else if is_var_in_set(b_vars, *r) {
+                    if b_r.is_some() {
+                        return fresh_bound_variable(self);
+                    } else {
+                        b_r = Some(*r);
+                    }
+                } else if !is_var_in_set(new_vars, *r) {
+                    only_new_vars = false;
+                }
+            }
+
+                // NB---I do not believe this algorithm computes
+                // (necessarily) the GLB.  As written it can
+                // spuriously fail.  In particular, if there is a case
+                // like: fn(fn(&a)) and fn(fn(&b)), where a and b are
+                // free, it will return fn(&c) where c = GLB(a,b).  If
+                // however this GLB is not defined, then the result is
+                // an error, even though something like
+                // "fn<X>(fn(&X))" where X is bound would be a
+                // subtype of both of those.
+                //
+                // The problem is that if we were to return a bound
+                // variable, we'd be computing a lower-bound, but not
+                // necessarily the *greatest* lower-bound.
+
+            if a_r.is_some() && b_r.is_some() && only_new_vars {
+                // Related to exactly one bound variable from each fn:
+                return rev_lookup(self, a_isr, a_r.get());
+            } else if a_r.is_none() && b_r.is_none() {
+                // Not related to bound variables from either fn:
+                return r0;
+            } else {
+                // Other:
+                return fresh_bound_variable(self);
+            }
+        }
+
+        fn rev_lookup(self: &Glb,
+                      a_isr: isr_alist,
+                      r: ty::Region) -> ty::Region
+        {
+            for list::each(a_isr) |pair| {
+                let (a_br, a_r) = *pair;
+                if a_r == r {
+                    return ty::re_bound(a_br);
+                }
+            }
+
+            self.infcx.tcx.sess.span_bug(
+                self.span,
+                fmt!("could not find original bound region for %?", r));
+        }
+
+        fn fresh_bound_variable(self: &Glb) -> ty::Region {
+            self.infcx.region_vars.new_bound()
+        }
    }

    fn fn_metas(a: &ty::FnMeta, b: &ty::FnMeta) -> cres<ty::FnMeta> {

--- a/src/librustc/middle/typeck/infer/lattice.rs
+++ b/src/librustc/middle/typeck/infer/lattice.rs
@@ -14,6 +14,8 @@
 use middle::typeck::infer::unify::*;
 use middle::typeck::infer::to_str::ToStr;

+use std::list;
+
 // ______________________________________________________________________
 // Lattice operations on variables
 //
@@ -158,3 +160,29 @@ fn lattice_var_and_t<L:lattice_ops combine>(
      }
    }
 }
+
+// ___________________________________________________________________________
+// Random utility functions used by LUB/GLB when computing LUB/GLB of
+// fn types
+
+fn var_ids<T: combine>(self: &T, isr: isr_alist) -> ~[RegionVid] {
+    let mut result = ~[];
+    for list::each(isr) |pair| {
+        match pair.second() {
+            ty::re_infer(ty::ReVar(r)) => { result.push(r); }
+            r => {
+                self.infcx().tcx.sess.span_bug(
+                    self.span(),
+                    fmt!("Found non-region-vid: %?", r));
+            }
+        }
+    }
+    return result;
+}
+
+fn is_var_in_set(new_vars: &[RegionVid], r: ty::Region) -> bool {
+    match r {
+        ty::re_infer(ty::ReVar(ref v)) => new_vars.contains(v),
+        _ => false
+    }
+}
--- a/src/librustc/middle/typeck/infer/lub.rs
+++ b/src/librustc/middle/typeck/infer/lub.rs
@@ -31,6 +31,7 @@ impl Lub: combine {
    fn infcx() -> infer_ctxt { self.infcx }
    fn tag() -> ~str { ~"lub" }
    fn a_is_expected() -> bool { self.a_is_expected }
+    fn span() -> span { self.span }

    fn sub() -> Sub { Sub(*self) }
    fn lub() -> Lub { Lub(*self) }
@@ -139,12 +140,10 @@ fn fns(a: &ty::FnTy, b: &ty::FnTy) -> cres<ty::FnTy> {
        let new_vars =
            self.infcx.region_vars.vars_created_since_snapshot(snapshot);
        let fn_ty1 =
-            ty::apply_op_on_t_to_ty_fn(
-                self.infcx.tcx, &fn_ty0,
-                |t| ty::fold_regions(
-                    self.infcx.tcx, t,
-                    |r, _in_fn| generalize_region(&self, snapshot,
-                                                  new_vars, a_isr, r)));
+            self.infcx.fold_regions_in_sig(
+                &fn_ty0,
+                |r, _in_fn| generalize_region(&self, snapshot, new_vars,
+                                              a_isr, r));
        return Ok(move fn_ty1);

        fn generalize_region(self: &Lub,
@@ -153,7 +152,7 @@ fn generalize_region(self: &Lub,
                             a_isr: isr_alist,
                             r0: ty::Region) -> ty::Region {
            // Regions that pre-dated the LUB computation stay as they are.
-            if !is_new_var(new_vars, r0) {
+            if !is_var_in_set(new_vars, r0) {
                debug!("generalize_region(r0=%?): not new variable", r0);
                return r0;
            }
@@ -163,7 +162,7 @@ fn generalize_region(self: &Lub,
            // Variables created during LUB computation which are
            // *related* to regions that pre-date the LUB computation
            // stay as they are.
-            if !tainted.all(|r| is_new_var(new_vars, *r)) {
+            if !tainted.all(|r| is_var_in_set(new_vars, *r)) {
                debug!("generalize_region(r0=%?): \
                        non-new-variables found in %?",
                       r0, tainted);
@@ -190,13 +189,6 @@ fn generalize_region(self: &Lub,
                fmt!("Region %? is not associated with \
                      any bound region from A!", r0));
        }
-
-        fn is_new_var(new_vars: &[RegionVid], r: ty::Region) -> bool {
-            match r {
-                ty::re_infer(ty::ReVar(ref v)) => new_vars.contains(v),
-                _ => false
-            }
-        }
    }

    fn fn_metas(a: &ty::FnMeta, b: &ty::FnMeta) -> cres<ty::FnMeta> {

--- a/src/librustc/middle/typeck/infer/mod.rs
+++ b/src/librustc/middle/typeck/infer/mod.rs
@@ -799,5 +799,16 @@ fn replace_bound_regions_with_fresh_regions(
        (fn_ty, isr)
    }

+    fn fold_regions_in_sig(
+        &self,
+        fn_ty: &ty::FnTy,
+        fldr: &fn(r: ty::Region, in_fn: bool) -> ty::Region) -> ty::FnTy
+    {
+        let sig = do ty::fold_sig(&fn_ty.sig) |t| {
+            ty::fold_regions(self.tcx, t, fldr)
+        };
+        ty::FnTyBase {meta: fn_ty.meta, sig: sig}
+    }
+
 }

--- a/src/librustc/middle/typeck/infer/region_inference.rs
+++ b/src/librustc/middle/typeck/infer/region_inference.rs
@@ -337,6 +337,61 @@ fn<a>(&a/T) <: fn<b>(&b/T)
 LUB or GLB computation.  We have to consider this.  Here is the
 algorithm I implemented.

+First though, let's discuss what we are trying to compute in more
+detail.  The LUB is basically the "common supertype" and the GLB is
+"common subtype"; one catch is that the LUB should be the
+*most-specific* common supertype and the GLB should be *most general*
+common subtype (as opposed to any common supertype or any common
+subtype).
+
+Anyway, to help clarify, here is a table containing some
+function pairs and their LUB/GLB:
+
+```
+Type 1              Type 2              LUB               GLB
+fn<a>(&a)           fn(&X)              fn(&X)            fn<a>(&a)
+fn(&A)              fn(&X)              --                fn<a>(&a)
+fn<a,b>(&a, &b)     fn<x>(&x, &x)       fn<a>(&a, &a)     fn<a,b>(&a, &b)
+fn<a,b>(&a, &b, &a) fn<x,y>(&x, &y, &y) fn<a>(&a, &a, &a) fn<a,b,c>(&a,&b,&c)
+```
+
+### Conventions
+
+I use lower-case letters (e.g., `&a`) for bound regions and upper-case
+letters for free regions (`&A`).  Region variables written with a
+dollar-sign (e.g., `$a`).  I will try to remember to enumerate the
+bound-regions on the fn type as well (e.g., `fn<a>(&a)`).
+
+### High-level summary
+
+Both the LUB and the GLB algorithms work in a similar fashion.  They
+begin by replacing all bound regions (on both sides) with fresh region
+inference variables.  Therefore, both functions are converted to types
+that contain only free regions.  We can then compute the LUB/GLB in a
+straightforward way, as described in `combine.rs`.  This results in an
+interim type T.  The algorithms then examine the regions that appear
+in T and try to, in some cases, replace them with bound regions to
+yield the final result.
+
+To decide whether to replace a region `R` that appears in `T` with a
+bound region, the algorithms make use of two bits of information.
+First is a set `V` that contains all region variables created as part
+of the LUB/GLB computation. `V` will contain the region variables
+created to replace the bound regions in the input types, but it also
+contains 'intermediate' variables created to represent the LUB/GLB of
+individual regions.  Basically, when asked to compute the LUB/GLB of a
+region variable with another region, the inferencer cannot oblige
+immediately since the valuese of that variables are not known.
+Therefore, it creates a new variable that is related to the two
+regions.  For example, the LUB of two variables `$x` and `$y` is a
+fresh variable `$z` that is constrained such that `$x <= $z` and `$y
+<= $z`.  So `V` will contain these intermediate variables as well.
+
+The other important factor in deciding how to replace a region in T is
+the function `Tainted($r)` which, for a region variable, identifies
+all regions that the region variable is related to in some way
+(`Tainted()` made an appearance in the subtype computation as well).
+
 ### LUB

 The LUB algorithm proceeds in three steps:
@@ -345,47 +400,28 @@ fn<a>(&a/T) <: fn<b>(&b/T)
   inference variables.
 2. Compute the LUB "as normal", meaning compute the GLB of each
   pair of argument types and the LUB of the return types and
-   so forth.  Combine those to a new function type F.
-3. Map the regions appearing in `F` using the procedure described below.
-
-For each region `R` that appears in `F`, we may need to replace it
-with a bound region.  Let `V` be the set of fresh variables created as
-part of the LUB procedure (either in step 1 or step 2).  You may be
-wondering how variables can be created in step 2.  The answer is that
-when we are asked to compute the LUB or GLB of two region variables,
-we do so by producing a new region variable that is related to those
-two variables.  i.e., The LUB of two variables `$x` and `$y` is a
-fresh variable `$z` that is constrained such that `$x <= $z` and `$y
-<= $z`.
-
-To decide how to replace a region `R`, we must examine `Tainted(R)`.
-This function searches through the constraints which were generated
-when computing the bounds of all the argument and return types and
-produces a list of all regions to which `R` is related, directly or
-indirectly.
-
-If `R` is not in `V` or `Tainted(R)` contains any region that is not
-in `V`, then `R` is not replaced (that is, `R` is mapped to itself).
-Otherwise, if `Tainted(R)` is a subset of `V`, then we select the
-earliest variable in `Tainted(R)` that originates from the left-hand
-side and replace `R` with a bound version of that variable.
-
-So, let's work through the simplest example: `fn(&A)` and `fn(&a)`.
-In this case, `&a` will be replaced with `$a` (the $ indicates an
-inference variable) which will be linked to the free region `&A`, and
-hence `V = { $a }` and `Tainted($a) = { &A }`.  Since `$a` is not a
-member of `V`, we leave `$a` as is.  When region inference happens,
-`$a` will be resolved to `&A`, as we wanted.
-
-So, let's work through the simplest example: `fn(&A)` and `fn(&a)`.
-In this case, `&a` will be replaced with `$a` (the $ indicates an
-inference variable) which will be linked to the free region `&A`, and
-hence `V = { $a }` and `Tainted($a) = { $a, &A }`.  Since `&A` is not a
-member of `V`, we leave `$a` as is.  When region inference happens,
-`$a` will be resolved to `&A`, as we wanted.
-
-Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`.
-In this case, we'll end up with a graph that looks like:
+   so forth.  Combine those to a new function type `F`.
+3. Replace each region `R` that appears in `F` as follows:
+   - Let `V` be the set of variables created during the LUB
+     computational steps 1 and 2, as described in the previous section.
+   - If `R` is not in `V`, replace `R` with itself.
+   - If `Tainted(R)` contains a region that is not in `V`,
+     replace `R` with itself.
+   - Otherwise, select the earliest variable in `Tainted(R)` that originates
+     from the left-hand side and replace `R` with the bound region that
+     this variable was a replacement for.
+
+So, let's work through the simplest example: `fn(&A)` and `fn<a>(&a)`.
+In this case, `&a` will be replaced with `$a` and the interim LUB type
+`fn($b)` will be computed, where `$b=GLB(&A,$a)`.  Therefore, `V =
+{$a, $b}` and `Tainted($b) = { $b, $a, &A }`.  When we go to replace
+`$b`, we find that since `&A \in Tainted($b)` is not a member of `V`,
+we leave `$b` as is.  When region inference happens, `$b` will be
+resolved to `&A`, as we wanted.
+
+Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`.  In
+this case, we'll end up with a (pre-replacement) LUB type of `fn(&g,
+&h)` and a graph that looks like:

 ```
     $a        $b     *--$x
@@ -399,8 +435,8 @@ fn<a>(&a/T) <: fn<b>(&b/T)
 `Tainted` will look like:

 ```
-V = {$a, $b, $x}
-Tainted($g) = Tainted($h) = { $a, $b, $h, $x }
+V = {$a, $b, $g, $h, $x}
+Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x }
 ```

 Therefore we replace both `$g` and `$h` with `$a`, and end up
@@ -413,40 +449,90 @@ fn<a>(&a/T) <: fn<b>(&b/T)
 region `R` that appears in the type `F`, we again compute `Tainted(R)`
 and examine the results:

-1. If `Tainted(R) = {R}` is a singleton set, replace `R` with itself.
+1. If `R` is not in `V`, it is not replaced.
 2. Else, if `Tainted(R)` contains only variables in `V`, and it
   contains exactly one variable from the LHS and one variable from
   the RHS, then `R` can be mapped to the bound version of the
   variable from the LHS.
-3. Else, `R` is mapped to a fresh bound variable.
+3. Else, if `Tainted(R)` contains no variable from the LHS and no
+   variable from the RHS, then `R` can be mapped to itself.
+4. Else, `R` is mapped to a fresh bound variable.

 These rules are pretty complex.  Let's look at some examples to see
 how they play out.

-Out first example was `fn(&a)` and `fn(&X)`---in
-this case, the LUB will be a variable `$g`, and `Tainted($g) =
-{$g,$a,$x}`.  By these rules, we'll replace `$g` with a fresh bound
-variable, so the result is `fn(&z)`, which is fine.
-
-The next example is `fn(&A)` and `fn(&Z)`. XXX
-
-The next example is `fn(&a, &b)` and `fn(&x, &x)`. In this case, as
-before, we'll end up with `F=fn(&g, &h)` where `Tainted($g) =
-Tainted($h) = {$g, $a, $b, $x}`.  This means that we'll select fresh
-bound varibales `g` and `h` and wind up with `fn(&g, &h)`.
+Out first example was `fn(&a)` and `fn(&X)`.  In this case, `&a` will
+be replaced with `$a` and we will ultimately compute a
+(pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`.
+Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}.  To find the
+replacement for `$g` we consult the rules above:
+- Rule (1) does not apply because `$g \in V`
+- Rule (2) does not apply because `&X \in Tainted($g)`
+- Rule (3) does not apply because `$a \in Tainted($g)`
+- Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`.
+So our final result is `fn(&z)`, which is correct.
+
+The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again
+have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`.
+Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`.  In this case,
+by rule (3), `$g` is mapped to itself, and hence the result is
+`fn($g)`.  This result is correct (in this case, at least), but it is
+indicative of a case that *can* lead us into concluding that there is
+no GLB when in fact a GLB does exist.  See the section "Questionable
+Results" below for more details.
+
+The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as
+before, we'll end up with `F=fn($g, $h)` where `Tainted($g) =
+Tainted($h) = {$g, $h, $a, $b, $c}`.  Only rule (4) applies and hence
+we'll select fresh bound variables `y` and `z` and wind up with
+`fn(&y, &z)`.

 For the last example, let's consider what may seem trivial, but is
-not: `fn(&a, &a)` and `fn(&x, &x)`.  In this case, we'll get `F=fn(&g,
-&h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a,
+not: `fn(&a, &a)` and `fn(&b, &b)`.  In this case, we'll get `F=fn($g,
+$h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a,
 $x}`.  Both of these sets contain exactly one bound variable from each
-side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`.
-Horray!
-
-### Why are these correct?
-
-You may be wondering whether this algorithm is correct.  So am I.  But
-I believe it is.  (Justification forthcoming, haven't had time to
-write it)
+side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which
+is the desired result.
+
+### Shortcomings and correctness
+
+You may be wondering whether this algorithm is correct.  The answer is
+"sort of".  There are definitely cases where they fail to compute a
+result even though a correct result exists.  I believe, though, that
+if they succeed, then the result is valid, and I will attempt to
+convince you.  The basic argument is that the "pre-replacement" step
+computes a set of constraints.  The replacements, then, attempt to
+satisfy those constraints, using bound identifiers where needed.
+
+For now I will briefly go over the cases for LUB/GLB and identify
+their intent:
+
+- LUB:
+  - The region variables that are substituted in place of bound regions
+    are intended to collect constraints on those bound regions.
+  - If Tainted(R) contains only values in V, then this region is unconstrained
+    and can therefore be generalized, otherwise it cannot.
+- GLB:
+  - The region variables that are substituted in place of bound regions
+    are intended to collect constraints on those bound regions.
+  - If Tainted(R) contains exactly one variable from each side, and
+    only variables in V, that indicates that those two bound regions
+    must be equated.
+  - Otherwise, if Tainted(R) references any variables from left or right
+    side, then it is trying to combine a bound region with a free one or
+    multiple bound regions, so we need to select fresh bound regions.
+
+Sorry this is more of a shorthand to myself.  I will try to write up something
+more convincing in the future.
+
+#### Where are the algorithms wrong?
+
+- The pre-replacement computation can fail even though using a
+  bound-region would have succeeded.
+- We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the
+  GLB of $a and $b.  But if inference finds that $a and $b must be mapped
+  to regions without a GLB, then this is effectively a failure to compute
+  the GLB.  However, the result `fn<$c>(fn($c))` is a valid GLB.

 */

@@ -457,7 +543,7 @@ fn<a>(&a/T) <: fn<b>(&b/T)
 use middle::region;
 use middle::ty;
 use middle::ty::{Region, RegionVid, re_static, re_infer, re_free, re_bound};
-use middle::ty::{re_scope, ReVar, ReSkolemized};
+use middle::ty::{re_scope, ReVar, ReSkolemized, br_fresh};
 use middle::typeck::infer::to_str::ToStr;
 use syntax::codemap;
 use util::ppaux::note_and_explain_region;
@@ -553,6 +639,7 @@ struct RegionVarBindings {
    lubs: CombineMap,
    glbs: CombineMap,
    mut skolemization_count: uint,
+    mut bound_count: uint,

    // The undo log records actions that might later be undone.
    //
@@ -579,6 +666,7 @@ fn RegionVarBindings(tcx: ty::ctxt) -> RegionVarBindings {
        lubs: CombineMap(),
        glbs: CombineMap(),
        skolemization_count: 0,
+        bound_count: 0,
        undo_log: ~[]
    }
 }
@@ -655,6 +743,26 @@ fn new_skolemized(&self, br: ty::bound_region) -> Region {
        re_infer(ReSkolemized(sc, br))
    }

+    fn new_bound(&self) -> Region {
+        // Creates a fresh bound variable for use in GLB computations.
+        // See discussion of GLB computation in the large comment at
+        // the top of this file for more details.
+        //
+        // This computation is mildly wrong in the face of rollover.
+        // It's conceivable, if unlikely, that one might wind up with
+        // accidental capture for nested functions in that case, if
+        // the outer function had bound regions created a very long
+        // time before and the inner function somehow wound up rolling
+        // over such that supposedly fresh identifiers were in fact
+        // shadowed.  We should convert our bound_region
+        // representation to use deBruijn indices or something like
+        // that to eliminate that possibility.
+
+        let sc = self.bound_count;
+        self.bound_count += 1;
+        re_bound(br_fresh(sc))
+    }
+
    fn add_constraint(&self, +constraint: Constraint, span: span) {
        // cannot add constraints once regions are resolved
        assert self.values.is_empty();
@@ -687,6 +795,16 @@ fn make_subregion(&self, span: span,
            self.add_constraint(ConstrainVarSubReg(sub_id, r), span);
            Ok(())
          }
+          (re_bound(br), _) => {
+            self.tcx.sess.span_bug(
+                span,
+                fmt!("Cannot relate bound region as subregion: %?", br));
+          }
+          (_, re_bound(br)) => {
+            self.tcx.sess.span_bug(
+                span,
+                fmt!("Cannot relate bound region as superregion: %?", br));
+          }
          _ => {
            if self.is_subregion_of(sub, sup) {
                Ok(())

--- a/src/librustc/middle/typeck/infer/sub.rs
+++ b/src/librustc/middle/typeck/infer/sub.rs
@@ -24,6 +24,7 @@ impl Sub: combine {
    fn infcx() -> infer_ctxt { self.infcx }
    fn tag() -> ~str { ~"sub" }
    fn a_is_expected() -> bool { self.a_is_expected }
+    fn span() -> span { self.span }

    fn sub() -> Sub { Sub(*self) }
    fn lub() -> Lub { Lub(*self) }

--- a/src/librustc/middle/typeck/infer/test.rs
+++ b/src/librustc/middle/typeck/infer/test.rs
@@ -16,7 +16,7 @@

 */

-
+use dvec::DVec;
 use std::getopts;
 use std::map::HashMap;
 use std::getopts;
@@ -36,7 +36,8 @@
 struct Env {
    crate: @ast::crate,
    tcx: ty::ctxt,
-    infcx: infer::infer_ctxt
+    infcx: infer::infer_ctxt,
+    err_messages: @DVec<~str>
 }

 struct RH {
@@ -44,10 +45,14 @@ struct RH {
    sub: &[RH]
 }

+const EMPTY_SOURCE_STR: &str = "/* Hello, world! */";
+
 fn setup_env(test_name: &str, source_string: &str) -> Env {
-    let matches = &getopts(~[~"-Z", ~"verbose"], optgroups()).get();
-    let sessopts = build_session_options(~"rustc", matches, diagnostic::emit);
-    let sess = build_session(sessopts, diagnostic::emit);
+    let messages = @DVec();
+    let matches = getopts(~[~"-Z", ~"verbose"], optgroups()).get();
+    let diag = diagnostic::collect(messages);
+    let sessopts = build_session_options(~"rustc", &matches, diag);
+    let sess = build_session(sessopts, diag);
    let cfg = build_configuration(sess, ~"whatever", str_input(~""));
    let dm = HashMap();
    let amap = HashMap();
@@ -66,7 +71,10 @@ fn setup_env(test_name: &str, source_string: &str) -> Env {

    let infcx = infer::new_infer_ctxt(tcx);

-    return Env { crate: crate, tcx: tcx, infcx: infcx };
+    return Env {crate: crate,
+                tcx: tcx,
+                infcx: infcx,
+                err_messages: messages};
 }

 impl Env {
@@ -210,6 +218,22 @@ fn t_rptr_static(&self) -> ty::t {

    fn lub() -> Lub { Lub(self.infcx.combine_fields(true, dummy_sp())) }

+    fn glb() -> Glb { Glb(self.infcx.combine_fields(true, dummy_sp())) }
+
+    fn resolve_regions(exp_count: uint) {
+        debug!("resolve_regions(%u)", exp_count);
+
+        self.infcx.resolve_regions();
+        if self.err_messages.len() != exp_count {
+            for self.err_messages.each |msg| {
+                debug!("Error encountered: %s", *msg);
+            }
+            fmt!("Resolving regions encountered %u errors but expected %u!",
+                 self.err_messages.len(),
+                 exp_count);
+        }
+    }
+
    /// Checks that `LUB(t1,t2) == t_lub`
    fn check_lub(&self, t1: ty::t, t2: ty::t, t_lub: ty::t) {
        match self.lub().tys(t1, t2) {
@@ -222,6 +246,30 @@ fn check_lub(&self, t1: ty::t, t2: ty::t, t_lub: ty::t) {
                // sanity check for good measure:
                self.assert_subtype(t1, t);
                self.assert_subtype(t2, t);
+
+                self.resolve_regions(0);
+            }
+        }
+    }
+
+    /// Checks that `GLB(t1,t2) == t_glb`
+    fn check_glb(&self, t1: ty::t, t2: ty::t, t_glb: ty::t) {
+        debug!("check_glb(t1=%s, t2=%s, t_glb=%s)",
+               self.ty_to_str(t1),
+               self.ty_to_str(t2),
+               self.ty_to_str(t_glb));
+        match self.glb().tys(t1, t2) {
+            Err(e) => {
+                fail fmt!("Unexpected error computing LUB: %?", e)
+            }
+            Ok(t) => {
+                self.assert_eq(t, t_glb);
+
+                // sanity check for good measure:
+                self.assert_subtype(t, t1);
+                self.assert_subtype(t, t2);
+
+                self.resolve_regions(0);
            }
        }
    }
@@ -236,11 +284,22 @@ fn check_no_lub(&self, t1: ty::t, t2: ty::t) {
            }
        }
    }
+
+    /// Checks that `GLB(t1,t2)` is undefined
+    fn check_no_glb(&self, t1: ty::t, t2: ty::t) {
+        match self.glb().tys(t1, t2) {
+            Err(_) => {}
+            Ok(t) => {
+                fail fmt!("Unexpected success computing GLB: %?",
+                          self.ty_to_str(t))
+            }
+        }
+    }
 }

 #[test]
 fn contravariant_region_ptr() {
-    let env = setup_env("contravariant_region_ptr", "");
+    let env = setup_env("contravariant_region_ptr", EMPTY_SOURCE_STR);
    env.create_simple_region_hierarchy();
    let t_rptr1 = env.t_rptr_scope(1);
    let t_rptr10 = env.t_rptr_scope(10);
@@ -251,7 +310,7 @@ fn contravariant_region_ptr() {

 #[test]
 fn lub_bound_bound() {
-    let env = setup_env("lub_bound_bound", "");
+    let env = setup_env("lub_bound_bound", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_bound2 = env.t_rptr_bound(2);
    env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()),
@@ -261,7 +320,7 @@ fn lub_bound_bound() {

 #[test]
 fn lub_bound_free() {
-    let env = setup_env("lub_bound_free", "");
+    let env = setup_env("lub_bound_free", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_free1 = env.t_rptr_free(0, 1);
    env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()),
@@ -271,7 +330,7 @@ fn lub_bound_free() {

 #[test]
 fn lub_bound_static() {
-    let env = setup_env("lub_bound_static", "");
+    let env = setup_env("lub_bound_static", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_static = env.t_rptr_static();
    env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()),
@@ -281,7 +340,7 @@ fn lub_bound_static() {

 #[test]
 fn lub_bound_bound_inverse_order() {
-    let env = setup_env("lub_bound_bound_inverse_order", "");
+    let env = setup_env("lub_bound_bound_inverse_order", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_bound2 = env.t_rptr_bound(2);
    env.check_lub(env.t_fn([t_rptr_bound1, t_rptr_bound2], t_rptr_bound1),
@@ -291,7 +350,7 @@ fn lub_bound_bound_inverse_order() {

 #[test]
 fn lub_free_free() {
-    let env = setup_env("lub_free_free", "");
+    let env = setup_env("lub_free_free", EMPTY_SOURCE_STR);
    let t_rptr_free1 = env.t_rptr_free(0, 1);
    let t_rptr_free2 = env.t_rptr_free(0, 2);
    let t_rptr_static = env.t_rptr_static();
@@ -302,10 +361,50 @@ fn lub_free_free() {

 #[test]
 fn lub_returning_scope() {
-    let env = setup_env("lub_returning_scope", "");
+    let env = setup_env("lub_returning_scope", EMPTY_SOURCE_STR);
    let t_rptr_scope10 = env.t_rptr_scope(10);
    let t_rptr_scope11 = env.t_rptr_scope(11);
    env.check_no_lub(env.t_fn([], t_rptr_scope10),
                     env.t_fn([], t_rptr_scope11));
 }

+#[test]
+fn glb_free_free_with_common_scope() {
+    let env = setup_env("glb_free_free", EMPTY_SOURCE_STR);
+    let t_rptr_free1 = env.t_rptr_free(0, 1);
+    let t_rptr_free2 = env.t_rptr_free(0, 2);
+    let t_rptr_scope = env.t_rptr_scope(0);
+    env.check_glb(env.t_fn([t_rptr_free1], env.t_int()),
+                  env.t_fn([t_rptr_free2], env.t_int()),
+                  env.t_fn([t_rptr_scope], env.t_int()));
+}
+
+#[test]
+fn glb_bound_bound() {
+    let env = setup_env("glb_bound_bound", EMPTY_SOURCE_STR);
+    let t_rptr_bound1 = env.t_rptr_bound(1);
+    let t_rptr_bound2 = env.t_rptr_bound(2);
+    env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()),
+                  env.t_fn([t_rptr_bound2], env.t_int()),
+                  env.t_fn([t_rptr_bound1], env.t_int()));
+}
+
+#[test]
+fn glb_bound_free() {
+    let env = setup_env("glb_bound_free", EMPTY_SOURCE_STR);
+    let t_rptr_bound1 = env.t_rptr_bound(1);
+    let t_rptr_free1 = env.t_rptr_free(0, 1);
+    env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()),
+                  env.t_fn([t_rptr_free1], env.t_int()),
+                  env.t_fn([t_rptr_bound1], env.t_int()));
+}
+
+#[test]
+fn glb_bound_static() {
+    let env = setup_env("glb_bound_static", EMPTY_SOURCE_STR);
+    let t_rptr_bound1 = env.t_rptr_bound(1);
+    let t_rptr_static = env.t_rptr_static();
+    env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()),
+                  env.t_fn([t_rptr_static], env.t_int()),
+                  env.t_fn([t_rptr_bound1], env.t_int()));
+}
--- a/src/librustc/util/ppaux.rs
+++ b/src/librustc/util/ppaux.rs
@@ -13,7 +13,8 @@
 use middle::ty::{arg, canon_mode};
 use middle::ty::{bound_copy, bound_const, bound_durable, bound_owned,
        bound_trait};
-use middle::ty::{bound_region, br_anon, br_named, br_self, br_cap_avoid};
+use middle::ty::{bound_region, br_anon, br_named, br_self, br_cap_avoid,
+                 br_fresh};
 use middle::ty::{ctxt, field, method};
 use middle::ty::{mt, t, param_bound};
 use middle::ty::{re_bound, re_free, re_scope, re_infer, re_static, Region};
@@ -94,6 +95,7 @@ fn explain_region_and_span(cx: ctxt, region: ty::Region)
        let prefix = match br {
          br_anon(idx) => fmt!("the anonymous lifetime #%u defined on",
                               idx + 1),
+          br_fresh(_) => fmt!("an anonymous lifetime defined on"),
          _ => fmt!("the lifetime %s as defined on",
                    bound_region_to_str(cx, br))
        };
@@ -140,6 +142,7 @@ fn bound_region_to_str_adorned(cx: ctxt, prefix: &str,
      br_named(id)         => fmt!("%s%s%s", prefix, cx.sess.str_of(id), sep),
      br_self              => fmt!("%sself%s", prefix, sep),
      br_anon(_)           => prefix.to_str(),
+      br_fresh(_)          => prefix.to_str(),
      br_cap_avoid(_, br)  => bound_region_to_str_adorned(cx, prefix,
                                                          *br, sep)
    }

--- a/src/libsyntax/codemap.rs
+++ b/src/libsyntax/codemap.rs
@@ -315,6 +315,10 @@ pub fn adjust_span(&self, sp: span) -> span {
    }

    pub fn span_to_str(&self, sp: span) -> ~str {
+        if self.files.len() == 0 && sp == ast_util::dummy_sp() {
+            return ~"no-location";
+        }
+
        let lo = self.lookup_char_pos_adj(sp.lo);
        let hi = self.lookup_char_pos_adj(sp.hi);
        return fmt!("%s:%u:%u: %u:%u", lo.filename,

--- a/src/libsyntax/diagnostic.rs
+++ b/src/libsyntax/diagnostic.rs
@@ -17,9 +17,11 @@
 use core::option;
 use core::str;
 use core::vec;
+use core::dvec::DVec;
+
 use std::term;

-export emitter, emit;
+export emitter, collect, emit;
 export level, fatal, error, warning, note;
 export span_handler, handler, mk_span_handler, mk_handler;
 export codemap_span_handler, codemap_handler;
@@ -143,7 +145,7 @@ fn mk_handler(emitter: Option<emitter>) -> handler {
      Some(e) => e,
      None => {
        let f = fn@(cmsp: Option<(@codemap::CodeMap, span)>,
-            msg: &str, t: level) {
+                    msg: &str, t: level) {
            emit(cmsp, msg, t);
        };
        f
@@ -206,6 +208,12 @@ fn print_diagnostic(topic: ~str, lvl: level, msg: &str) {
    io::stderr().write_str(fmt!(" %s\n", msg));
 }

+fn collect(messages: @DVec<~str>)
+    -> fn@(Option<(@codemap::CodeMap, span)>, &str, level)
+{
+    |_o, msg: &str, _l| { messages.push(msg.to_str()); }
+}
+
 fn emit(cmsp: Option<(@codemap::CodeMap, span)>, msg: &str, lvl: level) {
    match cmsp {
      Some((cm, sp)) => {