diff --git a/src/libcollections/hash/mod.rs b/src/libcollections/hash/mod.rs index 4173ffc5d2fd87460e12357d8dfcb39c3c298a84..1dc2539c592e9d68564aaa26055ff39f4b750437 100644 --- a/src/libcollections/hash/mod.rs +++ b/src/libcollections/hash/mod.rs @@ -8,58 +8,56 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Generic hashing support. - * - * This module provides a generic way to compute the hash of a value. The - * simplest way to make a type hashable is to use `#[deriving(Hash)]`: - * - * # Example - * - * ```rust - * use std::hash; - * use std::hash::Hash; - * - * #[deriving(Hash)] - * struct Person { - * id: uint, - * name: String, - * phone: u64, - * } - * - * let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 }; - * let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 }; - * - * assert!(hash::hash(&person1) != hash::hash(&person2)); - * ``` - * - * If you need more control over how a value is hashed, you need to implement - * the trait `Hash`: - * - * ```rust - * use std::hash; - * use std::hash::Hash; - * use std::hash::sip::SipState; - * - * struct Person { - * id: uint, - * name: String, - * phone: u64, - * } - * - * impl Hash for Person { - * fn hash(&self, state: &mut SipState) { - * self.id.hash(state); - * self.phone.hash(state); - * } - * } - * - * let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 }; - * let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 }; - * - * assert!(hash::hash(&person1) == hash::hash(&person2)); - * ``` - */ +//! Generic hashing support. +//! +//! This module provides a generic way to compute the hash of a value. The +//! simplest way to make a type hashable is to use `#[deriving(Hash)]`: +//! +//! # Example +//! +//! ```rust +//! use std::hash; +//! use std::hash::Hash; +//! +//! #[deriving(Hash)] +//! struct Person { +//! id: uint, +//! name: String, +//! phone: u64, +//! } +//! +//! let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 }; +//! let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 }; +//! +//! assert!(hash::hash(&person1) != hash::hash(&person2)); +//! ``` +//! +//! If you need more control over how a value is hashed, you need to implement +//! the trait `Hash`: +//! +//! ```rust +//! use std::hash; +//! use std::hash::Hash; +//! use std::hash::sip::SipState; +//! +//! struct Person { +//! id: uint, +//! name: String, +//! phone: u64, +//! } +//! +//! impl Hash for Person { +//! fn hash(&self, state: &mut SipState) { +//! self.id.hash(state); +//! self.phone.hash(state); +//! } +//! } +//! +//! let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 }; +//! let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 }; +//! +//! assert!(hash::hash(&person1) == hash::hash(&person2)); +//! ``` #![allow(unused_must_use)] diff --git a/src/libcore/clone.rs b/src/libcore/clone.rs index d13daf0964a1aeb0ce74132ffa308372243485c5..9f928f57e9e400c856c27b81511c8141c8d81557 100644 --- a/src/libcore/clone.rs +++ b/src/libcore/clone.rs @@ -8,18 +8,16 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! The `Clone` trait for types that cannot be 'implicitly copied' - -In Rust, some simple types are "implicitly copyable" and when you -assign them or pass them as arguments, the receiver will get a copy, -leaving the original value in place. These types do not require -allocation to copy and do not have finalizers (i.e. they do not -contain owned boxes or implement `Drop`), so the compiler considers -them cheap and safe to copy. For other types copies must be made -explicitly, by convention implementing the `Clone` trait and calling -the `clone` method. - -*/ +//! The `Clone` trait for types that cannot be 'implicitly copied' +//! +//! In Rust, some simple types are "implicitly copyable" and when you +//! assign them or pass them as arguments, the receiver will get a copy, +//! leaving the original value in place. These types do not require +//! allocation to copy and do not have finalizers (i.e. they do not +//! contain owned boxes or implement `Drop`), so the compiler considers +//! them cheap and safe to copy. For other types copies must be made +//! explicitly, by convention implementing the `Clone` trait and calling +//! the `clone` method. #![unstable] diff --git a/src/libcore/finally.rs b/src/libcore/finally.rs index 2e358e7a74b648dd0667ae6b052a6601f48c2d14..8bfdd934477239adafb2a6c56dfdd6b3eb3d5563 100644 --- a/src/libcore/finally.rs +++ b/src/libcore/finally.rs @@ -8,27 +8,25 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -The Finally trait provides a method, `finally` on -stack closures that emulates Java-style try/finally blocks. - -Using the `finally` method is sometimes convenient, but the type rules -prohibit any shared, mutable state between the "try" case and the -"finally" case. For advanced cases, the `try_finally` function can -also be used. See that function for more details. - -# Example - -``` -use std::finally::Finally; - -(|| { - // ... -}).finally(|| { - // this code is always run -}) -``` -*/ +//! The Finally trait provides a method, `finally` on +//! stack closures that emulates Java-style try/finally blocks. +//! +//! Using the `finally` method is sometimes convenient, but the type rules +//! prohibit any shared, mutable state between the "try" case and the +//! "finally" case. For advanced cases, the `try_finally` function can +//! also be used. See that function for more details. +//! +//! # Example +//! +//! ``` +//! use std::finally::Finally; +//! +//! (|| { +//! // ... +//! }).finally(|| { +//! // this code is always run +//! }) +//! ``` #![experimental] diff --git a/src/libcore/intrinsics.rs b/src/libcore/intrinsics.rs index 067ef47a86b9f9a44bc35ba7bae49310971892d5..78c74075d4867107131380631481c8d2ac0f0197 100644 --- a/src/libcore/intrinsics.rs +++ b/src/libcore/intrinsics.rs @@ -8,38 +8,36 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! rustc compiler intrinsics. - -The corresponding definitions are in librustc/middle/trans/foreign.rs. - -# Volatiles - -The volatile intrinsics provide operations intended to act on I/O -memory, which are guaranteed to not be reordered by the compiler -across other volatile intrinsics. See the LLVM documentation on -[[volatile]]. - -[volatile]: http://llvm.org/docs/LangRef.html#volatile-memory-accesses - -# Atomics - -The atomic intrinsics provide common atomic operations on machine -words, with multiple possible memory orderings. They obey the same -semantics as C++11. See the LLVM documentation on [[atomics]]. - -[atomics]: http://llvm.org/docs/Atomics.html - -A quick refresher on memory ordering: - -* Acquire - a barrier for acquiring a lock. Subsequent reads and writes - take place after the barrier. -* Release - a barrier for releasing a lock. Preceding reads and writes - take place before the barrier. -* Sequentially consistent - sequentially consistent operations are - guaranteed to happen in order. This is the standard mode for working - with atomic types and is equivalent to Java's `volatile`. - -*/ +//! rustc compiler intrinsics. +//! +//! The corresponding definitions are in librustc/middle/trans/foreign.rs. +//! +//! # Volatiles +//! +//! The volatile intrinsics provide operations intended to act on I/O +//! memory, which are guaranteed to not be reordered by the compiler +//! across other volatile intrinsics. See the LLVM documentation on +//! [[volatile]]. +//! +//! [volatile]: http://llvm.org/docs/LangRef.html#volatile-memory-accesses +//! +//! # Atomics +//! +//! The atomic intrinsics provide common atomic operations on machine +//! words, with multiple possible memory orderings. They obey the same +//! semantics as C++11. See the LLVM documentation on [[atomics]]. +//! +//! [atomics]: http://llvm.org/docs/Atomics.html +//! +//! A quick refresher on memory ordering: +//! +//! * Acquire - a barrier for acquiring a lock. Subsequent reads and writes +//! take place after the barrier. +//! * Release - a barrier for releasing a lock. Preceding reads and writes +//! take place before the barrier. +//! * Sequentially consistent - sequentially consistent operations are +//! guaranteed to happen in order. This is the standard mode for working +//! with atomic types and is equivalent to Java's `volatile`. #![experimental] #![allow(missing_docs)] diff --git a/src/libcore/iter.rs b/src/libcore/iter.rs index 496e7979b726eeec3def4f13d6fee4ad05769d18..2d488a4b15563196d0f2a5526d3171941e00f68c 100644 --- a/src/libcore/iter.rs +++ b/src/libcore/iter.rs @@ -8,55 +8,51 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Composable external iterators - -# The `Iterator` trait - -This module defines Rust's core iteration trait. The `Iterator` trait has one -unimplemented method, `next`. All other methods are derived through default -methods to perform operations such as `zip`, `chain`, `enumerate`, and `fold`. - -The goal of this module is to unify iteration across all containers in Rust. -An iterator can be considered as a state machine which is used to track which -element will be yielded next. - -There are various extensions also defined in this module to assist with various -types of iteration, such as the `DoubleEndedIterator` for iterating in reverse, -the `FromIterator` trait for creating a container from an iterator, and much -more. - -## Rust's `for` loop - -The special syntax used by rust's `for` loop is based around the `Iterator` -trait defined in this module. For loops can be viewed as a syntactical expansion -into a `loop`, for example, the `for` loop in this example is essentially -translated to the `loop` below. - -```rust -let values = vec![1i, 2, 3]; - -// "Syntactical sugar" taking advantage of an iterator -for &x in values.iter() { - println!("{}", x); -} - -// Rough translation of the iteration without a `for` iterator. -let mut it = values.iter(); -loop { - match it.next() { - Some(&x) => { - println!("{}", x); - } - None => { break } - } -} -``` - -This `for` loop syntax can be applied to any iterator over any type. - -*/ +//! Composable external iterators +//! +//! # The `Iterator` trait +//! +//! This module defines Rust's core iteration trait. The `Iterator` trait has one +//! unimplemented method, `next`. All other methods are derived through default +//! methods to perform operations such as `zip`, `chain`, `enumerate`, and `fold`. +//! +//! The goal of this module is to unify iteration across all containers in Rust. +//! An iterator can be considered as a state machine which is used to track which +//! element will be yielded next. +//! +//! There are various extensions also defined in this module to assist with various +//! types of iteration, such as the `DoubleEndedIterator` for iterating in reverse, +//! the `FromIterator` trait for creating a container from an iterator, and much +//! more. +//! +//! ## Rust's `for` loop +//! +//! The special syntax used by rust's `for` loop is based around the `Iterator` +//! trait defined in this module. For loops can be viewed as a syntactical expansion +//! into a `loop`, for example, the `for` loop in this example is essentially +//! translated to the `loop` below. +//! +//! ```rust +//! let values = vec![1i, 2, 3]; +//! +//! // "Syntactical sugar" taking advantage of an iterator +//! for &x in values.iter() { +//! println!("{}", x); +//! } +//! +//! // Rough translation of the iteration without a `for` iterator. +//! let mut it = values.iter(); +//! loop { +//! match it.next() { +//! Some(&x) => { +//! println!("{}", x); +//! } +//! None => { break } +//! } +//! } +//! ``` +//! +//! This `for` loop syntax can be applied to any iterator over any type. pub use self::MinMaxResult::*; diff --git a/src/libcore/kinds.rs b/src/libcore/kinds.rs index 6489101f7b980d184ac187a88044f218b9138d1b..0c2cb9d5910056cd385c6ab84d0de9f2493b96aa 100644 --- a/src/libcore/kinds.rs +++ b/src/libcore/kinds.rs @@ -8,17 +8,14 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -Primitive traits representing basic 'kinds' of types - -Rust types can be classified in various useful ways according to -intrinsic properties of the type. These classifications, often called -'kinds', are represented as traits. - -They cannot be implemented by user code, but are instead implemented -by the compiler automatically for the types to which they apply. - -*/ +//! Primitive traits representing basic 'kinds' of types +//! +//! Rust types can be classified in various useful ways according to +//! intrinsic properties of the type. These classifications, often called +//! 'kinds', are represented as traits. +//! +//! They cannot be implemented by user code, but are instead implemented +//! by the compiler automatically for the types to which they apply. /// Types able to be transferred across task boundaries. #[lang="send"] diff --git a/src/libcore/ops.rs b/src/libcore/ops.rs index 185c937eb6b37b610300215c791ec9f6ae103e6b..519dfd47fd8e33d651907d478887015814ef29fc 100644 --- a/src/libcore/ops.rs +++ b/src/libcore/ops.rs @@ -8,52 +8,48 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * - * Overloadable operators - * - * Implementing these traits allows you to get an effect similar to - * overloading operators. - * - * The values for the right hand side of an operator are automatically - * borrowed, so `a + b` is sugar for `a.add(&b)`. - * - * All of these traits are imported by the prelude, so they are available in - * every Rust program. - * - * # Example - * - * This example creates a `Point` struct that implements `Add` and `Sub`, and then - * demonstrates adding and subtracting two `Point`s. - * - * ```rust - * #[deriving(Show)] - * struct Point { - * x: int, - * y: int - * } - * - * impl Add for Point { - * fn add(&self, other: &Point) -> Point { - * Point {x: self.x + other.x, y: self.y + other.y} - * } - * } - * - * impl Sub for Point { - * fn sub(&self, other: &Point) -> Point { - * Point {x: self.x - other.x, y: self.y - other.y} - * } - * } - * fn main() { - * println!("{}", Point {x: 1, y: 0} + Point {x: 2, y: 3}); - * println!("{}", Point {x: 1, y: 0} - Point {x: 2, y: 3}); - * } - * ``` - * - * See the documentation for each trait for a minimum implementation that prints - * something to the screen. - * - */ +//! Overloadable operators +//! +//! Implementing these traits allows you to get an effect similar to +//! overloading operators. +//! +//! The values for the right hand side of an operator are automatically +//! borrowed, so `a + b` is sugar for `a.add(&b)`. +//! +//! All of these traits are imported by the prelude, so they are available in +//! every Rust program. +//! +//! # Example +//! +//! This example creates a `Point` struct that implements `Add` and `Sub`, and then +//! demonstrates adding and subtracting two `Point`s. +//! +//! ```rust +//! #[deriving(Show)] +//! struct Point { +//! x: int, +//! y: int +//! } +//! +//! impl Add for Point { +//! fn add(&self, other: &Point) -> Point { +//! Point {x: self.x + other.x, y: self.y + other.y} +//! } +//! } +//! +//! impl Sub for Point { +//! fn sub(&self, other: &Point) -> Point { +//! Point {x: self.x - other.x, y: self.y - other.y} +//! } +//! } +//! fn main() { +//! println!("{}", Point {x: 1, y: 0} + Point {x: 2, y: 3}); +//! println!("{}", Point {x: 1, y: 0} - Point {x: 2, y: 3}); +//! } +//! ``` +//! +//! See the documentation for each trait for a minimum implementation that prints +//! something to the screen. use kinds::Sized; diff --git a/src/libflate/lib.rs b/src/libflate/lib.rs index 568210118a8ae0f0aa071ed84d3b4463a31e9312..36a04392c36f3028619b000b719cc263ff720c31 100644 --- a/src/libflate/lib.rs +++ b/src/libflate/lib.rs @@ -8,15 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Simple [DEFLATE][def]-based compression. This is a wrapper around the -[`miniz`][mz] library, which is a one-file pure-C implementation of zlib. - -[def]: https://en.wikipedia.org/wiki/DEFLATE -[mz]: https://code.google.com/p/miniz/ - -*/ +//! Simple [DEFLATE][def]-based compression. This is a wrapper around the +//! [`miniz`][mz] library, which is a one-file pure-C implementation of zlib. +//! +//! [def]: https://en.wikipedia.org/wiki/DEFLATE +//! [mz]: https://code.google.com/p/miniz/ #![crate_name = "flate"] #![experimental] diff --git a/src/libgraphviz/lib.rs b/src/libgraphviz/lib.rs index f149ec509af053f0be6eb05e03526372c774f54d..04eeeb62e1d359cb31fa193aaafd7840589ddcd5 100644 --- a/src/libgraphviz/lib.rs +++ b/src/libgraphviz/lib.rs @@ -8,260 +8,258 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! Generate files suitable for use with [Graphviz](http://www.graphviz.org/) - -The `render` function generates output (e.g. an `output.dot` file) for -use with [Graphviz](http://www.graphviz.org/) by walking a labelled -graph. (Graphviz can then automatically lay out the nodes and edges -of the graph, and also optionally render the graph as an image or -other [output formats]( -http://www.graphviz.org/content/output-formats), such as SVG.) - -Rather than impose some particular graph data structure on clients, -this library exposes two traits that clients can implement on their -own structs before handing them over to the rendering function. - -Note: This library does not yet provide access to the full -expressiveness of the [DOT language]( -http://www.graphviz.org/doc/info/lang.html). For example, there are -many [attributes](http://www.graphviz.org/content/attrs) related to -providing layout hints (e.g. left-to-right versus top-down, which -algorithm to use, etc). The current intention of this library is to -emit a human-readable .dot file with very regular structure suitable -for easy post-processing. - -# Examples - -The first example uses a very simple graph representation: a list of -pairs of ints, representing the edges (the node set is implicit). -Each node label is derived directly from the int representing the node, -while the edge labels are all empty strings. - -This example also illustrates how to use `CowVec` to return -an owned vector or a borrowed slice as appropriate: we construct the -node vector from scratch, but borrow the edge list (rather than -constructing a copy of all the edges from scratch). - -The output from this example renders five nodes, with the first four -forming a diamond-shaped acyclic graph and then pointing to the fifth -which is cyclic. - -```rust -use graphviz as dot; - -type Nd = int; -type Ed = (int,int); -struct Edges(Vec); - -pub fn render_to(output: &mut W) { - let edges = Edges(vec!((0,1), (0,2), (1,3), (2,3), (3,4), (4,4))); - dot::render(&edges, output).unwrap() -} - -impl<'a> dot::Labeller<'a, Nd, Ed> for Edges { - fn graph_id(&'a self) -> dot::Id<'a> { dot::Id::new("example1").unwrap() } - - fn node_id(&'a self, n: &Nd) -> dot::Id<'a> { - dot::Id::new(format!("N{}", *n)).unwrap() - } -} - -impl<'a> dot::GraphWalk<'a, Nd, Ed> for Edges { - fn nodes(&self) -> dot::Nodes<'a,Nd> { - // (assumes that |N| \approxeq |E|) - let &Edges(ref v) = self; - let mut nodes = Vec::with_capacity(v.len()); - for &(s,t) in v.iter() { - nodes.push(s); nodes.push(t); - } - nodes.sort(); - nodes.dedup(); - nodes.into_cow() - } - - fn edges(&'a self) -> dot::Edges<'a,Ed> { - let &Edges(ref edges) = self; - edges.as_slice().into_cow() - } - - fn source(&self, e: &Ed) -> Nd { let &(s,_) = e; s } - - fn target(&self, e: &Ed) -> Nd { let &(_,t) = e; t } -} - -# pub fn main() { render_to(&mut Vec::new()) } -``` - -```no_run -# pub fn render_to(output: &mut W) { unimplemented!() } -pub fn main() { - use std::io::File; - let mut f = File::create(&Path::new("example1.dot")); - render_to(&mut f) -} -``` - -Output from first example (in `example1.dot`): - -```ignore -digraph example1 { - N0[label="N0"]; - N1[label="N1"]; - N2[label="N2"]; - N3[label="N3"]; - N4[label="N4"]; - N0 -> N1[label=""]; - N0 -> N2[label=""]; - N1 -> N3[label=""]; - N2 -> N3[label=""]; - N3 -> N4[label=""]; - N4 -> N4[label=""]; -} -``` - -The second example illustrates using `node_label` and `edge_label` to -add labels to the nodes and edges in the rendered graph. The graph -here carries both `nodes` (the label text to use for rendering a -particular node), and `edges` (again a list of `(source,target)` -indices). - -This example also illustrates how to use a type (in this case the edge -type) that shares substructure with the graph: the edge type here is a -direct reference to the `(source,target)` pair stored in the graph's -internal vector (rather than passing around a copy of the pair -itself). Note that this implies that `fn edges(&'a self)` must -construct a fresh `Vec<&'a (uint,uint)>` from the `Vec<(uint,uint)>` -edges stored in `self`. - -Since both the set of nodes and the set of edges are always -constructed from scratch via iterators, we use the `collect()` method -from the `Iterator` trait to collect the nodes and edges into freshly -constructed growable `Vec` values (rather use the `into_cow` -from the `IntoCow` trait as was used in the first example -above). - -The output from this example renders four nodes that make up the -Hasse-diagram for the subsets of the set `{x, y}`. Each edge is -labelled with the ⊆ character (specified using the HTML character -entity `&sube`). - -```rust -use graphviz as dot; - -type Nd = uint; -type Ed<'a> = &'a (uint, uint); -struct Graph { nodes: Vec<&'static str>, edges: Vec<(uint,uint)> } - -pub fn render_to(output: &mut W) { - let nodes = vec!("{x,y}","{x}","{y}","{}"); - let edges = vec!((0,1), (0,2), (1,3), (2,3)); - let graph = Graph { nodes: nodes, edges: edges }; - - dot::render(&graph, output).unwrap() -} - -impl<'a> dot::Labeller<'a, Nd, Ed<'a>> for Graph { - fn graph_id(&'a self) -> dot::Id<'a> { dot::Id::new("example2").unwrap() } - fn node_id(&'a self, n: &Nd) -> dot::Id<'a> { - dot::Id::new(format!("N{}", n)).unwrap() - } - fn node_label<'a>(&'a self, n: &Nd) -> dot::LabelText<'a> { - dot::LabelStr(self.nodes[*n].as_slice().into_cow()) - } - fn edge_label<'a>(&'a self, _: &Ed) -> dot::LabelText<'a> { - dot::LabelStr("⊆".into_cow()) - } -} - -impl<'a> dot::GraphWalk<'a, Nd, Ed<'a>> for Graph { - fn nodes(&self) -> dot::Nodes<'a,Nd> { range(0,self.nodes.len()).collect() } - fn edges(&'a self) -> dot::Edges<'a,Ed<'a>> { self.edges.iter().collect() } - fn source(&self, e: &Ed) -> Nd { let & &(s,_) = e; s } - fn target(&self, e: &Ed) -> Nd { let & &(_,t) = e; t } -} - -# pub fn main() { render_to(&mut Vec::new()) } -``` - -```no_run -# pub fn render_to(output: &mut W) { unimplemented!() } -pub fn main() { - use std::io::File; - let mut f = File::create(&Path::new("example2.dot")); - render_to(&mut f) -} -``` - -The third example is similar to the second, except now each node and -edge now carries a reference to the string label for each node as well -as that node's index. (This is another illustration of how to share -structure with the graph itself, and why one might want to do so.) - -The output from this example is the same as the second example: the -Hasse-diagram for the subsets of the set `{x, y}`. - -```rust -use graphviz as dot; - -type Nd<'a> = (uint, &'a str); -type Ed<'a> = (Nd<'a>, Nd<'a>); -struct Graph { nodes: Vec<&'static str>, edges: Vec<(uint,uint)> } - -pub fn render_to(output: &mut W) { - let nodes = vec!("{x,y}","{x}","{y}","{}"); - let edges = vec!((0,1), (0,2), (1,3), (2,3)); - let graph = Graph { nodes: nodes, edges: edges }; - - dot::render(&graph, output).unwrap() -} - -impl<'a> dot::Labeller<'a, Nd<'a>, Ed<'a>> for Graph { - fn graph_id(&'a self) -> dot::Id<'a> { dot::Id::new("example3").unwrap() } - fn node_id(&'a self, n: &Nd<'a>) -> dot::Id<'a> { - dot::Id::new(format!("N{}", n.val0())).unwrap() - } - fn node_label<'a>(&'a self, n: &Nd<'a>) -> dot::LabelText<'a> { - let &(i, _) = n; - dot::LabelStr(self.nodes[i].as_slice().into_cow()) - } - fn edge_label<'a>(&'a self, _: &Ed<'a>) -> dot::LabelText<'a> { - dot::LabelStr("⊆".into_cow()) - } -} - -impl<'a> dot::GraphWalk<'a, Nd<'a>, Ed<'a>> for Graph { - fn nodes(&'a self) -> dot::Nodes<'a,Nd<'a>> { - self.nodes.iter().map(|s|s.as_slice()).enumerate().collect() - } - fn edges(&'a self) -> dot::Edges<'a,Ed<'a>> { - self.edges.iter() - .map(|&(i,j)|((i, self.nodes[i].as_slice()), - (j, self.nodes[j].as_slice()))) - .collect() - } - fn source(&self, e: &Ed<'a>) -> Nd<'a> { let &(s,_) = e; s } - fn target(&self, e: &Ed<'a>) -> Nd<'a> { let &(_,t) = e; t } -} - -# pub fn main() { render_to(&mut Vec::new()) } -``` - -```no_run -# pub fn render_to(output: &mut W) { unimplemented!() } -pub fn main() { - use std::io::File; - let mut f = File::create(&Path::new("example3.dot")); - render_to(&mut f) -} -``` - -# References - -* [Graphviz](http://www.graphviz.org/) - -* [DOT language](http://www.graphviz.org/doc/info/lang.html) - -*/ +//! Generate files suitable for use with [Graphviz](http://www.graphviz.org/) +//! +//! The `render` function generates output (e.g. an `output.dot` file) for +//! use with [Graphviz](http://www.graphviz.org/) by walking a labelled +//! graph. (Graphviz can then automatically lay out the nodes and edges +//! of the graph, and also optionally render the graph as an image or +//! other [output formats]( +//! http://www.graphviz.org/content/output-formats), such as SVG.) +//! +//! Rather than impose some particular graph data structure on clients, +//! this library exposes two traits that clients can implement on their +//! own structs before handing them over to the rendering function. +//! +//! Note: This library does not yet provide access to the full +//! expressiveness of the [DOT language]( +//! http://www.graphviz.org/doc/info/lang.html). For example, there are +//! many [attributes](http://www.graphviz.org/content/attrs) related to +//! providing layout hints (e.g. left-to-right versus top-down, which +//! algorithm to use, etc). The current intention of this library is to +//! emit a human-readable .dot file with very regular structure suitable +//! for easy post-processing. +//! +//! # Examples +//! +//! The first example uses a very simple graph representation: a list of +//! pairs of ints, representing the edges (the node set is implicit). +//! Each node label is derived directly from the int representing the node, +//! while the edge labels are all empty strings. +//! +//! This example also illustrates how to use `CowVec` to return +//! an owned vector or a borrowed slice as appropriate: we construct the +//! node vector from scratch, but borrow the edge list (rather than +//! constructing a copy of all the edges from scratch). +//! +//! The output from this example renders five nodes, with the first four +//! forming a diamond-shaped acyclic graph and then pointing to the fifth +//! which is cyclic. +//! +//! ```rust +//! use graphviz as dot; +//! +//! type Nd = int; +//! type Ed = (int,int); +//! struct Edges(Vec); +//! +//! pub fn render_to(output: &mut W) { +//! let edges = Edges(vec!((0,1), (0,2), (1,3), (2,3), (3,4), (4,4))); +//! dot::render(&edges, output).unwrap() +//! } +//! +//! impl<'a> dot::Labeller<'a, Nd, Ed> for Edges { +//! fn graph_id(&'a self) -> dot::Id<'a> { dot::Id::new("example1").unwrap() } +//! +//! fn node_id(&'a self, n: &Nd) -> dot::Id<'a> { +//! dot::Id::new(format!("N{}", *n)).unwrap() +//! } +//! } +//! +//! impl<'a> dot::GraphWalk<'a, Nd, Ed> for Edges { +//! fn nodes(&self) -> dot::Nodes<'a,Nd> { +//! // (assumes that |N| \approxeq |E|) +//! let &Edges(ref v) = self; +//! let mut nodes = Vec::with_capacity(v.len()); +//! for &(s,t) in v.iter() { +//! nodes.push(s); nodes.push(t); +//! } +//! nodes.sort(); +//! nodes.dedup(); +//! nodes.into_cow() +//! } +//! +//! fn edges(&'a self) -> dot::Edges<'a,Ed> { +//! let &Edges(ref edges) = self; +//! edges.as_slice().into_cow() +//! } +//! +//! fn source(&self, e: &Ed) -> Nd { let &(s,_) = e; s } +//! +//! fn target(&self, e: &Ed) -> Nd { let &(_,t) = e; t } +//! } +//! +//! # pub fn main() { render_to(&mut Vec::new()) } +//! ``` +//! +//! ```no_run +//! # pub fn render_to(output: &mut W) { unimplemented!() } +//! pub fn main() { +//! use std::io::File; +//! let mut f = File::create(&Path::new("example1.dot")); +//! render_to(&mut f) +//! } +//! ``` +//! +//! Output from first example (in `example1.dot`): +//! +//! ```ignore +//! digraph example1 { +//! N0[label="N0"]; +//! N1[label="N1"]; +//! N2[label="N2"]; +//! N3[label="N3"]; +//! N4[label="N4"]; +//! N0 -> N1[label=""]; +//! N0 -> N2[label=""]; +//! N1 -> N3[label=""]; +//! N2 -> N3[label=""]; +//! N3 -> N4[label=""]; +//! N4 -> N4[label=""]; +//! } +//! ``` +//! +//! The second example illustrates using `node_label` and `edge_label` to +//! add labels to the nodes and edges in the rendered graph. The graph +//! here carries both `nodes` (the label text to use for rendering a +//! particular node), and `edges` (again a list of `(source,target)` +//! indices). +//! +//! This example also illustrates how to use a type (in this case the edge +//! type) that shares substructure with the graph: the edge type here is a +//! direct reference to the `(source,target)` pair stored in the graph's +//! internal vector (rather than passing around a copy of the pair +//! itself). Note that this implies that `fn edges(&'a self)` must +//! construct a fresh `Vec<&'a (uint,uint)>` from the `Vec<(uint,uint)>` +//! edges stored in `self`. +//! +//! Since both the set of nodes and the set of edges are always +//! constructed from scratch via iterators, we use the `collect()` method +//! from the `Iterator` trait to collect the nodes and edges into freshly +//! constructed growable `Vec` values (rather use the `into_cow` +//! from the `IntoCow` trait as was used in the first example +//! above). +//! +//! The output from this example renders four nodes that make up the +//! Hasse-diagram for the subsets of the set `{x, y}`. Each edge is +//! labelled with the ⊆ character (specified using the HTML character +//! entity `&sube`). +//! +//! ```rust +//! use graphviz as dot; +//! +//! type Nd = uint; +//! type Ed<'a> = &'a (uint, uint); +//! struct Graph { nodes: Vec<&'static str>, edges: Vec<(uint,uint)> } +//! +//! pub fn render_to(output: &mut W) { +//! let nodes = vec!("{x,y}","{x}","{y}","{}"); +//! let edges = vec!((0,1), (0,2), (1,3), (2,3)); +//! let graph = Graph { nodes: nodes, edges: edges }; +//! +//! dot::render(&graph, output).unwrap() +//! } +//! +//! impl<'a> dot::Labeller<'a, Nd, Ed<'a>> for Graph { +//! fn graph_id(&'a self) -> dot::Id<'a> { dot::Id::new("example2").unwrap() } +//! fn node_id(&'a self, n: &Nd) -> dot::Id<'a> { +//! dot::Id::new(format!("N{}", n)).unwrap() +//! } +//! fn node_label<'a>(&'a self, n: &Nd) -> dot::LabelText<'a> { +//! dot::LabelStr(self.nodes[*n].as_slice().into_cow()) +//! } +//! fn edge_label<'a>(&'a self, _: &Ed) -> dot::LabelText<'a> { +//! dot::LabelStr("⊆".into_cow()) +//! } +//! } +//! +//! impl<'a> dot::GraphWalk<'a, Nd, Ed<'a>> for Graph { +//! fn nodes(&self) -> dot::Nodes<'a,Nd> { range(0,self.nodes.len()).collect() } +//! fn edges(&'a self) -> dot::Edges<'a,Ed<'a>> { self.edges.iter().collect() } +//! fn source(&self, e: &Ed) -> Nd { let & &(s,_) = e; s } +//! fn target(&self, e: &Ed) -> Nd { let & &(_,t) = e; t } +//! } +//! +//! # pub fn main() { render_to(&mut Vec::new()) } +//! ``` +//! +//! ```no_run +//! # pub fn render_to(output: &mut W) { unimplemented!() } +//! pub fn main() { +//! use std::io::File; +//! let mut f = File::create(&Path::new("example2.dot")); +//! render_to(&mut f) +//! } +//! ``` +//! +//! The third example is similar to the second, except now each node and +//! edge now carries a reference to the string label for each node as well +//! as that node's index. (This is another illustration of how to share +//! structure with the graph itself, and why one might want to do so.) +//! +//! The output from this example is the same as the second example: the +//! Hasse-diagram for the subsets of the set `{x, y}`. +//! +//! ```rust +//! use graphviz as dot; +//! +//! type Nd<'a> = (uint, &'a str); +//! type Ed<'a> = (Nd<'a>, Nd<'a>); +//! struct Graph { nodes: Vec<&'static str>, edges: Vec<(uint,uint)> } +//! +//! pub fn render_to(output: &mut W) { +//! let nodes = vec!("{x,y}","{x}","{y}","{}"); +//! let edges = vec!((0,1), (0,2), (1,3), (2,3)); +//! let graph = Graph { nodes: nodes, edges: edges }; +//! +//! dot::render(&graph, output).unwrap() +//! } +//! +//! impl<'a> dot::Labeller<'a, Nd<'a>, Ed<'a>> for Graph { +//! fn graph_id(&'a self) -> dot::Id<'a> { dot::Id::new("example3").unwrap() } +//! fn node_id(&'a self, n: &Nd<'a>) -> dot::Id<'a> { +//! dot::Id::new(format!("N{}", n.val0())).unwrap() +//! } +//! fn node_label<'a>(&'a self, n: &Nd<'a>) -> dot::LabelText<'a> { +//! let &(i, _) = n; +//! dot::LabelStr(self.nodes[i].as_slice().into_cow()) +//! } +//! fn edge_label<'a>(&'a self, _: &Ed<'a>) -> dot::LabelText<'a> { +//! dot::LabelStr("⊆".into_cow()) +//! } +//! } +//! +//! impl<'a> dot::GraphWalk<'a, Nd<'a>, Ed<'a>> for Graph { +//! fn nodes(&'a self) -> dot::Nodes<'a,Nd<'a>> { +//! self.nodes.iter().map(|s|s.as_slice()).enumerate().collect() +//! } +//! fn edges(&'a self) -> dot::Edges<'a,Ed<'a>> { +//! self.edges.iter() +//! .map(|&(i,j)|((i, self.nodes[i].as_slice()), +//! (j, self.nodes[j].as_slice()))) +//! .collect() +//! } +//! fn source(&self, e: &Ed<'a>) -> Nd<'a> { let &(s,_) = e; s } +//! fn target(&self, e: &Ed<'a>) -> Nd<'a> { let &(_,t) = e; t } +//! } +//! +//! # pub fn main() { render_to(&mut Vec::new()) } +//! ``` +//! +//! ```no_run +//! # pub fn render_to(output: &mut W) { unimplemented!() } +//! pub fn main() { +//! use std::io::File; +//! let mut f = File::create(&Path::new("example3.dot")); +//! render_to(&mut f) +//! } +//! ``` +//! +//! # References +//! +//! * [Graphviz](http://www.graphviz.org/) +//! +//! * [DOT language](http://www.graphviz.org/doc/info/lang.html) #![crate_name = "graphviz"] #![experimental] diff --git a/src/liblibc/lib.rs b/src/liblibc/lib.rs index 10610b705840650b6a227d901ea903f62a99e01b..0014a3e3941d75875bd29349d1188335ddad571f 100644 --- a/src/liblibc/lib.rs +++ b/src/liblibc/lib.rs @@ -19,59 +19,57 @@ html_root_url = "http://doc.rust-lang.org/nightly/", html_playground_url = "http://play.rust-lang.org/")] -/*! -* Bindings for the C standard library and other platform libraries -* -* **NOTE:** These are *architecture and libc* specific. On Linux, these -* bindings are only correct for glibc. -* -* This module contains bindings to the C standard library, organized into -* modules by their defining standard. Additionally, it contains some assorted -* platform-specific definitions. For convenience, most functions and types -* are reexported, so `use libc::*` will import the available C bindings as -* appropriate for the target platform. The exact set of functions available -* are platform specific. -* -* *Note:* Because these definitions are platform-specific, some may not appear -* in the generated documentation. -* -* We consider the following specs reasonably normative with respect to -* interoperating with the C standard library (libc/msvcrt): -* -* * ISO 9899:1990 ('C95', 'ANSI C', 'Standard C'), NA1, 1995. -* * ISO 9899:1999 ('C99' or 'C9x'). -* * ISO 9945:1988 / IEEE 1003.1-1988 ('POSIX.1'). -* * ISO 9945:2001 / IEEE 1003.1-2001 ('POSIX:2001', 'SUSv3'). -* * ISO 9945:2008 / IEEE 1003.1-2008 ('POSIX:2008', 'SUSv4'). -* -* Note that any reference to the 1996 revision of POSIX, or any revs between -* 1990 (when '88 was approved at ISO) and 2001 (when the next actual -* revision-revision happened), are merely additions of other chapters (1b and -* 1c) outside the core interfaces. -* -* Despite having several names each, these are *reasonably* coherent -* point-in-time, list-of-definition sorts of specs. You can get each under a -* variety of names but will wind up with the same definition in each case. -* -* See standards(7) in linux-manpages for more details. -* -* Our interface to these libraries is complicated by the non-universality of -* conformance to any of them. About the only thing universally supported is -* the first (C95), beyond that definitions quickly become absent on various -* platforms. -* -* We therefore wind up dividing our module-space up (mostly for the sake of -* sanity while editing, filling-in-details and eliminating duplication) into -* definitions common-to-all (held in modules named c95, c99, posix88, posix01 -* and posix08) and definitions that appear only on *some* platforms (named -* 'extra'). This would be things like significant OSX foundation kit, or Windows -* library kernel32.dll, or various fancy glibc, Linux or BSD extensions. -* -* In addition to the per-platform 'extra' modules, we define a module of -* 'common BSD' libc routines that never quite made it into POSIX but show up -* in multiple derived systems. This is the 4.4BSD r2 / 1995 release, the final -* one from Berkeley after the lawsuits died down and the CSRG dissolved. -*/ +//! Bindings for the C standard library and other platform libraries +//! +//! **NOTE:** These are *architecture and libc* specific. On Linux, these +//! bindings are only correct for glibc. +//! +//! This module contains bindings to the C standard library, organized into +//! modules by their defining standard. Additionally, it contains some assorted +//! platform-specific definitions. For convenience, most functions and types +//! are reexported, so `use libc::*` will import the available C bindings as +//! appropriate for the target platform. The exact set of functions available +//! are platform specific. +//! +//! *Note:* Because these definitions are platform-specific, some may not appear +//! in the generated documentation. +//! +//! We consider the following specs reasonably normative with respect to +//! interoperating with the C standard library (libc/msvcrt): +//! +//! * ISO 9899:1990 ('C95', 'ANSI C', 'Standard C'), NA1, 1995. +//! * ISO 9899:1999 ('C99' or 'C9x'). +//! * ISO 9945:1988 / IEEE 1003.1-1988 ('POSIX.1'). +//! * ISO 9945:2001 / IEEE 1003.1-2001 ('POSIX:2001', 'SUSv3'). +//! * ISO 9945:2008 / IEEE 1003.1-2008 ('POSIX:2008', 'SUSv4'). +//! +//! Note that any reference to the 1996 revision of POSIX, or any revs between +//! 1990 (when '88 was approved at ISO) and 2001 (when the next actual +//! revision-revision happened), are merely additions of other chapters (1b and +//! 1c) outside the core interfaces. +//! +//! Despite having several names each, these are *reasonably* coherent +//! point-in-time, list-of-definition sorts of specs. You can get each under a +//! variety of names but will wind up with the same definition in each case. +//! +//! See standards(7) in linux-manpages for more details. +//! +//! Our interface to these libraries is complicated by the non-universality of +//! conformance to any of them. About the only thing universally supported is +//! the first (C95), beyond that definitions quickly become absent on various +//! platforms. +//! +//! We therefore wind up dividing our module-space up (mostly for the sake of +//! sanity while editing, filling-in-details and eliminating duplication) into +//! definitions common-to-all (held in modules named c95, c99, posix88, posix01 +//! and posix08) and definitions that appear only on *some* platforms (named +//! 'extra'). This would be things like significant OSX foundation kit, or Windows +//! library kernel32.dll, or various fancy glibc, Linux or BSD extensions. +//! +//! In addition to the per-platform 'extra' modules, we define a module of +//! 'common BSD' libc routines that never quite made it into POSIX but show up +//! in multiple derived systems. This is the 4.4BSD r2 / 1995 release, the final +//! one from Berkeley after the lawsuits died down and the CSRG dissolved. #![allow(non_camel_case_types)] #![allow(non_snake_case)] diff --git a/src/librand/distributions/mod.rs b/src/librand/distributions/mod.rs index 5bbddcb7c1652892d1594d3d59df2f652f18cb65..0fa989bf0b2b9b4e8713bc1cc2581e5fba7a31bd 100644 --- a/src/librand/distributions/mod.rs +++ b/src/librand/distributions/mod.rs @@ -8,17 +8,14 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -Sampling from random distributions. - -This is a generalization of `Rand` to allow parameters to control the -exact properties of the generated values, e.g. the mean and standard -deviation of a normal distribution. The `Sample` trait is the most -general, and allows for generating values that change some state -internally. The `IndependentSample` trait is for generating values -that do not need to record state. - -*/ +//! Sampling from random distributions. +//! +//! This is a generalization of `Rand` to allow parameters to control the +//! exact properties of the generated values, e.g. the mean and standard +//! deviation of a normal distribution. The `Sample` trait is the most +//! general, and allows for generating values that change some state +//! internally. The `IndependentSample` trait is for generating values +//! that do not need to record state. #![experimental] diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs index f272bb52a1436807cb0e24c231899fa18e12c57d..c599a0f2daf7f194249e0780867a745b8a3d8486 100644 --- a/src/librustc/lib.rs +++ b/src/librustc/lib.rs @@ -8,15 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -The Rust compiler. - -# Note - -This API is completely unstable and subject to change. - -*/ +//! The Rust compiler. +//! +//! # Note +//! +//! This API is completely unstable and subject to change. #![crate_name = "rustc"] #![experimental] diff --git a/src/librustc/middle/astencode.rs b/src/librustc/middle/astencode.rs index 7986a526b23b16188b9af78a43ff8ddce5851a6b..523e997a8deec149995e228cf531ddc7719841b9 100644 --- a/src/librustc/middle/astencode.rs +++ b/src/librustc/middle/astencode.rs @@ -196,53 +196,38 @@ fn reserve_id_range(sess: &Session, } impl<'a, 'b, 'tcx> DecodeContext<'a, 'b, 'tcx> { + /// Translates an internal id, meaning a node id that is known to refer to some part of the + /// item currently being inlined, such as a local variable or argument. All naked node-ids + /// that appear in types have this property, since if something might refer to an external item + /// we would use a def-id to allow for the possibility that the item resides in another crate. pub fn tr_id(&self, id: ast::NodeId) -> ast::NodeId { - /*! - * Translates an internal id, meaning a node id that is known - * to refer to some part of the item currently being inlined, - * such as a local variable or argument. All naked node-ids - * that appear in types have this property, since if something - * might refer to an external item we would use a def-id to - * allow for the possibility that the item resides in another - * crate. - */ - // from_id_range should be non-empty assert!(!self.from_id_range.empty()); (id - self.from_id_range.min + self.to_id_range.min) } + + /// Translates an EXTERNAL def-id, converting the crate number from the one used in the encoded + /// data to the current crate numbers.. By external, I mean that it be translated to a + /// reference to the item in its original crate, as opposed to being translated to a reference + /// to the inlined version of the item. This is typically, but not always, what you want, + /// because most def-ids refer to external things like types or other fns that may or may not + /// be inlined. Note that even when the inlined function is referencing itself recursively, we + /// would want `tr_def_id` for that reference--- conceptually the function calls the original, + /// non-inlined version, and trans deals with linking that recursive call to the inlined copy. + /// + /// However, there are a *few* cases where def-ids are used but we know that the thing being + /// referenced is in fact *internal* to the item being inlined. In those cases, you should use + /// `tr_intern_def_id()` below. pub fn tr_def_id(&self, did: ast::DefId) -> ast::DefId { - /*! - * Translates an EXTERNAL def-id, converting the crate number - * from the one used in the encoded data to the current crate - * numbers.. By external, I mean that it be translated to a - * reference to the item in its original crate, as opposed to - * being translated to a reference to the inlined version of - * the item. This is typically, but not always, what you - * want, because most def-ids refer to external things like - * types or other fns that may or may not be inlined. Note - * that even when the inlined function is referencing itself - * recursively, we would want `tr_def_id` for that - * reference--- conceptually the function calls the original, - * non-inlined version, and trans deals with linking that - * recursive call to the inlined copy. - * - * However, there are a *few* cases where def-ids are used but - * we know that the thing being referenced is in fact *internal* - * to the item being inlined. In those cases, you should use - * `tr_intern_def_id()` below. - */ decoder::translate_def_id(self.cdata, did) } - pub fn tr_intern_def_id(&self, did: ast::DefId) -> ast::DefId { - /*! - * Translates an INTERNAL def-id, meaning a def-id that is - * known to refer to some part of the item currently being - * inlined. In that case, we want to convert the def-id to - * refer to the current crate and to the new, inlined node-id. - */ + /// Translates an INTERNAL def-id, meaning a def-id that is + /// known to refer to some part of the item currently being + /// inlined. In that case, we want to convert the def-id to + /// refer to the current crate and to the new, inlined node-id. + pub fn tr_intern_def_id(&self, did: ast::DefId) -> ast::DefId { assert_eq!(did.krate, ast::LOCAL_CRATE); ast::DefId { krate: ast::LOCAL_CRATE, node: self.tr_id(did.node) } } @@ -1780,43 +1765,40 @@ fn read_unboxed_closure<'a, 'b>(&mut self, dcx: &DecodeContext<'a, 'b, 'tcx>) } } + /// Converts a def-id that appears in a type. The correct + /// translation will depend on what kind of def-id this is. + /// This is a subtle point: type definitions are not + /// inlined into the current crate, so if the def-id names + /// a nominal type or type alias, then it should be + /// translated to refer to the source crate. + /// + /// However, *type parameters* are cloned along with the function + /// they are attached to. So we should translate those def-ids + /// to refer to the new, cloned copy of the type parameter. + /// We only see references to free type parameters in the body of + /// an inlined function. In such cases, we need the def-id to + /// be a local id so that the TypeContents code is able to lookup + /// the relevant info in the ty_param_defs table. + /// + /// *Region parameters*, unfortunately, are another kettle of fish. + /// In such cases, def_id's can appear in types to distinguish + /// shadowed bound regions and so forth. It doesn't actually + /// matter so much what we do to these, since regions are erased + /// at trans time, but it's good to keep them consistent just in + /// case. We translate them with `tr_def_id()` which will map + /// the crate numbers back to the original source crate. + /// + /// Unboxed closures are cloned along with the function being + /// inlined, and all side tables use interned node IDs, so we + /// translate their def IDs accordingly. + /// + /// It'd be really nice to refactor the type repr to not include + /// def-ids so that all these distinctions were unnecessary. fn convert_def_id(&mut self, dcx: &DecodeContext, source: tydecode::DefIdSource, did: ast::DefId) -> ast::DefId { - /*! - * Converts a def-id that appears in a type. The correct - * translation will depend on what kind of def-id this is. - * This is a subtle point: type definitions are not - * inlined into the current crate, so if the def-id names - * a nominal type or type alias, then it should be - * translated to refer to the source crate. - * - * However, *type parameters* are cloned along with the function - * they are attached to. So we should translate those def-ids - * to refer to the new, cloned copy of the type parameter. - * We only see references to free type parameters in the body of - * an inlined function. In such cases, we need the def-id to - * be a local id so that the TypeContents code is able to lookup - * the relevant info in the ty_param_defs table. - * - * *Region parameters*, unfortunately, are another kettle of fish. - * In such cases, def_id's can appear in types to distinguish - * shadowed bound regions and so forth. It doesn't actually - * matter so much what we do to these, since regions are erased - * at trans time, but it's good to keep them consistent just in - * case. We translate them with `tr_def_id()` which will map - * the crate numbers back to the original source crate. - * - * Unboxed closures are cloned along with the function being - * inlined, and all side tables use interned node IDs, so we - * translate their def IDs accordingly. - * - * It'd be really nice to refactor the type repr to not include - * def-ids so that all these distinctions were unnecessary. - */ - let r = match source { NominalType | TypeWithId | RegionParameter => dcx.tr_def_id(did), TypeParameter | UnboxedClosureSource => dcx.tr_intern_def_id(did) diff --git a/src/librustc/middle/borrowck/check_loans.rs b/src/librustc/middle/borrowck/check_loans.rs index afcc533ffb81c90e1a502e694230a929c4546c22..9a27abbe8322dc107a3a90f4eb4c785d48509ce8 100644 --- a/src/librustc/middle/borrowck/check_loans.rs +++ b/src/librustc/middle/borrowck/check_loans.rs @@ -684,16 +684,13 @@ pub fn analyze_restrictions_on_use(&self, return ret; } + /// Reports an error if `expr` (which should be a path) + /// is using a moved/uninitialized value fn check_if_path_is_moved(&self, id: ast::NodeId, span: Span, use_kind: MovedValueUseKind, lp: &Rc>) { - /*! - * Reports an error if `expr` (which should be a path) - * is using a moved/uninitialized value - */ - debug!("check_if_path_is_moved(id={}, use_kind={}, lp={})", id, use_kind, lp.repr(self.bccx.tcx)); let base_lp = owned_ptr_base_path_rc(lp); @@ -708,30 +705,29 @@ fn check_if_path_is_moved(&self, }); } + /// Reports an error if assigning to `lp` will use a + /// moved/uninitialized value. Mainly this is concerned with + /// detecting derefs of uninitialized pointers. + /// + /// For example: + /// + /// ``` + /// let a: int; + /// a = 10; // ok, even though a is uninitialized + /// + /// struct Point { x: uint, y: uint } + /// let p: Point; + /// p.x = 22; // ok, even though `p` is uninitialized + /// + /// let p: ~Point; + /// (*p).x = 22; // not ok, p is uninitialized, can't deref + /// ``` fn check_if_assigned_path_is_moved(&self, id: ast::NodeId, span: Span, use_kind: MovedValueUseKind, lp: &Rc>) { - /*! - * Reports an error if assigning to `lp` will use a - * moved/uninitialized value. Mainly this is concerned with - * detecting derefs of uninitialized pointers. - * - * For example: - * - * let a: int; - * a = 10; // ok, even though a is uninitialized - * - * struct Point { x: uint, y: uint } - * let p: Point; - * p.x = 22; // ok, even though `p` is uninitialized - * - * let p: ~Point; - * (*p).x = 22; // not ok, p is uninitialized, can't deref - */ - match lp.kind { LpVar(_) | LpUpvar(_) => { // assigning to `x` does not require that `x` is initialized diff --git a/src/librustc/middle/borrowck/doc.rs b/src/librustc/middle/borrowck/doc.rs index 5b70d97b40276dd7c06e9b1835ba92520250a337..c6db5340f0f511d9e6865516368908a347b68185 100644 --- a/src/librustc/middle/borrowck/doc.rs +++ b/src/librustc/middle/borrowck/doc.rs @@ -8,1219 +8,1215 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# The Borrow Checker - -This pass has the job of enforcing memory safety. This is a subtle -topic. This docs aim to explain both the practice and the theory -behind the borrow checker. They start with a high-level overview of -how it works, and then proceed to dive into the theoretical -background. Finally, they go into detail on some of the more subtle -aspects. - -# Table of contents - -These docs are long. Search for the section you are interested in. - -- Overview -- Formal model -- Borrowing and loans -- Moves and initialization -- Drop flags and structural fragments -- Future work - -# Overview - -The borrow checker checks one function at a time. It operates in two -passes. The first pass, called `gather_loans`, walks over the function -and identifies all of the places where borrows (e.g., `&` expressions -and `ref` bindings) and moves (copies or captures of a linear value) -occur. It also tracks initialization sites. For each borrow and move, -it checks various basic safety conditions at this time (for example, -that the lifetime of the borrow doesn't exceed the lifetime of the -value being borrowed, or that there is no move out of an `&T` -referent). - -It then uses the dataflow module to propagate which of those borrows -may be in scope at each point in the procedure. A loan is considered -to come into scope at the expression that caused it and to go out of -scope when the lifetime of the resulting reference expires. - -Once the in-scope loans are known for each point in the program, the -borrow checker walks the IR again in a second pass called -`check_loans`. This pass examines each statement and makes sure that -it is safe with respect to the in-scope loans. - -# Formal model - -Throughout the docs we'll consider a simple subset of Rust in which -you can only borrow from lvalues, defined like so: - -```text -LV = x | LV.f | *LV -``` - -Here `x` represents some variable, `LV.f` is a field reference, -and `*LV` is a pointer dereference. There is no auto-deref or other -niceties. This means that if you have a type like: - -```text -struct S { f: uint } -``` - -and a variable `a: Box`, then the rust expression `a.f` would correspond -to an `LV` of `(*a).f`. - -Here is the formal grammar for the types we'll consider: - -```text -TY = () | S<'LT...> | Box | & 'LT MQ TY -MQ = mut | imm | const -``` - -Most of these types should be pretty self explanatory. Here `S` is a -struct name and we assume structs are declared like so: - -```text -SD = struct S<'LT...> { (f: TY)... } -``` - -# Borrowing and loans - -## An intuitive explanation - -### Issuing loans - -Now, imagine we had a program like this: - -```text -struct Foo { f: uint, g: uint } -... -'a: { - let mut x: Box = ...; - let y = &mut (*x).f; - x = ...; -} -``` - -This is of course dangerous because mutating `x` will free the old -value and hence invalidate `y`. The borrow checker aims to prevent -this sort of thing. - -#### Loans and restrictions - -The way the borrow checker works is that it analyzes each borrow -expression (in our simple model, that's stuff like `&LV`, though in -real life there are a few other cases to consider). For each borrow -expression, it computes a `Loan`, which is a data structure that -records (1) the value being borrowed, (2) the mutability and scope of -the borrow, and (3) a set of restrictions. In the code, `Loan` is a -struct defined in `middle::borrowck`. Formally, we define `LOAN` as -follows: - -```text -LOAN = (LV, LT, MQ, RESTRICTION*) -RESTRICTION = (LV, ACTION*) -ACTION = MUTATE | CLAIM | FREEZE -``` - -Here the `LOAN` tuple defines the lvalue `LV` being borrowed; the -lifetime `LT` of that borrow; the mutability `MQ` of the borrow; and a -list of restrictions. The restrictions indicate actions which, if -taken, could invalidate the loan and lead to type safety violations. - -Each `RESTRICTION` is a pair of a restrictive lvalue `LV` (which will -either be the path that was borrowed or some prefix of the path that -was borrowed) and a set of restricted actions. There are three kinds -of actions that may be restricted for the path `LV`: - -- `MUTATE` means that `LV` cannot be assigned to; -- `CLAIM` means that the `LV` cannot be borrowed mutably; -- `FREEZE` means that the `LV` cannot be borrowed immutably; - -Finally, it is never possible to move from an lvalue that appears in a -restriction. This implies that the "empty restriction" `(LV, [])`, -which contains an empty set of actions, still has a purpose---it -prevents moves from `LV`. I chose not to make `MOVE` a fourth kind of -action because that would imply that sometimes moves are permitted -from restrictived values, which is not the case. - -#### Example - -To give you a better feeling for what kind of restrictions derived -from a loan, let's look at the loan `L` that would be issued as a -result of the borrow `&mut (*x).f` in the example above: - -```text -L = ((*x).f, 'a, mut, RS) where - RS = [((*x).f, [MUTATE, CLAIM, FREEZE]), - (*x, [MUTATE, CLAIM, FREEZE]), - (x, [MUTATE, CLAIM, FREEZE])] -``` - -The loan states that the expression `(*x).f` has been loaned as -mutable for the lifetime `'a`. Because the loan is mutable, that means -that the value `(*x).f` may be mutated via the newly created reference -(and *only* via that pointer). This is reflected in the -restrictions `RS` that accompany the loan. - -The first restriction `((*x).f, [MUTATE, CLAIM, FREEZE])` states that -the lender may not mutate, freeze, nor alias `(*x).f`. Mutation is -illegal because `(*x).f` is only supposed to be mutated via the new -reference, not by mutating the original path `(*x).f`. Freezing is -illegal because the path now has an `&mut` alias; so even if we the -lender were to consider `(*x).f` to be immutable, it might be mutated -via this alias. They will be enforced for the lifetime `'a` of the -loan. After the loan expires, the restrictions no longer apply. - -The second restriction on `*x` is interesting because it does not -apply to the path that was lent (`(*x).f`) but rather to a prefix of -the borrowed path. This is due to the rules of inherited mutability: -if the user were to assign to (or freeze) `*x`, they would indirectly -overwrite (or freeze) `(*x).f`, and thus invalidate the reference -that was created. In general it holds that when a path is -lent, restrictions are issued for all the owning prefixes of that -path. In this case, the path `*x` owns the path `(*x).f` and, -because `x` is an owned pointer, the path `x` owns the path `*x`. -Therefore, borrowing `(*x).f` yields restrictions on both -`*x` and `x`. - -### Checking for illegal assignments, moves, and reborrows - -Once we have computed the loans introduced by each borrow, the borrow -checker uses a data flow propagation to compute the full set of loans -in scope at each expression and then uses that set to decide whether -that expression is legal. Remember that the scope of loan is defined -by its lifetime LT. We sometimes say that a loan which is in-scope at -a particular point is an "outstanding loan", and the set of -restrictions included in those loans as the "outstanding -restrictions". - -The kinds of expressions which in-scope loans can render illegal are: -- *assignments* (`lv = v`): illegal if there is an in-scope restriction - against mutating `lv`; -- *moves*: illegal if there is any in-scope restriction on `lv` at all; -- *mutable borrows* (`&mut lv`): illegal there is an in-scope restriction - against claiming `lv`; -- *immutable borrows* (`&lv`): illegal there is an in-scope restriction - against freezing `lv`. - -## Formal rules - -Now that we hopefully have some kind of intuitive feeling for how the -borrow checker works, let's look a bit more closely now at the precise -conditions that it uses. For simplicity I will ignore const loans. - -I will present the rules in a modified form of standard inference -rules, which looks as follows: - -```text -PREDICATE(X, Y, Z) // Rule-Name - Condition 1 - Condition 2 - Condition 3 -``` - -The initial line states the predicate that is to be satisfied. The -indented lines indicate the conditions that must be met for the -predicate to be satisfied. The right-justified comment states the name -of this rule: there are comments in the borrowck source referencing -these names, so that you can cross reference to find the actual code -that corresponds to the formal rule. - -### Invariants - -I want to collect, at a high-level, the invariants the borrow checker -maintains. I will give them names and refer to them throughout the -text. Together these invariants are crucial for the overall soundness -of the system. - -**Mutability requires uniqueness.** To mutate a path - -**Unique mutability.** There is only one *usable* mutable path to any -given memory at any given time. This implies that when claiming memory -with an expression like `p = &mut x`, the compiler must guarantee that -the borrowed value `x` can no longer be mutated so long as `p` is -live. (This is done via restrictions, read on.) - -**.** - - -### The `gather_loans` pass - -We start with the `gather_loans` pass, which walks the AST looking for -borrows. For each borrow, there are three bits of information: the -lvalue `LV` being borrowed and the mutability `MQ` and lifetime `LT` -of the resulting pointer. Given those, `gather_loans` applies four -validity tests: - -1. `MUTABILITY(LV, MQ)`: The mutability of the reference is -compatible with the mutability of `LV` (i.e., not borrowing immutable -data as mutable). - -2. `ALIASABLE(LV, MQ)`: The aliasability of the reference is -compatible with the aliasability of `LV`. The goal is to prevent -`&mut` borrows of aliasability data. - -3. `LIFETIME(LV, LT, MQ)`: The lifetime of the borrow does not exceed -the lifetime of the value being borrowed. - -4. `RESTRICTIONS(LV, LT, ACTIONS) = RS`: This pass checks and computes the -restrictions to maintain memory safety. These are the restrictions -that will go into the final loan. We'll discuss in more detail below. - -## Checking mutability - -Checking mutability is fairly straightforward. We just want to prevent -immutable data from being borrowed as mutable. Note that it is ok to -borrow mutable data as immutable, since that is simply a -freeze. Formally we define a predicate `MUTABLE(LV, MQ)` which, if -defined, means that "borrowing `LV` with mutability `MQ` is ok. The -Rust code corresponding to this predicate is the function -`check_mutability` in `middle::borrowck::gather_loans`. - -### Checking mutability of variables - -*Code pointer:* Function `check_mutability()` in `gather_loans/mod.rs`, -but also the code in `mem_categorization`. - -Let's begin with the rules for variables, which state that if a -variable is declared as mutable, it may be borrowed any which way, but -otherwise the variable must be borrowed as immutable or const: - -```text -MUTABILITY(X, MQ) // M-Var-Mut - DECL(X) = mut - -MUTABILITY(X, MQ) // M-Var-Imm - DECL(X) = imm - MQ = imm | const -``` - -### Checking mutability of owned content - -Fields and owned pointers inherit their mutability from -their base expressions, so both of their rules basically -delegate the check to the base expression `LV`: - -```text -MUTABILITY(LV.f, MQ) // M-Field - MUTABILITY(LV, MQ) - -MUTABILITY(*LV, MQ) // M-Deref-Unique - TYPE(LV) = Box - MUTABILITY(LV, MQ) -``` - -### Checking mutability of immutable pointer types - -Immutable pointer types like `&T` can only -be borrowed if MQ is immutable or const: - -```text -MUTABILITY(*LV, MQ) // M-Deref-Borrowed-Imm - TYPE(LV) = &Ty - MQ == imm | const -``` - -### Checking mutability of mutable pointer types - -`&mut T` can be frozen, so it is acceptable to borrow it as either imm or mut: - -```text -MUTABILITY(*LV, MQ) // M-Deref-Borrowed-Mut - TYPE(LV) = &mut Ty -``` - -## Checking aliasability - -The goal of the aliasability check is to ensure that we never permit -`&mut` borrows of aliasable data. Formally we define a predicate -`ALIASABLE(LV, MQ)` which if defined means that -"borrowing `LV` with mutability `MQ` is ok". The -Rust code corresponding to this predicate is the function -`check_aliasability()` in `middle::borrowck::gather_loans`. - -### Checking aliasability of variables - -Local variables are never aliasable as they are accessible only within -the stack frame. - -```text - ALIASABLE(X, MQ) // M-Var-Mut -``` - -### Checking aliasable of owned content - -Owned content is aliasable if it is found in an aliasable location: - -```text -ALIASABLE(LV.f, MQ) // M-Field - ALIASABLE(LV, MQ) - -ALIASABLE(*LV, MQ) // M-Deref-Unique - ALIASABLE(LV, MQ) -``` - -### Checking mutability of immutable pointer types - -Immutable pointer types like `&T` are aliasable, and hence can only be -borrowed immutably: - -```text -ALIASABLE(*LV, imm) // M-Deref-Borrowed-Imm - TYPE(LV) = &Ty -``` - -### Checking mutability of mutable pointer types - -`&mut T` can be frozen, so it is acceptable to borrow it as either imm or mut: - -```text -ALIASABLE(*LV, MQ) // M-Deref-Borrowed-Mut - TYPE(LV) = &mut Ty -``` - -## Checking lifetime - -These rules aim to ensure that no data is borrowed for a scope that exceeds -its lifetime. These two computations wind up being intimately related. -Formally, we define a predicate `LIFETIME(LV, LT, MQ)`, which states that -"the lvalue `LV` can be safely borrowed for the lifetime `LT` with mutability -`MQ`". The Rust code corresponding to this predicate is the module -`middle::borrowck::gather_loans::lifetime`. - -### The Scope function - -Several of the rules refer to a helper function `SCOPE(LV)=LT`. The -`SCOPE(LV)` yields the lifetime `LT` for which the lvalue `LV` is -guaranteed to exist, presuming that no mutations occur. - -The scope of a local variable is the block where it is declared: - -```text - SCOPE(X) = block where X is declared -``` - -The scope of a field is the scope of the struct: - -```text - SCOPE(LV.f) = SCOPE(LV) -``` - -The scope of a unique referent is the scope of the pointer, since -(barring mutation or moves) the pointer will not be freed until -the pointer itself `LV` goes out of scope: - -```text - SCOPE(*LV) = SCOPE(LV) if LV has type Box -``` - -The scope of a borrowed referent is the scope associated with the -pointer. This is a conservative approximation, since the data that -the pointer points at may actually live longer: - -```text - SCOPE(*LV) = LT if LV has type &'LT T or &'LT mut T -``` - -### Checking lifetime of variables - -The rule for variables states that a variable can only be borrowed a -lifetime `LT` that is a subregion of the variable's scope: - -```text -LIFETIME(X, LT, MQ) // L-Local - LT <= SCOPE(X) -``` - -### Checking lifetime for owned content - -The lifetime of a field or owned pointer is the same as the lifetime -of its owner: - -```text -LIFETIME(LV.f, LT, MQ) // L-Field - LIFETIME(LV, LT, MQ) - -LIFETIME(*LV, LT, MQ) // L-Deref-Send - TYPE(LV) = Box - LIFETIME(LV, LT, MQ) -``` - -### Checking lifetime for derefs of references - -References have a lifetime `LT'` associated with them. The -data they point at has been guaranteed to be valid for at least this -lifetime. Therefore, the borrow is valid so long as the lifetime `LT` -of the borrow is shorter than the lifetime `LT'` of the pointer -itself: - -```text -LIFETIME(*LV, LT, MQ) // L-Deref-Borrowed - TYPE(LV) = <' Ty OR <' mut Ty - LT <= LT' -``` - -## Computing the restrictions - -The final rules govern the computation of *restrictions*, meaning that -we compute the set of actions that will be illegal for the life of the -loan. The predicate is written `RESTRICTIONS(LV, LT, ACTIONS) = -RESTRICTION*`, which can be read "in order to prevent `ACTIONS` from -occurring on `LV`, the restrictions `RESTRICTION*` must be respected -for the lifetime of the loan". - -Note that there is an initial set of restrictions: these restrictions -are computed based on the kind of borrow: - -```text -&mut LV => RESTRICTIONS(LV, LT, MUTATE|CLAIM|FREEZE) -&LV => RESTRICTIONS(LV, LT, MUTATE|CLAIM) -&const LV => RESTRICTIONS(LV, LT, []) -``` - -The reasoning here is that a mutable borrow must be the only writer, -therefore it prevents other writes (`MUTATE`), mutable borrows -(`CLAIM`), and immutable borrows (`FREEZE`). An immutable borrow -permits other immutable borrows but forbids writes and mutable borrows. -Finally, a const borrow just wants to be sure that the value is not -moved out from under it, so no actions are forbidden. - -### Restrictions for loans of a local variable - -The simplest case is a borrow of a local variable `X`: - -```text -RESTRICTIONS(X, LT, ACTIONS) = (X, ACTIONS) // R-Variable -``` - -In such cases we just record the actions that are not permitted. - -### Restrictions for loans of fields - -Restricting a field is the same as restricting the owner of that -field: - -```text -RESTRICTIONS(LV.f, LT, ACTIONS) = RS, (LV.f, ACTIONS) // R-Field - RESTRICTIONS(LV, LT, ACTIONS) = RS -``` - -The reasoning here is as follows. If the field must not be mutated, -then you must not mutate the owner of the field either, since that -would indirectly modify the field. Similarly, if the field cannot be -frozen or aliased, we cannot allow the owner to be frozen or aliased, -since doing so indirectly freezes/aliases the field. This is the -origin of inherited mutability. - -### Restrictions for loans of owned referents - -Because the mutability of owned referents is inherited, restricting an -owned referent is similar to restricting a field, in that it implies -restrictions on the pointer. However, owned pointers have an important -twist: if the owner `LV` is mutated, that causes the owned referent -`*LV` to be freed! So whenever an owned referent `*LV` is borrowed, we -must prevent the owned pointer `LV` from being mutated, which means -that we always add `MUTATE` and `CLAIM` to the restriction set imposed -on `LV`: - -```text -RESTRICTIONS(*LV, LT, ACTIONS) = RS, (*LV, ACTIONS) // R-Deref-Send-Pointer - TYPE(LV) = Box - RESTRICTIONS(LV, LT, ACTIONS|MUTATE|CLAIM) = RS -``` - -### Restrictions for loans of immutable borrowed referents - -Immutable borrowed referents are freely aliasable, meaning that -the compiler does not prevent you from copying the pointer. This -implies that issuing restrictions is useless. We might prevent the -user from acting on `*LV` itself, but there could be another path -`*LV1` that refers to the exact same memory, and we would not be -restricting that path. Therefore, the rule for `&Ty` pointers -always returns an empty set of restrictions, and it only permits -restricting `MUTATE` and `CLAIM` actions: - -```text -RESTRICTIONS(*LV, LT, ACTIONS) = [] // R-Deref-Imm-Borrowed - TYPE(LV) = <' Ty - LT <= LT' // (1) - ACTIONS subset of [MUTATE, CLAIM] -``` - -The reason that we can restrict `MUTATE` and `CLAIM` actions even -without a restrictions list is that it is never legal to mutate nor to -borrow mutably the contents of a `&Ty` pointer. In other words, -those restrictions are already inherent in the type. - -Clause (1) in the rule for `&Ty` deserves mention. Here I -specify that the lifetime of the loan must be less than the lifetime -of the `&Ty` pointer. In simple cases, this clause is redundant, since -the `LIFETIME()` function will already enforce the required rule: - -``` -fn foo(point: &'a Point) -> &'static f32 { - &point.x // Error -} -``` - -The above example fails to compile both because of clause (1) above -but also by the basic `LIFETIME()` check. However, in more advanced -examples involving multiple nested pointers, clause (1) is needed: - -``` -fn foo(point: &'a &'b mut Point) -> &'b f32 { - &point.x // Error -} -``` - -The `LIFETIME` rule here would accept `'b` because, in fact, the -*memory is* guaranteed to remain valid (i.e., not be freed) for the -lifetime `'b`, since the `&mut` pointer is valid for `'b`. However, we -are returning an immutable reference, so we need the memory to be both -valid and immutable. Even though `point.x` is referenced by an `&mut` -pointer, it can still be considered immutable so long as that `&mut` -pointer is found in an aliased location. That means the memory is -guaranteed to be *immutable* for the lifetime of the `&` pointer, -which is only `'a`, not `'b`. Hence this example yields an error. - -As a final twist, consider the case of two nested *immutable* -pointers, rather than a mutable pointer within an immutable one: - -``` -fn foo(point: &'a &'b Point) -> &'b f32 { - &point.x // OK -} -``` - -This function is legal. The reason for this is that the inner pointer -(`*point : &'b Point`) is enough to guarantee the memory is immutable -and valid for the lifetime `'b`. This is reflected in -`RESTRICTIONS()` by the fact that we do not recurse (i.e., we impose -no restrictions on `LV`, which in this particular case is the pointer -`point : &'a &'b Point`). - -#### Why both `LIFETIME()` and `RESTRICTIONS()`? - -Given the previous text, it might seem that `LIFETIME` and -`RESTRICTIONS` should be folded together into one check, but there is -a reason that they are separated. They answer separate concerns. -The rules pertaining to `LIFETIME` exist to ensure that we don't -create a borrowed pointer that outlives the memory it points at. So -`LIFETIME` prevents a function like this: - -``` -fn get_1<'a>() -> &'a int { - let x = 1; - &x -} -``` - -Here we would be returning a pointer into the stack. Clearly bad. - -However, the `RESTRICTIONS` rules are more concerned with how memory -is used. The example above doesn't generate an error according to -`RESTRICTIONS` because, for local variables, we don't require that the -loan lifetime be a subset of the local variable lifetime. The idea -here is that we *can* guarantee that `x` is not (e.g.) mutated for the -lifetime `'a`, even though `'a` exceeds the function body and thus -involves unknown code in the caller -- after all, `x` ceases to exist -after we return and hence the remaining code in `'a` cannot possibly -mutate it. This distinction is important for type checking functions -like this one: - -``` -fn inc_and_get<'a>(p: &'a mut Point) -> &'a int { - p.x += 1; - &p.x -} -``` - -In this case, we take in a `&mut` and return a frozen borrowed pointer -with the same lifetime. So long as the lifetime of the returned value -doesn't exceed the lifetime of the `&mut` we receive as input, this is -fine, though it may seem surprising at first (it surprised me when I -first worked it through). After all, we're guaranteeing that `*p` -won't be mutated for the lifetime `'a`, even though we can't "see" the -entirety of the code during that lifetime, since some of it occurs in -our caller. But we *do* know that nobody can mutate `*p` except -through `p`. So if we don't mutate `*p` and we don't return `p`, then -we know that the right to mutate `*p` has been lost to our caller -- -in terms of capability, the caller passed in the ability to mutate -`*p`, and we never gave it back. (Note that we can't return `p` while -`*p` is borrowed since that would be a move of `p`, as `&mut` pointers -are affine.) - -### Restrictions for loans of const aliasable referents - -Freeze pointers are read-only. There may be `&mut` or `&` aliases, and -we can not prevent *anything* but moves in that case. So the -`RESTRICTIONS` function is only defined if `ACTIONS` is the empty set. -Because moves from a `&const` lvalue are never legal, it is not -necessary to add any restrictions at all to the final result. - -```text - RESTRICTIONS(*LV, LT, []) = [] // R-Deref-Freeze-Borrowed - TYPE(LV) = &const Ty -``` - -### Restrictions for loans of mutable borrowed referents - -Mutable borrowed pointers are guaranteed to be the only way to mutate -their referent. This permits us to take greater license with them; for -example, the referent can be frozen simply be ensuring that we do not -use the original pointer to perform mutate. Similarly, we can allow -the referent to be claimed, so long as the original pointer is unused -while the new claimant is live. - -The rule for mutable borrowed pointers is as follows: - -```text -RESTRICTIONS(*LV, LT, ACTIONS) = RS, (*LV, ACTIONS) // R-Deref-Mut-Borrowed - TYPE(LV) = <' mut Ty - LT <= LT' // (1) - RESTRICTIONS(LV, LT, ACTIONS) = RS // (2) -``` - -Let's examine the two numbered clauses: - -Clause (1) specifies that the lifetime of the loan (`LT`) cannot -exceed the lifetime of the `&mut` pointer (`LT'`). The reason for this -is that the `&mut` pointer is guaranteed to be the only legal way to -mutate its referent -- but only for the lifetime `LT'`. After that -lifetime, the loan on the referent expires and hence the data may be -modified by its owner again. This implies that we are only able to -guarantee that the referent will not be modified or aliased for a -maximum of `LT'`. - -Here is a concrete example of a bug this rule prevents: - -``` -// Test region-reborrow-from-shorter-mut-ref.rs: -fn copy_pointer<'a,'b,T>(x: &'a mut &'b mut T) -> &'b mut T { - &mut **p // ERROR due to clause (1) -} -fn main() { - let mut x = 1; - let mut y = &mut x; // <-'b-----------------------------+ - // +-'a--------------------+ | - // v v | - let z = copy_borrowed_ptr(&mut y); // y is lent | - *y += 1; // Here y==z, so both should not be usable... | - *z += 1; // ...and yet they would be, but for clause 1. | -} // <------------------------------------------------------+ -``` - -Clause (2) propagates the restrictions on the referent to the pointer -itself. This is the same as with an owned pointer, though the -reasoning is mildly different. The basic goal in all cases is to -prevent the user from establishing another route to the same data. To -see what I mean, let's examine various cases of what can go wrong and -show how it is prevented. - -**Example danger 1: Moving the base pointer.** One of the simplest -ways to violate the rules is to move the base pointer to a new name -and access it via that new name, thus bypassing the restrictions on -the old name. Here is an example: - -``` -// src/test/compile-fail/borrowck-move-mut-base-ptr.rs -fn foo(t0: &mut int) { - let p: &int = &*t0; // Freezes `*t0` - let t1 = t0; //~ ERROR cannot move out of `t0` - *t1 = 22; // OK, not a write through `*t0` -} -``` - -Remember that `&mut` pointers are linear, and hence `let t1 = t0` is a -move of `t0` -- or would be, if it were legal. Instead, we get an -error, because clause (2) imposes restrictions on `LV` (`t0`, here), -and any restrictions on a path make it impossible to move from that -path. - -**Example danger 2: Claiming the base pointer.** Another possible -danger is to mutably borrow the base path. This can lead to two bad -scenarios. The most obvious is that the mutable borrow itself becomes -another path to access the same data, as shown here: - -``` -// src/test/compile-fail/borrowck-mut-borrow-of-mut-base-ptr.rs -fn foo<'a>(mut t0: &'a mut int, - mut t1: &'a mut int) { - let p: &int = &*t0; // Freezes `*t0` - let mut t2 = &mut t0; //~ ERROR cannot borrow `t0` - **t2 += 1; // Mutates `*t0` -} -``` - -In this example, `**t2` is the same memory as `*t0`. Because `t2` is -an `&mut` pointer, `**t2` is a unique path and hence it would be -possible to mutate `**t2` even though that memory was supposed to be -frozen by the creation of `p`. However, an error is reported -- the -reason is that the freeze `&*t0` will restrict claims and mutation -against `*t0` which, by clause 2, in turn prevents claims and mutation -of `t0`. Hence the claim `&mut t0` is illegal. - -Another danger with an `&mut` pointer is that we could swap the `t0` -value away to create a new path: - -``` -// src/test/compile-fail/borrowck-swap-mut-base-ptr.rs -fn foo<'a>(mut t0: &'a mut int, - mut t1: &'a mut int) { - let p: &int = &*t0; // Freezes `*t0` - swap(&mut t0, &mut t1); //~ ERROR cannot borrow `t0` - *t1 = 22; -} -``` - -This is illegal for the same reason as above. Note that if we added -back a swap operator -- as we used to have -- we would want to be very -careful to ensure this example is still illegal. - -**Example danger 3: Freeze the base pointer.** In the case where the -referent is claimed, even freezing the base pointer can be dangerous, -as shown in the following example: - -``` -// src/test/compile-fail/borrowck-borrow-of-mut-base-ptr.rs -fn foo<'a>(mut t0: &'a mut int, - mut t1: &'a mut int) { - let p: &mut int = &mut *t0; // Claims `*t0` - let mut t2 = &t0; //~ ERROR cannot borrow `t0` - let q: &int = &*t2; // Freezes `*t0` but not through `*p` - *p += 1; // violates type of `*q` -} -``` - -Here the problem is that `*t0` is claimed by `p`, and hence `p` wants -to be the controlling pointer through which mutation or freezes occur. -But `t2` would -- if it were legal -- have the type `& &mut int`, and -hence would be a mutable pointer in an aliasable location, which is -considered frozen (since no one can write to `**t2` as it is not a -unique path). Therefore, we could reasonably create a frozen `&int` -pointer pointing at `*t0` that coexists with the mutable pointer `p`, -which is clearly unsound. - -However, it is not always unsafe to freeze the base pointer. In -particular, if the referent is frozen, there is no harm in it: - -``` -// src/test/run-pass/borrowck-borrow-of-mut-base-ptr-safe.rs -fn foo<'a>(mut t0: &'a mut int, - mut t1: &'a mut int) { - let p: &int = &*t0; // Freezes `*t0` - let mut t2 = &t0; - let q: &int = &*t2; // Freezes `*t0`, but that's ok... - let r: &int = &*t0; // ...after all, could do same thing directly. -} -``` - -In this case, creating the alias `t2` of `t0` is safe because the only -thing `t2` can be used for is to further freeze `*t0`, which is -already frozen. In particular, we cannot assign to `*t0` through the -new alias `t2`, as demonstrated in this test case: - -``` -// src/test/run-pass/borrowck-borrow-mut-base-ptr-in-aliasable-loc.rs -fn foo(t0: & &mut int) { - let t1 = t0; - let p: &int = &**t0; - **t1 = 22; //~ ERROR cannot assign -} -``` - -This distinction is reflected in the rules. When doing an `&mut` -borrow -- as in the first example -- the set `ACTIONS` will be -`CLAIM|MUTATE|FREEZE`, because claiming the referent implies that it -cannot be claimed, mutated, or frozen by anyone else. These -restrictions are propagated back to the base path and hence the base -path is considered unfreezable. - -In contrast, when the referent is merely frozen -- as in the second -example -- the set `ACTIONS` will be `CLAIM|MUTATE`, because freezing -the referent implies that it cannot be claimed or mutated but permits -others to freeze. Hence when these restrictions are propagated back to -the base path, it will still be considered freezable. - - - -**FIXME #10520: Restrictions against mutating the base pointer.** When -an `&mut` pointer is frozen or claimed, we currently pass along the -restriction against MUTATE to the base pointer. I do not believe this -restriction is needed. It dates from the days when we had a way to -mutate that preserved the value being mutated (i.e., swap). Nowadays -the only form of mutation is assignment, which destroys the pointer -being mutated -- therefore, a mutation cannot create a new path to the -same data. Rather, it removes an existing path. This implies that not -only can we permit mutation, we can have mutation kill restrictions in -the dataflow sense. - -**WARNING:** We do not currently have `const` borrows in the -language. If they are added back in, we must ensure that they are -consistent with all of these examples. The crucial question will be -what sorts of actions are permitted with a `&const &mut` pointer. I -would suggest that an `&mut` referent found in an `&const` location be -prohibited from both freezes and claims. This would avoid the need to -prevent `const` borrows of the base pointer when the referent is -borrowed. - -# Moves and initialization - -The borrow checker is also in charge of ensuring that: - -- all memory which is accessed is initialized -- immutable local variables are assigned at most once. - -These are two separate dataflow analyses built on the same -framework. Let's look at checking that memory is initialized first; -the checking of immutable local variable assignments works in a very -similar way. - -To track the initialization of memory, we actually track all the -points in the program that *create uninitialized memory*, meaning -moves and the declaration of uninitialized variables. For each of -these points, we create a bit in the dataflow set. Assignments to a -variable `x` or path `a.b.c` kill the move/uninitialization bits for -those paths and any subpaths (e.g., `x`, `x.y`, `a.b.c`, `*a.b.c`). -Bits are unioned when two control-flow paths join. Thus, the -presence of a bit indicates that the move may have occurred without an -intervening assignment to the same memory. At each use of a variable, -we examine the bits in scope, and check that none of them are -moves/uninitializations of the variable that is being used. - -Let's look at a simple example: - -``` -fn foo(a: Box) { - let b: Box; // Gen bit 0. - - if cond { // Bits: 0 - use(&*a); - b = a; // Gen bit 1, kill bit 0. - use(&*b); - } else { - // Bits: 0 - } - // Bits: 0,1 - use(&*a); // Error. - use(&*b); // Error. -} - -fn use(a: &int) { } -``` - -In this example, the variable `b` is created uninitialized. In one -branch of an `if`, we then move the variable `a` into `b`. Once we -exit the `if`, therefore, it is an error to use `a` or `b` since both -are only conditionally initialized. I have annotated the dataflow -state using comments. There are two dataflow bits, with bit 0 -corresponding to the creation of `b` without an initializer, and bit 1 -corresponding to the move of `a`. The assignment `b = a` both -generates bit 1, because it is a move of `a`, and kills bit 0, because -`b` is now initialized. On the else branch, though, `b` is never -initialized, and so bit 0 remains untouched. When the two flows of -control join, we union the bits from both sides, resulting in both -bits 0 and 1 being set. Thus any attempt to use `a` uncovers the bit 1 -from the "then" branch, showing that `a` may be moved, and any attempt -to use `b` uncovers bit 0, from the "else" branch, showing that `b` -may not be initialized. - -## Initialization of immutable variables - -Initialization of immutable variables works in a very similar way, -except that: - -1. we generate bits for each assignment to a variable; -2. the bits are never killed except when the variable goes out of scope. - -Thus the presence of an assignment bit indicates that the assignment -may have occurred. Note that assignments are only killed when the -variable goes out of scope, as it is not relevant whether or not there -has been a move in the meantime. Using these bits, we can declare that -an assignment to an immutable variable is legal iff there is no other -assignment bit to that same variable in scope. - -## Why is the design made this way? - -It may seem surprising that we assign dataflow bits to *each move* -rather than *each path being moved*. This is somewhat less efficient, -since on each use, we must iterate through all moves and check whether -any of them correspond to the path in question. Similar concerns apply -to the analysis for double assignments to immutable variables. The -main reason to do it this way is that it allows us to print better -error messages, because when a use occurs, we can print out the -precise move that may be in scope, rather than simply having to say -"the variable may not be initialized". - -## Data structures used in the move analysis - -The move analysis maintains several data structures that enable it to -cross-reference moves and assignments to determine when they may be -moving/assigning the same memory. These are all collected into the -`MoveData` and `FlowedMoveData` structs. The former represents the set -of move paths, moves, and assignments, and the latter adds in the -results of a dataflow computation. - -### Move paths - -The `MovePath` tree tracks every path that is moved or assigned to. -These paths have the same form as the `LoanPath` data structure, which -in turn is the "real world version of the lvalues `LV` that we -introduced earlier. The difference between a `MovePath` and a `LoanPath` -is that move paths are: - -1. Canonicalized, so that we have exactly one copy of each, and - we can refer to move paths by index; -2. Cross-referenced with other paths into a tree, so that given a move - path we can efficiently find all parent move paths and all - extensions (e.g., given the `a.b` move path, we can easily find the - move path `a` and also the move paths `a.b.c`) -3. Cross-referenced with moves and assignments, so that we can - easily find all moves and assignments to a given path. - -The mechanism that we use is to create a `MovePath` record for each -move path. These are arranged in an array and are referenced using -`MovePathIndex` values, which are newtype'd indices. The `MovePath` -structs are arranged into a tree, representing using the standard -Knuth representation where each node has a child 'pointer' and a "next -sibling" 'pointer'. In addition, each `MovePath` has a parent -'pointer'. In this case, the 'pointers' are just `MovePathIndex` -values. - -In this way, if we want to find all base paths of a given move path, -we can just iterate up the parent pointers (see `each_base_path()` in -the `move_data` module). If we want to find all extensions, we can -iterate through the subtree (see `each_extending_path()`). - -### Moves and assignments - -There are structs to represent moves (`Move`) and assignments -(`Assignment`), and these are also placed into arrays and referenced -by index. All moves of a particular path are arranged into a linked -lists, beginning with `MovePath.first_move` and continuing through -`Move.next_move`. - -We distinguish between "var" assignments, which are assignments to a -variable like `x = foo`, and "path" assignments (`x.f = foo`). This -is because we need to assign dataflows to the former, but not the -latter, so as to check for double initialization of immutable -variables. - -### Gathering and checking moves - -Like loans, we distinguish two phases. The first, gathering, is where -we uncover all the moves and assignments. As with loans, we do some -basic sanity checking in this phase, so we'll report errors if you -attempt to move out of a borrowed pointer etc. Then we do the dataflow -(see `FlowedMoveData::new`). Finally, in the `check_loans.rs` code, we -walk back over, identify all uses, assignments, and captures, and -check that they are legal given the set of dataflow bits we have -computed for that program point. - -# Drop flags and structural fragments - -In addition to the job of enforcing memory safety, the borrow checker -code is also responsible for identifying the *structural fragments* of -data in the function, to support out-of-band dynamic drop flags -allocated on the stack. (For background, see [RFC PR #320].) - -[RFC PR #320]: https://github.com/rust-lang/rfcs/pull/320 - -Semantically, each piece of data that has a destructor may need a -boolean flag to indicate whether or not its destructor has been run -yet. However, in many cases there is no need to actually maintain such -a flag: It can be apparent from the code itself that a given path is -always initialized (or always deinitialized) when control reaches the -end of its owner's scope, and thus we can unconditionally emit (or -not) the destructor invocation for that path. - -A simple example of this is the following: - -```rust -struct D { p: int } -impl D { fn new(x: int) -> D { ... } -impl Drop for D { ... } - -fn foo(a: D, b: D, t: || -> bool) { - let c: D; - let d: D; - if t() { c = b; } -} -``` - -At the end of the body of `foo`, the compiler knows that `a` is -initialized, introducing a drop obligation (deallocating the boxed -integer) for the end of `a`'s scope that is run unconditionally. -Likewise the compiler knows that `d` is not initialized, and thus it -leave out the drop code for `d`. - -The compiler cannot statically know the drop-state of `b` nor `c` at -the end of their scope, since that depends on the value of -`t`. Therefore, we need to insert boolean flags to track whether we -need to drop `b` and `c`. - -However, the matter is not as simple as just mapping local variables -to their corresponding drop flags when necessary. In particular, in -addition to being able to move data out of local variables, Rust -allows one to move values in and out of structured data. - -Consider the following: - -```rust -struct S { x: D, y: D, z: D } - -fn foo(a: S, mut b: S, t: || -> bool) { - let mut c: S; - let d: S; - let e: S = a.clone(); - if t() { - c = b; - b.x = e.y; - } - if t() { c.y = D::new(4); } -} -``` - -As before, the drop obligations of `a` and `d` can be statically -determined, and again the state of `b` and `c` depend on dynamic -state. But additionally, the dynamic drop obligations introduced by -`b` and `c` are not just per-local boolean flags. For example, if the -first call to `t` returns `false` and the second call `true`, then at -the end of their scope, `b` will be completely initialized, but only -`c.y` in `c` will be initialized. If both calls to `t` return `true`, -then at the end of their scope, `c` will be completely initialized, -but only `b.x` will be initialized in `b`, and only `e.x` and `e.z` -will be initialized in `e`. - -Note that we need to cover the `z` field in each case in some way, -since it may (or may not) need to be dropped, even though `z` is never -directly mentioned in the body of the `foo` function. We call a path -like `b.z` a *fragment sibling* of `b.x`, since the field `z` comes -from the same structure `S` that declared the field `x` in `b.x`. - -In general we need to maintain boolean flags that match the -`S`-structure of both `b` and `c`. In addition, we need to consult -such a flag when doing an assignment (such as `c.y = D::new(4);` -above), in order to know whether or not there is a previous value that -needs to be dropped before we do the assignment. - -So for any given function, we need to determine what flags are needed -to track its drop obligations. Our strategy for determining the set of -flags is to represent the fragmentation of the structure explicitly: -by starting initially from the paths that are explicitly mentioned in -moves and assignments (such as `b.x` and `c.y` above), and then -traversing the structure of the path's type to identify leftover -*unmoved fragments*: assigning into `c.y` means that `c.x` and `c.z` -are leftover unmoved fragments. Each fragment represents a drop -obligation that may need to be tracked. Paths that are only moved or -assigned in their entirety (like `a` and `d`) are treated as a single -drop obligation. - -The fragment construction process works by piggy-backing on the -existing `move_data` module. We already have callbacks that visit each -direct move and assignment; these form the basis for the sets of -moved_leaf_paths and assigned_leaf_paths. From these leaves, we can -walk up their parent chain to identify all of their parent paths. -We need to identify the parents because of cases like the following: - -```rust -struct Pair{ x: X, y: Y } -fn foo(dd_d_d: Pair, D>, D>) { - other_function(dd_d_d.x.y); -} -``` - -In this code, the move of the path `dd_d.x.y` leaves behind not only -the fragment drop-obligation `dd_d.x.x` but also `dd_d.y` as well. - -Once we have identified the directly-referenced leaves and their -parents, we compute the left-over fragments, in the function -`fragments::add_fragment_siblings`. As of this writing this works by -looking at each directly-moved or assigned path P, and blindly -gathering all sibling fields of P (as well as siblings for the parents -of P, etc). After accumulating all such siblings, we filter out the -entries added as siblings of P that turned out to be -directly-referenced paths (or parents of directly referenced paths) -themselves, thus leaving the never-referenced "left-overs" as the only -thing left from the gathering step. - -## Array structural fragments - -A special case of the structural fragments discussed above are -the elements of an array that has been passed by value, such as -the following: - -```rust -fn foo(a: [D, ..10], i: uint) -> D { - a[i] -} -``` - -The above code moves a single element out of the input array `a`. -The remainder of the array still needs to be dropped; i.e., it -is a structural fragment. Note that after performing such a move, -it is not legal to read from the array `a`. There are a number of -ways to deal with this, but the important thing to note is that -the semantics needs to distinguish in some manner between a -fragment that is the *entire* array versus a fragment that represents -all-but-one element of the array. A place where that distinction -would arise is the following: - -```rust -fn foo(a: [D, ..10], b: [D, ..10], i: uint, t: bool) -> D { - if t { - a[i] - } else { - b[i] - } - - // When control exits, we will need either to drop all of `a` - // and all-but-one of `b`, or to drop all of `b` and all-but-one - // of `a`. -} -``` - -There are a number of ways that the trans backend could choose to -compile this (e.g. a `[bool, ..10]` array for each such moved array; -or an `Option` for each moved array). From the viewpoint of the -borrow-checker, the important thing is to record what kind of fragment -is implied by the relevant moves. - -# Future work - -While writing up these docs, I encountered some rules I believe to be -stricter than necessary: - -- I think restricting the `&mut` LV against moves and `ALIAS` is sufficient, - `MUTATE` and `CLAIM` are overkill. `MUTATE` was necessary when swap was - a built-in operator, but as it is not, it is implied by `CLAIM`, - and `CLAIM` is implied by `ALIAS`. The only net effect of this is an - extra error message in some cases, though. -- I have not described how closures interact. Current code is unsound. - I am working on describing and implementing the fix. -- If we wish, we can easily extend the move checking to allow finer-grained - tracking of what is initialized and what is not, enabling code like - this: - - a = x.f.g; // x.f.g is now uninitialized - // here, x and x.f are not usable, but x.f.h *is* - x.f.g = b; // x.f.g is not initialized - // now x, x.f, x.f.g, x.f.h are all usable - - What needs to change here, most likely, is that the `moves` module - should record not only what paths are moved, but what expressions - are actual *uses*. For example, the reference to `x` in `x.f.g = b` - is not a true *use* in the sense that it requires `x` to be fully - initialized. This is in fact why the above code produces an error - today: the reference to `x` in `x.f.g = b` is considered illegal - because `x` is not fully initialized. - -There are also some possible refactorings: - -- It might be nice to replace all loan paths with the MovePath mechanism, - since they allow lightweight comparison using an integer. - -*/ +//! # The Borrow Checker +//! +//! This pass has the job of enforcing memory safety. This is a subtle +//! topic. This docs aim to explain both the practice and the theory +//! behind the borrow checker. They start with a high-level overview of +//! how it works, and then proceed to dive into the theoretical +//! background. Finally, they go into detail on some of the more subtle +//! aspects. +//! +//! # Table of contents +//! +//! These docs are long. Search for the section you are interested in. +//! +//! - Overview +//! - Formal model +//! - Borrowing and loans +//! - Moves and initialization +//! - Drop flags and structural fragments +//! - Future work +//! +//! # Overview +//! +//! The borrow checker checks one function at a time. It operates in two +//! passes. The first pass, called `gather_loans`, walks over the function +//! and identifies all of the places where borrows (e.g., `&` expressions +//! and `ref` bindings) and moves (copies or captures of a linear value) +//! occur. It also tracks initialization sites. For each borrow and move, +//! it checks various basic safety conditions at this time (for example, +//! that the lifetime of the borrow doesn't exceed the lifetime of the +//! value being borrowed, or that there is no move out of an `&T` +//! referent). +//! +//! It then uses the dataflow module to propagate which of those borrows +//! may be in scope at each point in the procedure. A loan is considered +//! to come into scope at the expression that caused it and to go out of +//! scope when the lifetime of the resulting reference expires. +//! +//! Once the in-scope loans are known for each point in the program, the +//! borrow checker walks the IR again in a second pass called +//! `check_loans`. This pass examines each statement and makes sure that +//! it is safe with respect to the in-scope loans. +//! +//! # Formal model +//! +//! Throughout the docs we'll consider a simple subset of Rust in which +//! you can only borrow from lvalues, defined like so: +//! +//! ```text +//! LV = x | LV.f | *LV +//! ``` +//! +//! Here `x` represents some variable, `LV.f` is a field reference, +//! and `*LV` is a pointer dereference. There is no auto-deref or other +//! niceties. This means that if you have a type like: +//! +//! ```text +//! struct S { f: uint } +//! ``` +//! +//! and a variable `a: Box`, then the rust expression `a.f` would correspond +//! to an `LV` of `(*a).f`. +//! +//! Here is the formal grammar for the types we'll consider: +//! +//! ```text +//! TY = () | S<'LT...> | Box | & 'LT MQ TY +//! MQ = mut | imm | const +//! ``` +//! +//! Most of these types should be pretty self explanatory. Here `S` is a +//! struct name and we assume structs are declared like so: +//! +//! ```text +//! SD = struct S<'LT...> { (f: TY)... } +//! ``` +//! +//! # Borrowing and loans +//! +//! ## An intuitive explanation +//! +//! ### Issuing loans +//! +//! Now, imagine we had a program like this: +//! +//! ```text +//! struct Foo { f: uint, g: uint } +//! ... +//! 'a: { +//! let mut x: Box = ...; +//! let y = &mut (*x).f; +//! x = ...; +//! } +//! ``` +//! +//! This is of course dangerous because mutating `x` will free the old +//! value and hence invalidate `y`. The borrow checker aims to prevent +//! this sort of thing. +//! +//! #### Loans and restrictions +//! +//! The way the borrow checker works is that it analyzes each borrow +//! expression (in our simple model, that's stuff like `&LV`, though in +//! real life there are a few other cases to consider). For each borrow +//! expression, it computes a `Loan`, which is a data structure that +//! records (1) the value being borrowed, (2) the mutability and scope of +//! the borrow, and (3) a set of restrictions. In the code, `Loan` is a +//! struct defined in `middle::borrowck`. Formally, we define `LOAN` as +//! follows: +//! +//! ```text +//! LOAN = (LV, LT, MQ, RESTRICTION*) +//! RESTRICTION = (LV, ACTION*) +//! ACTION = MUTATE | CLAIM | FREEZE +//! ``` +//! +//! Here the `LOAN` tuple defines the lvalue `LV` being borrowed; the +//! lifetime `LT` of that borrow; the mutability `MQ` of the borrow; and a +//! list of restrictions. The restrictions indicate actions which, if +//! taken, could invalidate the loan and lead to type safety violations. +//! +//! Each `RESTRICTION` is a pair of a restrictive lvalue `LV` (which will +//! either be the path that was borrowed or some prefix of the path that +//! was borrowed) and a set of restricted actions. There are three kinds +//! of actions that may be restricted for the path `LV`: +//! +//! - `MUTATE` means that `LV` cannot be assigned to; +//! - `CLAIM` means that the `LV` cannot be borrowed mutably; +//! - `FREEZE` means that the `LV` cannot be borrowed immutably; +//! +//! Finally, it is never possible to move from an lvalue that appears in a +//! restriction. This implies that the "empty restriction" `(LV, [])`, +//! which contains an empty set of actions, still has a purpose---it +//! prevents moves from `LV`. I chose not to make `MOVE` a fourth kind of +//! action because that would imply that sometimes moves are permitted +//! from restrictived values, which is not the case. +//! +//! #### Example +//! +//! To give you a better feeling for what kind of restrictions derived +//! from a loan, let's look at the loan `L` that would be issued as a +//! result of the borrow `&mut (*x).f` in the example above: +//! +//! ```text +//! L = ((*x).f, 'a, mut, RS) where +//! RS = [((*x).f, [MUTATE, CLAIM, FREEZE]), +//! (*x, [MUTATE, CLAIM, FREEZE]), +//! (x, [MUTATE, CLAIM, FREEZE])] +//! ``` +//! +//! The loan states that the expression `(*x).f` has been loaned as +//! mutable for the lifetime `'a`. Because the loan is mutable, that means +//! that the value `(*x).f` may be mutated via the newly created reference +//! (and *only* via that pointer). This is reflected in the +//! restrictions `RS` that accompany the loan. +//! +//! The first restriction `((*x).f, [MUTATE, CLAIM, FREEZE])` states that +//! the lender may not mutate, freeze, nor alias `(*x).f`. Mutation is +//! illegal because `(*x).f` is only supposed to be mutated via the new +//! reference, not by mutating the original path `(*x).f`. Freezing is +//! illegal because the path now has an `&mut` alias; so even if we the +//! lender were to consider `(*x).f` to be immutable, it might be mutated +//! via this alias. They will be enforced for the lifetime `'a` of the +//! loan. After the loan expires, the restrictions no longer apply. +//! +//! The second restriction on `*x` is interesting because it does not +//! apply to the path that was lent (`(*x).f`) but rather to a prefix of +//! the borrowed path. This is due to the rules of inherited mutability: +//! if the user were to assign to (or freeze) `*x`, they would indirectly +//! overwrite (or freeze) `(*x).f`, and thus invalidate the reference +//! that was created. In general it holds that when a path is +//! lent, restrictions are issued for all the owning prefixes of that +//! path. In this case, the path `*x` owns the path `(*x).f` and, +//! because `x` is an owned pointer, the path `x` owns the path `*x`. +//! Therefore, borrowing `(*x).f` yields restrictions on both +//! `*x` and `x`. +//! +//! ### Checking for illegal assignments, moves, and reborrows +//! +//! Once we have computed the loans introduced by each borrow, the borrow +//! checker uses a data flow propagation to compute the full set of loans +//! in scope at each expression and then uses that set to decide whether +//! that expression is legal. Remember that the scope of loan is defined +//! by its lifetime LT. We sometimes say that a loan which is in-scope at +//! a particular point is an "outstanding loan", and the set of +//! restrictions included in those loans as the "outstanding +//! restrictions". +//! +//! The kinds of expressions which in-scope loans can render illegal are: +//! - *assignments* (`lv = v`): illegal if there is an in-scope restriction +//! against mutating `lv`; +//! - *moves*: illegal if there is any in-scope restriction on `lv` at all; +//! - *mutable borrows* (`&mut lv`): illegal there is an in-scope restriction +//! against claiming `lv`; +//! - *immutable borrows* (`&lv`): illegal there is an in-scope restriction +//! against freezing `lv`. +//! +//! ## Formal rules +//! +//! Now that we hopefully have some kind of intuitive feeling for how the +//! borrow checker works, let's look a bit more closely now at the precise +//! conditions that it uses. For simplicity I will ignore const loans. +//! +//! I will present the rules in a modified form of standard inference +//! rules, which looks as follows: +//! +//! ```text +//! PREDICATE(X, Y, Z) // Rule-Name +//! Condition 1 +//! Condition 2 +//! Condition 3 +//! ``` +//! +//! The initial line states the predicate that is to be satisfied. The +//! indented lines indicate the conditions that must be met for the +//! predicate to be satisfied. The right-justified comment states the name +//! of this rule: there are comments in the borrowck source referencing +//! these names, so that you can cross reference to find the actual code +//! that corresponds to the formal rule. +//! +//! ### Invariants +//! +//! I want to collect, at a high-level, the invariants the borrow checker +//! maintains. I will give them names and refer to them throughout the +//! text. Together these invariants are crucial for the overall soundness +//! of the system. +//! +//! **Mutability requires uniqueness.** To mutate a path +//! +//! **Unique mutability.** There is only one *usable* mutable path to any +//! given memory at any given time. This implies that when claiming memory +//! with an expression like `p = &mut x`, the compiler must guarantee that +//! the borrowed value `x` can no longer be mutated so long as `p` is +//! live. (This is done via restrictions, read on.) +//! +//! **.** +//! +//! +//! ### The `gather_loans` pass +//! +//! We start with the `gather_loans` pass, which walks the AST looking for +//! borrows. For each borrow, there are three bits of information: the +//! lvalue `LV` being borrowed and the mutability `MQ` and lifetime `LT` +//! of the resulting pointer. Given those, `gather_loans` applies four +//! validity tests: +//! +//! 1. `MUTABILITY(LV, MQ)`: The mutability of the reference is +//! compatible with the mutability of `LV` (i.e., not borrowing immutable +//! data as mutable). +//! +//! 2. `ALIASABLE(LV, MQ)`: The aliasability of the reference is +//! compatible with the aliasability of `LV`. The goal is to prevent +//! `&mut` borrows of aliasability data. +//! +//! 3. `LIFETIME(LV, LT, MQ)`: The lifetime of the borrow does not exceed +//! the lifetime of the value being borrowed. +//! +//! 4. `RESTRICTIONS(LV, LT, ACTIONS) = RS`: This pass checks and computes the +//! restrictions to maintain memory safety. These are the restrictions +//! that will go into the final loan. We'll discuss in more detail below. +//! +//! ## Checking mutability +//! +//! Checking mutability is fairly straightforward. We just want to prevent +//! immutable data from being borrowed as mutable. Note that it is ok to +//! borrow mutable data as immutable, since that is simply a +//! freeze. Formally we define a predicate `MUTABLE(LV, MQ)` which, if +//! defined, means that "borrowing `LV` with mutability `MQ` is ok. The +//! Rust code corresponding to this predicate is the function +//! `check_mutability` in `middle::borrowck::gather_loans`. +//! +//! ### Checking mutability of variables +//! +//! *Code pointer:* Function `check_mutability()` in `gather_loans/mod.rs`, +//! but also the code in `mem_categorization`. +//! +//! Let's begin with the rules for variables, which state that if a +//! variable is declared as mutable, it may be borrowed any which way, but +//! otherwise the variable must be borrowed as immutable or const: +//! +//! ```text +//! MUTABILITY(X, MQ) // M-Var-Mut +//! DECL(X) = mut +//! +//! MUTABILITY(X, MQ) // M-Var-Imm +//! DECL(X) = imm +//! MQ = imm | const +//! ``` +//! +//! ### Checking mutability of owned content +//! +//! Fields and owned pointers inherit their mutability from +//! their base expressions, so both of their rules basically +//! delegate the check to the base expression `LV`: +//! +//! ```text +//! MUTABILITY(LV.f, MQ) // M-Field +//! MUTABILITY(LV, MQ) +//! +//! MUTABILITY(*LV, MQ) // M-Deref-Unique +//! TYPE(LV) = Box +//! MUTABILITY(LV, MQ) +//! ``` +//! +//! ### Checking mutability of immutable pointer types +//! +//! Immutable pointer types like `&T` can only +//! be borrowed if MQ is immutable or const: +//! +//! ```text +//! MUTABILITY(*LV, MQ) // M-Deref-Borrowed-Imm +//! TYPE(LV) = &Ty +//! MQ == imm | const +//! ``` +//! +//! ### Checking mutability of mutable pointer types +//! +//! `&mut T` can be frozen, so it is acceptable to borrow it as either imm or mut: +//! +//! ```text +//! MUTABILITY(*LV, MQ) // M-Deref-Borrowed-Mut +//! TYPE(LV) = &mut Ty +//! ``` +//! +//! ## Checking aliasability +//! +//! The goal of the aliasability check is to ensure that we never permit +//! `&mut` borrows of aliasable data. Formally we define a predicate +//! `ALIASABLE(LV, MQ)` which if defined means that +//! "borrowing `LV` with mutability `MQ` is ok". The +//! Rust code corresponding to this predicate is the function +//! `check_aliasability()` in `middle::borrowck::gather_loans`. +//! +//! ### Checking aliasability of variables +//! +//! Local variables are never aliasable as they are accessible only within +//! the stack frame. +//! +//! ```text +//! ALIASABLE(X, MQ) // M-Var-Mut +//! ``` +//! +//! ### Checking aliasable of owned content +//! +//! Owned content is aliasable if it is found in an aliasable location: +//! +//! ```text +//! ALIASABLE(LV.f, MQ) // M-Field +//! ALIASABLE(LV, MQ) +//! +//! ALIASABLE(*LV, MQ) // M-Deref-Unique +//! ALIASABLE(LV, MQ) +//! ``` +//! +//! ### Checking mutability of immutable pointer types +//! +//! Immutable pointer types like `&T` are aliasable, and hence can only be +//! borrowed immutably: +//! +//! ```text +//! ALIASABLE(*LV, imm) // M-Deref-Borrowed-Imm +//! TYPE(LV) = &Ty +//! ``` +//! +//! ### Checking mutability of mutable pointer types +//! +//! `&mut T` can be frozen, so it is acceptable to borrow it as either imm or mut: +//! +//! ```text +//! ALIASABLE(*LV, MQ) // M-Deref-Borrowed-Mut +//! TYPE(LV) = &mut Ty +//! ``` +//! +//! ## Checking lifetime +//! +//! These rules aim to ensure that no data is borrowed for a scope that exceeds +//! its lifetime. These two computations wind up being intimately related. +//! Formally, we define a predicate `LIFETIME(LV, LT, MQ)`, which states that +//! "the lvalue `LV` can be safely borrowed for the lifetime `LT` with mutability +//! `MQ`". The Rust code corresponding to this predicate is the module +//! `middle::borrowck::gather_loans::lifetime`. +//! +//! ### The Scope function +//! +//! Several of the rules refer to a helper function `SCOPE(LV)=LT`. The +//! `SCOPE(LV)` yields the lifetime `LT` for which the lvalue `LV` is +//! guaranteed to exist, presuming that no mutations occur. +//! +//! The scope of a local variable is the block where it is declared: +//! +//! ```text +//! SCOPE(X) = block where X is declared +//! ``` +//! +//! The scope of a field is the scope of the struct: +//! +//! ```text +//! SCOPE(LV.f) = SCOPE(LV) +//! ``` +//! +//! The scope of a unique referent is the scope of the pointer, since +//! (barring mutation or moves) the pointer will not be freed until +//! the pointer itself `LV` goes out of scope: +//! +//! ```text +//! SCOPE(*LV) = SCOPE(LV) if LV has type Box +//! ``` +//! +//! The scope of a borrowed referent is the scope associated with the +//! pointer. This is a conservative approximation, since the data that +//! the pointer points at may actually live longer: +//! +//! ```text +//! SCOPE(*LV) = LT if LV has type &'LT T or &'LT mut T +//! ``` +//! +//! ### Checking lifetime of variables +//! +//! The rule for variables states that a variable can only be borrowed a +//! lifetime `LT` that is a subregion of the variable's scope: +//! +//! ```text +//! LIFETIME(X, LT, MQ) // L-Local +//! LT <= SCOPE(X) +//! ``` +//! +//! ### Checking lifetime for owned content +//! +//! The lifetime of a field or owned pointer is the same as the lifetime +//! of its owner: +//! +//! ```text +//! LIFETIME(LV.f, LT, MQ) // L-Field +//! LIFETIME(LV, LT, MQ) +//! +//! LIFETIME(*LV, LT, MQ) // L-Deref-Send +//! TYPE(LV) = Box +//! LIFETIME(LV, LT, MQ) +//! ``` +//! +//! ### Checking lifetime for derefs of references +//! +//! References have a lifetime `LT'` associated with them. The +//! data they point at has been guaranteed to be valid for at least this +//! lifetime. Therefore, the borrow is valid so long as the lifetime `LT` +//! of the borrow is shorter than the lifetime `LT'` of the pointer +//! itself: +//! +//! ```text +//! LIFETIME(*LV, LT, MQ) // L-Deref-Borrowed +//! TYPE(LV) = <' Ty OR <' mut Ty +//! LT <= LT' +//! ``` +//! +//! ## Computing the restrictions +//! +//! The final rules govern the computation of *restrictions*, meaning that +//! we compute the set of actions that will be illegal for the life of the +//! loan. The predicate is written `RESTRICTIONS(LV, LT, ACTIONS) = +//! RESTRICTION*`, which can be read "in order to prevent `ACTIONS` from +//! occurring on `LV`, the restrictions `RESTRICTION*` must be respected +//! for the lifetime of the loan". +//! +//! Note that there is an initial set of restrictions: these restrictions +//! are computed based on the kind of borrow: +//! +//! ```text +//! &mut LV => RESTRICTIONS(LV, LT, MUTATE|CLAIM|FREEZE) +//! &LV => RESTRICTIONS(LV, LT, MUTATE|CLAIM) +//! &const LV => RESTRICTIONS(LV, LT, []) +//! ``` +//! +//! The reasoning here is that a mutable borrow must be the only writer, +//! therefore it prevents other writes (`MUTATE`), mutable borrows +//! (`CLAIM`), and immutable borrows (`FREEZE`). An immutable borrow +//! permits other immutable borrows but forbids writes and mutable borrows. +//! Finally, a const borrow just wants to be sure that the value is not +//! moved out from under it, so no actions are forbidden. +//! +//! ### Restrictions for loans of a local variable +//! +//! The simplest case is a borrow of a local variable `X`: +//! +//! ```text +//! RESTRICTIONS(X, LT, ACTIONS) = (X, ACTIONS) // R-Variable +//! ``` +//! +//! In such cases we just record the actions that are not permitted. +//! +//! ### Restrictions for loans of fields +//! +//! Restricting a field is the same as restricting the owner of that +//! field: +//! +//! ```text +//! RESTRICTIONS(LV.f, LT, ACTIONS) = RS, (LV.f, ACTIONS) // R-Field +//! RESTRICTIONS(LV, LT, ACTIONS) = RS +//! ``` +//! +//! The reasoning here is as follows. If the field must not be mutated, +//! then you must not mutate the owner of the field either, since that +//! would indirectly modify the field. Similarly, if the field cannot be +//! frozen or aliased, we cannot allow the owner to be frozen or aliased, +//! since doing so indirectly freezes/aliases the field. This is the +//! origin of inherited mutability. +//! +//! ### Restrictions for loans of owned referents +//! +//! Because the mutability of owned referents is inherited, restricting an +//! owned referent is similar to restricting a field, in that it implies +//! restrictions on the pointer. However, owned pointers have an important +//! twist: if the owner `LV` is mutated, that causes the owned referent +//! `*LV` to be freed! So whenever an owned referent `*LV` is borrowed, we +//! must prevent the owned pointer `LV` from being mutated, which means +//! that we always add `MUTATE` and `CLAIM` to the restriction set imposed +//! on `LV`: +//! +//! ```text +//! RESTRICTIONS(*LV, LT, ACTIONS) = RS, (*LV, ACTIONS) // R-Deref-Send-Pointer +//! TYPE(LV) = Box +//! RESTRICTIONS(LV, LT, ACTIONS|MUTATE|CLAIM) = RS +//! ``` +//! +//! ### Restrictions for loans of immutable borrowed referents +//! +//! Immutable borrowed referents are freely aliasable, meaning that +//! the compiler does not prevent you from copying the pointer. This +//! implies that issuing restrictions is useless. We might prevent the +//! user from acting on `*LV` itself, but there could be another path +//! `*LV1` that refers to the exact same memory, and we would not be +//! restricting that path. Therefore, the rule for `&Ty` pointers +//! always returns an empty set of restrictions, and it only permits +//! restricting `MUTATE` and `CLAIM` actions: +//! +//! ```text +//! RESTRICTIONS(*LV, LT, ACTIONS) = [] // R-Deref-Imm-Borrowed +//! TYPE(LV) = <' Ty +//! LT <= LT' // (1) +//! ACTIONS subset of [MUTATE, CLAIM] +//! ``` +//! +//! The reason that we can restrict `MUTATE` and `CLAIM` actions even +//! without a restrictions list is that it is never legal to mutate nor to +//! borrow mutably the contents of a `&Ty` pointer. In other words, +//! those restrictions are already inherent in the type. +//! +//! Clause (1) in the rule for `&Ty` deserves mention. Here I +//! specify that the lifetime of the loan must be less than the lifetime +//! of the `&Ty` pointer. In simple cases, this clause is redundant, since +//! the `LIFETIME()` function will already enforce the required rule: +//! +//! ``` +//! fn foo(point: &'a Point) -> &'static f32 { +//! &point.x // Error +//! } +//! ``` +//! +//! The above example fails to compile both because of clause (1) above +//! but also by the basic `LIFETIME()` check. However, in more advanced +//! examples involving multiple nested pointers, clause (1) is needed: +//! +//! ``` +//! fn foo(point: &'a &'b mut Point) -> &'b f32 { +//! &point.x // Error +//! } +//! ``` +//! +//! The `LIFETIME` rule here would accept `'b` because, in fact, the +//! *memory is* guaranteed to remain valid (i.e., not be freed) for the +//! lifetime `'b`, since the `&mut` pointer is valid for `'b`. However, we +//! are returning an immutable reference, so we need the memory to be both +//! valid and immutable. Even though `point.x` is referenced by an `&mut` +//! pointer, it can still be considered immutable so long as that `&mut` +//! pointer is found in an aliased location. That means the memory is +//! guaranteed to be *immutable* for the lifetime of the `&` pointer, +//! which is only `'a`, not `'b`. Hence this example yields an error. +//! +//! As a final twist, consider the case of two nested *immutable* +//! pointers, rather than a mutable pointer within an immutable one: +//! +//! ``` +//! fn foo(point: &'a &'b Point) -> &'b f32 { +//! &point.x // OK +//! } +//! ``` +//! +//! This function is legal. The reason for this is that the inner pointer +//! (`*point : &'b Point`) is enough to guarantee the memory is immutable +//! and valid for the lifetime `'b`. This is reflected in +//! `RESTRICTIONS()` by the fact that we do not recurse (i.e., we impose +//! no restrictions on `LV`, which in this particular case is the pointer +//! `point : &'a &'b Point`). +//! +//! #### Why both `LIFETIME()` and `RESTRICTIONS()`? +//! +//! Given the previous text, it might seem that `LIFETIME` and +//! `RESTRICTIONS` should be folded together into one check, but there is +//! a reason that they are separated. They answer separate concerns. +//! The rules pertaining to `LIFETIME` exist to ensure that we don't +//! create a borrowed pointer that outlives the memory it points at. So +//! `LIFETIME` prevents a function like this: +//! +//! ``` +//! fn get_1<'a>() -> &'a int { +//! let x = 1; +//! &x +//! } +//! ``` +//! +//! Here we would be returning a pointer into the stack. Clearly bad. +//! +//! However, the `RESTRICTIONS` rules are more concerned with how memory +//! is used. The example above doesn't generate an error according to +//! `RESTRICTIONS` because, for local variables, we don't require that the +//! loan lifetime be a subset of the local variable lifetime. The idea +//! here is that we *can* guarantee that `x` is not (e.g.) mutated for the +//! lifetime `'a`, even though `'a` exceeds the function body and thus +//! involves unknown code in the caller -- after all, `x` ceases to exist +//! after we return and hence the remaining code in `'a` cannot possibly +//! mutate it. This distinction is important for type checking functions +//! like this one: +//! +//! ``` +//! fn inc_and_get<'a>(p: &'a mut Point) -> &'a int { +//! p.x += 1; +//! &p.x +//! } +//! ``` +//! +//! In this case, we take in a `&mut` and return a frozen borrowed pointer +//! with the same lifetime. So long as the lifetime of the returned value +//! doesn't exceed the lifetime of the `&mut` we receive as input, this is +//! fine, though it may seem surprising at first (it surprised me when I +//! first worked it through). After all, we're guaranteeing that `*p` +//! won't be mutated for the lifetime `'a`, even though we can't "see" the +//! entirety of the code during that lifetime, since some of it occurs in +//! our caller. But we *do* know that nobody can mutate `*p` except +//! through `p`. So if we don't mutate `*p` and we don't return `p`, then +//! we know that the right to mutate `*p` has been lost to our caller -- +//! in terms of capability, the caller passed in the ability to mutate +//! `*p`, and we never gave it back. (Note that we can't return `p` while +//! `*p` is borrowed since that would be a move of `p`, as `&mut` pointers +//! are affine.) +//! +//! ### Restrictions for loans of const aliasable referents +//! +//! Freeze pointers are read-only. There may be `&mut` or `&` aliases, and +//! we can not prevent *anything* but moves in that case. So the +//! `RESTRICTIONS` function is only defined if `ACTIONS` is the empty set. +//! Because moves from a `&const` lvalue are never legal, it is not +//! necessary to add any restrictions at all to the final result. +//! +//! ```text +//! RESTRICTIONS(*LV, LT, []) = [] // R-Deref-Freeze-Borrowed +//! TYPE(LV) = &const Ty +//! ``` +//! +//! ### Restrictions for loans of mutable borrowed referents +//! +//! Mutable borrowed pointers are guaranteed to be the only way to mutate +//! their referent. This permits us to take greater license with them; for +//! example, the referent can be frozen simply be ensuring that we do not +//! use the original pointer to perform mutate. Similarly, we can allow +//! the referent to be claimed, so long as the original pointer is unused +//! while the new claimant is live. +//! +//! The rule for mutable borrowed pointers is as follows: +//! +//! ```text +//! RESTRICTIONS(*LV, LT, ACTIONS) = RS, (*LV, ACTIONS) // R-Deref-Mut-Borrowed +//! TYPE(LV) = <' mut Ty +//! LT <= LT' // (1) +//! RESTRICTIONS(LV, LT, ACTIONS) = RS // (2) +//! ``` +//! +//! Let's examine the two numbered clauses: +//! +//! Clause (1) specifies that the lifetime of the loan (`LT`) cannot +//! exceed the lifetime of the `&mut` pointer (`LT'`). The reason for this +//! is that the `&mut` pointer is guaranteed to be the only legal way to +//! mutate its referent -- but only for the lifetime `LT'`. After that +//! lifetime, the loan on the referent expires and hence the data may be +//! modified by its owner again. This implies that we are only able to +//! guarantee that the referent will not be modified or aliased for a +//! maximum of `LT'`. +//! +//! Here is a concrete example of a bug this rule prevents: +//! +//! ``` +//! // Test region-reborrow-from-shorter-mut-ref.rs: +//! fn copy_pointer<'a,'b,T>(x: &'a mut &'b mut T) -> &'b mut T { +//! &mut **p // ERROR due to clause (1) +//! } +//! fn main() { +//! let mut x = 1; +//! let mut y = &mut x; // <-'b-----------------------------+ +//! // +-'a--------------------+ | +//! // v v | +//! let z = copy_borrowed_ptr(&mut y); // y is lent | +//! *y += 1; // Here y==z, so both should not be usable... | +//! *z += 1; // ...and yet they would be, but for clause 1. | +//! } // <------------------------------------------------------+ +//! ``` +//! +//! Clause (2) propagates the restrictions on the referent to the pointer +//! itself. This is the same as with an owned pointer, though the +//! reasoning is mildly different. The basic goal in all cases is to +//! prevent the user from establishing another route to the same data. To +//! see what I mean, let's examine various cases of what can go wrong and +//! show how it is prevented. +//! +//! **Example danger 1: Moving the base pointer.** One of the simplest +//! ways to violate the rules is to move the base pointer to a new name +//! and access it via that new name, thus bypassing the restrictions on +//! the old name. Here is an example: +//! +//! ``` +//! // src/test/compile-fail/borrowck-move-mut-base-ptr.rs +//! fn foo(t0: &mut int) { +//! let p: &int = &*t0; // Freezes `*t0` +//! let t1 = t0; //~ ERROR cannot move out of `t0` +//! *t1 = 22; // OK, not a write through `*t0` +//! } +//! ``` +//! +//! Remember that `&mut` pointers are linear, and hence `let t1 = t0` is a +//! move of `t0` -- or would be, if it were legal. Instead, we get an +//! error, because clause (2) imposes restrictions on `LV` (`t0`, here), +//! and any restrictions on a path make it impossible to move from that +//! path. +//! +//! **Example danger 2: Claiming the base pointer.** Another possible +//! danger is to mutably borrow the base path. This can lead to two bad +//! scenarios. The most obvious is that the mutable borrow itself becomes +//! another path to access the same data, as shown here: +//! +//! ``` +//! // src/test/compile-fail/borrowck-mut-borrow-of-mut-base-ptr.rs +//! fn foo<'a>(mut t0: &'a mut int, +//! mut t1: &'a mut int) { +//! let p: &int = &*t0; // Freezes `*t0` +//! let mut t2 = &mut t0; //~ ERROR cannot borrow `t0` +//! **t2 += 1; // Mutates `*t0` +//! } +//! ``` +//! +//! In this example, `**t2` is the same memory as `*t0`. Because `t2` is +//! an `&mut` pointer, `**t2` is a unique path and hence it would be +//! possible to mutate `**t2` even though that memory was supposed to be +//! frozen by the creation of `p`. However, an error is reported -- the +//! reason is that the freeze `&*t0` will restrict claims and mutation +//! against `*t0` which, by clause 2, in turn prevents claims and mutation +//! of `t0`. Hence the claim `&mut t0` is illegal. +//! +//! Another danger with an `&mut` pointer is that we could swap the `t0` +//! value away to create a new path: +//! +//! ``` +//! // src/test/compile-fail/borrowck-swap-mut-base-ptr.rs +//! fn foo<'a>(mut t0: &'a mut int, +//! mut t1: &'a mut int) { +//! let p: &int = &*t0; // Freezes `*t0` +//! swap(&mut t0, &mut t1); //~ ERROR cannot borrow `t0` +//! *t1 = 22; +//! } +//! ``` +//! +//! This is illegal for the same reason as above. Note that if we added +//! back a swap operator -- as we used to have -- we would want to be very +//! careful to ensure this example is still illegal. +//! +//! **Example danger 3: Freeze the base pointer.** In the case where the +//! referent is claimed, even freezing the base pointer can be dangerous, +//! as shown in the following example: +//! +//! ``` +//! // src/test/compile-fail/borrowck-borrow-of-mut-base-ptr.rs +//! fn foo<'a>(mut t0: &'a mut int, +//! mut t1: &'a mut int) { +//! let p: &mut int = &mut *t0; // Claims `*t0` +//! let mut t2 = &t0; //~ ERROR cannot borrow `t0` +//! let q: &int = &*t2; // Freezes `*t0` but not through `*p` +//! *p += 1; // violates type of `*q` +//! } +//! ``` +//! +//! Here the problem is that `*t0` is claimed by `p`, and hence `p` wants +//! to be the controlling pointer through which mutation or freezes occur. +//! But `t2` would -- if it were legal -- have the type `& &mut int`, and +//! hence would be a mutable pointer in an aliasable location, which is +//! considered frozen (since no one can write to `**t2` as it is not a +//! unique path). Therefore, we could reasonably create a frozen `&int` +//! pointer pointing at `*t0` that coexists with the mutable pointer `p`, +//! which is clearly unsound. +//! +//! However, it is not always unsafe to freeze the base pointer. In +//! particular, if the referent is frozen, there is no harm in it: +//! +//! ``` +//! // src/test/run-pass/borrowck-borrow-of-mut-base-ptr-safe.rs +//! fn foo<'a>(mut t0: &'a mut int, +//! mut t1: &'a mut int) { +//! let p: &int = &*t0; // Freezes `*t0` +//! let mut t2 = &t0; +//! let q: &int = &*t2; // Freezes `*t0`, but that's ok... +//! let r: &int = &*t0; // ...after all, could do same thing directly. +//! } +//! ``` +//! +//! In this case, creating the alias `t2` of `t0` is safe because the only +//! thing `t2` can be used for is to further freeze `*t0`, which is +//! already frozen. In particular, we cannot assign to `*t0` through the +//! new alias `t2`, as demonstrated in this test case: +//! +//! ``` +//! // src/test/run-pass/borrowck-borrow-mut-base-ptr-in-aliasable-loc.rs +//! fn foo(t0: & &mut int) { +//! let t1 = t0; +//! let p: &int = &**t0; +//! **t1 = 22; //~ ERROR cannot assign +//! } +//! ``` +//! +//! This distinction is reflected in the rules. When doing an `&mut` +//! borrow -- as in the first example -- the set `ACTIONS` will be +//! `CLAIM|MUTATE|FREEZE`, because claiming the referent implies that it +//! cannot be claimed, mutated, or frozen by anyone else. These +//! restrictions are propagated back to the base path and hence the base +//! path is considered unfreezable. +//! +//! In contrast, when the referent is merely frozen -- as in the second +//! example -- the set `ACTIONS` will be `CLAIM|MUTATE`, because freezing +//! the referent implies that it cannot be claimed or mutated but permits +//! others to freeze. Hence when these restrictions are propagated back to +//! the base path, it will still be considered freezable. +//! +//! +//! +//! **FIXME #10520: Restrictions against mutating the base pointer.** When +//! an `&mut` pointer is frozen or claimed, we currently pass along the +//! restriction against MUTATE to the base pointer. I do not believe this +//! restriction is needed. It dates from the days when we had a way to +//! mutate that preserved the value being mutated (i.e., swap). Nowadays +//! the only form of mutation is assignment, which destroys the pointer +//! being mutated -- therefore, a mutation cannot create a new path to the +//! same data. Rather, it removes an existing path. This implies that not +//! only can we permit mutation, we can have mutation kill restrictions in +//! the dataflow sense. +//! +//! **WARNING:** We do not currently have `const` borrows in the +//! language. If they are added back in, we must ensure that they are +//! consistent with all of these examples. The crucial question will be +//! what sorts of actions are permitted with a `&const &mut` pointer. I +//! would suggest that an `&mut` referent found in an `&const` location be +//! prohibited from both freezes and claims. This would avoid the need to +//! prevent `const` borrows of the base pointer when the referent is +//! borrowed. +//! +//! # Moves and initialization +//! +//! The borrow checker is also in charge of ensuring that: +//! +//! - all memory which is accessed is initialized +//! - immutable local variables are assigned at most once. +//! +//! These are two separate dataflow analyses built on the same +//! framework. Let's look at checking that memory is initialized first; +//! the checking of immutable local variable assignments works in a very +//! similar way. +//! +//! To track the initialization of memory, we actually track all the +//! points in the program that *create uninitialized memory*, meaning +//! moves and the declaration of uninitialized variables. For each of +//! these points, we create a bit in the dataflow set. Assignments to a +//! variable `x` or path `a.b.c` kill the move/uninitialization bits for +//! those paths and any subpaths (e.g., `x`, `x.y`, `a.b.c`, `*a.b.c`). +//! Bits are unioned when two control-flow paths join. Thus, the +//! presence of a bit indicates that the move may have occurred without an +//! intervening assignment to the same memory. At each use of a variable, +//! we examine the bits in scope, and check that none of them are +//! moves/uninitializations of the variable that is being used. +//! +//! Let's look at a simple example: +//! +//! ``` +//! fn foo(a: Box) { +//! let b: Box; // Gen bit 0. +//! +//! if cond { // Bits: 0 +//! use(&*a); +//! b = a; // Gen bit 1, kill bit 0. +//! use(&*b); +//! } else { +//! // Bits: 0 +//! } +//! // Bits: 0,1 +//! use(&*a); // Error. +//! use(&*b); // Error. +//! } +//! +//! fn use(a: &int) { } +//! ``` +//! +//! In this example, the variable `b` is created uninitialized. In one +//! branch of an `if`, we then move the variable `a` into `b`. Once we +//! exit the `if`, therefore, it is an error to use `a` or `b` since both +//! are only conditionally initialized. I have annotated the dataflow +//! state using comments. There are two dataflow bits, with bit 0 +//! corresponding to the creation of `b` without an initializer, and bit 1 +//! corresponding to the move of `a`. The assignment `b = a` both +//! generates bit 1, because it is a move of `a`, and kills bit 0, because +//! `b` is now initialized. On the else branch, though, `b` is never +//! initialized, and so bit 0 remains untouched. When the two flows of +//! control join, we union the bits from both sides, resulting in both +//! bits 0 and 1 being set. Thus any attempt to use `a` uncovers the bit 1 +//! from the "then" branch, showing that `a` may be moved, and any attempt +//! to use `b` uncovers bit 0, from the "else" branch, showing that `b` +//! may not be initialized. +//! +//! ## Initialization of immutable variables +//! +//! Initialization of immutable variables works in a very similar way, +//! except that: +//! +//! 1. we generate bits for each assignment to a variable; +//! 2. the bits are never killed except when the variable goes out of scope. +//! +//! Thus the presence of an assignment bit indicates that the assignment +//! may have occurred. Note that assignments are only killed when the +//! variable goes out of scope, as it is not relevant whether or not there +//! has been a move in the meantime. Using these bits, we can declare that +//! an assignment to an immutable variable is legal iff there is no other +//! assignment bit to that same variable in scope. +//! +//! ## Why is the design made this way? +//! +//! It may seem surprising that we assign dataflow bits to *each move* +//! rather than *each path being moved*. This is somewhat less efficient, +//! since on each use, we must iterate through all moves and check whether +//! any of them correspond to the path in question. Similar concerns apply +//! to the analysis for double assignments to immutable variables. The +//! main reason to do it this way is that it allows us to print better +//! error messages, because when a use occurs, we can print out the +//! precise move that may be in scope, rather than simply having to say +//! "the variable may not be initialized". +//! +//! ## Data structures used in the move analysis +//! +//! The move analysis maintains several data structures that enable it to +//! cross-reference moves and assignments to determine when they may be +//! moving/assigning the same memory. These are all collected into the +//! `MoveData` and `FlowedMoveData` structs. The former represents the set +//! of move paths, moves, and assignments, and the latter adds in the +//! results of a dataflow computation. +//! +//! ### Move paths +//! +//! The `MovePath` tree tracks every path that is moved or assigned to. +//! These paths have the same form as the `LoanPath` data structure, which +//! in turn is the "real world version of the lvalues `LV` that we +//! introduced earlier. The difference between a `MovePath` and a `LoanPath` +//! is that move paths are: +//! +//! 1. Canonicalized, so that we have exactly one copy of each, and +//! we can refer to move paths by index; +//! 2. Cross-referenced with other paths into a tree, so that given a move +//! path we can efficiently find all parent move paths and all +//! extensions (e.g., given the `a.b` move path, we can easily find the +//! move path `a` and also the move paths `a.b.c`) +//! 3. Cross-referenced with moves and assignments, so that we can +//! easily find all moves and assignments to a given path. +//! +//! The mechanism that we use is to create a `MovePath` record for each +//! move path. These are arranged in an array and are referenced using +//! `MovePathIndex` values, which are newtype'd indices. The `MovePath` +//! structs are arranged into a tree, representing using the standard +//! Knuth representation where each node has a child 'pointer' and a "next +//! sibling" 'pointer'. In addition, each `MovePath` has a parent +//! 'pointer'. In this case, the 'pointers' are just `MovePathIndex` +//! values. +//! +//! In this way, if we want to find all base paths of a given move path, +//! we can just iterate up the parent pointers (see `each_base_path()` in +//! the `move_data` module). If we want to find all extensions, we can +//! iterate through the subtree (see `each_extending_path()`). +//! +//! ### Moves and assignments +//! +//! There are structs to represent moves (`Move`) and assignments +//! (`Assignment`), and these are also placed into arrays and referenced +//! by index. All moves of a particular path are arranged into a linked +//! lists, beginning with `MovePath.first_move` and continuing through +//! `Move.next_move`. +//! +//! We distinguish between "var" assignments, which are assignments to a +//! variable like `x = foo`, and "path" assignments (`x.f = foo`). This +//! is because we need to assign dataflows to the former, but not the +//! latter, so as to check for double initialization of immutable +//! variables. +//! +//! ### Gathering and checking moves +//! +//! Like loans, we distinguish two phases. The first, gathering, is where +//! we uncover all the moves and assignments. As with loans, we do some +//! basic sanity checking in this phase, so we'll report errors if you +//! attempt to move out of a borrowed pointer etc. Then we do the dataflow +//! (see `FlowedMoveData::new`). Finally, in the `check_loans.rs` code, we +//! walk back over, identify all uses, assignments, and captures, and +//! check that they are legal given the set of dataflow bits we have +//! computed for that program point. +//! +//! # Drop flags and structural fragments +//! +//! In addition to the job of enforcing memory safety, the borrow checker +//! code is also responsible for identifying the *structural fragments* of +//! data in the function, to support out-of-band dynamic drop flags +//! allocated on the stack. (For background, see [RFC PR #320].) +//! +//! [RFC PR #320]: https://github.com/rust-lang/rfcs/pull/320 +//! +//! Semantically, each piece of data that has a destructor may need a +//! boolean flag to indicate whether or not its destructor has been run +//! yet. However, in many cases there is no need to actually maintain such +//! a flag: It can be apparent from the code itself that a given path is +//! always initialized (or always deinitialized) when control reaches the +//! end of its owner's scope, and thus we can unconditionally emit (or +//! not) the destructor invocation for that path. +//! +//! A simple example of this is the following: +//! +//! ```rust +//! struct D { p: int } +//! impl D { fn new(x: int) -> D { ... } +//! impl Drop for D { ... } +//! +//! fn foo(a: D, b: D, t: || -> bool) { +//! let c: D; +//! let d: D; +//! if t() { c = b; } +//! } +//! ``` +//! +//! At the end of the body of `foo`, the compiler knows that `a` is +//! initialized, introducing a drop obligation (deallocating the boxed +//! integer) for the end of `a`'s scope that is run unconditionally. +//! Likewise the compiler knows that `d` is not initialized, and thus it +//! leave out the drop code for `d`. +//! +//! The compiler cannot statically know the drop-state of `b` nor `c` at +//! the end of their scope, since that depends on the value of +//! `t`. Therefore, we need to insert boolean flags to track whether we +//! need to drop `b` and `c`. +//! +//! However, the matter is not as simple as just mapping local variables +//! to their corresponding drop flags when necessary. In particular, in +//! addition to being able to move data out of local variables, Rust +//! allows one to move values in and out of structured data. +//! +//! Consider the following: +//! +//! ```rust +//! struct S { x: D, y: D, z: D } +//! +//! fn foo(a: S, mut b: S, t: || -> bool) { +//! let mut c: S; +//! let d: S; +//! let e: S = a.clone(); +//! if t() { +//! c = b; +//! b.x = e.y; +//! } +//! if t() { c.y = D::new(4); } +//! } +//! ``` +//! +//! As before, the drop obligations of `a` and `d` can be statically +//! determined, and again the state of `b` and `c` depend on dynamic +//! state. But additionally, the dynamic drop obligations introduced by +//! `b` and `c` are not just per-local boolean flags. For example, if the +//! first call to `t` returns `false` and the second call `true`, then at +//! the end of their scope, `b` will be completely initialized, but only +//! `c.y` in `c` will be initialized. If both calls to `t` return `true`, +//! then at the end of their scope, `c` will be completely initialized, +//! but only `b.x` will be initialized in `b`, and only `e.x` and `e.z` +//! will be initialized in `e`. +//! +//! Note that we need to cover the `z` field in each case in some way, +//! since it may (or may not) need to be dropped, even though `z` is never +//! directly mentioned in the body of the `foo` function. We call a path +//! like `b.z` a *fragment sibling* of `b.x`, since the field `z` comes +//! from the same structure `S` that declared the field `x` in `b.x`. +//! +//! In general we need to maintain boolean flags that match the +//! `S`-structure of both `b` and `c`. In addition, we need to consult +//! such a flag when doing an assignment (such as `c.y = D::new(4);` +//! above), in order to know whether or not there is a previous value that +//! needs to be dropped before we do the assignment. +//! +//! So for any given function, we need to determine what flags are needed +//! to track its drop obligations. Our strategy for determining the set of +//! flags is to represent the fragmentation of the structure explicitly: +//! by starting initially from the paths that are explicitly mentioned in +//! moves and assignments (such as `b.x` and `c.y` above), and then +//! traversing the structure of the path's type to identify leftover +//! *unmoved fragments*: assigning into `c.y` means that `c.x` and `c.z` +//! are leftover unmoved fragments. Each fragment represents a drop +//! obligation that may need to be tracked. Paths that are only moved or +//! assigned in their entirety (like `a` and `d`) are treated as a single +//! drop obligation. +//! +//! The fragment construction process works by piggy-backing on the +//! existing `move_data` module. We already have callbacks that visit each +//! direct move and assignment; these form the basis for the sets of +//! moved_leaf_paths and assigned_leaf_paths. From these leaves, we can +//! walk up their parent chain to identify all of their parent paths. +//! We need to identify the parents because of cases like the following: +//! +//! ```rust +//! struct Pair{ x: X, y: Y } +//! fn foo(dd_d_d: Pair, D>, D>) { +//! other_function(dd_d_d.x.y); +//! } +//! ``` +//! +//! In this code, the move of the path `dd_d.x.y` leaves behind not only +//! the fragment drop-obligation `dd_d.x.x` but also `dd_d.y` as well. +//! +//! Once we have identified the directly-referenced leaves and their +//! parents, we compute the left-over fragments, in the function +//! `fragments::add_fragment_siblings`. As of this writing this works by +//! looking at each directly-moved or assigned path P, and blindly +//! gathering all sibling fields of P (as well as siblings for the parents +//! of P, etc). After accumulating all such siblings, we filter out the +//! entries added as siblings of P that turned out to be +//! directly-referenced paths (or parents of directly referenced paths) +//! themselves, thus leaving the never-referenced "left-overs" as the only +//! thing left from the gathering step. +//! +//! ## Array structural fragments +//! +//! A special case of the structural fragments discussed above are +//! the elements of an array that has been passed by value, such as +//! the following: +//! +//! ```rust +//! fn foo(a: [D, ..10], i: uint) -> D { +//! a[i] +//! } +//! ``` +//! +//! The above code moves a single element out of the input array `a`. +//! The remainder of the array still needs to be dropped; i.e., it +//! is a structural fragment. Note that after performing such a move, +//! it is not legal to read from the array `a`. There are a number of +//! ways to deal with this, but the important thing to note is that +//! the semantics needs to distinguish in some manner between a +//! fragment that is the *entire* array versus a fragment that represents +//! all-but-one element of the array. A place where that distinction +//! would arise is the following: +//! +//! ```rust +//! fn foo(a: [D, ..10], b: [D, ..10], i: uint, t: bool) -> D { +//! if t { +//! a[i] +//! } else { +//! b[i] +//! } +//! +//! // When control exits, we will need either to drop all of `a` +//! // and all-but-one of `b`, or to drop all of `b` and all-but-one +//! // of `a`. +//! } +//! ``` +//! +//! There are a number of ways that the trans backend could choose to +//! compile this (e.g. a `[bool, ..10]` array for each such moved array; +//! or an `Option` for each moved array). From the viewpoint of the +//! borrow-checker, the important thing is to record what kind of fragment +//! is implied by the relevant moves. +//! +//! # Future work +//! +//! While writing up these docs, I encountered some rules I believe to be +//! stricter than necessary: +//! +//! - I think restricting the `&mut` LV against moves and `ALIAS` is sufficient, +//! `MUTATE` and `CLAIM` are overkill. `MUTATE` was necessary when swap was +//! a built-in operator, but as it is not, it is implied by `CLAIM`, +//! and `CLAIM` is implied by `ALIAS`. The only net effect of this is an +//! extra error message in some cases, though. +//! - I have not described how closures interact. Current code is unsound. +//! I am working on describing and implementing the fix. +//! - If we wish, we can easily extend the move checking to allow finer-grained +//! tracking of what is initialized and what is not, enabling code like +//! this: +//! +//! a = x.f.g; // x.f.g is now uninitialized +//! // here, x and x.f are not usable, but x.f.h *is* +//! x.f.g = b; // x.f.g is not initialized +//! // now x, x.f, x.f.g, x.f.h are all usable +//! +//! What needs to change here, most likely, is that the `moves` module +//! should record not only what paths are moved, but what expressions +//! are actual *uses*. For example, the reference to `x` in `x.f.g = b` +//! is not a true *use* in the sense that it requires `x` to be fully +//! initialized. This is in fact why the above code produces an error +//! today: the reference to `x` in `x.f.g = b` is considered illegal +//! because `x` is not fully initialized. +//! +//! There are also some possible refactorings: +//! +//! - It might be nice to replace all loan paths with the MovePath mechanism, +//! since they allow lightweight comparison using an integer. diff --git a/src/librustc/middle/borrowck/fragments.rs b/src/librustc/middle/borrowck/fragments.rs index 7e766e9138e35f6e49a6cac4adedae312af08d58..dddc326df35724ca70ce4ab9a977e399a1197e48 100644 --- a/src/librustc/middle/borrowck/fragments.rs +++ b/src/librustc/middle/borrowck/fragments.rs @@ -8,13 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! +//! Helper routines used for fragmenting structural paths due to moves for +//! tracking drop obligations. Please see the extensive comments in the +//! section "Structural fragments" in `doc.rs`. -Helper routines used for fragmenting structural paths due to moves for -tracking drop obligations. Please see the extensive comments in the -section "Structural fragments" in `doc.rs`. - -*/ use self::Fragment::*; use session::config; @@ -176,16 +173,12 @@ pub fn instrument_move_fragments<'tcx>(this: &MoveData<'tcx>, instrument_all_paths("assigned_leaf_path", &fragments.assigned_leaf_paths); } +/// Normalizes the fragment sets in `this`; i.e., removes duplicate entries, constructs the set of +/// parents, and constructs the left-over fragments. +/// +/// Note: "left-over fragments" means paths that were not directly referenced in moves nor +/// assignments, but must nonetheless be tracked as potential drop obligations. pub fn fixup_fragment_sets<'tcx>(this: &MoveData<'tcx>, tcx: &ty::ctxt<'tcx>) { - /*! - * Normalizes the fragment sets in `this`; i.e., removes - * duplicate entries, constructs the set of parents, and - * constructs the left-over fragments. - * - * Note: "left-over fragments" means paths that were not - * directly referenced in moves nor assignments, but must - * nonetheless be tracked as potential drop obligations. - */ let mut fragments = this.fragments.borrow_mut(); @@ -283,18 +276,14 @@ fn non_member(elem: MovePathIndex, set: &[MovePathIndex]) -> bool { } } +/// Adds all of the precisely-tracked siblings of `lp` as potential move paths of interest. For +/// example, if `lp` represents `s.x.j`, then adds moves paths for `s.x.i` and `s.x.k`, the +/// siblings of `s.x.j`. fn add_fragment_siblings<'tcx>(this: &MoveData<'tcx>, tcx: &ty::ctxt<'tcx>, gathered_fragments: &mut Vec, lp: Rc>, origin_id: Option) { - /*! - * Adds all of the precisely-tracked siblings of `lp` as - * potential move paths of interest. For example, if `lp` - * represents `s.x.j`, then adds moves paths for `s.x.i` and - * `s.x.k`, the siblings of `s.x.j`. - */ - match lp.kind { LpVar(_) | LpUpvar(..) => {} // Local variables have no siblings. @@ -343,6 +332,8 @@ fn add_fragment_siblings<'tcx>(this: &MoveData<'tcx>, } } +/// We have determined that `origin_lp` destructures to LpExtend(parent, original_field_name). +/// Based on this, add move paths for all of the siblings of `origin_lp`. fn add_fragment_siblings_for_extension<'tcx>(this: &MoveData<'tcx>, tcx: &ty::ctxt<'tcx>, gathered_fragments: &mut Vec, @@ -353,12 +344,6 @@ fn add_fragment_siblings_for_extension<'tcx>(this: &MoveData<'tcx>, origin_id: Option, enum_variant_info: Option<(ast::DefId, Rc>)>) { - /*! - * We have determined that `origin_lp` destructures to - * LpExtend(parent, original_field_name). Based on this, - * add move paths for all of the siblings of `origin_lp`. - */ - let parent_ty = parent_lp.to_type(); let add_fragment_sibling_local = |field_name| { @@ -454,6 +439,8 @@ fn add_fragment_siblings_for_extension<'tcx>(this: &MoveData<'tcx>, } } +/// Adds the single sibling `LpExtend(parent, new_field_name)` of `origin_lp` (the original +/// loan-path). fn add_fragment_sibling_core<'tcx>(this: &MoveData<'tcx>, tcx: &ty::ctxt<'tcx>, gathered_fragments: &mut Vec, @@ -461,10 +448,6 @@ fn add_fragment_sibling_core<'tcx>(this: &MoveData<'tcx>, mc: mc::MutabilityCategory, new_field_name: mc::FieldName, origin_lp: &Rc>) -> MovePathIndex { - /*! - * Adds the single sibling `LpExtend(parent, new_field_name)` - * of `origin_lp` (the original loan-path). - */ let opt_variant_did = match parent.kind { LpDowncast(_, variant_did) => Some(variant_did), LpVar(..) | LpUpvar(..) | LpExtend(..) => None, diff --git a/src/librustc/middle/borrowck/gather_loans/gather_moves.rs b/src/librustc/middle/borrowck/gather_loans/gather_moves.rs index 1d0b0558bb16c2ae94482dcd3938217734ebe06c..651141605042782ff943c3bbfa8181264216276e 100644 --- a/src/librustc/middle/borrowck/gather_loans/gather_moves.rs +++ b/src/librustc/middle/borrowck/gather_loans/gather_moves.rs @@ -8,9 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Computes moves. - */ +//! Computes moves. use middle::borrowck::*; use middle::borrowck::LoanPathKind::*; diff --git a/src/librustc/middle/borrowck/gather_loans/lifetime.rs b/src/librustc/middle/borrowck/gather_loans/lifetime.rs index 7a7ed3e75d20edac25793480156aeec8dfac0eb3..e6a7c150df8f41d82ae460e1ad8f447a12e33d86 100644 --- a/src/librustc/middle/borrowck/gather_loans/lifetime.rs +++ b/src/librustc/middle/borrowck/gather_loans/lifetime.rs @@ -8,10 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * This module implements the check that the lifetime of a borrow - * does not exceed the lifetime of the value being borrowed. - */ +//! This module implements the check that the lifetime of a borrow +//! does not exceed the lifetime of the value being borrowed. use middle::borrowck::*; use middle::expr_use_visitor as euv; diff --git a/src/librustc/middle/borrowck/gather_loans/mod.rs b/src/librustc/middle/borrowck/gather_loans/mod.rs index 088b62a12cf98ddad27e608d40dc4ecdba39176d..4f7ecc99c8938e22d54d6485e95d46efdbd3a533 100644 --- a/src/librustc/middle/borrowck/gather_loans/mod.rs +++ b/src/librustc/middle/borrowck/gather_loans/mod.rs @@ -225,6 +225,9 @@ fn check_aliasability<'a, 'tcx>(bccx: &BorrowckCtxt<'a, 'tcx>, impl<'a, 'tcx> GatherLoanCtxt<'a, 'tcx> { pub fn tcx(&self) -> &'a ty::ctxt<'tcx> { self.bccx.tcx } + /// Guarantees that `addr_of(cmt)` will be valid for the duration of `static_scope_r`, or + /// reports an error. This may entail taking out loans, which will be added to the + /// `req_loan_map`. fn guarantee_valid(&mut self, borrow_id: ast::NodeId, borrow_span: Span, @@ -232,12 +235,6 @@ fn guarantee_valid(&mut self, req_kind: ty::BorrowKind, loan_region: ty::Region, cause: euv::LoanCause) { - /*! - * Guarantees that `addr_of(cmt)` will be valid for the duration of - * `static_scope_r`, or reports an error. This may entail taking - * out loans, which will be added to the `req_loan_map`. - */ - debug!("guarantee_valid(borrow_id={}, cmt={}, \ req_mutbl={}, loan_region={})", borrow_id, diff --git a/src/librustc/middle/borrowck/gather_loans/restrictions.rs b/src/librustc/middle/borrowck/gather_loans/restrictions.rs index adae34b49dca2d1dc386c27f14a0f6f656ef3164..bd9cf8f84b6431a1913291628956d748a1d20514 100644 --- a/src/librustc/middle/borrowck/gather_loans/restrictions.rs +++ b/src/librustc/middle/borrowck/gather_loans/restrictions.rs @@ -8,9 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Computes the restrictions that result from a borrow. - */ +//! Computes the restrictions that result from a borrow. pub use self::RestrictionResult::*; diff --git a/src/librustc/middle/borrowck/mod.rs b/src/librustc/middle/borrowck/mod.rs index 45040cd7b102e93d18a7fda4c24805e40dd76d72..0bbcdfe61bb46c86d5688ad0f81c0e16c177ecab 100644 --- a/src/librustc/middle/borrowck/mod.rs +++ b/src/librustc/middle/borrowck/mod.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! See doc.rs for a thorough explanation of the borrow checker */ +//! See doc.rs for a thorough explanation of the borrow checker #![allow(non_camel_case_types)] diff --git a/src/librustc/middle/borrowck/move_data.rs b/src/librustc/middle/borrowck/move_data.rs index dc9516ccc5da2d352c9b3938b9782ae5dd29122e..7bf3458f0ae3da3613273ea5e9ca6afa1c791574 100644 --- a/src/librustc/middle/borrowck/move_data.rs +++ b/src/librustc/middle/borrowck/move_data.rs @@ -8,12 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Data structures used for tracking moves. Please see the extensive -comments in the section "Moves and initialization" in `doc.rs`. - -*/ +//! Data structures used for tracking moves. Please see the extensive +//! comments in the section "Moves and initialization" in `doc.rs`. pub use self::MoveKind::*; @@ -297,15 +293,11 @@ fn is_var_path(&self, index: MovePathIndex) -> bool { self.path_parent(index) == InvalidMovePathIndex } + /// Returns the existing move path index for `lp`, if any, and otherwise adds a new index for + /// `lp` and any of its base paths that do not yet have an index. pub fn move_path(&self, tcx: &ty::ctxt<'tcx>, lp: Rc>) -> MovePathIndex { - /*! - * Returns the existing move path index for `lp`, if any, - * and otherwise adds a new index for `lp` and any of its - * base paths that do not yet have an index. - */ - match self.path_map.borrow().get(&lp) { Some(&index) => { return index; @@ -370,13 +362,10 @@ fn existing_base_paths(&self, lp: &Rc>) result } + /// Adds any existing move path indices for `lp` and any base paths of `lp` to `result`, but + /// does not add new move paths fn add_existing_base_paths(&self, lp: &Rc>, result: &mut Vec) { - /*! - * Adds any existing move path indices for `lp` and any base - * paths of `lp` to `result`, but does not add new move paths - */ - match self.path_map.borrow().get(lp).cloned() { Some(index) => { self.each_base_path(index, |p| { @@ -397,16 +386,12 @@ fn add_existing_base_paths(&self, lp: &Rc>, } + /// Adds a new move entry for a move of `lp` that occurs at location `id` with kind `kind`. pub fn add_move(&self, tcx: &ty::ctxt<'tcx>, lp: Rc>, id: ast::NodeId, kind: MoveKind) { - /*! - * Adds a new move entry for a move of `lp` that occurs at - * location `id` with kind `kind`. - */ - debug!("add_move(lp={}, id={}, kind={})", lp.repr(tcx), id, @@ -428,6 +413,8 @@ pub fn add_move(&self, }); } + /// Adds a new record for an assignment to `lp` that occurs at location `id` with the given + /// `span`. pub fn add_assignment(&self, tcx: &ty::ctxt<'tcx>, lp: Rc>, @@ -435,11 +422,6 @@ pub fn add_assignment(&self, span: Span, assignee_id: ast::NodeId, mode: euv::MutateMode) { - /*! - * Adds a new record for an assignment to `lp` that occurs at - * location `id` with the given `span`. - */ - debug!("add_assignment(lp={}, assign_id={}, assignee_id={}", lp.repr(tcx), assign_id, assignee_id); @@ -473,18 +455,16 @@ pub fn add_assignment(&self, } } + /// Adds a new record for a match of `base_lp`, downcast to + /// variant `lp`, that occurs at location `pattern_id`. (One + /// should be able to recover the span info from the + /// `pattern_id` and the ast_map, I think.) pub fn add_variant_match(&self, tcx: &ty::ctxt<'tcx>, lp: Rc>, pattern_id: ast::NodeId, base_lp: Rc>, mode: euv::MatchMode) { - /*! - * Adds a new record for a match of `base_lp`, downcast to - * variant `lp`, that occurs at location `pattern_id`. (One - * should be able to recover the span info from the - * `pattern_id` and the ast_map, I think.) - */ debug!("add_variant_match(lp={}, pattern_id={})", lp.repr(tcx), pattern_id); @@ -507,18 +487,15 @@ fn fixup_fragment_sets(&self, tcx: &ty::ctxt<'tcx>) { fragments::fixup_fragment_sets(self, tcx) } + /// Adds the gen/kills for the various moves and + /// assignments into the provided data flow contexts. + /// Moves are generated by moves and killed by assignments and + /// scoping. Assignments are generated by assignment to variables and + /// killed by scoping. See `doc.rs` for more details. fn add_gen_kills(&self, tcx: &ty::ctxt<'tcx>, dfcx_moves: &mut MoveDataFlow, dfcx_assign: &mut AssignDataFlow) { - /*! - * Adds the gen/kills for the various moves and - * assignments into the provided data flow contexts. - * Moves are generated by moves and killed by assignments and - * scoping. Assignments are generated by assignment to variables and - * killed by scoping. See `doc.rs` for more details. - */ - for (i, the_move) in self.moves.borrow().iter().enumerate() { dfcx_moves.add_gen(the_move.id, i); } @@ -695,18 +672,14 @@ pub fn kind_of_move_of_path(&self, ret } + /// Iterates through each move of `loan_path` (or some base path of `loan_path`) that *may* + /// have occurred on entry to `id` without an intervening assignment. In other words, any moves + /// that would invalidate a reference to `loan_path` at location `id`. pub fn each_move_of(&self, id: ast::NodeId, loan_path: &Rc>, f: |&Move, &LoanPath<'tcx>| -> bool) -> bool { - /*! - * Iterates through each move of `loan_path` (or some base path - * of `loan_path`) that *may* have occurred on entry to `id` without - * an intervening assignment. In other words, any moves that - * would invalidate a reference to `loan_path` at location `id`. - */ - // Bad scenarios: // // 1. Move of `a.b.c`, use of `a.b.c` @@ -755,17 +728,13 @@ pub fn each_move_of(&self, }) } + /// Iterates through every assignment to `loan_path` that may have occurred on entry to `id`. + /// `loan_path` must be a single variable. pub fn each_assignment_of(&self, id: ast::NodeId, loan_path: &Rc>, f: |&Assignment| -> bool) -> bool { - /*! - * Iterates through every assignment to `loan_path` that - * may have occurred on entry to `id`. `loan_path` must be - * a single variable. - */ - let loan_path_index = { match self.move_data.existing_move_path(loan_path) { Some(i) => i, diff --git a/src/librustc/middle/cfg/mod.rs b/src/librustc/middle/cfg/mod.rs index bb758ec7c38b73082254fa654dafd375419ac1f0..a2e8ba8d65c3321e782a147b6512973a74c8c71d 100644 --- a/src/librustc/middle/cfg/mod.rs +++ b/src/librustc/middle/cfg/mod.rs @@ -8,12 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Module that constructs a control-flow graph representing an item. -Uses `Graph` as the underlying representation. - -*/ +//! Module that constructs a control-flow graph representing an item. +//! Uses `Graph` as the underlying representation. use middle::graph; use middle::ty; diff --git a/src/librustc/middle/dataflow.rs b/src/librustc/middle/dataflow.rs index 141504cb6f7d50e530cd288a6986ef9001e0a258..53fea8ffc86c65e1f7519d5880fbf11cf0ed7f58 100644 --- a/src/librustc/middle/dataflow.rs +++ b/src/librustc/middle/dataflow.rs @@ -9,12 +9,10 @@ // except according to those terms. -/*! - * A module for propagating forward dataflow information. The analysis - * assumes that the items to be propagated can be represented as bits - * and thus uses bitvectors. Your job is simply to specify the so-called - * GEN and KILL bits for each expression. - */ +//! A module for propagating forward dataflow information. The analysis +//! assumes that the items to be propagated can be represented as bits +//! and thus uses bitvectors. Your job is simply to specify the so-called +//! GEN and KILL bits for each expression. pub use self::EntryOrExit::*; diff --git a/src/librustc/middle/expr_use_visitor.rs b/src/librustc/middle/expr_use_visitor.rs index 656feb51a1d3c9e5931ff37d2f38cd80cdb7ab41..9bb5a6f9a2447da225bdcfac599742feeb6e4b6e 100644 --- a/src/librustc/middle/expr_use_visitor.rs +++ b/src/librustc/middle/expr_use_visitor.rs @@ -8,11 +8,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * A different sort of visitor for walking fn bodies. Unlike the - * normal visitor, which just walks the entire body in one shot, the - * `ExprUseVisitor` determines how expressions are being used. - */ +//! A different sort of visitor for walking fn bodies. Unlike the +//! normal visitor, which just walks the entire body in one shot, the +//! `ExprUseVisitor` determines how expressions are being used. pub use self::MutateMode::*; pub use self::LoanCause::*; @@ -716,12 +714,9 @@ fn walk_local(&mut self, local: &ast::Local) { } } + /// Indicates that the value of `blk` will be consumed, meaning either copied or moved + /// depending on its type. fn walk_block(&mut self, blk: &ast::Block) { - /*! - * Indicates that the value of `blk` will be consumed, - * meaning either copied or moved depending on its type. - */ - debug!("walk_block(blk.id={})", blk.id); for stmt in blk.stmts.iter() { @@ -821,16 +816,12 @@ fn walk_adjustment(&mut self, expr: &ast::Expr) { } } + /// Autoderefs for overloaded Deref calls in fact reference their receiver. That is, if we have + /// `(*x)` where `x` is of type `Rc`, then this in fact is equivalent to `x.deref()`. Since + /// `deref()` is declared with `&self`, this is an autoref of `x`. fn walk_autoderefs(&mut self, expr: &ast::Expr, autoderefs: uint) { - /*! - * Autoderefs for overloaded Deref calls in fact reference - * their receiver. That is, if we have `(*x)` where `x` is of - * type `Rc`, then this in fact is equivalent to - * `x.deref()`. Since `deref()` is declared with `&self`, this - * is an autoref of `x`. - */ debug!("walk_autoderefs expr={} autoderefs={}", expr.repr(self.tcx()), autoderefs); for i in range(0, autoderefs) { diff --git a/src/librustc/middle/fast_reject.rs b/src/librustc/middle/fast_reject.rs index 7514a63c7fa58f5bfd1186da8c6fde92e521eed0..da467c3d0d555a3736fd97ea77e66585ae8d47ba 100644 --- a/src/librustc/middle/fast_reject.rs +++ b/src/librustc/middle/fast_reject.rs @@ -33,26 +33,20 @@ pub enum SimplifiedType { ParameterSimplifiedType, } +/// Tries to simplify a type by dropping type parameters, deref'ing away any reference types, etc. +/// The idea is to get something simple that we can use to quickly decide if two types could unify +/// during method lookup. +/// +/// If `can_simplify_params` is false, then we will fail to simplify type parameters entirely. This +/// is useful when those type parameters would be instantiated with fresh type variables, since +/// then we can't say much about whether two types would unify. Put another way, +/// `can_simplify_params` should be true if type parameters appear free in `ty` and `false` if they +/// are to be considered bound. pub fn simplify_type(tcx: &ty::ctxt, ty: Ty, can_simplify_params: bool) -> Option { - /*! - * Tries to simplify a type by dropping type parameters, deref'ing - * away any reference types, etc. The idea is to get something - * simple that we can use to quickly decide if two types could - * unify during method lookup. - * - * If `can_simplify_params` is false, then we will fail to - * simplify type parameters entirely. This is useful when those - * type parameters would be instantiated with fresh type - * variables, since then we can't say much about whether two types - * would unify. Put another way, `can_simplify_params` should be - * true if type parameters appear free in `ty` and `false` if they - * are to be considered bound. - */ - match ty.sty { ty::ty_bool => Some(BoolSimplifiedType), ty::ty_char => Some(CharSimplifiedType), diff --git a/src/librustc/middle/graph.rs b/src/librustc/middle/graph.rs index ac132477b8772d4ab4899c35ac8bc53dac8e903d..2f50a96402302a5354aa540095006dcde08c53f2 100644 --- a/src/librustc/middle/graph.rs +++ b/src/librustc/middle/graph.rs @@ -8,31 +8,27 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -A graph module for use in dataflow, region resolution, and elsewhere. - -# Interface details - -You customize the graph by specifying a "node data" type `N` and an -"edge data" type `E`. You can then later gain access (mutable or -immutable) to these "user-data" bits. Currently, you can only add -nodes or edges to the graph. You cannot remove or modify them once -added. This could be changed if we have a need. - -# Implementation details - -The main tricky thing about this code is the way that edges are -stored. The edges are stored in a central array, but they are also -threaded onto two linked lists for each node, one for incoming edges -and one for outgoing edges. Note that every edge is a member of some -incoming list and some outgoing list. Basically you can load the -first index of the linked list from the node data structures (the -field `first_edge`) and then, for each edge, load the next index from -the field `next_edge`). Each of those fields is an array that should -be indexed by the direction (see the type `Direction`). - -*/ +//! A graph module for use in dataflow, region resolution, and elsewhere. +//! +//! # Interface details +//! +//! You customize the graph by specifying a "node data" type `N` and an +//! "edge data" type `E`. You can then later gain access (mutable or +//! immutable) to these "user-data" bits. Currently, you can only add +//! nodes or edges to the graph. You cannot remove or modify them once +//! added. This could be changed if we have a need. +//! +//! # Implementation details +//! +//! The main tricky thing about this code is the way that edges are +//! stored. The edges are stored in a central array, but they are also +//! threaded onto two linked lists for each node, one for incoming edges +//! and one for outgoing edges. Note that every edge is a member of some +//! incoming list and some outgoing list. Basically you can load the +//! first index of the linked list from the node data structures (the +//! field `first_edge`) and then, for each edge, load the next index from +//! the field `next_edge`). Each of those fields is an array that should +//! be indexed by the direction (see the type `Direction`). #![allow(dead_code)] // still WIP diff --git a/src/librustc/middle/liveness.rs b/src/librustc/middle/liveness.rs index 15d9e87a9d5a34678475a3ef7dd5c4de7ee78aeb..a09ceac11a53dae7513e0b37e30fb2dd353f237a 100644 --- a/src/librustc/middle/liveness.rs +++ b/src/librustc/middle/liveness.rs @@ -8,105 +8,103 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * A classic liveness analysis based on dataflow over the AST. Computes, - * for each local variable in a function, whether that variable is live - * at a given point. Program execution points are identified by their - * id. - * - * # Basic idea - * - * The basic model is that each local variable is assigned an index. We - * represent sets of local variables using a vector indexed by this - * index. The value in the vector is either 0, indicating the variable - * is dead, or the id of an expression that uses the variable. - * - * We conceptually walk over the AST in reverse execution order. If we - * find a use of a variable, we add it to the set of live variables. If - * we find an assignment to a variable, we remove it from the set of live - * variables. When we have to merge two flows, we take the union of - * those two flows---if the variable is live on both paths, we simply - * pick one id. In the event of loops, we continue doing this until a - * fixed point is reached. - * - * ## Checking initialization - * - * At the function entry point, all variables must be dead. If this is - * not the case, we can report an error using the id found in the set of - * live variables, which identifies a use of the variable which is not - * dominated by an assignment. - * - * ## Checking moves - * - * After each explicit move, the variable must be dead. - * - * ## Computing last uses - * - * Any use of the variable where the variable is dead afterwards is a - * last use. - * - * # Implementation details - * - * The actual implementation contains two (nested) walks over the AST. - * The outer walk has the job of building up the ir_maps instance for the - * enclosing function. On the way down the tree, it identifies those AST - * nodes and variable IDs that will be needed for the liveness analysis - * and assigns them contiguous IDs. The liveness id for an AST node is - * called a `live_node` (it's a newtype'd uint) and the id for a variable - * is called a `variable` (another newtype'd uint). - * - * On the way back up the tree, as we are about to exit from a function - * declaration we allocate a `liveness` instance. Now that we know - * precisely how many nodes and variables we need, we can allocate all - * the various arrays that we will need to precisely the right size. We then - * perform the actual propagation on the `liveness` instance. - * - * This propagation is encoded in the various `propagate_through_*()` - * methods. It effectively does a reverse walk of the AST; whenever we - * reach a loop node, we iterate until a fixed point is reached. - * - * ## The `Users` struct - * - * At each live node `N`, we track three pieces of information for each - * variable `V` (these are encapsulated in the `Users` struct): - * - * - `reader`: the `LiveNode` ID of some node which will read the value - * that `V` holds on entry to `N`. Formally: a node `M` such - * that there exists a path `P` from `N` to `M` where `P` does not - * write `V`. If the `reader` is `invalid_node()`, then the current - * value will never be read (the variable is dead, essentially). - * - * - `writer`: the `LiveNode` ID of some node which will write the - * variable `V` and which is reachable from `N`. Formally: a node `M` - * such that there exists a path `P` from `N` to `M` and `M` writes - * `V`. If the `writer` is `invalid_node()`, then there is no writer - * of `V` that follows `N`. - * - * - `used`: a boolean value indicating whether `V` is *used*. We - * distinguish a *read* from a *use* in that a *use* is some read that - * is not just used to generate a new value. For example, `x += 1` is - * a read but not a use. This is used to generate better warnings. - * - * ## Special Variables - * - * We generate various special variables for various, well, special purposes. - * These are described in the `specials` struct: - * - * - `exit_ln`: a live node that is generated to represent every 'exit' from - * the function, whether it be by explicit return, panic, or other means. - * - * - `fallthrough_ln`: a live node that represents a fallthrough - * - * - `no_ret_var`: a synthetic variable that is only 'read' from, the - * fallthrough node. This allows us to detect functions where we fail - * to return explicitly. - * - `clean_exit_var`: a synthetic variable that is only 'read' from the - * fallthrough node. It is only live if the function could converge - * via means other than an explicit `return` expression. That is, it is - * only dead if the end of the function's block can never be reached. - * It is the responsibility of typeck to ensure that there are no - * `return` expressions in a function declared as diverging. - */ +//! A classic liveness analysis based on dataflow over the AST. Computes, +//! for each local variable in a function, whether that variable is live +//! at a given point. Program execution points are identified by their +//! id. +//! +//! # Basic idea +//! +//! The basic model is that each local variable is assigned an index. We +//! represent sets of local variables using a vector indexed by this +//! index. The value in the vector is either 0, indicating the variable +//! is dead, or the id of an expression that uses the variable. +//! +//! We conceptually walk over the AST in reverse execution order. If we +//! find a use of a variable, we add it to the set of live variables. If +//! we find an assignment to a variable, we remove it from the set of live +//! variables. When we have to merge two flows, we take the union of +//! those two flows---if the variable is live on both paths, we simply +//! pick one id. In the event of loops, we continue doing this until a +//! fixed point is reached. +//! +//! ## Checking initialization +//! +//! At the function entry point, all variables must be dead. If this is +//! not the case, we can report an error using the id found in the set of +//! live variables, which identifies a use of the variable which is not +//! dominated by an assignment. +//! +//! ## Checking moves +//! +//! After each explicit move, the variable must be dead. +//! +//! ## Computing last uses +//! +//! Any use of the variable where the variable is dead afterwards is a +//! last use. +//! +//! # Implementation details +//! +//! The actual implementation contains two (nested) walks over the AST. +//! The outer walk has the job of building up the ir_maps instance for the +//! enclosing function. On the way down the tree, it identifies those AST +//! nodes and variable IDs that will be needed for the liveness analysis +//! and assigns them contiguous IDs. The liveness id for an AST node is +//! called a `live_node` (it's a newtype'd uint) and the id for a variable +//! is called a `variable` (another newtype'd uint). +//! +//! On the way back up the tree, as we are about to exit from a function +//! declaration we allocate a `liveness` instance. Now that we know +//! precisely how many nodes and variables we need, we can allocate all +//! the various arrays that we will need to precisely the right size. We then +//! perform the actual propagation on the `liveness` instance. +//! +//! This propagation is encoded in the various `propagate_through_*()` +//! methods. It effectively does a reverse walk of the AST; whenever we +//! reach a loop node, we iterate until a fixed point is reached. +//! +//! ## The `Users` struct +//! +//! At each live node `N`, we track three pieces of information for each +//! variable `V` (these are encapsulated in the `Users` struct): +//! +//! - `reader`: the `LiveNode` ID of some node which will read the value +//! that `V` holds on entry to `N`. Formally: a node `M` such +//! that there exists a path `P` from `N` to `M` where `P` does not +//! write `V`. If the `reader` is `invalid_node()`, then the current +//! value will never be read (the variable is dead, essentially). +//! +//! - `writer`: the `LiveNode` ID of some node which will write the +//! variable `V` and which is reachable from `N`. Formally: a node `M` +//! such that there exists a path `P` from `N` to `M` and `M` writes +//! `V`. If the `writer` is `invalid_node()`, then there is no writer +//! of `V` that follows `N`. +//! +//! - `used`: a boolean value indicating whether `V` is *used*. We +//! distinguish a *read* from a *use* in that a *use* is some read that +//! is not just used to generate a new value. For example, `x += 1` is +//! a read but not a use. This is used to generate better warnings. +//! +//! ## Special Variables +//! +//! We generate various special variables for various, well, special purposes. +//! These are described in the `specials` struct: +//! +//! - `exit_ln`: a live node that is generated to represent every 'exit' from +//! the function, whether it be by explicit return, panic, or other means. +//! +//! - `fallthrough_ln`: a live node that represents a fallthrough +//! +//! - `no_ret_var`: a synthetic variable that is only 'read' from, the +//! fallthrough node. This allows us to detect functions where we fail +//! to return explicitly. +//! - `clean_exit_var`: a synthetic variable that is only 'read' from the +//! fallthrough node. It is only live if the function could converge +//! via means other than an explicit `return` expression. That is, it is +//! only dead if the end of the function's block can never be reached. +//! It is the responsibility of typeck to ensure that there are no +//! `return` expressions in a function declared as diverging. use self::LoopKind::*; use self::LiveNodeKind::*; use self::VarKind::*; diff --git a/src/librustc/middle/mem_categorization.rs b/src/librustc/middle/mem_categorization.rs index 046ab162cfcb05d4c4b3df425cdd788dfb6b14df..53a5ac7a09342f7f1fd8204e07f365ca3829d4b2 100644 --- a/src/librustc/middle/mem_categorization.rs +++ b/src/librustc/middle/mem_categorization.rs @@ -8,57 +8,55 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * # Categorization - * - * The job of the categorization module is to analyze an expression to - * determine what kind of memory is used in evaluating it (for example, - * where dereferences occur and what kind of pointer is dereferenced; - * whether the memory is mutable; etc) - * - * Categorization effectively transforms all of our expressions into - * expressions of the following forms (the actual enum has many more - * possibilities, naturally, but they are all variants of these base - * forms): - * - * E = rvalue // some computed rvalue - * | x // address of a local variable or argument - * | *E // deref of a ptr - * | E.comp // access to an interior component - * - * Imagine a routine ToAddr(Expr) that evaluates an expression and returns an - * address where the result is to be found. If Expr is an lvalue, then this - * is the address of the lvalue. If Expr is an rvalue, this is the address of - * some temporary spot in memory where the result is stored. - * - * Now, cat_expr() classifies the expression Expr and the address A=ToAddr(Expr) - * as follows: - * - * - cat: what kind of expression was this? This is a subset of the - * full expression forms which only includes those that we care about - * for the purpose of the analysis. - * - mutbl: mutability of the address A - * - ty: the type of data found at the address A - * - * The resulting categorization tree differs somewhat from the expressions - * themselves. For example, auto-derefs are explicit. Also, an index a[b] is - * decomposed into two operations: a dereference to reach the array data and - * then an index to jump forward to the relevant item. - * - * ## By-reference upvars - * - * One part of the translation which may be non-obvious is that we translate - * closure upvars into the dereference of a borrowed pointer; this more closely - * resembles the runtime translation. So, for example, if we had: - * - * let mut x = 3; - * let y = 5; - * let inc = || x += y; - * - * Then when we categorize `x` (*within* the closure) we would yield a - * result of `*x'`, effectively, where `x'` is a `cat_upvar` reference - * tied to `x`. The type of `x'` will be a borrowed pointer. - */ +//! # Categorization +//! +//! The job of the categorization module is to analyze an expression to +//! determine what kind of memory is used in evaluating it (for example, +//! where dereferences occur and what kind of pointer is dereferenced; +//! whether the memory is mutable; etc) +//! +//! Categorization effectively transforms all of our expressions into +//! expressions of the following forms (the actual enum has many more +//! possibilities, naturally, but they are all variants of these base +//! forms): +//! +//! E = rvalue // some computed rvalue +//! | x // address of a local variable or argument +//! | *E // deref of a ptr +//! | E.comp // access to an interior component +//! +//! Imagine a routine ToAddr(Expr) that evaluates an expression and returns an +//! address where the result is to be found. If Expr is an lvalue, then this +//! is the address of the lvalue. If Expr is an rvalue, this is the address of +//! some temporary spot in memory where the result is stored. +//! +//! Now, cat_expr() classifies the expression Expr and the address A=ToAddr(Expr) +//! as follows: +//! +//! - cat: what kind of expression was this? This is a subset of the +//! full expression forms which only includes those that we care about +//! for the purpose of the analysis. +//! - mutbl: mutability of the address A +//! - ty: the type of data found at the address A +//! +//! The resulting categorization tree differs somewhat from the expressions +//! themselves. For example, auto-derefs are explicit. Also, an index a[b] is +//! decomposed into two operations: a dereference to reach the array data and +//! then an index to jump forward to the relevant item. +//! +//! ## By-reference upvars +//! +//! One part of the translation which may be non-obvious is that we translate +//! closure upvars into the dereference of a borrowed pointer; this more closely +//! resembles the runtime translation. So, for example, if we had: +//! +//! let mut x = 3; +//! let y = 5; +//! let inc = || x += y; +//! +//! Then when we categorize `x` (*within* the closure) we would yield a +//! result of `*x'`, effectively, where `x'` is a `cat_upvar` reference +//! tied to `x`. The type of `x'` will be a borrowed pointer. #![allow(non_camel_case_types)] @@ -1058,20 +1056,17 @@ fn deref_vec(&self, } } + /// Given a pattern P like: `[_, ..Q, _]`, where `vec_cmt` is the cmt for `P`, `slice_pat` is + /// the pattern `Q`, returns: + /// + /// * a cmt for `Q` + /// * the mutability and region of the slice `Q` + /// + /// These last two bits of info happen to be things that borrowck needs. pub fn cat_slice_pattern(&self, vec_cmt: cmt<'tcx>, slice_pat: &ast::Pat) -> McResult<(cmt<'tcx>, ast::Mutability, ty::Region)> { - /*! - * Given a pattern P like: `[_, ..Q, _]`, where `vec_cmt` is - * the cmt for `P`, `slice_pat` is the pattern `Q`, returns: - * - a cmt for `Q` - * - the mutability and region of the slice `Q` - * - * These last two bits of info happen to be things that - * borrowck needs. - */ - let slice_ty = if_ok!(self.node_ty(slice_pat.id)); let (slice_mutbl, slice_r) = vec_slice_info(self.tcx(), slice_pat, @@ -1079,17 +1074,13 @@ pub fn cat_slice_pattern(&self, let cmt_slice = self.cat_index(slice_pat, self.deref_vec(slice_pat, vec_cmt)); return Ok((cmt_slice, slice_mutbl, slice_r)); + /// In a pattern like [a, b, ..c], normally `c` has slice type, but if you have [a, b, + /// ..ref c], then the type of `ref c` will be `&&[]`, so to extract the slice details we + /// have to recurse through rptrs. fn vec_slice_info(tcx: &ty::ctxt, pat: &ast::Pat, slice_ty: Ty) -> (ast::Mutability, ty::Region) { - /*! - * In a pattern like [a, b, ..c], normally `c` has slice type, - * but if you have [a, b, ..ref c], then the type of `ref c` - * will be `&&[]`, so to extract the slice details we have - * to recurse through rptrs. - */ - match slice_ty.sty { ty::ty_rptr(r, ref mt) => match mt.ty.sty { ty::ty_vec(_, None) => (mt.mutbl, r), @@ -1428,13 +1419,9 @@ pub fn guarantor(&self) -> cmt<'tcx> { } } + /// Returns `Some(_)` if this lvalue represents a freely aliasable pointer type. pub fn freely_aliasable(&self, ctxt: &ty::ctxt<'tcx>) -> Option { - /*! - * Returns `Some(_)` if this lvalue represents a freely aliasable - * pointer type. - */ - // Maybe non-obvious: copied upvars can only be considered // non-aliasable in once closures, since any other kind can be // aliased and eventually recused. diff --git a/src/librustc/middle/region.rs b/src/librustc/middle/region.rs index c5511f995bc689397e7d5e5c74e51f02e0ae58de..20be98ca977d1025ee344cf08a02c2c2567461ea 100644 --- a/src/librustc/middle/region.rs +++ b/src/librustc/middle/region.rs @@ -8,18 +8,13 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -This file actually contains two passes related to regions. The first -pass builds up the `scope_map`, which describes the parent links in -the region hierarchy. The second pass infers which types must be -region parameterized. - -Most of the documentation on regions can be found in -`middle/typeck/infer/region_inference.rs` - -*/ - +//! This file actually contains two passes related to regions. The first +//! pass builds up the `scope_map`, which describes the parent links in +//! the region hierarchy. The second pass infers which types must be +//! region parameterized. +//! +//! Most of the documentation on regions can be found in +//! `middle/typeck/infer/region_inference.rs` use session::Session; use middle::ty::{mod, Ty, FreeRegion}; @@ -171,14 +166,10 @@ pub fn record_rvalue_scope(&self, var: ast::NodeId, lifetime: CodeExtent) { self.rvalue_scopes.borrow_mut().insert(var, lifetime); } + /// Records that a scope is a TERMINATING SCOPE. Whenever we create automatic temporaries -- + /// e.g. by an expression like `a().f` -- they will be freed within the innermost terminating + /// scope. pub fn mark_as_terminating_scope(&self, scope_id: CodeExtent) { - /*! - * Records that a scope is a TERMINATING SCOPE. Whenever we - * create automatic temporaries -- e.g. by an - * expression like `a().f` -- they will be freed within - * the innermost terminating scope. - */ - debug!("record_terminating_scope(scope_id={})", scope_id); self.terminating_scopes.borrow_mut().insert(scope_id); } @@ -197,10 +188,8 @@ pub fn encl_scope(&self, id: CodeExtent) -> CodeExtent { } } + /// Returns the lifetime of the local variable `var_id` pub fn var_scope(&self, var_id: ast::NodeId) -> CodeExtent { - /*! - * Returns the lifetime of the local variable `var_id` - */ match self.var_map.borrow().get(&var_id) { Some(&r) => r, None => { panic!("no enclosing scope for id {}", var_id); } @@ -257,15 +246,12 @@ pub fn scopes_intersect(&self, scope1: CodeExtent, scope2: CodeExtent) self.is_subscope_of(scope2, scope1) } + /// Returns true if `subscope` is equal to or is lexically nested inside `superscope` and false + /// otherwise. pub fn is_subscope_of(&self, subscope: CodeExtent, superscope: CodeExtent) -> bool { - /*! - * Returns true if `subscope` is equal to or is lexically - * nested inside `superscope` and false otherwise. - */ - let mut s = subscope; while superscope != s { match self.scope_map.borrow().get(&s) { @@ -285,27 +271,20 @@ pub fn is_subscope_of(&self, return true; } + /// Determines whether two free regions have a subregion relationship + /// by walking the graph encoded in `free_region_map`. Note that + /// it is possible that `sub != sup` and `sub <= sup` and `sup <= sub` + /// (that is, the user can give two different names to the same lifetime). pub fn sub_free_region(&self, sub: FreeRegion, sup: FreeRegion) -> bool { - /*! - * Determines whether two free regions have a subregion relationship - * by walking the graph encoded in `free_region_map`. Note that - * it is possible that `sub != sup` and `sub <= sup` and `sup <= sub` - * (that is, the user can give two different names to the same lifetime). - */ - can_reach(&*self.free_region_map.borrow(), sub, sup) } + /// Determines whether one region is a subregion of another. This is intended to run *after + /// inference* and sadly the logic is somewhat duplicated with the code in infer.rs. pub fn is_subregion_of(&self, sub_region: ty::Region, super_region: ty::Region) -> bool { - /*! - * Determines whether one region is a subregion of another. This is - * intended to run *after inference* and sadly the logic is somewhat - * duplicated with the code in infer.rs. - */ - debug!("is_subregion_of(sub_region={}, super_region={})", sub_region, super_region); @@ -345,16 +324,12 @@ pub fn is_subregion_of(&self, } } + /// Finds the nearest common ancestor (if any) of two scopes. That is, finds the smallest + /// scope which is greater than or equal to both `scope_a` and `scope_b`. pub fn nearest_common_ancestor(&self, scope_a: CodeExtent, scope_b: CodeExtent) -> Option { - /*! - * Finds the nearest common ancestor (if any) of two scopes. That - * is, finds the smallest scope which is greater than or equal to - * both `scope_a` and `scope_b`. - */ - if scope_a == scope_b { return Some(scope_a); } let a_ancestors = ancestors_of(self, scope_a); @@ -681,18 +656,15 @@ fn resolve_local(visitor: &mut RegionResolutionVisitor, local: &ast::Local) { visit::walk_local(visitor, local); + /// True if `pat` match the `P&` nonterminal: + /// + /// P& = ref X + /// | StructName { ..., P&, ... } + /// | VariantName(..., P&, ...) + /// | [ ..., P&, ... ] + /// | ( ..., P&, ... ) + /// | box P& fn is_binding_pat(pat: &ast::Pat) -> bool { - /*! - * True if `pat` match the `P&` nonterminal: - * - * P& = ref X - * | StructName { ..., P&, ... } - * | VariantName(..., P&, ...) - * | [ ..., P&, ... ] - * | ( ..., P&, ... ) - * | box P& - */ - match pat.node { ast::PatIdent(ast::BindByRef(_), _, _) => true, @@ -719,35 +691,27 @@ fn is_binding_pat(pat: &ast::Pat) -> bool { } } + /// True if `ty` is a borrowed pointer type like `&int` or `&[...]`. fn is_borrowed_ty(ty: &ast::Ty) -> bool { - /*! - * True if `ty` is a borrowed pointer type - * like `&int` or `&[...]`. - */ - match ty.node { ast::TyRptr(..) => true, _ => false } } + /// If `expr` matches the `E&` grammar, then records an extended rvalue scope as appropriate: + /// + /// E& = & ET + /// | StructName { ..., f: E&, ... } + /// | [ ..., E&, ... ] + /// | ( ..., E&, ... ) + /// | {...; E&} + /// | box E& + /// | E& as ... + /// | ( E& ) fn record_rvalue_scope_if_borrow_expr(visitor: &mut RegionResolutionVisitor, expr: &ast::Expr, blk_id: CodeExtent) { - /*! - * If `expr` matches the `E&` grammar, then records an extended - * rvalue scope as appropriate: - * - * E& = & ET - * | StructName { ..., f: E&, ... } - * | [ ..., E&, ... ] - * | ( ..., E&, ... ) - * | {...; E&} - * | box E& - * | E& as ... - * | ( E& ) - */ - match expr.node { ast::ExprAddrOf(_, ref subexpr) => { record_rvalue_scope_if_borrow_expr(visitor, &**subexpr, blk_id); @@ -787,29 +751,24 @@ fn record_rvalue_scope_if_borrow_expr(visitor: &mut RegionResolutionVisitor, } } + /// Applied to an expression `expr` if `expr` -- or something owned or partially owned by + /// `expr` -- is going to be indirectly referenced by a variable in a let statement. In that + /// case, the "temporary lifetime" or `expr` is extended to be the block enclosing the `let` + /// statement. + /// + /// More formally, if `expr` matches the grammar `ET`, record the rvalue scope of the matching + /// `` as `blk_id`: + /// + /// ET = *ET + /// | ET[...] + /// | ET.f + /// | (ET) + /// | + /// + /// Note: ET is intended to match "rvalues or lvalues based on rvalues". fn record_rvalue_scope<'a>(visitor: &mut RegionResolutionVisitor, expr: &'a ast::Expr, blk_scope: CodeExtent) { - /*! - * Applied to an expression `expr` if `expr` -- or something - * owned or partially owned by `expr` -- is going to be - * indirectly referenced by a variable in a let statement. In - * that case, the "temporary lifetime" or `expr` is extended - * to be the block enclosing the `let` statement. - * - * More formally, if `expr` matches the grammar `ET`, record - * the rvalue scope of the matching `` as `blk_id`: - * - * ET = *ET - * | ET[...] - * | ET.f - * | (ET) - * | - * - * Note: ET is intended to match "rvalues or - * lvalues based on rvalues". - */ - let mut expr = expr; loop { // Note: give all the expressions matching `ET` with the diff --git a/src/librustc/middle/resolve_lifetime.rs b/src/librustc/middle/resolve_lifetime.rs index fae64ff924274c41ead2ebec5de88ab53b8bf9e1..9c32410ecbfaf187ccb2d0ae4eee0868127293fe 100644 --- a/src/librustc/middle/resolve_lifetime.rs +++ b/src/librustc/middle/resolve_lifetime.rs @@ -8,14 +8,12 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Name resolution for lifetimes. - * - * Name resolution for lifetimes follows MUCH simpler rules than the - * full resolve. For example, lifetime names are never exported or - * used between functions, and they operate in a purely top-down - * way. Therefore we break lifetime name resolution into a separate pass. - */ +//! Name resolution for lifetimes. +//! +//! Name resolution for lifetimes follows MUCH simpler rules than the +//! full resolve. For example, lifetime names are never exported or +//! used between functions, and they operate in a purely top-down +//! way. Therefore we break lifetime name resolution into a separate pass. pub use self::DefRegion::*; use self::ScopeChain::*; @@ -254,34 +252,27 @@ fn with(&mut self, wrap_scope: ScopeChain, f: |&mut LifetimeContext|) { } /// Visits self by adding a scope and handling recursive walk over the contents with `walk`. + /// + /// Handles visiting fns and methods. These are a bit complicated because we must distinguish + /// early- vs late-bound lifetime parameters. We do this by checking which lifetimes appear + /// within type bounds; those are early bound lifetimes, and the rest are late bound. + /// + /// For example: + /// + /// fn foo<'a,'b,'c,T:Trait<'b>>(...) + /// + /// Here `'a` and `'c` are late bound but `'b` is early bound. Note that early- and late-bound + /// lifetimes may be interspersed together. + /// + /// If early bound lifetimes are present, we separate them into their own list (and likewise + /// for late bound). They will be numbered sequentially, starting from the lowest index that is + /// already in scope (for a fn item, that will be 0, but for a method it might not be). Late + /// bound lifetimes are resolved by name and associated with a binder id (`binder_id`), so the + /// ordering is not important there. fn visit_early_late(&mut self, early_space: subst::ParamSpace, generics: &ast::Generics, walk: |&mut LifetimeContext|) { - /*! - * Handles visiting fns and methods. These are a bit - * complicated because we must distinguish early- vs late-bound - * lifetime parameters. We do this by checking which lifetimes - * appear within type bounds; those are early bound lifetimes, - * and the rest are late bound. - * - * For example: - * - * fn foo<'a,'b,'c,T:Trait<'b>>(...) - * - * Here `'a` and `'c` are late bound but `'b` is early - * bound. Note that early- and late-bound lifetimes may be - * interspersed together. - * - * If early bound lifetimes are present, we separate them into - * their own list (and likewise for late bound). They will be - * numbered sequentially, starting from the lowest index that - * is already in scope (for a fn item, that will be 0, but for - * a method it might not be). Late bound lifetimes are - * resolved by name and associated with a binder id (`binder_id`), so - * the ordering is not important there. - */ - let referenced_idents = early_bound_lifetime_names(generics); debug!("visit_early_late: referenced_idents={}", @@ -479,13 +470,9 @@ pub fn early_bound_lifetimes<'a>(generics: &'a ast::Generics) -> Vec Vec { - /*! - * Given a set of generic declarations, returns a list of names - * containing all early bound lifetime names for those - * generics. (In fact, this list may also contain other names.) - */ - // Create two lists, dividing the lifetimes into early/late bound. // Initially, all of them are considered late, but we will move // things from late into early as we go if we find references to diff --git a/src/librustc/middle/subst.rs b/src/librustc/middle/subst.rs index b030867fc841c5fbe6670ef76c2417d2c7e687ca..365c2ed39dbc006c9925f4183dec3e1ce5a74a45 100644 --- a/src/librustc/middle/subst.rs +++ b/src/librustc/middle/subst.rs @@ -131,26 +131,18 @@ pub fn erase_regions(self) -> Substs<'tcx> { Substs { types: types, regions: ErasedRegions } } + /// Since ErasedRegions are only to be used in trans, most of the compiler can use this method + /// to easily access the set of region substitutions. pub fn regions<'a>(&'a self) -> &'a VecPerParamSpace { - /*! - * Since ErasedRegions are only to be used in trans, most of - * the compiler can use this method to easily access the set - * of region substitutions. - */ - match self.regions { ErasedRegions => panic!("Erased regions only expected in trans"), NonerasedRegions(ref r) => r } } + /// Since ErasedRegions are only to be used in trans, most of the compiler can use this method + /// to easily access the set of region substitutions. pub fn mut_regions<'a>(&'a mut self) -> &'a mut VecPerParamSpace { - /*! - * Since ErasedRegions are only to be used in trans, most of - * the compiler can use this method to easily access the set - * of region substitutions. - */ - match self.regions { ErasedRegions => panic!("Erased regions only expected in trans"), NonerasedRegions(ref mut r) => r @@ -688,59 +680,49 @@ fn ty_for_param(&self, p: ty::ParamTy, source_ty: Ty<'tcx>) -> Ty<'tcx> { self.shift_regions_through_binders(ty) } + /// It is sometimes necessary to adjust the debruijn indices during substitution. This occurs + /// when we are substituting a type with escaping regions into a context where we have passed + /// through region binders. That's quite a mouthful. Let's see an example: + /// + /// ``` + /// type Func = fn(A); + /// type MetaFunc = for<'a> fn(Func<&'a int>) + /// ``` + /// + /// The type `MetaFunc`, when fully expanded, will be + /// + /// for<'a> fn(fn(&'a int)) + /// ^~ ^~ ^~~ + /// | | | + /// | | DebruijnIndex of 2 + /// Binders + /// + /// Here the `'a` lifetime is bound in the outer function, but appears as an argument of the + /// inner one. Therefore, that appearance will have a DebruijnIndex of 2, because we must skip + /// over the inner binder (remember that we count Debruijn indices from 1). However, in the + /// definition of `MetaFunc`, the binder is not visible, so the type `&'a int` will have a + /// debruijn index of 1. It's only during the substitution that we can see we must increase the + /// depth by 1 to account for the binder that we passed through. + /// + /// As a second example, consider this twist: + /// + /// ``` + /// type FuncTuple = (A,fn(A)); + /// type MetaFuncTuple = for<'a> fn(FuncTuple<&'a int>) + /// ``` + /// + /// Here the final type will be: + /// + /// for<'a> fn((&'a int, fn(&'a int))) + /// ^~~ ^~~ + /// | | + /// DebruijnIndex of 1 | + /// DebruijnIndex of 2 + /// + /// As indicated in the diagram, here the same type `&'a int` is substituted once, but in the + /// first case we do not increase the Debruijn index and in the second case we do. The reason + /// is that only in the second case have we passed through a fn binder. fn shift_regions_through_binders(&self, ty: Ty<'tcx>) -> Ty<'tcx> { - /*! - * It is sometimes necessary to adjust the debruijn indices - * during substitution. This occurs when we are substituting a - * type with escaping regions into a context where we have - * passed through region binders. That's quite a - * mouthful. Let's see an example: - * - * ``` - * type Func = fn(A); - * type MetaFunc = for<'a> fn(Func<&'a int>) - * ``` - * - * The type `MetaFunc`, when fully expanded, will be - * - * for<'a> fn(fn(&'a int)) - * ^~ ^~ ^~~ - * | | | - * | | DebruijnIndex of 2 - * Binders - * - * Here the `'a` lifetime is bound in the outer function, but - * appears as an argument of the inner one. Therefore, that - * appearance will have a DebruijnIndex of 2, because we must - * skip over the inner binder (remember that we count Debruijn - * indices from 1). However, in the definition of `MetaFunc`, - * the binder is not visible, so the type `&'a int` will have - * a debruijn index of 1. It's only during the substitution - * that we can see we must increase the depth by 1 to account - * for the binder that we passed through. - * - * As a second example, consider this twist: - * - * ``` - * type FuncTuple = (A,fn(A)); - * type MetaFuncTuple = for<'a> fn(FuncTuple<&'a int>) - * ``` - * - * Here the final type will be: - * - * for<'a> fn((&'a int, fn(&'a int))) - * ^~~ ^~~ - * | | - * DebruijnIndex of 1 | - * DebruijnIndex of 2 - * - * As indicated in the diagram, here the same type `&'a int` - * is substituted once, but in the first case we do not - * increase the Debruijn index and in the second case we - * do. The reason is that only in the second case have we - * passed through a fn binder. - */ - debug!("shift_regions(ty={}, region_binders_passed={}, type_has_escaping_regions={})", ty.repr(self.tcx()), self.region_binders_passed, ty::type_has_escaping_regions(ty)); diff --git a/src/librustc/middle/traits/coherence.rs b/src/librustc/middle/traits/coherence.rs index c84a2a0d11e6b3b01a3c20a67899e21cb945ee39..048f394224cf0b1d2e8e2133a63d4083b72c5145 100644 --- a/src/librustc/middle/traits/coherence.rs +++ b/src/librustc/middle/traits/coherence.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! See `doc.rs` for high-level documentation */ +//! See `doc.rs` for high-level documentation use super::SelectionContext; use super::Obligation; diff --git a/src/librustc/middle/traits/doc.rs b/src/librustc/middle/traits/doc.rs index c014bc0c164f206dce9b7c1180382c215b0d846f..62246b77ee9409b0c168806403ec4b1b85e8e71e 100644 --- a/src/librustc/middle/traits/doc.rs +++ b/src/librustc/middle/traits/doc.rs @@ -8,403 +8,399 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# TRAIT RESOLUTION - -This document describes the general process and points out some non-obvious -things. - -## Major concepts - -Trait resolution is the process of pairing up an impl with each -reference to a trait. So, for example, if there is a generic function like: - - fn clone_slice(x: &[T]) -> Vec { ... } - -and then a call to that function: - - let v: Vec = clone_slice([1, 2, 3].as_slice()) - -it is the job of trait resolution to figure out (in which case) -whether there exists an impl of `int : Clone` - -Note that in some cases, like generic functions, we may not be able to -find a specific impl, but we can figure out that the caller must -provide an impl. To see what I mean, consider the body of `clone_slice`: - - fn clone_slice(x: &[T]) -> Vec { - let mut v = Vec::new(); - for e in x.iter() { - v.push((*e).clone()); // (*) - } - } - -The line marked `(*)` is only legal if `T` (the type of `*e`) -implements the `Clone` trait. Naturally, since we don't know what `T` -is, we can't find the specific impl; but based on the bound `T:Clone`, -we can say that there exists an impl which the caller must provide. - -We use the term *obligation* to refer to a trait reference in need of -an impl. - -## Overview - -Trait resolution consists of three major parts: - -- SELECTION: Deciding how to resolve a specific obligation. For - example, selection might decide that a specific obligation can be - resolved by employing an impl which matches the self type, or by - using a parameter bound. In the case of an impl, Selecting one - obligation can create *nested obligations* because of where clauses - on the impl itself. It may also require evaluating those nested - obligations to resolve ambiguities. - -- FULFILLMENT: The fulfillment code is what tracks that obligations - are completely fulfilled. Basically it is a worklist of obligations - to be selected: once selection is successful, the obligation is - removed from the worklist and any nested obligations are enqueued. - -- COHERENCE: The coherence checks are intended to ensure that there - are never overlapping impls, where two impls could be used with - equal precedence. - -## Selection - -Selection is the process of deciding whether an obligation can be -resolved and, if so, how it is to be resolved (via impl, where clause, etc). -The main interface is the `select()` function, which takes an obligation -and returns a `SelectionResult`. There are three possible outcomes: - -- `Ok(Some(selection))` -- yes, the obligation can be resolved, and - `selection` indicates how. If the impl was resolved via an impl, - then `selection` may also indicate nested obligations that are required - by the impl. - -- `Ok(None)` -- we are not yet sure whether the obligation can be - resolved or not. This happens most commonly when the obligation - contains unbound type variables. - -- `Err(err)` -- the obligation definitely cannot be resolved due to a - type error, or because there are no impls that could possibly apply, - etc. - -The basic algorithm for selection is broken into two big phases: -candidate assembly and confirmation. - -### Candidate assembly - -Searches for impls/where-clauses/etc that might -possibly be used to satisfy the obligation. Each of those is called -a candidate. To avoid ambiguity, we want to find exactly one -candidate that is definitively applicable. In some cases, we may not -know whether an impl/where-clause applies or not -- this occurs when -the obligation contains unbound inference variables. - -The basic idea for candidate assembly is to do a first pass in which -we identify all possible candidates. During this pass, all that we do -is try and unify the type parameters. (In particular, we ignore any -nested where clauses.) Presuming that this unification succeeds, the -impl is added as a candidate. - -Once this first pass is done, we can examine the set of candidates. If -it is a singleton set, then we are done: this is the only impl in -scope that could possibly apply. Otherwise, we can winnow down the set -of candidates by using where clauses and other conditions. If this -reduced set yields a single, unambiguous entry, we're good to go, -otherwise the result is considered ambiguous. - -#### The basic process: Inferring based on the impls we see - -This process is easier if we work through some examples. Consider -the following trait: - -``` -trait Convert { - fn convert(&self) -> Target; -} -``` - -This trait just has one method. It's about as simple as it gets. It -converts from the (implicit) `Self` type to the `Target` type. If we -wanted to permit conversion between `int` and `uint`, we might -implement `Convert` like so: - -```rust -impl Convert for int { ... } // int -> uint -impl Convert for uint { ... } // uint -> uint -``` - -Now imagine there is some code like the following: - -```rust -let x: int = ...; -let y = x.convert(); -``` - -The call to convert will generate a trait reference `Convert<$Y> for -int`, where `$Y` is the type variable representing the type of -`y`. When we match this against the two impls we can see, we will find -that only one remains: `Convert for int`. Therefore, we can -select this impl, which will cause the type of `$Y` to be unified to -`uint`. (Note that while assembling candidates, we do the initial -unifications in a transaction, so that they don't affect one another.) - -There are tests to this effect in src/test/run-pass: - - traits-multidispatch-infer-convert-source-and-target.rs - traits-multidispatch-infer-convert-target.rs - -#### Winnowing: Resolving ambiguities - -But what happens if there are multiple impls where all the types -unify? Consider this example: - -```rust -trait Get { - fn get(&self) -> Self; -} - -impl Get for T { - fn get(&self) -> T { *self } -} - -impl Get for Box { - fn get(&self) -> Box { box get_it(&**self) } -} -``` - -What happens when we invoke `get_it(&box 1_u16)`, for example? In this -case, the `Self` type is `Box` -- that unifies with both impls, -because the first applies to all types, and the second to all -boxes. In the olden days we'd have called this ambiguous. But what we -do now is do a second *winnowing* pass that considers where clauses -and attempts to remove candidates -- in this case, the first impl only -applies if `Box : Copy`, which doesn't hold. After winnowing, -then, we are left with just one candidate, so we can proceed. There is -a test of this in `src/test/run-pass/traits-conditional-dispatch.rs`. - -#### Matching - -The subroutines that decide whether a particular impl/where-clause/etc -applies to a particular obligation. At the moment, this amounts to -unifying the self types, but in the future we may also recursively -consider some of the nested obligations, in the case of an impl. - -#### Lifetimes and selection - -Because of how that lifetime inference works, it is not possible to -give back immediate feedback as to whether a unification or subtype -relationship between lifetimes holds or not. Therefore, lifetime -matching is *not* considered during selection. This is reflected in -the fact that subregion assignment is infallible. This may yield -lifetime constraints that will later be found to be in error (in -contrast, the non-lifetime-constraints have already been checked -during selection and can never cause an error, though naturally they -may lead to other errors downstream). - -#### Where clauses - -Besides an impl, the other major way to resolve an obligation is via a -where clause. The selection process is always given a *parameter -environment* which contains a list of where clauses, which are -basically obligations that can assume are satisfiable. We will iterate -over that list and check whether our current obligation can be found -in that list, and if so it is considered satisfied. More precisely, we -want to check whether there is a where-clause obligation that is for -the same trait (or some subtrait) and for which the self types match, -using the definition of *matching* given above. - -Consider this simple example: - - trait A1 { ... } - trait A2 : A1 { ... } - - trait B { ... } - - fn foo { ... } - -Clearly we can use methods offered by `A1`, `A2`, or `B` within the -body of `foo`. In each case, that will incur an obligation like `X : -A1` or `X : A2`. The parameter environment will contain two -where-clauses, `X : A2` and `X : B`. For each obligation, then, we -search this list of where-clauses. To resolve an obligation `X:A1`, -we would note that `X:A2` implies that `X:A1`. - -### Confirmation - -Confirmation unifies the output type parameters of the trait with the -values found in the obligation, possibly yielding a type error. If we -return to our example of the `Convert` trait from the previous -section, confirmation is where an error would be reported, because the -impl specified that `T` would be `uint`, but the obligation reported -`char`. Hence the result of selection would be an error. - -### Selection during translation - -During type checking, we do not store the results of trait selection. -We simply wish to verify that trait selection will succeed. Then -later, at trans time, when we have all concrete types available, we -can repeat the trait selection. In this case, we do not consider any -where-clauses to be in scope. We know that therefore each resolution -will resolve to a particular impl. - -One interesting twist has to do with nested obligations. In general, in trans, -we only need to do a "shallow" selection for an obligation. That is, we wish to -identify which impl applies, but we do not (yet) need to decide how to select -any nested obligations. Nonetheless, we *do* currently do a complete resolution, -and that is because it can sometimes inform the results of type inference. That is, -we do not have the full substitutions in terms of the type varibales of the impl available -to us, so we must run trait selection to figure everything out. - -Here is an example: - - trait Foo { ... } - impl> Foo for Vec { ... } - - impl Bar for int { ... } - -After one shallow round of selection for an obligation like `Vec -: Foo`, we would know which impl we want, and we would know that -`T=int`, but we do not know the type of `U`. We must select the -nested obligation `int : Bar` to find out that `U=uint`. - -It would be good to only do *just as much* nested resolution as -necessary. Currently, though, we just do a full resolution. - -## Method matching - -Method dispach follows a slightly different path than normal trait -selection. This is because it must account for the transformed self -type of the receiver and various other complications. The procedure is -described in `select.rs` in the "METHOD MATCHING" section. - -# Caching and subtle considerations therewith - -In general we attempt to cache the results of trait selection. This -is a somewhat complex process. Part of the reason for this is that we -want to be able to cache results even when all the types in the trait -reference are not fully known. In that case, it may happen that the -trait selection process is also influencing type variables, so we have -to be able to not only cache the *result* of the selection process, -but *replay* its effects on the type variables. - -## An example - -The high-level idea of how the cache works is that we first replace -all unbound inference variables with skolemized versions. Therefore, -if we had a trait reference `uint : Foo<$1>`, where `$n` is an unbound -inference variable, we might replace it with `uint : Foo<%0>`, where -`%n` is a skolemized type. We would then look this up in the cache. -If we found a hit, the hit would tell us the immediate next step to -take in the selection process: i.e., apply impl #22, or apply where -clause `X : Foo`. Let's say in this case there is no hit. -Therefore, we search through impls and where clauses and so forth, and -we come to the conclusion that the only possible impl is this one, -with def-id 22: - - impl Foo for uint { ... } // Impl #22 - -We would then record in the cache `uint : Foo<%0> ==> -ImplCandidate(22)`. Next we would confirm `ImplCandidate(22)`, which -would (as a side-effect) unify `$1` with `int`. - -Now, at some later time, we might come along and see a `uint : -Foo<$3>`. When skolemized, this would yield `uint : Foo<%0>`, just as -before, and hence the cache lookup would succeed, yielding -`ImplCandidate(22)`. We would confirm `ImplCandidate(22)` which would -(as a side-effect) unify `$3` with `int`. - -## Where clauses and the local vs global cache - -One subtle interaction is that the results of trait lookup will vary -depending on what where clauses are in scope. Therefore, we actually -have *two* caches, a local and a global cache. The local cache is -attached to the `ParameterEnvironment` and the global cache attached -to the `tcx`. We use the local cache whenever the result might depend -on the where clauses that are in scope. The determination of which -cache to use is done by the method `pick_candidate_cache` in -`select.rs`. - -There are two cases where we currently use the local cache. The -current rules are probably more conservative than necessary. - -### Trait references that involve parameter types - -The most obvious case where you need the local environment is -when the trait reference includes parameter types. For example, -consider the following function: - - impl Vec { - fn foo(x: T) - where T : Foo - { ... } - - fn bar(x: T) - { ... } - } - -If there is an obligation `T : Foo`, or `int : Bar`, or whatever, -clearly the results from `foo` and `bar` are potentially different, -since the set of where clauses in scope are different. - -### Trait references with unbound variables when where clauses are in scope - -There is another less obvious interaction which involves unbound variables -where *only* where clauses are in scope (no impls). This manifested as -issue #18209 (`run-pass/trait-cache-issue-18209.rs`). Consider -this snippet: - -``` -pub trait Foo { - fn load_from() -> Box; - fn load() -> Box { - Foo::load_from() - } -} -``` - -The default method will incur an obligation `$0 : Foo` from the call -to `load_from`. If there are no impls, this can be eagerly resolved to -`VtableParam(Self : Foo)` and cached. Because the trait reference -doesn't involve any parameters types (only the resolution does), this -result was stored in the global cache, causing later calls to -`Foo::load_from()` to get nonsense. - -To fix this, we always use the local cache if there are unbound -variables and where clauses in scope. This is more conservative than -necessary as far as I can tell. However, it still seems to be a simple -rule and I observe ~99% hit rate on rustc, so it doesn't seem to hurt -us in particular. - -Here is an example of the kind of subtle case that I would be worried -about with a more complex rule (although this particular case works -out ok). Imagine the trait reference doesn't directly reference a -where clause, but the where clause plays a role in the winnowing -phase. Something like this: - -``` -pub trait Foo { ... } -pub trait Bar { ... } -impl Foo for T { ... } // Impl A -impl Foo for uint { ... } // Impl B -``` - -Now, in some function, we have no where clauses in scope, and we have -an obligation `$1 : Foo<$0>`. We might then conclude that `$0=char` -and `$1=uint`: this is because for impl A to apply, `uint:Bar` would -have to hold, and we know it does not or else the coherence check -would have failed. So we might enter into our global cache: `$1 : -Foo<$0> => Impl B`. Then we come along in a different scope, where a -generic type `A` is around with the bound `A:Bar`. Now suddenly the -impl is viable. - -The flaw in this imaginary DOOMSDAY SCENARIO is that we would not -currently conclude that `$1 : Foo<$0>` implies that `$0 == uint` and -`$1 == char`, even though it is true that (absent type parameters) -there is no other type the user could enter. However, it is not -*completely* implausible that we *could* draw this conclusion in the -future; we wouldn't have to guess types, in particular, we could be -led by the impls. - -*/ +//! # TRAIT RESOLUTION +//! +//! This document describes the general process and points out some non-obvious +//! things. +//! +//! ## Major concepts +//! +//! Trait resolution is the process of pairing up an impl with each +//! reference to a trait. So, for example, if there is a generic function like: +//! +//! fn clone_slice(x: &[T]) -> Vec { ... } +//! +//! and then a call to that function: +//! +//! let v: Vec = clone_slice([1, 2, 3].as_slice()) +//! +//! it is the job of trait resolution to figure out (in which case) +//! whether there exists an impl of `int : Clone` +//! +//! Note that in some cases, like generic functions, we may not be able to +//! find a specific impl, but we can figure out that the caller must +//! provide an impl. To see what I mean, consider the body of `clone_slice`: +//! +//! fn clone_slice(x: &[T]) -> Vec { +//! let mut v = Vec::new(); +//! for e in x.iter() { +//! v.push((*e).clone()); // (*) +//! } +//! } +//! +//! The line marked `(*)` is only legal if `T` (the type of `*e`) +//! implements the `Clone` trait. Naturally, since we don't know what `T` +//! is, we can't find the specific impl; but based on the bound `T:Clone`, +//! we can say that there exists an impl which the caller must provide. +//! +//! We use the term *obligation* to refer to a trait reference in need of +//! an impl. +//! +//! ## Overview +//! +//! Trait resolution consists of three major parts: +//! +//! - SELECTION: Deciding how to resolve a specific obligation. For +//! example, selection might decide that a specific obligation can be +//! resolved by employing an impl which matches the self type, or by +//! using a parameter bound. In the case of an impl, Selecting one +//! obligation can create *nested obligations* because of where clauses +//! on the impl itself. It may also require evaluating those nested +//! obligations to resolve ambiguities. +//! +//! - FULFILLMENT: The fulfillment code is what tracks that obligations +//! are completely fulfilled. Basically it is a worklist of obligations +//! to be selected: once selection is successful, the obligation is +//! removed from the worklist and any nested obligations are enqueued. +//! +//! - COHERENCE: The coherence checks are intended to ensure that there +//! are never overlapping impls, where two impls could be used with +//! equal precedence. +//! +//! ## Selection +//! +//! Selection is the process of deciding whether an obligation can be +//! resolved and, if so, how it is to be resolved (via impl, where clause, etc). +//! The main interface is the `select()` function, which takes an obligation +//! and returns a `SelectionResult`. There are three possible outcomes: +//! +//! - `Ok(Some(selection))` -- yes, the obligation can be resolved, and +//! `selection` indicates how. If the impl was resolved via an impl, +//! then `selection` may also indicate nested obligations that are required +//! by the impl. +//! +//! - `Ok(None)` -- we are not yet sure whether the obligation can be +//! resolved or not. This happens most commonly when the obligation +//! contains unbound type variables. +//! +//! - `Err(err)` -- the obligation definitely cannot be resolved due to a +//! type error, or because there are no impls that could possibly apply, +//! etc. +//! +//! The basic algorithm for selection is broken into two big phases: +//! candidate assembly and confirmation. +//! +//! ### Candidate assembly +//! +//! Searches for impls/where-clauses/etc that might +//! possibly be used to satisfy the obligation. Each of those is called +//! a candidate. To avoid ambiguity, we want to find exactly one +//! candidate that is definitively applicable. In some cases, we may not +//! know whether an impl/where-clause applies or not -- this occurs when +//! the obligation contains unbound inference variables. +//! +//! The basic idea for candidate assembly is to do a first pass in which +//! we identify all possible candidates. During this pass, all that we do +//! is try and unify the type parameters. (In particular, we ignore any +//! nested where clauses.) Presuming that this unification succeeds, the +//! impl is added as a candidate. +//! +//! Once this first pass is done, we can examine the set of candidates. If +//! it is a singleton set, then we are done: this is the only impl in +//! scope that could possibly apply. Otherwise, we can winnow down the set +//! of candidates by using where clauses and other conditions. If this +//! reduced set yields a single, unambiguous entry, we're good to go, +//! otherwise the result is considered ambiguous. +//! +//! #### The basic process: Inferring based on the impls we see +//! +//! This process is easier if we work through some examples. Consider +//! the following trait: +//! +//! ``` +//! trait Convert { +//! fn convert(&self) -> Target; +//! } +//! ``` +//! +//! This trait just has one method. It's about as simple as it gets. It +//! converts from the (implicit) `Self` type to the `Target` type. If we +//! wanted to permit conversion between `int` and `uint`, we might +//! implement `Convert` like so: +//! +//! ```rust +//! impl Convert for int { ... } // int -> uint +//! impl Convert for uint { ... } // uint -> uint +//! ``` +//! +//! Now imagine there is some code like the following: +//! +//! ```rust +//! let x: int = ...; +//! let y = x.convert(); +//! ``` +//! +//! The call to convert will generate a trait reference `Convert<$Y> for +//! int`, where `$Y` is the type variable representing the type of +//! `y`. When we match this against the two impls we can see, we will find +//! that only one remains: `Convert for int`. Therefore, we can +//! select this impl, which will cause the type of `$Y` to be unified to +//! `uint`. (Note that while assembling candidates, we do the initial +//! unifications in a transaction, so that they don't affect one another.) +//! +//! There are tests to this effect in src/test/run-pass: +//! +//! traits-multidispatch-infer-convert-source-and-target.rs +//! traits-multidispatch-infer-convert-target.rs +//! +//! #### Winnowing: Resolving ambiguities +//! +//! But what happens if there are multiple impls where all the types +//! unify? Consider this example: +//! +//! ```rust +//! trait Get { +//! fn get(&self) -> Self; +//! } +//! +//! impl Get for T { +//! fn get(&self) -> T { *self } +//! } +//! +//! impl Get for Box { +//! fn get(&self) -> Box { box get_it(&**self) } +//! } +//! ``` +//! +//! What happens when we invoke `get_it(&box 1_u16)`, for example? In this +//! case, the `Self` type is `Box` -- that unifies with both impls, +//! because the first applies to all types, and the second to all +//! boxes. In the olden days we'd have called this ambiguous. But what we +//! do now is do a second *winnowing* pass that considers where clauses +//! and attempts to remove candidates -- in this case, the first impl only +//! applies if `Box : Copy`, which doesn't hold. After winnowing, +//! then, we are left with just one candidate, so we can proceed. There is +//! a test of this in `src/test/run-pass/traits-conditional-dispatch.rs`. +//! +//! #### Matching +//! +//! The subroutines that decide whether a particular impl/where-clause/etc +//! applies to a particular obligation. At the moment, this amounts to +//! unifying the self types, but in the future we may also recursively +//! consider some of the nested obligations, in the case of an impl. +//! +//! #### Lifetimes and selection +//! +//! Because of how that lifetime inference works, it is not possible to +//! give back immediate feedback as to whether a unification or subtype +//! relationship between lifetimes holds or not. Therefore, lifetime +//! matching is *not* considered during selection. This is reflected in +//! the fact that subregion assignment is infallible. This may yield +//! lifetime constraints that will later be found to be in error (in +//! contrast, the non-lifetime-constraints have already been checked +//! during selection and can never cause an error, though naturally they +//! may lead to other errors downstream). +//! +//! #### Where clauses +//! +//! Besides an impl, the other major way to resolve an obligation is via a +//! where clause. The selection process is always given a *parameter +//! environment* which contains a list of where clauses, which are +//! basically obligations that can assume are satisfiable. We will iterate +//! over that list and check whether our current obligation can be found +//! in that list, and if so it is considered satisfied. More precisely, we +//! want to check whether there is a where-clause obligation that is for +//! the same trait (or some subtrait) and for which the self types match, +//! using the definition of *matching* given above. +//! +//! Consider this simple example: +//! +//! trait A1 { ... } +//! trait A2 : A1 { ... } +//! +//! trait B { ... } +//! +//! fn foo { ... } +//! +//! Clearly we can use methods offered by `A1`, `A2`, or `B` within the +//! body of `foo`. In each case, that will incur an obligation like `X : +//! A1` or `X : A2`. The parameter environment will contain two +//! where-clauses, `X : A2` and `X : B`. For each obligation, then, we +//! search this list of where-clauses. To resolve an obligation `X:A1`, +//! we would note that `X:A2` implies that `X:A1`. +//! +//! ### Confirmation +//! +//! Confirmation unifies the output type parameters of the trait with the +//! values found in the obligation, possibly yielding a type error. If we +//! return to our example of the `Convert` trait from the previous +//! section, confirmation is where an error would be reported, because the +//! impl specified that `T` would be `uint`, but the obligation reported +//! `char`. Hence the result of selection would be an error. +//! +//! ### Selection during translation +//! +//! During type checking, we do not store the results of trait selection. +//! We simply wish to verify that trait selection will succeed. Then +//! later, at trans time, when we have all concrete types available, we +//! can repeat the trait selection. In this case, we do not consider any +//! where-clauses to be in scope. We know that therefore each resolution +//! will resolve to a particular impl. +//! +//! One interesting twist has to do with nested obligations. In general, in trans, +//! we only need to do a "shallow" selection for an obligation. That is, we wish to +//! identify which impl applies, but we do not (yet) need to decide how to select +//! any nested obligations. Nonetheless, we *do* currently do a complete resolution, +//! and that is because it can sometimes inform the results of type inference. That is, +//! we do not have the full substitutions in terms of the type varibales of the impl available +//! to us, so we must run trait selection to figure everything out. +//! +//! Here is an example: +//! +//! trait Foo { ... } +//! impl> Foo for Vec { ... } +//! +//! impl Bar for int { ... } +//! +//! After one shallow round of selection for an obligation like `Vec +//! : Foo`, we would know which impl we want, and we would know that +//! `T=int`, but we do not know the type of `U`. We must select the +//! nested obligation `int : Bar` to find out that `U=uint`. +//! +//! It would be good to only do *just as much* nested resolution as +//! necessary. Currently, though, we just do a full resolution. +//! +//! ## Method matching +//! +//! Method dispach follows a slightly different path than normal trait +//! selection. This is because it must account for the transformed self +//! type of the receiver and various other complications. The procedure is +//! described in `select.rs` in the "METHOD MATCHING" section. +//! +//! # Caching and subtle considerations therewith +//! +//! In general we attempt to cache the results of trait selection. This +//! is a somewhat complex process. Part of the reason for this is that we +//! want to be able to cache results even when all the types in the trait +//! reference are not fully known. In that case, it may happen that the +//! trait selection process is also influencing type variables, so we have +//! to be able to not only cache the *result* of the selection process, +//! but *replay* its effects on the type variables. +//! +//! ## An example +//! +//! The high-level idea of how the cache works is that we first replace +//! all unbound inference variables with skolemized versions. Therefore, +//! if we had a trait reference `uint : Foo<$1>`, where `$n` is an unbound +//! inference variable, we might replace it with `uint : Foo<%0>`, where +//! `%n` is a skolemized type. We would then look this up in the cache. +//! If we found a hit, the hit would tell us the immediate next step to +//! take in the selection process: i.e., apply impl #22, or apply where +//! clause `X : Foo`. Let's say in this case there is no hit. +//! Therefore, we search through impls and where clauses and so forth, and +//! we come to the conclusion that the only possible impl is this one, +//! with def-id 22: +//! +//! impl Foo for uint { ... } // Impl #22 +//! +//! We would then record in the cache `uint : Foo<%0> ==> +//! ImplCandidate(22)`. Next we would confirm `ImplCandidate(22)`, which +//! would (as a side-effect) unify `$1` with `int`. +//! +//! Now, at some later time, we might come along and see a `uint : +//! Foo<$3>`. When skolemized, this would yield `uint : Foo<%0>`, just as +//! before, and hence the cache lookup would succeed, yielding +//! `ImplCandidate(22)`. We would confirm `ImplCandidate(22)` which would +//! (as a side-effect) unify `$3` with `int`. +//! +//! ## Where clauses and the local vs global cache +//! +//! One subtle interaction is that the results of trait lookup will vary +//! depending on what where clauses are in scope. Therefore, we actually +//! have *two* caches, a local and a global cache. The local cache is +//! attached to the `ParameterEnvironment` and the global cache attached +//! to the `tcx`. We use the local cache whenever the result might depend +//! on the where clauses that are in scope. The determination of which +//! cache to use is done by the method `pick_candidate_cache` in +//! `select.rs`. +//! +//! There are two cases where we currently use the local cache. The +//! current rules are probably more conservative than necessary. +//! +//! ### Trait references that involve parameter types +//! +//! The most obvious case where you need the local environment is +//! when the trait reference includes parameter types. For example, +//! consider the following function: +//! +//! impl Vec { +//! fn foo(x: T) +//! where T : Foo +//! { ... } +//! +//! fn bar(x: T) +//! { ... } +//! } +//! +//! If there is an obligation `T : Foo`, or `int : Bar`, or whatever, +//! clearly the results from `foo` and `bar` are potentially different, +//! since the set of where clauses in scope are different. +//! +//! ### Trait references with unbound variables when where clauses are in scope +//! +//! There is another less obvious interaction which involves unbound variables +//! where *only* where clauses are in scope (no impls). This manifested as +//! issue #18209 (`run-pass/trait-cache-issue-18209.rs`). Consider +//! this snippet: +//! +//! ``` +//! pub trait Foo { +//! fn load_from() -> Box; +//! fn load() -> Box { +//! Foo::load_from() +//! } +//! } +//! ``` +//! +//! The default method will incur an obligation `$0 : Foo` from the call +//! to `load_from`. If there are no impls, this can be eagerly resolved to +//! `VtableParam(Self : Foo)` and cached. Because the trait reference +//! doesn't involve any parameters types (only the resolution does), this +//! result was stored in the global cache, causing later calls to +//! `Foo::load_from()` to get nonsense. +//! +//! To fix this, we always use the local cache if there are unbound +//! variables and where clauses in scope. This is more conservative than +//! necessary as far as I can tell. However, it still seems to be a simple +//! rule and I observe ~99% hit rate on rustc, so it doesn't seem to hurt +//! us in particular. +//! +//! Here is an example of the kind of subtle case that I would be worried +//! about with a more complex rule (although this particular case works +//! out ok). Imagine the trait reference doesn't directly reference a +//! where clause, but the where clause plays a role in the winnowing +//! phase. Something like this: +//! +//! ``` +//! pub trait Foo { ... } +//! pub trait Bar { ... } +//! impl Foo for T { ... } // Impl A +//! impl Foo for uint { ... } // Impl B +//! ``` +//! +//! Now, in some function, we have no where clauses in scope, and we have +//! an obligation `$1 : Foo<$0>`. We might then conclude that `$0=char` +//! and `$1=uint`: this is because for impl A to apply, `uint:Bar` would +//! have to hold, and we know it does not or else the coherence check +//! would have failed. So we might enter into our global cache: `$1 : +//! Foo<$0> => Impl B`. Then we come along in a different scope, where a +//! generic type `A` is around with the bound `A:Bar`. Now suddenly the +//! impl is viable. +//! +//! The flaw in this imaginary DOOMSDAY SCENARIO is that we would not +//! currently conclude that `$1 : Foo<$0>` implies that `$0 == uint` and +//! `$1 == char`, even though it is true that (absent type parameters) +//! there is no other type the user could enter. However, it is not +//! *completely* implausible that we *could* draw this conclusion in the +//! future; we wouldn't have to guess types, in particular, we could be +//! led by the impls. diff --git a/src/librustc/middle/traits/fulfill.rs b/src/librustc/middle/traits/fulfill.rs index 62382ac386fcd8719411f6199cb9bd40698e981a..a22eba486e8b9f649f15bbc235d24096b5fe984a 100644 --- a/src/librustc/middle/traits/fulfill.rs +++ b/src/librustc/middle/traits/fulfill.rs @@ -81,20 +81,16 @@ pub fn select_all_or_error<'a>(&mut self, } } + /// Attempts to select obligations that were registered since the call to a selection routine. + /// This is used by the type checker to eagerly attempt to resolve obligations in hopes of + /// gaining type information. It'd be equally valid to use `select_where_possible` but it + /// results in `O(n^2)` performance (#18208). pub fn select_new_obligations<'a>(&mut self, infcx: &InferCtxt<'a,'tcx>, param_env: &ty::ParameterEnvironment<'tcx>, typer: &Typer<'tcx>) -> Result<(),Vec>> { - /*! - * Attempts to select obligations that were registered since - * the call to a selection routine. This is used by the type checker - * to eagerly attempt to resolve obligations in hopes of gaining - * type information. It'd be equally valid to use `select_where_possible` - * but it results in `O(n^2)` performance (#18208). - */ - let mut selcx = SelectionContext::new(infcx, param_env, typer); self.select(&mut selcx, true) } @@ -113,16 +109,13 @@ pub fn pending_trait_obligations(&self) -> &[Obligation<'tcx>] { self.trait_obligations[] } + /// Attempts to select obligations using `selcx`. If `only_new_obligations` is true, then it + /// only attempts to select obligations that haven't been seen before. fn select<'a>(&mut self, selcx: &mut SelectionContext<'a, 'tcx>, only_new_obligations: bool) -> Result<(),Vec>> { - /*! - * Attempts to select obligations using `selcx`. If - * `only_new_obligations` is true, then it only attempts to - * select obligations that haven't been seen before. - */ debug!("select({} obligations, only_new_obligations={}) start", self.trait_obligations.len(), only_new_obligations); diff --git a/src/librustc/middle/traits/mod.rs b/src/librustc/middle/traits/mod.rs index 0a47d647890381d27ca17d249f10e1f3649ca03d..c4eeff8caf64aeca0fdfbd3361da04d87bd3c990 100644 --- a/src/librustc/middle/traits/mod.rs +++ b/src/librustc/middle/traits/mod.rs @@ -8,9 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Trait Resolution. See doc.rs. - */ +//! Trait Resolution. See doc.rs. pub use self::SelectionError::*; pub use self::FulfillmentErrorCode::*; @@ -226,6 +224,10 @@ pub struct VtableParamData<'tcx> { pub bound: Rc>, } +/// Matches the self type of the inherent impl `impl_def_id` +/// against `self_ty` and returns the resulting resolution. This +/// routine may modify the surrounding type context (for example, +/// it may unify variables). pub fn select_inherent_impl<'a,'tcx>(infcx: &InferCtxt<'a,'tcx>, param_env: &ty::ParameterEnvironment<'tcx>, typer: &Typer<'tcx>, @@ -235,13 +237,6 @@ pub fn select_inherent_impl<'a,'tcx>(infcx: &InferCtxt<'a,'tcx>, -> SelectionResult<'tcx, VtableImplData<'tcx, Obligation<'tcx>>> { - /*! - * Matches the self type of the inherent impl `impl_def_id` - * against `self_ty` and returns the resulting resolution. This - * routine may modify the surrounding type context (for example, - * it may unify variables). - */ - // This routine is only suitable for inherent impls. This is // because it does not attempt to unify the output type parameters // from the trait ref against the values from the obligation. @@ -256,53 +251,41 @@ pub fn select_inherent_impl<'a,'tcx>(infcx: &InferCtxt<'a,'tcx>, selcx.select_inherent_impl(impl_def_id, cause, self_ty) } +/// True if neither the trait nor self type is local. Note that `impl_def_id` must refer to an impl +/// of a trait, not an inherent impl. pub fn is_orphan_impl(tcx: &ty::ctxt, impl_def_id: ast::DefId) -> bool { - /*! - * True if neither the trait nor self type is local. Note that - * `impl_def_id` must refer to an impl of a trait, not an inherent - * impl. - */ - !coherence::impl_is_local(tcx, impl_def_id) } +/// True if there exist types that satisfy both of the two given impls. pub fn overlapping_impls(infcx: &InferCtxt, impl1_def_id: ast::DefId, impl2_def_id: ast::DefId) -> bool { - /*! - * True if there exist types that satisfy both of the two given impls. - */ - coherence::impl_can_satisfy(infcx, impl1_def_id, impl2_def_id) && coherence::impl_can_satisfy(infcx, impl2_def_id, impl1_def_id) } +/// Given generic bounds from an impl like: +/// +/// impl ... +/// +/// along with the bindings for the types `A` and `B` (e.g., ``), yields a result like +/// +/// [[Foo for A0, Bar for B0, Qux for B0], [], []] +/// +/// Expects that `generic_bounds` have already been fully substituted, late-bound regions liberated +/// and so forth, so that they are in the same namespace as `type_substs`. pub fn obligations_for_generics<'tcx>(tcx: &ty::ctxt<'tcx>, cause: ObligationCause<'tcx>, generic_bounds: &ty::GenericBounds<'tcx>, type_substs: &subst::VecPerParamSpace>) -> subst::VecPerParamSpace> { - /*! - * Given generic bounds from an impl like: - * - * impl ... - * - * along with the bindings for the types `A` and `B` (e.g., - * ``), yields a result like - * - * [[Foo for A0, Bar for B0, Qux for B0], [], []] - * - * Expects that `generic_bounds` have already been fully - * substituted, late-bound regions liberated and so forth, - * so that they are in the same namespace as `type_substs`. - */ - util::obligations_for_generics(tcx, cause, 0, generic_bounds, type_substs) } diff --git a/src/librustc/middle/traits/select.rs b/src/librustc/middle/traits/select.rs index d1cc851c41f20efdeaeb84ddb90bced2d6a66d5a..f49cd2dd19f7f0692f0c05f7f50f4df7d8b3284e 100644 --- a/src/librustc/middle/traits/select.rs +++ b/src/librustc/middle/traits/select.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! See `doc.rs` for high-level documentation */ +//! See `doc.rs` for high-level documentation #![allow(dead_code)] // FIXME -- just temporarily pub use self::MethodMatchResult::*; @@ -201,15 +201,11 @@ pub fn tcx(&self) -> &'cx ty::ctxt<'tcx> { // is `Vec:Iterable`, but the impl specifies // `impl Iterable for Vec`, than an error would result. + /// Evaluates whether the obligation can be satisfied. Returns an indication of whether the + /// obligation can be satisfied and, if so, by what means. Never affects surrounding typing + /// environment. pub fn select(&mut self, obligation: &Obligation<'tcx>) -> SelectionResult<'tcx, Selection<'tcx>> { - /*! - * Evaluates whether the obligation can be satisfied. Returns - * an indication of whether the obligation can be satisfied - * and, if so, by what means. Never affects surrounding typing - * environment. - */ - debug!("select({})", obligation.repr(self.tcx())); assert!(!obligation.trait_ref.has_escaping_regions()); @@ -253,15 +249,11 @@ pub fn select_inherent_impl(&mut self, // The result is "true" if the obligation *may* hold and "false" if // we can be sure it does not. + /// Evaluates whether the obligation `obligation` can be satisfied (by any means). pub fn evaluate_obligation(&mut self, obligation: &Obligation<'tcx>) -> bool { - /*! - * Evaluates whether the obligation `obligation` can be - * satisfied (by any means). - */ - debug!("evaluate_obligation({})", obligation.repr(self.tcx())); assert!(!obligation.trait_ref.has_escaping_regions()); @@ -387,17 +379,13 @@ fn evaluate_stack<'o>(&mut self, } } + /// Evaluates whether the impl with id `impl_def_id` could be applied to the self type + /// `obligation_self_ty`. This can be used either for trait or inherent impls. pub fn evaluate_impl(&mut self, impl_def_id: ast::DefId, obligation: &Obligation<'tcx>) -> bool { - /*! - * Evaluates whether the impl with id `impl_def_id` could be - * applied to the self type `obligation_self_ty`. This can be - * used either for trait or inherent impls. - */ - debug!("evaluate_impl(impl_def_id={}, obligation={})", impl_def_id.repr(self.tcx()), obligation.repr(self.tcx())); @@ -435,23 +423,20 @@ pub fn evaluate_impl(&mut self, // the body of `evaluate_method_obligation()` for more details on // the algorithm. + /// Determine whether a trait-method is applicable to a receiver of + /// type `rcvr_ty`. *Does not affect the inference state.* + /// + /// - `rcvr_ty` -- type of the receiver + /// - `xform_self_ty` -- transformed self type declared on the method, with `Self` + /// to a fresh type variable + /// - `obligation` -- a reference to the trait where the method is declared, with + /// the input types on the trait replaced with fresh type variables pub fn evaluate_method_obligation(&mut self, rcvr_ty: Ty<'tcx>, xform_self_ty: Ty<'tcx>, obligation: &Obligation<'tcx>) -> MethodMatchResult { - /*! - * Determine whether a trait-method is applicable to a receiver of - * type `rcvr_ty`. *Does not affect the inference state.* - * - * - `rcvr_ty` -- type of the receiver - * - `xform_self_ty` -- transformed self type declared on the method, with `Self` - * to a fresh type variable - * - `obligation` -- a reference to the trait where the method is declared, with - * the input types on the trait replaced with fresh type variables - */ - // Here is the situation. We have a trait method declared (say) like so: // // trait TheTrait { @@ -563,19 +548,15 @@ pub fn evaluate_method_obligation(&mut self, } } + /// Given the successful result of a method match, this function "confirms" the result, which + /// basically repeats the various matching operations, but outside of any snapshot so that + /// their effects are committed into the inference state. pub fn confirm_method_match(&mut self, rcvr_ty: Ty<'tcx>, xform_self_ty: Ty<'tcx>, obligation: &Obligation<'tcx>, data: MethodMatchedData) { - /*! - * Given the successful result of a method match, this - * function "confirms" the result, which basically repeats the - * various matching operations, but outside of any snapshot so - * that their effects are committed into the inference state. - */ - let is_ok = match data { PreciseMethodMatch => { self.match_method_precise(rcvr_ty, xform_self_ty, obligation).is_ok() @@ -597,17 +578,14 @@ pub fn confirm_method_match(&mut self, } } + /// Implements the *precise method match* procedure described in + /// `evaluate_method_obligation()`. fn match_method_precise(&mut self, rcvr_ty: Ty<'tcx>, xform_self_ty: Ty<'tcx>, obligation: &Obligation<'tcx>) -> Result<(),()> { - /*! - * Implements the *precise method match* procedure described in - * `evaluate_method_obligation()`. - */ - self.infcx.commit_if_ok(|| { match self.infcx.sub_types(false, infer::RelateSelfType(obligation.cause.span), rcvr_ty, xform_self_ty) { @@ -623,18 +601,14 @@ fn match_method_precise(&mut self, }) } + /// Assembles a list of potentially applicable impls using the *coercive match* procedure + /// described in `evaluate_method_obligation()`. fn assemble_method_candidates_from_impls(&mut self, rcvr_ty: Ty<'tcx>, xform_self_ty: Ty<'tcx>, obligation: &Obligation<'tcx>) -> Vec { - /*! - * Assembles a list of potentially applicable impls using the - * *coercive match* procedure described in - * `evaluate_method_obligation()`. - */ - let mut candidates = Vec::new(); let all_impls = self.all_impls(obligation.trait_ref.def_id); @@ -650,6 +624,8 @@ fn assemble_method_candidates_from_impls(&mut self, candidates } + /// Applies the *coercive match* procedure described in `evaluate_method_obligation()` to a + /// particular impl. fn match_method_coerce(&mut self, impl_def_id: ast::DefId, rcvr_ty: Ty<'tcx>, @@ -657,11 +633,6 @@ fn match_method_coerce(&mut self, obligation: &Obligation<'tcx>) -> Result, ()> { - /*! - * Applies the *coercive match* procedure described in - * `evaluate_method_obligation()` to a particular impl. - */ - // This is almost always expected to succeed. It // causes the impl's self-type etc to be unified with // the type variable that is shared between @@ -683,6 +654,8 @@ fn match_method_coerce(&mut self, Ok(substs) } + /// A version of `winnow_impl` applicable to coerice method matching. This is basically the + /// same as `winnow_impl` but it uses the method matching procedure and is specific to impls. fn winnow_method_impl(&mut self, impl_def_id: ast::DefId, rcvr_ty: Ty<'tcx>, @@ -690,13 +663,6 @@ fn winnow_method_impl(&mut self, obligation: &Obligation<'tcx>) -> bool { - /*! - * A version of `winnow_impl` applicable to coerice method - * matching. This is basically the same as `winnow_impl` but - * it uses the method matching procedure and is specific to - * impls. - */ - debug!("winnow_method_impl: impl_def_id={} rcvr_ty={} xform_self_ty={} obligation={}", impl_def_id.repr(self.tcx()), rcvr_ty.repr(self.tcx()), @@ -962,19 +928,15 @@ fn assemble_candidates<'o>(&mut self, Ok(candidates) } + /// Given an obligation like ``, search the obligations that the caller + /// supplied to find out whether it is listed among them. + /// + /// Never affects inference environment. fn assemble_candidates_from_caller_bounds(&mut self, obligation: &Obligation<'tcx>, candidates: &mut CandidateSet<'tcx>) -> Result<(),SelectionError<'tcx>> { - /*! - * Given an obligation like ``, search the obligations - * that the caller supplied to find out whether it is listed among - * them. - * - * Never affects inference environment. - */ - debug!("assemble_candidates_from_caller_bounds({})", obligation.repr(self.tcx())); @@ -1002,22 +964,17 @@ fn assemble_candidates_from_caller_bounds(&mut self, Ok(()) } + /// Check for the artificial impl that the compiler will create for an obligation like `X : + /// FnMut<..>` where `X` is an unboxed closure type. + /// + /// Note: the type parameters on an unboxed closure candidate are modeled as *output* type + /// parameters and hence do not affect whether this trait is a match or not. They will be + /// unified during the confirmation step. fn assemble_unboxed_candidates(&mut self, obligation: &Obligation<'tcx>, candidates: &mut CandidateSet<'tcx>) -> Result<(),SelectionError<'tcx>> { - /*! - * Check for the artificial impl that the compiler will create - * for an obligation like `X : FnMut<..>` where `X` is an - * unboxed closure type. - * - * Note: the type parameters on an unboxed closure candidate - * are modeled as *output* type parameters and hence do not - * affect whether this trait is a match or not. They will be - * unified during the confirmation step. - */ - let tcx = self.tcx(); let kind = if Some(obligation.trait_ref.def_id) == tcx.lang_items.fn_trait() { ty::FnUnboxedClosureKind @@ -1060,15 +1017,12 @@ fn assemble_unboxed_candidates(&mut self, Ok(()) } + /// Search for impls that might apply to `obligation`. fn assemble_candidates_from_impls(&mut self, obligation: &Obligation<'tcx>, candidates: &mut CandidateSet<'tcx>) -> Result<(), SelectionError<'tcx>> { - /*! - * Search for impls that might apply to `obligation`. - */ - let all_impls = self.all_impls(obligation.trait_ref.def_id); for &impl_def_id in all_impls.iter() { self.infcx.probe(|| { @@ -1092,17 +1046,14 @@ fn assemble_candidates_from_impls(&mut self, // attempt to evaluate recursive bounds to see if they are // satisfied. + /// Further evaluate `candidate` to decide whether all type parameters match and whether nested + /// obligations are met. Returns true if `candidate` remains viable after this further + /// scrutiny. fn winnow_candidate<'o>(&mut self, stack: &ObligationStack<'o, 'tcx>, candidate: &Candidate<'tcx>) -> EvaluationResult { - /*! - * Further evaluate `candidate` to decide whether all type parameters match - * and whether nested obligations are met. Returns true if `candidate` remains - * viable after this further scrutiny. - */ - debug!("winnow_candidate: candidate={}", candidate.repr(self.tcx())); self.infcx.probe(|| { let candidate = (*candidate).clone(); @@ -1129,37 +1080,35 @@ fn winnow_selection<'o>(&mut self, result } + /// Returns true if `candidate_i` should be dropped in favor of `candidate_j`. + /// + /// This is generally true if either: + /// - candidate i and candidate j are equivalent; or, + /// - candidate i is a conrete impl and candidate j is a where clause bound, + /// and the concrete impl is applicable to the types in the where clause bound. + /// + /// The last case refers to cases where there are blanket impls (often conditional + /// blanket impls) as well as a where clause. This can come down to one of two cases: + /// + /// - The impl is truly unconditional (it has no where clauses + /// of its own), in which case the where clause is + /// unnecessary, because coherence requires that we would + /// pick that particular impl anyhow (at least so long as we + /// don't have specialization). + /// + /// - The impl is conditional, in which case we may not have winnowed it out + /// because we don't know if the conditions apply, but the where clause is basically + /// telling us taht there is some impl, though not necessarily the one we see. + /// + /// In both cases we prefer to take the where clause, which is + /// essentially harmless. See issue #18453 for more details of + /// a case where doing the opposite caused us harm. fn candidate_should_be_dropped_in_favor_of<'o>(&mut self, stack: &ObligationStack<'o, 'tcx>, candidate_i: &Candidate<'tcx>, candidate_j: &Candidate<'tcx>) -> bool { - /*! - * Returns true if `candidate_i` should be dropped in favor of `candidate_j`. - * This is generally true if either: - * - candidate i and candidate j are equivalent; or, - * - candidate i is a conrete impl and candidate j is a where clause bound, - * and the concrete impl is applicable to the types in the where clause bound. - * - * The last case refers to cases where there are blanket impls (often conditional - * blanket impls) as well as a where clause. This can come down to one of two cases: - * - * - The impl is truly unconditional (it has no where clauses - * of its own), in which case the where clause is - * unnecessary, because coherence requires that we would - * pick that particular impl anyhow (at least so long as we - * don't have specialization). - * - * - The impl is conditional, in which case we may not have winnowed it out - * because we don't know if the conditions apply, but the where clause is basically - * telling us taht there is some impl, though not necessarily the one we see. - * - * In both cases we prefer to take the where clause, which is - * essentially harmless. See issue #18453 for more details of - * a case where doing the opposite caused us harm. - */ - match (candidate_i, candidate_j) { (&ImplCandidate(impl_def_id), &ParamCandidate(ref vt)) => { debug!("Considering whether to drop param {} in favor of impl {}", @@ -1848,26 +1797,23 @@ fn match_trait_refs(&mut self, } } + /// Determines whether the self type declared against + /// `impl_def_id` matches `obligation_self_ty`. If successful, + /// returns the substitutions used to make them match. See + /// `match_impl()`. For example, if `impl_def_id` is declared + /// as: + /// + /// impl Foo for ~T { ... } + /// + /// and `obligation_self_ty` is `int`, we'd back an `Err(_)` + /// result. But if `obligation_self_ty` were `~int`, we'd get + /// back `Ok(T=int)`. fn match_inherent_impl(&mut self, impl_def_id: ast::DefId, obligation_cause: ObligationCause, obligation_self_ty: Ty<'tcx>) -> Result,()> { - /*! - * Determines whether the self type declared against - * `impl_def_id` matches `obligation_self_ty`. If successful, - * returns the substitutions used to make them match. See - * `match_impl()`. For example, if `impl_def_id` is declared - * as: - * - * impl Foo for ~T { ... } - * - * and `obligation_self_ty` is `int`, we'd back an `Err(_)` - * result. But if `obligation_self_ty` were `~int`, we'd get - * back `Ok(T=int)`. - */ - // Create fresh type variables for each type parameter declared // on the impl etc. let impl_substs = util::fresh_substs_for_impl(self.infcx, @@ -1928,6 +1874,19 @@ fn match_self_types(&mut self, // the output type parameters from the obligation with those found // on the impl/bound, which may yield type errors. + /// Relates the output type parameters from an impl to the + /// trait. This may lead to type errors. The confirmation step + /// is separated from the main match procedure because these + /// type errors do not cause us to select another impl. + /// + /// As an example, consider matching the obligation + /// `Iterator for Elems` using the following impl: + /// + /// impl Iterator for Elems { ... } + /// + /// The match phase will succeed with substitution `T=int`. + /// The confirm step will then try to unify `int` and `char` + /// and yield an error. fn confirm_impl_vtable(&mut self, impl_def_id: ast::DefId, obligation_cause: ObligationCause<'tcx>, @@ -1935,22 +1894,6 @@ fn confirm_impl_vtable(&mut self, substs: &Substs<'tcx>) -> Result<(), SelectionError<'tcx>> { - /*! - * Relates the output type parameters from an impl to the - * trait. This may lead to type errors. The confirmation step - * is separated from the main match procedure because these - * type errors do not cause us to select another impl. - * - * As an example, consider matching the obligation - * `Iterator for Elems` using the following impl: - * - * impl Iterator for Elems { ... } - * - * The match phase will succeed with substitution `T=int`. - * The confirm step will then try to unify `int` and `char` - * and yield an error. - */ - let impl_trait_ref = ty::impl_trait_ref(self.tcx(), impl_def_id).unwrap(); let impl_trait_ref = impl_trait_ref.subst(self.tcx(), @@ -1958,38 +1901,30 @@ fn confirm_impl_vtable(&mut self, self.confirm(obligation_cause, obligation_trait_ref, impl_trait_ref) } + /// After we have determined which impl applies, and with what substitutions, there is one last + /// step. We have to go back and relate the "output" type parameters from the obligation to the + /// types that are specified in the impl. + /// + /// For example, imagine we have: + /// + /// impl Iterator for Vec { ... } + /// + /// and our obligation is `Iterator for Vec` (note the mismatch in the obligation + /// types). Up until this step, no error would be reported: the self type is `Vec`, and + /// that matches `Vec` with the substitution `T=int`. At this stage, we could then go and + /// check that the type parameters to the `Iterator` trait match. (In terms of the parameters, + /// the `expected_trait_ref` here would be `Iterator for Vec`, and the + /// `obligation_trait_ref` would be `Iterator for Vec`. + /// + /// Note that this checking occurs *after* the impl has selected, because these output type + /// parameters should not affect the selection of the impl. Therefore, if there is a mismatch, + /// we report an error to the user. fn confirm(&mut self, obligation_cause: ObligationCause, obligation_trait_ref: Rc>, expected_trait_ref: Rc>) -> Result<(), SelectionError<'tcx>> { - /*! - * After we have determined which impl applies, and with what - * substitutions, there is one last step. We have to go back - * and relate the "output" type parameters from the obligation - * to the types that are specified in the impl. - * - * For example, imagine we have: - * - * impl Iterator for Vec { ... } - * - * and our obligation is `Iterator for Vec` (note - * the mismatch in the obligation types). Up until this step, - * no error would be reported: the self type is `Vec`, - * and that matches `Vec` with the substitution `T=int`. - * At this stage, we could then go and check that the type - * parameters to the `Iterator` trait match. - * (In terms of the parameters, the `expected_trait_ref` - * here would be `Iterator for Vec`, and the - * `obligation_trait_ref` would be `Iterator for Vec`. - * - * Note that this checking occurs *after* the impl has - * selected, because these output type parameters should not - * affect the selection of the impl. Therefore, if there is a - * mismatch, we report an error to the user. - */ - let origin = infer::RelateOutputImplTypes(obligation_cause.span); let obligation_trait_ref = obligation_trait_ref.clone(); @@ -2019,11 +1954,8 @@ fn push_stack<'o,'s:'o>(&mut self, } } + /// Returns set of all impls for a given trait. fn all_impls(&self, trait_def_id: ast::DefId) -> Vec { - /*! - * Returns set of all impls for a given trait. - */ - ty::populate_implementations_for_trait_if_necessary(self.tcx(), trait_def_id); match self.tcx().trait_impls.borrow().get(&trait_def_id) { diff --git a/src/librustc/middle/traits/util.rs b/src/librustc/middle/traits/util.rs index ec49d5010562b8d966a00e63bca3083794030a78..b9e694ff4e2b2780ee11a64d7f5df732139ddf72 100644 --- a/src/librustc/middle/traits/util.rs +++ b/src/librustc/middle/traits/util.rs @@ -42,22 +42,18 @@ pub fn supertraits<'cx, 'tcx>(tcx: &'cx ty::ctxt<'tcx>, trait_ref: Rc>) -> Supertraits<'cx, 'tcx> { - /*! - * Returns an iterator over the trait reference `T` and all of its - * supertrait references. May contain duplicates. In general - * the ordering is not defined. - * - * Example: - * - * ``` - * trait Foo { ... } - * trait Bar : Foo { ... } - * trait Baz : Bar+Foo { ... } - * ``` - * - * `supertraits(Baz)` yields `[Baz, Bar, Foo, Foo]` in some order. - */ - + /// Returns an iterator over the trait reference `T` and all of its supertrait references. May + /// contain duplicates. In general the ordering is not defined. + /// + /// Example: + /// + /// ``` + /// trait Foo { ... } + /// trait Bar : Foo { ... } + /// trait Baz : Bar+Foo { ... } + /// ``` + /// + /// `supertraits(Baz)` yields `[Baz, Bar, Foo, Foo]` in some order. transitive_bounds(tcx, &[trait_ref]) } @@ -97,12 +93,8 @@ fn push(&mut self, trait_ref: &ty::TraitRef<'tcx>) { self.stack.push(entry); } + /// Returns the path taken through the trait supertraits to reach the current point. pub fn indices(&self) -> Vec { - /*! - * Returns the path taken through the trait supertraits to - * reach the current point. - */ - self.stack.iter().map(|e| e.position).collect() } } @@ -171,6 +163,7 @@ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { } } +/// See `super::obligations_for_generics` pub fn obligations_for_generics<'tcx>(tcx: &ty::ctxt<'tcx>, cause: ObligationCause<'tcx>, recursion_depth: uint, @@ -178,7 +171,6 @@ pub fn obligations_for_generics<'tcx>(tcx: &ty::ctxt<'tcx>, type_substs: &VecPerParamSpace>) -> VecPerParamSpace> { - /*! See `super::obligations_for_generics` */ debug!("obligations_for_generics(generic_bounds={}, type_substs={})", generic_bounds.repr(tcx), type_substs.repr(tcx)); @@ -272,20 +264,15 @@ pub fn obligation_for_builtin_bound<'tcx>( } } +/// Starting from a caller obligation `caller_bound` (which has coordinates `space`/`i` in the list +/// of caller obligations), search through the trait and supertraits to find one where `test(d)` is +/// true, where `d` is the def-id of the trait/supertrait. If any is found, return `Some(p)` where +/// `p` is the path to that trait/supertrait. Else `None`. pub fn search_trait_and_supertraits_from_bound<'tcx>(tcx: &ty::ctxt<'tcx>, caller_bound: Rc>, test: |ast::DefId| -> bool) -> Option> { - /*! - * Starting from a caller obligation `caller_bound` (which has - * coordinates `space`/`i` in the list of caller obligations), - * search through the trait and supertraits to find one where - * `test(d)` is true, where `d` is the def-id of the - * trait/supertrait. If any is found, return `Some(p)` where `p` - * is the path to that trait/supertrait. Else `None`. - */ - for bound in transitive_bounds(tcx, &[caller_bound]) { if test(bound.def_id) { let vtable_param = VtableParamData { bound: bound }; diff --git a/src/librustc/middle/ty.rs b/src/librustc/middle/ty.rs index 2c8465e62d7c3257a3cdb1207e7d416e5a2c9360..b79bce62f0b77ed3a0fe182b69d1eb607e897a33 100644 --- a/src/librustc/middle/ty.rs +++ b/src/librustc/middle/ty.rs @@ -671,39 +671,29 @@ pub fn type_has_late_bound_regions(ty: Ty) -> bool { ty.flags.intersects(HAS_RE_LATE_BOUND) } +/// An "escaping region" is a bound region whose binder is not part of `t`. +/// +/// So, for example, consider a type like the following, which has two binders: +/// +/// for<'a> fn(x: for<'b> fn(&'a int, &'b int)) +/// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ outer scope +/// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner scope +/// +/// This type has *bound regions* (`'a`, `'b`), but it does not have escaping regions, because the +/// binders of both `'a` and `'b` are part of the type itself. However, if we consider the *inner +/// fn type*, that type has an escaping region: `'a`. +/// +/// Note that what I'm calling an "escaping region" is often just called a "free region". However, +/// we already use the term "free region". It refers to the regions that we use to represent bound +/// regions on a fn definition while we are typechecking its body. +/// +/// To clarify, conceptually there is no particular difference between an "escaping" region and a +/// "free" region. However, there is a big difference in practice. Basically, when "entering" a +/// binding level, one is generally required to do some sort of processing to a bound region, such +/// as replacing it with a fresh/skolemized region, or making an entry in the environment to +/// represent the scope to which it is attached, etc. An escaping region represents a bound region +/// for which this processing has not yet been done. pub fn type_has_escaping_regions(ty: Ty) -> bool { - /*! - * An "escaping region" is a bound region whose binder is not part of `t`. - * - * So, for example, consider a type like the following, which has two - * binders: - * - * for<'a> fn(x: for<'b> fn(&'a int, &'b int)) - * ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ outer scope - * ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner scope - * - * This type has *bound regions* (`'a`, `'b`), but it does not - * have escaping regions, because the binders of both `'a` and - * `'b` are part of the type itself. However, if we consider the - * *inner fn type*, that type has an escaping region: `'a`. - * - * Note that what I'm calling an "escaping region" is often just - * called a "free region". However, we already use the term "free - * region". It refers to the regions that we use to represent - * bound regions on a fn definition while we are typechecking its - * body. - * - * To clarify, conceptually there is no particular difference - * between an "escaping" region and a "free" region. However, - * there is a big difference in practice. Basically, when - * "entering" a binding level, one is generally required to do - * some sort of processing to a bound region, such as replacing it - * with a fresh/skolemized region, or making an entry in the - * environment to represent the scope to which it is attached, - * etc. An escaping region represents a bound region for which - * this processing has not yet been done. - */ - type_escapes_depth(ty, 0) } @@ -1248,11 +1238,8 @@ pub fn all_builtin_bounds() -> BuiltinBounds { set } +/// An existential bound that does not implement any traits. pub fn region_existential_bound(r: ty::Region) -> ExistentialBounds { - /*! - * An existential bound that does not implement any traits. - */ - ty::ExistentialBounds { region_bound: r, builtin_bounds: empty_builtin_bounds() } } @@ -1834,12 +1821,9 @@ fn add_depth(&mut self, depth: uint) { } } + /// Adds the flags/depth from a set of types that appear within the current type, but within a + /// region binder. fn add_bound_computation(&mut self, computation: &FlagComputation) { - /*! - * Adds the flags/depth from a set of types that appear within - * the current type, but within a region binder. - */ - self.add_flags(computation.flags); // The types that contributed to `computation` occured within @@ -2575,38 +2559,26 @@ pub fn needs_drop(&self, _: &ctxt) -> bool { self.intersects(TC::NeedsDrop) } + /// Includes only those bits that still apply when indirected through a `Box` pointer pub fn owned_pointer(&self) -> TypeContents { - /*! - * Includes only those bits that still apply - * when indirected through a `Box` pointer - */ TC::OwnsOwned | ( *self & (TC::OwnsAll | TC::ReachesAll)) } + /// Includes only those bits that still apply when indirected through a reference (`&`) pub fn reference(&self, bits: TypeContents) -> TypeContents { - /*! - * Includes only those bits that still apply - * when indirected through a reference (`&`) - */ bits | ( *self & TC::ReachesAll) } + /// Includes only those bits that still apply when indirected through a managed pointer (`@`) pub fn managed_pointer(&self) -> TypeContents { - /*! - * Includes only those bits that still apply - * when indirected through a managed pointer (`@`) - */ TC::Managed | ( *self & TC::ReachesAll) } + /// Includes only those bits that still apply when indirected through an unsafe pointer (`*`) pub fn unsafe_pointer(&self) -> TypeContents { - /*! - * Includes only those bits that still apply - * when indirected through an unsafe pointer (`*`) - */ *self & TC::ReachesAll } @@ -2883,14 +2855,10 @@ fn apply_lang_items(cx: &ctxt, } } + /// Type contents due to containing a reference with the region `region` and borrow kind `bk` fn borrowed_contents(region: ty::Region, mutbl: ast::Mutability) -> TypeContents { - /*! - * Type contents due to containing a reference - * with the region `region` and borrow kind `bk` - */ - let b = match mutbl { ast::MutMutable => TC::ReachesMutable | TC::OwnsAffine, ast::MutImmutable => TC::None, @@ -3648,20 +3616,16 @@ pub fn expr_ty_opt<'tcx>(cx: &ctxt<'tcx>, expr: &ast::Expr) -> Option> return node_id_to_type_opt(cx, expr.id); } +/// Returns the type of `expr`, considering any `AutoAdjustment` +/// entry recorded for that expression. +/// +/// It would almost certainly be better to store the adjusted ty in with +/// the `AutoAdjustment`, but I opted not to do this because it would +/// require serializing and deserializing the type and, although that's not +/// hard to do, I just hate that code so much I didn't want to touch it +/// unless it was to fix it properly, which seemed a distraction from the +/// task at hand! -nmatsakis pub fn expr_ty_adjusted<'tcx>(cx: &ctxt<'tcx>, expr: &ast::Expr) -> Ty<'tcx> { - /*! - * - * Returns the type of `expr`, considering any `AutoAdjustment` - * entry recorded for that expression. - * - * It would almost certainly be better to store the adjusted ty in with - * the `AutoAdjustment`, but I opted not to do this because it would - * require serializing and deserializing the type and, although that's not - * hard to do, I just hate that code so much I didn't want to touch it - * unless it was to fix it properly, which seemed a distraction from the - * task at hand! -nmatsakis - */ - adjust_ty(cx, expr.span, expr.id, expr_ty(cx, expr), cx.adjustments.borrow().get(&expr.id), |method_call| cx.method_map.borrow().get(&method_call).map(|method| method.ty)) @@ -3707,6 +3671,7 @@ pub fn local_var_name_str(cx: &ctxt, id: NodeId) -> InternedString { } } +/// See `expr_ty_adjusted` pub fn adjust_ty<'tcx>(cx: &ctxt<'tcx>, span: Span, expr_id: ast::NodeId, @@ -3714,7 +3679,6 @@ pub fn adjust_ty<'tcx>(cx: &ctxt<'tcx>, adjustment: Option<&AutoAdjustment<'tcx>>, method_type: |typeck::MethodCall| -> Option>) -> Ty<'tcx> { - /*! See `expr_ty_adjusted` */ match unadjusted_ty.sty { ty_err => return unadjusted_ty, @@ -4128,16 +4092,11 @@ pub fn ty_sort_string<'tcx>(cx: &ctxt<'tcx>, ty: Ty<'tcx>) -> String { } } +/// Explains the source of a type err in a short, human readable way. This is meant to be placed +/// in parentheses after some larger message. You should also invoke `note_and_explain_type_err()` +/// afterwards to present additional details, particularly when it comes to lifetime-related +/// errors. pub fn type_err_to_str<'tcx>(cx: &ctxt<'tcx>, err: &type_err<'tcx>) -> String { - /*! - * - * Explains the source of a type err in a short, - * human readable way. This is meant to be placed in - * parentheses after some larger message. You should - * also invoke `note_and_explain_type_err()` afterwards - * to present additional details, particularly when - * it comes to lifetime-related errors. */ - fn tstore_to_closure(s: &TraitStore) -> String { match s { &UniqTraitStore => "proc".to_string(), @@ -4352,21 +4311,16 @@ pub fn provided_trait_methods<'tcx>(cx: &ctxt<'tcx>, id: ast::DefId) } } +/// Helper for looking things up in the various maps that are populated during typeck::collect +/// (e.g., `cx.impl_or_trait_items`, `cx.tcache`, etc). All of these share the pattern that if the +/// id is local, it should have been loaded into the map by the `typeck::collect` phase. If the +/// def-id is external, then we have to go consult the crate loading code (and cache the result for +/// the future). fn lookup_locally_or_in_crate_store( descr: &str, def_id: ast::DefId, map: &mut DefIdMap, load_external: || -> V) -> V { - /*! - * Helper for looking things up in the various maps - * that are populated during typeck::collect (e.g., - * `cx.impl_or_trait_items`, `cx.tcache`, etc). All of these share - * the pattern that if the id is local, it should have - * been loaded into the map by the `typeck::collect` phase. - * If the def-id is external, then we have to go consult - * the crate loading code (and cache the result for the future). - */ - match map.get(&def_id).cloned() { Some(v) => { return v; } None => { } @@ -5238,19 +5192,16 @@ pub fn each_bound_trait_and_supertraits<'tcx>(tcx: &ctxt<'tcx>, return true; } +/// Given a type which must meet the builtin bounds and trait bounds, returns a set of lifetimes +/// which the type must outlive. +/// +/// Requires that trait definitions have been processed. pub fn required_region_bounds<'tcx>(tcx: &ctxt<'tcx>, region_bounds: &[ty::Region], builtin_bounds: BuiltinBounds, trait_bounds: &[Rc>]) -> Vec { - /*! - * Given a type which must meet the builtin bounds and trait - * bounds, returns a set of lifetimes which the type must outlive. - * - * Requires that trait definitions have been processed. - */ - let mut all_bounds = Vec::new(); debug!("required_region_bounds(builtin_bounds={}, trait_bounds={})", @@ -5636,13 +5587,9 @@ pub fn to_string(self) -> &'static str { } } +/// Construct a parameter environment suitable for static contexts or other contexts where there +/// are no free type/lifetime parameters in scope. pub fn empty_parameter_environment<'tcx>() -> ParameterEnvironment<'tcx> { - /*! - * Construct a parameter environment suitable for static contexts - * or other contexts where there are no free type/lifetime - * parameters in scope. - */ - ty::ParameterEnvironment { free_substs: Substs::empty(), bounds: VecPerParamSpace::empty(), caller_obligations: VecPerParamSpace::empty(), @@ -5650,6 +5597,7 @@ pub fn empty_parameter_environment<'tcx>() -> ParameterEnvironment<'tcx> { selection_cache: traits::SelectionCache::new(), } } +/// See `ParameterEnvironment` struct def'n for details pub fn construct_parameter_environment<'tcx>( tcx: &ctxt<'tcx>, span: Span, @@ -5657,7 +5605,6 @@ pub fn construct_parameter_environment<'tcx>( free_id: ast::NodeId) -> ParameterEnvironment<'tcx> { - /*! See `ParameterEnvironment` struct def'n for details */ // // Construct the free substs. @@ -5786,15 +5733,11 @@ pub fn from_mutbl(m: ast::Mutability) -> BorrowKind { } } + /// Returns a mutability `m` such that an `&m T` pointer could be used to obtain this borrow + /// kind. Because borrow kinds are richer than mutabilities, we sometimes have to pick a + /// mutability that is stronger than necessary so that it at least *would permit* the borrow in + /// question. pub fn to_mutbl_lossy(self) -> ast::Mutability { - /*! - * Returns a mutability `m` such that an `&m T` pointer could - * be used to obtain this borrow kind. Because borrow kinds - * are richer than mutabilities, we sometimes have to pick a - * mutability that is stronger than necessary so that it at - * least *would permit* the borrow in question. - */ - match self { MutBorrow => ast::MutMutable, ImmBorrow => ast::MutImmutable, @@ -5959,6 +5902,8 @@ pub fn is_identity(&self) -> bool { } } +/// Replace any late-bound regions bound in `value` with free variants attached to scope-id +/// `scope_id`. pub fn liberate_late_bound_regions<'tcx, HR>( tcx: &ty::ctxt<'tcx>, scope: region::CodeExtent, @@ -5966,31 +5911,23 @@ pub fn liberate_late_bound_regions<'tcx, HR>( -> HR where HR : HigherRankedFoldable<'tcx> { - /*! - * Replace any late-bound regions bound in `value` with free variants - * attached to scope-id `scope_id`. - */ - replace_late_bound_regions( tcx, value, |br, _| ty::ReFree(ty::FreeRegion{scope: scope, bound_region: br})).0 } +/// Replace any late-bound regions bound in `value` with `'static`. Useful in trans but also +/// method lookup and a few other places where precise region relationships are not required. pub fn erase_late_bound_regions<'tcx, HR>( tcx: &ty::ctxt<'tcx>, value: &HR) -> HR where HR : HigherRankedFoldable<'tcx> { - /*! - * Replace any late-bound regions bound in `value` with `'static`. - * Useful in trans but also method lookup and a few other places - * where precise region relationships are not required. - */ - replace_late_bound_regions(tcx, value, |_, _| ty::ReStatic).0 } +/// Replaces the late-bound-regions in `value` that are bound by `value`. pub fn replace_late_bound_regions<'tcx, HR>( tcx: &ty::ctxt<'tcx>, value: &HR, @@ -5998,10 +5935,6 @@ pub fn replace_late_bound_regions<'tcx, HR>( -> (HR, FnvHashMap) where HR : HigherRankedFoldable<'tcx> { - /*! - * Replaces the late-bound-regions in `value` that are bound by `value`. - */ - debug!("replace_late_bound_regions({})", value.repr(tcx)); let mut map = FnvHashMap::new(); diff --git a/src/librustc/middle/ty_fold.rs b/src/librustc/middle/ty_fold.rs index 913919fe774f341f0a7320347904dc2d72565d8d..0d7b9b99c57e6fea1990f4da780095b03d6b948f 100644 --- a/src/librustc/middle/ty_fold.rs +++ b/src/librustc/middle/ty_fold.rs @@ -8,33 +8,31 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Generalized type folding mechanism. The setup is a bit convoluted - * but allows for convenient usage. Let T be an instance of some - * "foldable type" (one which implements `TypeFoldable`) and F be an - * instance of a "folder" (a type which implements `TypeFolder`). Then - * the setup is intended to be: - * - * T.fold_with(F) --calls--> F.fold_T(T) --calls--> super_fold_T(F, T) - * - * This way, when you define a new folder F, you can override - * `fold_T()` to customize the behavior, and invoke `super_fold_T()` - * to get the original behavior. Meanwhile, to actually fold - * something, you can just write `T.fold_with(F)`, which is - * convenient. (Note that `fold_with` will also transparently handle - * things like a `Vec` where T is foldable and so on.) - * - * In this ideal setup, the only function that actually *does* - * anything is `super_fold_T`, which traverses the type `T`. Moreover, - * `super_fold_T` should only ever call `T.fold_with()`. - * - * In some cases, we follow a degenerate pattern where we do not have - * a `fold_T` nor `super_fold_T` method. Instead, `T.fold_with` - * traverses the structure directly. This is suboptimal because the - * behavior cannot be overriden, but it's much less work to implement. - * If you ever *do* need an override that doesn't exist, it's not hard - * to convert the degenerate pattern into the proper thing. - */ +//! Generalized type folding mechanism. The setup is a bit convoluted +//! but allows for convenient usage. Let T be an instance of some +//! "foldable type" (one which implements `TypeFoldable`) and F be an +//! instance of a "folder" (a type which implements `TypeFolder`). Then +//! the setup is intended to be: +//! +//! T.fold_with(F) --calls--> F.fold_T(T) --calls--> super_fold_T(F, T) +//! +//! This way, when you define a new folder F, you can override +//! `fold_T()` to customize the behavior, and invoke `super_fold_T()` +//! to get the original behavior. Meanwhile, to actually fold +//! something, you can just write `T.fold_with(F)`, which is +//! convenient. (Note that `fold_with` will also transparently handle +//! things like a `Vec` where T is foldable and so on.) +//! +//! In this ideal setup, the only function that actually *does* +//! anything is `super_fold_T`, which traverses the type `T`. Moreover, +//! `super_fold_T` should only ever call `T.fold_with()`. +//! +//! In some cases, we follow a degenerate pattern where we do not have +//! a `fold_T` nor `super_fold_T` method. Instead, `T.fold_with` +//! traverses the structure directly. This is suboptimal because the +//! behavior cannot be overriden, but it's much less work to implement. +//! If you ever *do* need an override that doesn't exist, it's not hard +//! to convert the degenerate pattern into the proper thing. use middle::subst; use middle::subst::VecPerParamSpace; diff --git a/src/librustc/middle/typeck/astconv.rs b/src/librustc/middle/typeck/astconv.rs index fd5b1bd4793b5094b0694e5fa0e0cc929e7ef33d..5dfe3fc3a58cd921aaadf4fffdfc8f3b867b1f50 100644 --- a/src/librustc/middle/typeck/astconv.rs +++ b/src/librustc/middle/typeck/astconv.rs @@ -8,46 +8,44 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Conversion from AST representation of types to the ty.rs - * representation. The main routine here is `ast_ty_to_ty()`: each use - * is parameterized by an instance of `AstConv` and a `RegionScope`. - * - * The parameterization of `ast_ty_to_ty()` is because it behaves - * somewhat differently during the collect and check phases, - * particularly with respect to looking up the types of top-level - * items. In the collect phase, the crate context is used as the - * `AstConv` instance; in this phase, the `get_item_ty()` function - * triggers a recursive call to `ty_of_item()` (note that - * `ast_ty_to_ty()` will detect recursive types and report an error). - * In the check phase, when the FnCtxt is used as the `AstConv`, - * `get_item_ty()` just looks up the item type in `tcx.tcache`. - * - * The `RegionScope` trait controls what happens when the user does - * not specify a region in some location where a region is required - * (e.g., if the user writes `&Foo` as a type rather than `&'a Foo`). - * See the `rscope` module for more details. - * - * Unlike the `AstConv` trait, the region scope can change as we descend - * the type. This is to accommodate the fact that (a) fn types are binding - * scopes and (b) the default region may change. To understand case (a), - * consider something like: - * - * type foo = { x: &a.int, y: |&a.int| } - * - * The type of `x` is an error because there is no region `a` in scope. - * In the type of `y`, however, region `a` is considered a bound region - * as it does not already appear in scope. - * - * Case (b) says that if you have a type: - * type foo<'a> = ...; - * type bar = fn(&foo, &a.foo) - * The fully expanded version of type bar is: - * type bar = fn(&'foo &, &a.foo<'a>) - * Note that the self region for the `foo` defaulted to `&` in the first - * case but `&a` in the second. Basically, defaults that appear inside - * an rptr (`&r.T`) use the region `r` that appears in the rptr. - */ +//! Conversion from AST representation of types to the ty.rs +//! representation. The main routine here is `ast_ty_to_ty()`: each use +//! is parameterized by an instance of `AstConv` and a `RegionScope`. +//! +//! The parameterization of `ast_ty_to_ty()` is because it behaves +//! somewhat differently during the collect and check phases, +//! particularly with respect to looking up the types of top-level +//! items. In the collect phase, the crate context is used as the +//! `AstConv` instance; in this phase, the `get_item_ty()` function +//! triggers a recursive call to `ty_of_item()` (note that +//! `ast_ty_to_ty()` will detect recursive types and report an error). +//! In the check phase, when the FnCtxt is used as the `AstConv`, +//! `get_item_ty()` just looks up the item type in `tcx.tcache`. +//! +//! The `RegionScope` trait controls what happens when the user does +//! not specify a region in some location where a region is required +//! (e.g., if the user writes `&Foo` as a type rather than `&'a Foo`). +//! See the `rscope` module for more details. +//! +//! Unlike the `AstConv` trait, the region scope can change as we descend +//! the type. This is to accommodate the fact that (a) fn types are binding +//! scopes and (b) the default region may change. To understand case (a), +//! consider something like: +//! +//! type foo = { x: &a.int, y: |&a.int| } +//! +//! The type of `x` is an error because there is no region `a` in scope. +//! In the type of `y`, however, region `a` is considered a bound region +//! as it does not already appear in scope. +//! +//! Case (b) says that if you have a type: +//! type foo<'a> = ...; +//! type bar = fn(&foo, &a.foo) +//! The fully expanded version of type bar is: +//! type bar = fn(&'foo &, &a.foo<'a>) +//! Note that the self region for the `foo` defaulted to `&` in the first +//! case but `&a` in the second. Basically, defaults that appear inside +//! an rptr (`&r.T`) use the region `r` that appears in the rptr. use middle::const_eval; use middle::def; use middle::resolve_lifetime as rl; @@ -201,6 +199,8 @@ pub fn opt_ast_region_to_region<'tcx, AC: AstConv<'tcx>, RS: RegionScope>( r } +/// Given a path `path` that refers to an item `I` with the declared generics `decl_generics`, +/// returns an appropriate set of substitutions for this particular reference to `I`. fn ast_path_substs_for_ty<'tcx,AC,RS>( this: &AC, rscope: &RS, @@ -211,12 +211,6 @@ fn ast_path_substs_for_ty<'tcx,AC,RS>( -> Substs<'tcx> where AC: AstConv<'tcx>, RS: RegionScope { - /*! - * Given a path `path` that refers to an item `I` with the - * declared generics `decl_generics`, returns an appropriate - * set of substitutions for this particular reference to `I`. - */ - let tcx = this.tcx(); // ast_path_substs() is only called to convert paths that are @@ -422,6 +416,9 @@ pub fn instantiate_poly_trait_ref<'tcx,AC,RS>( instantiate_trait_ref(this, rscope, &ast_trait_ref.trait_ref, self_ty) } +/// Instantiates the path for the given trait reference, assuming that it's bound to a valid trait +/// type. Returns the def_id for the defining trait. Fails if the type is a type other than a trait +/// type. pub fn instantiate_trait_ref<'tcx,AC,RS>(this: &AC, rscope: &RS, ast_trait_ref: &ast::TraitRef, @@ -430,12 +427,6 @@ pub fn instantiate_trait_ref<'tcx,AC,RS>(this: &AC, where AC: AstConv<'tcx>, RS: RegionScope { - /*! - * Instantiates the path for the given trait reference, assuming that - * it's bound to a valid trait type. Returns the def_id for the defining - * trait. Fails if the type is a type other than a trait type. - */ - match lookup_def_tcx(this.tcx(), ast_trait_ref.path.span, ast_trait_ref.ref_id) { @@ -1318,6 +1309,10 @@ pub fn ty_of_closure<'tcx, AC: AstConv<'tcx>>( } } +/// Given an existential type like `Foo+'a+Bar`, this routine converts the `'a` and `Bar` intos an +/// `ExistentialBounds` struct. The `main_trait_refs` argument specifies the `Foo` -- it is absent +/// for closures. Eventually this should all be normalized, I think, so that there is no "main +/// trait ref" and instead we just have a flat list of bounds as the existential type. pub fn conv_existential_bounds<'tcx, AC: AstConv<'tcx>, RS:RegionScope>( this: &AC, rscope: &RS, @@ -1326,16 +1321,6 @@ pub fn conv_existential_bounds<'tcx, AC: AstConv<'tcx>, RS:RegionScope>( ast_bounds: &[ast::TyParamBound]) -> ty::ExistentialBounds { - /*! - * Given an existential type like `Foo+'a+Bar`, this routine - * converts the `'a` and `Bar` intos an `ExistentialBounds` - * struct. The `main_trait_refs` argument specifies the `Foo` -- - * it is absent for closures. Eventually this should all be - * normalized, I think, so that there is no "main trait ref" and - * instead we just have a flat list of bounds as the existential - * type. - */ - let ast_bound_refs: Vec<&ast::TyParamBound> = ast_bounds.iter().collect(); @@ -1432,6 +1417,10 @@ pub fn conv_existential_bounds_from_partitioned_bounds<'tcx, AC, RS>( } } +/// Given the bounds on a type parameter / existential type, determines what single region bound +/// (if any) we can use to summarize this type. The basic idea is that we will use the bound the +/// user provided, if they provided one, and otherwise search the supertypes of trait bounds for +/// region bounds. It may be that we can derive no bound at all, in which case we return `None`. pub fn compute_opt_region_bound<'tcx>(tcx: &ty::ctxt<'tcx>, span: Span, builtin_bounds: ty::BuiltinBounds, @@ -1439,16 +1428,6 @@ pub fn compute_opt_region_bound<'tcx>(tcx: &ty::ctxt<'tcx>, trait_bounds: &[Rc>]) -> Option { - /*! - * Given the bounds on a type parameter / existential type, - * determines what single region bound (if any) we can use to - * summarize this type. The basic idea is that we will use the - * bound the user provided, if they provided one, and otherwise - * search the supertypes of trait bounds for region bounds. It may - * be that we can derive no bound at all, in which case we return - * `None`. - */ - if region_bounds.len() > 1 { tcx.sess.span_err( region_bounds[1].span, @@ -1495,6 +1474,9 @@ pub fn compute_opt_region_bound<'tcx>(tcx: &ty::ctxt<'tcx>, return Some(r); } +/// A version of `compute_opt_region_bound` for use where some region bound is required +/// (existential types, basically). Reports an error if no region bound can be derived and we are +/// in an `rscope` that does not provide a default. fn compute_region_bound<'tcx, AC: AstConv<'tcx>, RS:RegionScope>( this: &AC, rscope: &RS, @@ -1504,13 +1486,6 @@ fn compute_region_bound<'tcx, AC: AstConv<'tcx>, RS:RegionScope>( trait_bounds: &[Rc>]) -> ty::Region { - /*! - * A version of `compute_opt_region_bound` for use where some - * region bound is required (existential types, - * basically). Reports an error if no region bound can be derived - * and we are in an `rscope` that does not provide a default. - */ - match compute_opt_region_bound(this.tcx(), span, builtin_bounds, region_bounds, trait_bounds) { Some(r) => r, @@ -1534,17 +1509,13 @@ pub struct PartitionedBounds<'a> { pub region_bounds: Vec<&'a ast::Lifetime>, } +/// Divides a list of bounds from the AST into three groups: builtin bounds (Copy, Sized etc), +/// general trait bounds, and region bounds. pub fn partition_bounds<'a>(tcx: &ty::ctxt, _span: Span, ast_bounds: &'a [&ast::TyParamBound]) -> PartitionedBounds<'a> { - /*! - * Divides a list of bounds from the AST into three groups: - * builtin bounds (Copy, Sized etc), general trait bounds, - * and region bounds. - */ - let mut builtin_bounds = ty::empty_builtin_bounds(); let mut region_bounds = Vec::new(); let mut trait_bounds = Vec::new(); diff --git a/src/librustc/middle/typeck/check/closure.rs b/src/librustc/middle/typeck/check/closure.rs index 51636f00c391ad2171fb9022e2f5d85e9ed05f53..0a93b3a5ec7dc93326698f4b733542f9c8ac7881 100644 --- a/src/librustc/middle/typeck/check/closure.rs +++ b/src/librustc/middle/typeck/check/closure.rs @@ -8,9 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Code for type-checking closure expressions. - */ +//! Code for type-checking closure expressions. use super::check_fn; use super::{Expectation, ExpectCastableToType, ExpectHasType, NoExpectation}; diff --git a/src/librustc/middle/typeck/check/method/confirm.rs b/src/librustc/middle/typeck/check/method/confirm.rs index 5bcd96e66efc2da544ba00a13ba8f09b09c858d9..e866627be3d29ad1537423dc03d6057fa55a5d12 100644 --- a/src/librustc/middle/typeck/check/method/confirm.rs +++ b/src/librustc/middle/typeck/check/method/confirm.rs @@ -189,22 +189,17 @@ fn create_ty_adjustment(&mut self, /////////////////////////////////////////////////////////////////////////// // + /// Returns a set of substitutions for the method *receiver* where all type and region + /// parameters are instantiated with fresh variables. This substitution does not include any + /// parameters declared on the method itself. + /// + /// Note that this substitution may include late-bound regions from the impl level. If so, + /// these are instantiated later in the `instantiate_method_sig` routine. fn fresh_receiver_substs(&mut self, self_ty: Ty<'tcx>, pick: &probe::Pick<'tcx>) -> (subst::Substs<'tcx>, MethodOrigin<'tcx>) { - /*! - * Returns a set of substitutions for the method *receiver* - * where all type and region parameters are instantiated with - * fresh variables. This substitution does not include any - * parameters declared on the method itself. - * - * Note that this substitution may include late-bound regions - * from the impl level. If so, these are instantiated later in - * the `instantiate_method_sig` routine. - */ - match pick.kind { probe::InherentImplPick(impl_def_id) => { assert!(ty::impl_trait_ref(self.tcx(), impl_def_id).is_none(), @@ -478,14 +473,11 @@ fn add_obligations(&mut self, /////////////////////////////////////////////////////////////////////////// // RECONCILIATION + /// When we select a method with an `&mut self` receiver, we have to go convert any + /// auto-derefs, indices, etc from `Deref` and `Index` into `DerefMut` and `IndexMut` + /// respectively. fn fixup_derefs_on_method_receiver_if_necessary(&self, method_callee: &MethodCallee) { - /*! - * When we select a method with an `&mut self` receiver, we have to go - * convert any auto-derefs, indices, etc from `Deref` and `Index` into - * `DerefMut` and `IndexMut` respectively. - */ - let sig = match method_callee.ty.sty { ty::ty_bare_fn(ref f) => f.sig.clone(), ty::ty_closure(ref f) => f.sig.clone(), diff --git a/src/librustc/middle/typeck/check/method/doc.rs b/src/librustc/middle/typeck/check/method/doc.rs index 8c691e02ca9d053f12bd5c195ea05b931ebf7a5d..6129e38e39c12f9e6e0810dfd0101fcd98605996 100644 --- a/src/librustc/middle/typeck/check/method/doc.rs +++ b/src/librustc/middle/typeck/check/method/doc.rs @@ -8,119 +8,114 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# Method lookup - -Method lookup can be rather complex due to the interaction of a number -of factors, such as self types, autoderef, trait lookup, etc. This -file provides an overview of the process. More detailed notes are in -the code itself, naturally. - -One way to think of method lookup is that we convert an expression of -the form: - - receiver.method(...) - -into a more explicit UFCS form: - - Trait::method(ADJ(receiver), ...) // for a trait call - ReceiverType::method(ADJ(receiver), ...) // for an inherent method call - -Here `ADJ` is some kind of adjustment, which is typically a series of -autoderefs and then possibly an autoref (e.g., `&**receiver`). However -we sometimes do other adjustments and coercions along the way, in -particular unsizing (e.g., converting from `[T, ..n]` to `[T]`). - -## The Two Phases - -Method lookup is divided into two major phases: probing (`probe.rs`) -and confirmation (`confirm.rs`). The probe phase is when we decide -what method to call and how to adjust the receiver. The confirmation -phase "applies" this selection, updating the side-tables, unifying -type variables, and otherwise doing side-effectful things. - -One reason for this division is to be more amenable to caching. The -probe phase produces a "pick" (`probe::Pick`), which is designed to be -cacheable across method-call sites. Therefore, it does not include -inference variables or other information. - -## Probe phase - -The probe phase (`probe.rs`) decides what method is being called and -how to adjust the receiver. - -### Steps - -The first thing that the probe phase does is to create a series of -*steps*. This is done by progressively dereferencing the receiver type -until it cannot be deref'd anymore, as well as applying an optional -"unsize" step. So if the receiver has type `Rc>`, this -might yield: - - Rc> - Box<[T, ..3]> - [T, ..3] - [T] - -### Candidate assembly - -We then search along those steps to create a list of *candidates*. A -`Candidate` is a method item that might plausibly be the method being -invoked. For each candidate, we'll derive a "transformed self type" -that takes into account explicit self. - -Candidates are grouped into two kinds, inherent and extension. - -**Inherent candidates** are those that are derived from the -type of the receiver itself. So, if you have a receiver of some -nominal type `Foo` (e.g., a struct), any methods defined within an -impl like `impl Foo` are inherent methods. Nothing needs to be -imported to use an inherent method, they are associated with the type -itself (note that inherent impls can only be defined in the same -module as the type itself). - -FIXME: Inherent candidates are not always derived from impls. If you -have a trait object, such as a value of type `Box`, then the -trait methods (`to_string()`, in this case) are inherently associated -with it. Another case is type parameters, in which case the methods of -their bounds are inherent. However, this part of the rules is subject -to change: when DST's "impl Trait for Trait" is complete, trait object -dispatch could be subsumed into trait matching, and the type parameter -behavior should be reconsidered in light of where clauses. - -**Extension candidates** are derived from imported traits. If I have -the trait `ToString` imported, and I call `to_string()` on a value of -type `T`, then we will go off to find out whether there is an impl of -`ToString` for `T`. These kinds of method calls are called "extension -methods". They can be defined in any module, not only the one that -defined `T`. Furthermore, you must import the trait to call such a -method. - -So, let's continue our example. Imagine that we were calling a method -`foo` with the receiver `Rc>` and there is a trait `Foo` -that defines it with `&self` for the type `Rc` as well as a method -on the type `Box` that defines `Foo` but with `&mut self`. Then we -might have two candidates: - - &Rc> from the impl of `Foo` for `Rc` where `U=Box - &mut Box<[T, ..3]>> from the inherent impl on `Box` where `U=[T, ..3]` - -### Candidate search - -Finally, to actually pick the method, we will search down the steps, -trying to match the receiver type against the candidate types. At -each step, we also consider an auto-ref and auto-mut-ref to see whether -that makes any of the candidates match. We pick the first step where -we find a match. - -In the case of our example, the first step is `Rc>`, -which does not itself match any candidate. But when we autoref it, we -get the type `&Rc>` which does match. We would then -recursively consider all where-clauses that appear on the impl: if -those match (or we cannot rule out that they do), then this is the -method we would pick. Otherwise, we would continue down the series of -steps. - -*/ - +//! # Method lookup +//! +//! Method lookup can be rather complex due to the interaction of a number +//! of factors, such as self types, autoderef, trait lookup, etc. This +//! file provides an overview of the process. More detailed notes are in +//! the code itself, naturally. +//! +//! One way to think of method lookup is that we convert an expression of +//! the form: +//! +//! receiver.method(...) +//! +//! into a more explicit UFCS form: +//! +//! Trait::method(ADJ(receiver), ...) // for a trait call +//! ReceiverType::method(ADJ(receiver), ...) // for an inherent method call +//! +//! Here `ADJ` is some kind of adjustment, which is typically a series of +//! autoderefs and then possibly an autoref (e.g., `&**receiver`). However +//! we sometimes do other adjustments and coercions along the way, in +//! particular unsizing (e.g., converting from `[T, ..n]` to `[T]`). +//! +//! ## The Two Phases +//! +//! Method lookup is divided into two major phases: probing (`probe.rs`) +//! and confirmation (`confirm.rs`). The probe phase is when we decide +//! what method to call and how to adjust the receiver. The confirmation +//! phase "applies" this selection, updating the side-tables, unifying +//! type variables, and otherwise doing side-effectful things. +//! +//! One reason for this division is to be more amenable to caching. The +//! probe phase produces a "pick" (`probe::Pick`), which is designed to be +//! cacheable across method-call sites. Therefore, it does not include +//! inference variables or other information. +//! +//! ## Probe phase +//! +//! The probe phase (`probe.rs`) decides what method is being called and +//! how to adjust the receiver. +//! +//! ### Steps +//! +//! The first thing that the probe phase does is to create a series of +//! *steps*. This is done by progressively dereferencing the receiver type +//! until it cannot be deref'd anymore, as well as applying an optional +//! "unsize" step. So if the receiver has type `Rc>`, this +//! might yield: +//! +//! Rc> +//! Box<[T, ..3]> +//! [T, ..3] +//! [T] +//! +//! ### Candidate assembly +//! +//! We then search along those steps to create a list of *candidates*. A +//! `Candidate` is a method item that might plausibly be the method being +//! invoked. For each candidate, we'll derive a "transformed self type" +//! that takes into account explicit self. +//! +//! Candidates are grouped into two kinds, inherent and extension. +//! +//! **Inherent candidates** are those that are derived from the +//! type of the receiver itself. So, if you have a receiver of some +//! nominal type `Foo` (e.g., a struct), any methods defined within an +//! impl like `impl Foo` are inherent methods. Nothing needs to be +//! imported to use an inherent method, they are associated with the type +//! itself (note that inherent impls can only be defined in the same +//! module as the type itself). +//! +//! FIXME: Inherent candidates are not always derived from impls. If you +//! have a trait object, such as a value of type `Box`, then the +//! trait methods (`to_string()`, in this case) are inherently associated +//! with it. Another case is type parameters, in which case the methods of +//! their bounds are inherent. However, this part of the rules is subject +//! to change: when DST's "impl Trait for Trait" is complete, trait object +//! dispatch could be subsumed into trait matching, and the type parameter +//! behavior should be reconsidered in light of where clauses. +//! +//! **Extension candidates** are derived from imported traits. If I have +//! the trait `ToString` imported, and I call `to_string()` on a value of +//! type `T`, then we will go off to find out whether there is an impl of +//! `ToString` for `T`. These kinds of method calls are called "extension +//! methods". They can be defined in any module, not only the one that +//! defined `T`. Furthermore, you must import the trait to call such a +//! method. +//! +//! So, let's continue our example. Imagine that we were calling a method +//! `foo` with the receiver `Rc>` and there is a trait `Foo` +//! that defines it with `&self` for the type `Rc` as well as a method +//! on the type `Box` that defines `Foo` but with `&mut self`. Then we +//! might have two candidates: +//! +//! &Rc> from the impl of `Foo` for `Rc` where `U=Box +//! &mut Box<[T, ..3]>> from the inherent impl on `Box` where `U=[T, ..3]` +//! +//! ### Candidate search +//! +//! Finally, to actually pick the method, we will search down the steps, +//! trying to match the receiver type against the candidate types. At +//! each step, we also consider an auto-ref and auto-mut-ref to see whether +//! that makes any of the candidates match. We pick the first step where +//! we find a match. +//! +//! In the case of our example, the first step is `Rc>`, +//! which does not itself match any candidate. But when we autoref it, we +//! get the type `&Rc>` which does match. We would then +//! recursively consider all where-clauses that appear on the impl: if +//! those match (or we cannot rule out that they do), then this is the +//! method we would pick. Otherwise, we would continue down the series of +//! steps. diff --git a/src/librustc/middle/typeck/check/method/mod.rs b/src/librustc/middle/typeck/check/method/mod.rs index 0f4152644adafb80b968de958dcf64b25406a4fd..34c3292f8cd69b60e515228bb1283a3729113440 100644 --- a/src/librustc/middle/typeck/check/method/mod.rs +++ b/src/librustc/middle/typeck/check/method/mod.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! Method lookup: the secret sauce of Rust. See `doc.rs`. */ +//! Method lookup: the secret sauce of Rust. See `doc.rs`. use middle::subst; use middle::subst::{Subst}; @@ -56,6 +56,7 @@ pub enum CandidateSource { type MethodIndex = uint; // just for doc purposes +/// Determines whether the type `self_ty` supports a method name `method_name` or not. pub fn exists<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, span: Span, method_name: ast::Name, @@ -63,10 +64,6 @@ pub fn exists<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, call_expr_id: ast::NodeId) -> bool { - /*! - * Determines whether the type `self_ty` supports a method name `method_name` or not. - */ - match probe::probe(fcx, span, method_name, self_ty, call_expr_id) { Ok(_) => true, Err(NoMatch(_)) => false, @@ -74,6 +71,20 @@ pub fn exists<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, } } +/// Performs method lookup. If lookup is successful, it will return the callee and store an +/// appropriate adjustment for the self-expr. In some cases it may report an error (e.g., invoking +/// the `drop` method). +/// +/// # Arguments +/// +/// Given a method call like `foo.bar::(...)`: +/// +/// * `fcx`: the surrounding `FnCtxt` (!) +/// * `span`: the span for the method call +/// * `method_name`: the name of the method being called (`bar`) +/// * `self_ty`: the (unadjusted) type of the self expression (`foo`) +/// * `supplied_method_types`: the explicit method type parameters, if any (`T1..Tn`) +/// * `self_expr`: the self expression (`foo`) pub fn lookup<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, span: Span, method_name: ast::Name, @@ -83,23 +94,6 @@ pub fn lookup<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, self_expr: &ast::Expr) -> Result, MethodError> { - /*! - * Performs method lookup. If lookup is successful, it will return the callee - * and store an appropriate adjustment for the self-expr. In some cases it may - * report an error (e.g., invoking the `drop` method). - * - * # Arguments - * - * Given a method call like `foo.bar::(...)`: - * - * - `fcx`: the surrounding `FnCtxt` (!) - * - `span`: the span for the method call - * - `method_name`: the name of the method being called (`bar`) - * - `self_ty`: the (unadjusted) type of the self expression (`foo`) - * - `supplied_method_types`: the explicit method type parameters, if any (`T1..Tn`) - * - `self_expr`: the self expression (`foo`) - */ - debug!("lookup(method_name={}, self_ty={}, call_expr={}, self_expr={})", method_name.repr(fcx.tcx()), self_ty.repr(fcx.tcx()), @@ -124,6 +118,15 @@ pub fn lookup_in_trait<'a, 'tcx>(fcx: &'a FnCtxt<'a, 'tcx>, self_ty, opt_input_types) } +/// `lookup_in_trait_adjusted` is used for overloaded operators. It does a very narrow slice of +/// what the normal probe/confirm path does. In particular, it doesn't really do any probing: it +/// simply constructs an obligation for a particular trait with the given self-type and checks +/// whether that trait is implemented. +/// +/// FIXME(#18741) -- It seems likely that we can consolidate some of this code with the other +/// method-lookup code. In particular, autoderef on index is basically identical to autoderef with +/// normal probes, except that the test also looks for built-in indexing. Also, the second half of +/// this method is basically the same as confirmation. pub fn lookup_in_trait_adjusted<'a, 'tcx>(fcx: &'a FnCtxt<'a, 'tcx>, span: Span, self_expr: Option<&'a ast::Expr>, @@ -134,21 +137,6 @@ pub fn lookup_in_trait_adjusted<'a, 'tcx>(fcx: &'a FnCtxt<'a, 'tcx>, opt_input_types: Option>>) -> Option> { - /*! - * `lookup_in_trait_adjusted` is used for overloaded operators. It - * does a very narrow slice of what the normal probe/confirm path - * does. In particular, it doesn't really do any probing: it - * simply constructs an obligation for a particular trait with the - * given self-type and checks whether that trait is implemented. - * - * FIXME(#18741) -- It seems likely that we can consolidate some of this - * code with the other method-lookup code. In particular, - * autoderef on index is basically identical to autoderef with - * normal probes, except that the test also looks for built-in - * indexing. Also, the second half of this method is basically - * the same as confirmation. - */ - debug!("lookup_in_trait_adjusted(self_ty={}, self_expr={}, m_name={}, trait_def_id={})", self_ty.repr(fcx.tcx()), self_expr.repr(fcx.tcx()), @@ -408,16 +396,13 @@ fn report_candidates(fcx: &FnCtxt, } } +/// Find method with name `method_name` defined in `trait_def_id` and return it, along with its +/// index (or `None`, if no such method). fn trait_method<'tcx>(tcx: &ty::ctxt<'tcx>, trait_def_id: ast::DefId, method_name: ast::Name) -> Option<(uint, Rc>)> { - /*! - * Find method with name `method_name` defined in `trait_def_id` and return it, - * along with its index (or `None`, if no such method). - */ - let trait_items = ty::trait_items(tcx, trait_def_id); trait_items .iter() diff --git a/src/librustc/middle/typeck/check/method/probe.rs b/src/librustc/middle/typeck/check/method/probe.rs index a98b4cf011d97e38f1a9b4eb414a813c2349e455..484d72130e61d94182df0c71e57e88a4677db01f 100644 --- a/src/librustc/middle/typeck/check/method/probe.rs +++ b/src/librustc/middle/typeck/check/method/probe.rs @@ -807,33 +807,26 @@ fn consider_probe(&self, self_ty: Ty<'tcx>, probe: &Candidate<'tcx>) -> bool { }) } + /// Sometimes we get in a situation where we have multiple probes that are all impls of the + /// same trait, but we don't know which impl to use. In this case, since in all cases the + /// external interface of the method can be determined from the trait, it's ok not to decide. + /// We can basically just collapse all of the probes for various impls into one where-clause + /// probe. This will result in a pending obligation so when more type-info is available we can + /// make the final decision. + /// + /// Example (`src/test/run-pass/method-two-trait-defer-resolution-1.rs`): + /// + /// ``` + /// trait Foo { ... } + /// impl Foo for Vec { ... } + /// impl Foo for Vec { ... } + /// ``` + /// + /// Now imagine the receiver is `Vec<_>`. It doesn't really matter at this time which impl we + /// use, so it's ok to just commit to "using the method from the trait Foo". fn collapse_candidates_to_trait_pick(&self, probes: &[&Candidate<'tcx>]) -> Option> { - /*! - * Sometimes we get in a situation where we have multiple - * probes that are all impls of the same trait, but we don't - * know which impl to use. In this case, since in all cases - * the external interface of the method can be determined from - * the trait, it's ok not to decide. We can basically just - * collapse all of the probes for various impls into one - * where-clause probe. This will result in a pending - * obligation so when more type-info is available we can make - * the final decision. - * - * Example (`src/test/run-pass/method-two-trait-defer-resolution-1.rs`): - * - * ``` - * trait Foo { ... } - * impl Foo for Vec { ... } - * impl Foo for Vec { ... } - * ``` - * - * Now imagine the receiver is `Vec<_>`. It doesn't really - * matter at this time which impl we use, so it's ok to just - * commit to "using the method from the trait Foo". - */ - // Do all probes correspond to the same trait? let trait_data = match probes[0].to_trait_data() { Some(data) => data, @@ -952,36 +945,27 @@ fn impl_substs(&self, subst::Substs::new(type_vars, region_placeholders) } + /// Replace late-bound-regions bound by `value` with `'static` using + /// `ty::erase_late_bound_regions`. + /// + /// This is only a reasonable thing to do during the *probe* phase, not the *confirm* phase, of + /// method matching. It is reasonable during the probe phase because we don't consider region + /// relationships at all. Therefore, we can just replace all the region variables with 'static + /// rather than creating fresh region variables. This is nice for two reasons: + /// + /// 1. Because the numbers of the region variables would otherwise be fairly unique to this + /// particular method call, it winds up creating fewer types overall, which helps for memory + /// usage. (Admittedly, this is a rather small effect, though measureable.) + /// + /// 2. It makes it easier to deal with higher-ranked trait bounds, because we can replace any + /// late-bound regions with 'static. Otherwise, if we were going to replace late-bound + /// regions with actual region variables as is proper, we'd have to ensure that the same + /// region got replaced with the same variable, which requires a bit more coordination + /// and/or tracking the substitution and + /// so forth. fn erase_late_bound_regions(&self, value: &T) -> T where T : HigherRankedFoldable<'tcx> { - /*! - * Replace late-bound-regions bound by `value` with `'static` - * using `ty::erase_late_bound_regions`. - * - * This is only a reasonable thing to do during the *probe* - * phase, not the *confirm* phase, of method matching. It is - * reasonable during the probe phase because we don't consider - * region relationships at all. Therefore, we can just replace - * all the region variables with 'static rather than creating - * fresh region variables. This is nice for two reasons: - * - * 1. Because the numbers of the region variables would - * otherwise be fairly unique to this particular method - * call, it winds up creating fewer types overall, which - * helps for memory usage. (Admittedly, this is a rather - * small effect, though measureable.) - * - * 2. It makes it easier to deal with higher-ranked trait - * bounds, because we can replace any late-bound regions - * with 'static. Otherwise, if we were going to replace - * late-bound regions with actual region variables as is - * proper, we'd have to ensure that the same region got - * replaced with the same variable, which requires a bit - * more coordination and/or tracking the substitution and - * so forth. - */ - ty::erase_late_bound_regions(self.tcx(), value) } } @@ -1000,16 +984,13 @@ fn impl_method<'tcx>(tcx: &ty::ctxt<'tcx>, .and_then(|item| item.as_opt_method()) } +/// Find method with name `method_name` defined in `trait_def_id` and return it, along with its +/// index (or `None`, if no such method). fn trait_method<'tcx>(tcx: &ty::ctxt<'tcx>, trait_def_id: ast::DefId, method_name: ast::Name) -> Option<(uint, Rc>)> { - /*! - * Find method with name `method_name` defined in `trait_def_id` and return it, - * along with its index (or `None`, if no such method). - */ - let trait_items = ty::trait_items(tcx, trait_def_id); trait_items .iter() diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs index 754bdc8c8ea013172526fe87b45fd5106045c973..b33ce04f5ebe74b96ba1df1347298dc04058cdac 100644 --- a/src/librustc/middle/typeck/check/mod.rs +++ b/src/librustc/middle/typeck/check/mod.rs @@ -486,6 +486,12 @@ fn visit_item(&mut self, _: &ast::Item) { } } +/// Helper used by check_bare_fn and check_expr_fn. Does the grungy work of checking a function +/// body and returns the function context used for that purpose, since in the case of a fn item +/// there is still a bit more to do. +/// +/// * ... +/// * inherited: other fields inherited from the enclosing fn (if any) fn check_fn<'a, 'tcx>(ccx: &'a CrateCtxt<'a, 'tcx>, fn_style: ast::FnStyle, fn_style_id: ast::NodeId, @@ -495,16 +501,6 @@ fn check_fn<'a, 'tcx>(ccx: &'a CrateCtxt<'a, 'tcx>, body: &ast::Block, inherited: &'a Inherited<'a, 'tcx>) -> FnCtxt<'a, 'tcx> { - /*! - * Helper used by check_bare_fn and check_expr_fn. Does the - * grungy work of checking a function body and returns the - * function context used for that purpose, since in the case of a - * fn item there is still a bit more to do. - * - * - ... - * - inherited: other fields inherited from the enclosing fn (if any) - */ - let tcx = ccx.tcx; let err_count_on_creation = tcx.sess.err_count(); @@ -701,19 +697,17 @@ pub fn check_item(ccx: &CrateCtxt, it: &ast::Item) { } } +/// Type checks a method body. +/// +/// # Parameters +/// +/// * `item_generics`: generics defined on the impl/trait that contains +/// the method +/// * `self_bound`: bound for the `Self` type parameter, if any +/// * `method`: the method definition fn check_method_body<'a, 'tcx>(ccx: &CrateCtxt<'a, 'tcx>, item_generics: &ty::Generics<'tcx>, method: &ast::Method) { - /*! - * Type checks a method body. - * - * # Parameters - * - `item_generics`: generics defined on the impl/trait that contains - * the method - * - `self_bound`: bound for the `Self` type parameter, if any - * - `method`: the method definition - */ - debug!("check_method_body(item_generics={}, method.id={})", item_generics.repr(ccx.tcx), method.id); @@ -1222,6 +1216,33 @@ fn compare_impl_method<'tcx>(tcx: &ty::ctxt<'tcx>, // parameters. infcx.resolve_regions_and_report_errors(); + /// Check that region bounds on impl method are the same as those on the trait. In principle, + /// it could be ok for there to be fewer region bounds on the impl method, but this leads to an + /// annoying corner case that is painful to handle (described below), so for now we can just + /// forbid it. + /// + /// Example (see `src/test/compile-fail/regions-bound-missing-bound-in-impl.rs`): + /// + /// ``` + /// trait Foo<'a> { + /// fn method1<'b>(); + /// fn method2<'b:'a>(); + /// } + /// + /// impl<'a> Foo<'a> for ... { + /// fn method1<'b:'a>() { .. case 1, definitely bad .. } + /// fn method2<'b>() { .. case 2, could be ok .. } + /// } + /// ``` + /// + /// The "definitely bad" case is case #1. Here, the impl adds an extra constraint not present + /// in the trait. + /// + /// The "maybe bad" case is case #2. Here, the impl adds an extra constraint not present in the + /// trait. We could in principle allow this, but it interacts in a complex way with early/late + /// bound resolution of lifetimes. Basically the presence or absence of a lifetime bound + /// affects whether the lifetime is early/late bound, and right now the code breaks if the + /// trait has an early bound lifetime parameter and the method does not. fn check_region_bounds_on_impl_method<'tcx>(tcx: &ty::ctxt<'tcx>, span: Span, impl_m: &ty::Method<'tcx>, @@ -1232,37 +1253,6 @@ fn check_region_bounds_on_impl_method<'tcx>(tcx: &ty::ctxt<'tcx>, impl_to_skol_substs: &Substs<'tcx>) -> bool { - /*! - - Check that region bounds on impl method are the same as those - on the trait. In principle, it could be ok for there to be - fewer region bounds on the impl method, but this leads to an - annoying corner case that is painful to handle (described - below), so for now we can just forbid it. - - Example (see - `src/test/compile-fail/regions-bound-missing-bound-in-impl.rs`): - - trait Foo<'a> { - fn method1<'b>(); - fn method2<'b:'a>(); - } - - impl<'a> Foo<'a> for ... { - fn method1<'b:'a>() { .. case 1, definitely bad .. } - fn method2<'b>() { .. case 2, could be ok .. } - } - - The "definitely bad" case is case #1. Here, the impl adds an - extra constraint not present in the trait. - - The "maybe bad" case is case #2. Here, the impl adds an extra - constraint not present in the trait. We could in principle - allow this, but it interacts in a complex way with early/late - bound resolution of lifetimes. Basically the presence or - absence of a lifetime bound affects whether the lifetime is - early/late bound, and right now the code breaks if the trait - has an early bound lifetime parameter and the method does not. */ @@ -1770,23 +1760,17 @@ fn register_unsize_obligations(&self, } } + /// Returns the type of `def_id` with all generics replaced by by fresh type/region variables. + /// Also returns the substitution from the type parameters on `def_id` to the fresh variables. + /// Registers any trait obligations specified on `def_id` at the same time. + /// + /// Note that function is only intended to be used with types (notably, not impls). This is + /// because it doesn't do any instantiation of late-bound regions. pub fn instantiate_type(&self, span: Span, def_id: ast::DefId) -> TypeAndSubsts<'tcx> { - /*! - * Returns the type of `def_id` with all generics replaced by - * by fresh type/region variables. Also returns the - * substitution from the type parameters on `def_id` to the - * fresh variables. Registers any trait obligations specified - * on `def_id` at the same time. - * - * Note that function is only intended to be used with types - * (notably, not impls). This is because it doesn't do any - * instantiation of late-bound regions. - */ - let polytype = ty::lookup_item_type(self.tcx(), def_id); let substs = @@ -1886,26 +1870,19 @@ pub fn expr_ty(&self, ex: &ast::Expr) -> Ty<'tcx> { } } + /// Fetch type of `expr` after applying adjustments that have been recorded in the fcx. pub fn expr_ty_adjusted(&self, expr: &ast::Expr) -> Ty<'tcx> { - /*! - * Fetch type of `expr` after applying adjustments that - * have been recorded in the fcx. - */ - let adjustments = self.inh.adjustments.borrow(); let adjustment = adjustments.get(&expr.id); self.adjust_expr_ty(expr, adjustment) } + /// Apply `adjustment` to the type of `expr` pub fn adjust_expr_ty(&self, expr: &ast::Expr, adjustment: Option<&ty::AutoAdjustment<'tcx>>) -> Ty<'tcx> { - /*! - * Apply `adjustment` to the type of `expr` - */ - let raw_ty = self.expr_ty(expr); let raw_ty = self.infcx().shallow_resolve(raw_ty); ty::adjust_ty(self.tcx(), @@ -2013,16 +1990,13 @@ pub fn report_mismatched_types(&self, self.infcx().report_mismatched_types(sp, e, a, err) } + /// Registers an obligation for checking later, during regionck, that the type `ty` must + /// outlive the region `r`. pub fn register_region_obligation(&self, origin: infer::SubregionOrigin<'tcx>, ty: Ty<'tcx>, r: ty::Region) { - /*! - * Registers an obligation for checking later, during - * regionck, that the type `ty` must outlive the region `r`. - */ - let mut region_obligations = self.inh.region_obligations.borrow_mut(); let region_obligation = RegionObligation { sub_region: r, sup_type: ty, @@ -2045,31 +2019,29 @@ pub fn add_default_region_param_bounds(&self, } } + /// Given a fully substituted set of bounds (`generic_bounds`), and the values with which each + /// type/region parameter was instantiated (`substs`), creates and registers suitable + /// trait/region obligations. + /// + /// For example, if there is a function: + /// + /// ``` + /// fn foo<'a,T:'a>(...) + /// ``` + /// + /// and a reference: + /// + /// ``` + /// let f = foo; + /// ``` + /// + /// Then we will create a fresh region variable `'$0` and a fresh type variable `$1` for `'a` + /// and `T`. This routine will add a region obligation `$1:'$0` and register it locally. pub fn add_obligations_for_parameters(&self, cause: traits::ObligationCause<'tcx>, substs: &Substs<'tcx>, generic_bounds: &ty::GenericBounds<'tcx>) { - /*! - * Given a fully substituted set of bounds (`generic_bounds`), - * and the values with which each type/region parameter was - * instantiated (`substs`), creates and registers suitable - * trait/region obligations. - * - * For example, if there is a function: - * - * fn foo<'a,T:'a>(...) - * - * and a reference: - * - * let f = foo; - * - * Then we will create a fresh region variable `'$0` and a - * fresh type variable `$1` for `'a` and `T`. This routine - * will add a region obligation `$1:'$0` and register it - * locally. - */ - assert!(!generic_bounds.has_escaping_regions()); debug!("add_obligations_for_parameters(substs={}, generic_bounds={})", @@ -2160,22 +2132,17 @@ pub enum LvaluePreference { NoPreference } +/// Executes an autoderef loop for the type `t`. At each step, invokes `should_stop` to decide +/// whether to terminate the loop. Returns the final type and number of derefs that it performed. +/// +/// Note: this method does not modify the adjustments table. The caller is responsible for +/// inserting an AutoAdjustment record into the `fcx` using one of the suitable methods. pub fn autoderef<'a, 'tcx, T>(fcx: &FnCtxt<'a, 'tcx>, sp: Span, base_ty: Ty<'tcx>, expr_id: Option, mut lvalue_pref: LvaluePreference, should_stop: |Ty<'tcx>, uint| -> Option) -> (Ty<'tcx>, uint, Option) { - /*! - * Executes an autoderef loop for the type `t`. At each step, invokes - * `should_stop` to decide whether to terminate the loop. Returns - * the final type and number of derefs that it performed. - * - * Note: this method does not modify the adjustments table. The caller is - * responsible for inserting an AutoAdjustment record into the `fcx` - * using one of the suitable methods. - */ - let mut t = base_ty; for autoderefs in range(0, fcx.tcx().sess.recursion_limit.get()) { let resolved_t = structurally_resolved_type(fcx, sp, t); @@ -2306,19 +2273,14 @@ fn try_overloaded_deref<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, make_overloaded_lvalue_return_type(fcx, method_call, method) } +/// For the overloaded lvalue expressions (`*x`, `x[3]`), the trait returns a type of `&T`, but the +/// actual type we assign to the *expression* is `T`. So this function just peels off the return +/// type by one layer to yield `T`. It also inserts the `method-callee` into the method map. fn make_overloaded_lvalue_return_type<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, method_call: Option, method: Option>) -> Option> { - /*! - * For the overloaded lvalue expressions (`*x`, `x[3]`), the trait - * returns a type of `&T`, but the actual type we assign to the - * *expression* is `T`. So this function just peels off the return - * type by one layer to yield `T`. It also inserts the - * `method-callee` into the method map. - */ - match method { Some(method) => { let ref_ty = ty::ty_fn_ret(method.ty); @@ -2380,6 +2342,8 @@ fn autoderef_for_index<'a, 'tcx, T>(fcx: &FnCtxt<'a, 'tcx>, } } +/// Autoderefs `base_expr`, looking for a `Slice` impl. If it finds one, installs the relevant +/// method info and returns the result type (else None). fn try_overloaded_slice<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, method_call: MethodCall, expr: &ast::Expr, @@ -2390,12 +2354,6 @@ fn try_overloaded_slice<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, mutbl: ast::Mutability) -> Option> // return type is result of slice { - /*! - * Autoderefs `base_expr`, looking for a `Slice` impl. If it - * finds one, installs the relevant method info and returns the - * result type (else None). - */ - let lvalue_pref = match mutbl { ast::MutMutable => PreferMutLvalue, ast::MutImmutable => NoPreference @@ -2436,6 +2394,8 @@ fn try_overloaded_slice<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, }) } +/// Checks for a `Slice` (or `SliceMut`) impl at the relevant level of autoderef. If it finds one, +/// installs method info and returns type of method (else None). fn try_overloaded_slice_step<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, method_call: MethodCall, expr: &ast::Expr, @@ -2448,12 +2408,6 @@ fn try_overloaded_slice_step<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, // result type is type of method being called -> Option> { - /*! - * Checks for a `Slice` (or `SliceMut`) impl at the relevant level - * of autoderef. If it finds one, installs method info and returns - * type of method (else None). - */ - let method = if mutbl == ast::MutMutable { // Try `SliceMut` first, if preferred. match fcx.tcx().lang_items.slice_mut_trait() { @@ -2510,6 +2464,10 @@ fn try_overloaded_slice_step<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, }) } +/// To type-check `base_expr[index_expr]`, we progressively autoderef (and otherwise adjust) +/// `base_expr`, looking for a type which either supports builtin indexing or overloaded indexing. +/// This loop implements one step in that search; the autoderef loop is implemented by +/// `autoderef_for_index`. fn try_index_step<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, method_call: MethodCall, expr: &ast::Expr, @@ -2519,13 +2477,6 @@ fn try_index_step<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, lvalue_pref: LvaluePreference) -> Option<(/*index type*/ Ty<'tcx>, /*element type*/ Ty<'tcx>)> { - /*! - * To type-check `base_expr[index_expr]`, we progressively autoderef (and otherwise adjust) - * `base_expr`, looking for a type which either supports builtin indexing or overloaded - * indexing. This loop implements one step in that search; the autoderef loop is implemented - * by `autoderef_for_index`. - */ - debug!("try_index_step(expr={}, base_expr.id={}, adjusted_ty={}, adjustment={})", expr.repr(fcx.tcx()), base_expr.repr(fcx.tcx()), @@ -2712,6 +2663,8 @@ fn check_method_argument_types<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, } } +/// Generic function that factors out common logic from function calls, method calls and overloaded +/// operators. fn check_argument_types<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, sp: Span, fn_inputs: &[Ty<'tcx>], @@ -2720,12 +2673,6 @@ fn check_argument_types<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, deref_args: DerefArgs, variadic: bool, tuple_arguments: TupleArgumentsFlag) { - /*! - * - * Generic function that factors out common logic from - * function calls, method calls and overloaded operators. - */ - let tcx = fcx.ccx.tcx; // Grab the argument types, supplying fresh type variables @@ -5289,6 +5236,15 @@ fn report_error_if_segment_contains_type_parameters( } } + /// Finds the parameters that the user provided and adds them to `substs`. If too many + /// parameters are provided, then reports an error and clears the output vector. + /// + /// We clear the output vector because that will cause the `adjust_XXX_parameters()` later to + /// use inference variables. This seems less likely to lead to derived errors. + /// + /// Note that we *do not* check for *too few* parameters here. Due to the presence of defaults + /// etc that is more complicated. I wanted however to do the reporting of *too many* parameters + /// here because we can easily use the precise span of the N+1'th parameter. fn push_explicit_parameters_from_segment_to_substs<'a, 'tcx>( fcx: &FnCtxt<'a, 'tcx>, space: subst::ParamSpace, @@ -5298,23 +5254,6 @@ fn push_explicit_parameters_from_segment_to_substs<'a, 'tcx>( segment: &ast::PathSegment, substs: &mut Substs<'tcx>) { - /*! - * Finds the parameters that the user provided and adds them - * to `substs`. If too many parameters are provided, then - * reports an error and clears the output vector. - * - * We clear the output vector because that will cause the - * `adjust_XXX_parameters()` later to use inference - * variables. This seems less likely to lead to derived - * errors. - * - * Note that we *do not* check for *too few* parameters here. - * Due to the presence of defaults etc that is more - * complicated. I wanted however to do the reporting of *too - * many* parameters here because we can easily use the precise - * span of the N+1'th parameter. - */ - match segment.parameters { ast::AngleBracketedParameters(ref data) => { push_explicit_angle_bracketed_parameters_from_segment_to_substs( @@ -5373,6 +5312,12 @@ fn push_explicit_angle_bracketed_parameters_from_segment_to_substs<'a, 'tcx>( } } + /// As with + /// `push_explicit_angle_bracketed_parameters_from_segment_to_substs`, + /// but intended for `Foo(A,B) -> C` form. This expands to + /// roughly the same thing as `Foo<(A,B),C>`. One important + /// difference has to do with the treatment of anonymous + /// regions, which are translated into bound regions (NYI). fn push_explicit_parenthesized_parameters_from_segment_to_substs<'a, 'tcx>( fcx: &FnCtxt<'a, 'tcx>, space: subst::ParamSpace, @@ -5381,15 +5326,6 @@ fn push_explicit_parenthesized_parameters_from_segment_to_substs<'a, 'tcx>( data: &ast::ParenthesizedParameterData, substs: &mut Substs<'tcx>) { - /*! - * As with - * `push_explicit_angle_bracketed_parameters_from_segment_to_substs`, - * but intended for `Foo(A,B) -> C` form. This expands to - * roughly the same thing as `Foo<(A,B),C>`. One important - * difference has to do with the treatment of anonymous - * regions, which are translated into bound regions (NYI). - */ - let type_count = type_defs.len(space); if type_count < 2 { span_err!(fcx.tcx().sess, span, E0167, diff --git a/src/librustc/middle/typeck/check/regionck.rs b/src/librustc/middle/typeck/check/regionck.rs index f12b5cdad9886ffae2b25d2eb3057da5264cd535..bc6e7d9d87ffed20635b46cf075e3a000b15aa23 100644 --- a/src/librustc/middle/typeck/check/regionck.rs +++ b/src/librustc/middle/typeck/check/regionck.rs @@ -8,115 +8,111 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -The region check is a final pass that runs over the AST after we have -inferred the type constraints but before we have actually finalized -the types. Its purpose is to embed a variety of region constraints. -Inserting these constraints as a separate pass is good because (1) it -localizes the code that has to do with region inference and (2) often -we cannot know what constraints are needed until the basic types have -been inferred. - -### Interaction with the borrow checker - -In general, the job of the borrowck module (which runs later) is to -check that all soundness criteria are met, given a particular set of -regions. The job of *this* module is to anticipate the needs of the -borrow checker and infer regions that will satisfy its requirements. -It is generally true that the inference doesn't need to be sound, -meaning that if there is a bug and we inferred bad regions, the borrow -checker should catch it. This is not entirely true though; for -example, the borrow checker doesn't check subtyping, and it doesn't -check that region pointers are always live when they are used. It -might be worthwhile to fix this so that borrowck serves as a kind of -verification step -- that would add confidence in the overall -correctness of the compiler, at the cost of duplicating some type -checks and effort. - -### Inferring the duration of borrows, automatic and otherwise - -Whenever we introduce a borrowed pointer, for example as the result of -a borrow expression `let x = &data`, the lifetime of the pointer `x` -is always specified as a region inference variable. `regionck` has the -job of adding constraints such that this inference variable is as -narrow as possible while still accommodating all uses (that is, every -dereference of the resulting pointer must be within the lifetime). - -#### Reborrows - -Generally speaking, `regionck` does NOT try to ensure that the data -`data` will outlive the pointer `x`. That is the job of borrowck. The -one exception is when "re-borrowing" the contents of another borrowed -pointer. For example, imagine you have a borrowed pointer `b` with -lifetime L1 and you have an expression `&*b`. The result of this -expression will be another borrowed pointer with lifetime L2 (which is -an inference variable). The borrow checker is going to enforce the -constraint that L2 < L1, because otherwise you are re-borrowing data -for a lifetime larger than the original loan. However, without the -routines in this module, the region inferencer would not know of this -dependency and thus it might infer the lifetime of L2 to be greater -than L1 (issue #3148). - -There are a number of troublesome scenarios in the tests -`region-dependent-*.rs`, but here is one example: - - struct Foo { i: int } - struct Bar { foo: Foo } - fn get_i(x: &'a Bar) -> &'a int { - let foo = &x.foo; // Lifetime L1 - &foo.i // Lifetime L2 - } - -Note that this comes up either with `&` expressions, `ref` -bindings, and `autorefs`, which are the three ways to introduce -a borrow. - -The key point here is that when you are borrowing a value that -is "guaranteed" by a borrowed pointer, you must link the -lifetime of that borrowed pointer (L1, here) to the lifetime of -the borrow itself (L2). What do I mean by "guaranteed" by a -borrowed pointer? I mean any data that is reached by first -dereferencing a borrowed pointer and then either traversing -interior offsets or owned pointers. We say that the guarantor -of such data it the region of the borrowed pointer that was -traversed. This is essentially the same as the ownership -relation, except that a borrowed pointer never owns its -contents. - -### Inferring borrow kinds for upvars - -Whenever there is a closure expression, we need to determine how each -upvar is used. We do this by initially assigning each upvar an -immutable "borrow kind" (see `ty::BorrowKind` for details) and then -"escalating" the kind as needed. The borrow kind proceeds according to -the following lattice: - - ty::ImmBorrow -> ty::UniqueImmBorrow -> ty::MutBorrow - -So, for example, if we see an assignment `x = 5` to an upvar `x`, we -will promote its borrow kind to mutable borrow. If we see an `&mut x` -we'll do the same. Naturally, this applies not just to the upvar, but -to everything owned by `x`, so the result is the same for something -like `x.f = 5` and so on (presuming `x` is not a borrowed pointer to a -struct). These adjustments are performed in -`adjust_upvar_borrow_kind()` (you can trace backwards through the code -from there). - -The fact that we are inferring borrow kinds as we go results in a -semi-hacky interaction with mem-categorization. In particular, -mem-categorization will query the current borrow kind as it -categorizes, and we'll return the *current* value, but this may get -adjusted later. Therefore, in this module, we generally ignore the -borrow kind (and derived mutabilities) that are returned from -mem-categorization, since they may be inaccurate. (Another option -would be to use a unification scheme, where instead of returning a -concrete borrow kind like `ty::ImmBorrow`, we return a -`ty::InferBorrow(upvar_id)` or something like that, but this would -then mean that all later passes would have to check for these figments -and report an error, and it just seems like more mess in the end.) - -*/ +//! The region check is a final pass that runs over the AST after we have +//! inferred the type constraints but before we have actually finalized +//! the types. Its purpose is to embed a variety of region constraints. +//! Inserting these constraints as a separate pass is good because (1) it +//! localizes the code that has to do with region inference and (2) often +//! we cannot know what constraints are needed until the basic types have +//! been inferred. +//! +//! ### Interaction with the borrow checker +//! +//! In general, the job of the borrowck module (which runs later) is to +//! check that all soundness criteria are met, given a particular set of +//! regions. The job of *this* module is to anticipate the needs of the +//! borrow checker and infer regions that will satisfy its requirements. +//! It is generally true that the inference doesn't need to be sound, +//! meaning that if there is a bug and we inferred bad regions, the borrow +//! checker should catch it. This is not entirely true though; for +//! example, the borrow checker doesn't check subtyping, and it doesn't +//! check that region pointers are always live when they are used. It +//! might be worthwhile to fix this so that borrowck serves as a kind of +//! verification step -- that would add confidence in the overall +//! correctness of the compiler, at the cost of duplicating some type +//! checks and effort. +//! +//! ### Inferring the duration of borrows, automatic and otherwise +//! +//! Whenever we introduce a borrowed pointer, for example as the result of +//! a borrow expression `let x = &data`, the lifetime of the pointer `x` +//! is always specified as a region inference variable. `regionck` has the +//! job of adding constraints such that this inference variable is as +//! narrow as possible while still accommodating all uses (that is, every +//! dereference of the resulting pointer must be within the lifetime). +//! +//! #### Reborrows +//! +//! Generally speaking, `regionck` does NOT try to ensure that the data +//! `data` will outlive the pointer `x`. That is the job of borrowck. The +//! one exception is when "re-borrowing" the contents of another borrowed +//! pointer. For example, imagine you have a borrowed pointer `b` with +//! lifetime L1 and you have an expression `&*b`. The result of this +//! expression will be another borrowed pointer with lifetime L2 (which is +//! an inference variable). The borrow checker is going to enforce the +//! constraint that L2 < L1, because otherwise you are re-borrowing data +//! for a lifetime larger than the original loan. However, without the +//! routines in this module, the region inferencer would not know of this +//! dependency and thus it might infer the lifetime of L2 to be greater +//! than L1 (issue #3148). +//! +//! There are a number of troublesome scenarios in the tests +//! `region-dependent-*.rs`, but here is one example: +//! +//! struct Foo { i: int } +//! struct Bar { foo: Foo } +//! fn get_i(x: &'a Bar) -> &'a int { +//! let foo = &x.foo; // Lifetime L1 +//! &foo.i // Lifetime L2 +//! } +//! +//! Note that this comes up either with `&` expressions, `ref` +//! bindings, and `autorefs`, which are the three ways to introduce +//! a borrow. +//! +//! The key point here is that when you are borrowing a value that +//! is "guaranteed" by a borrowed pointer, you must link the +//! lifetime of that borrowed pointer (L1, here) to the lifetime of +//! the borrow itself (L2). What do I mean by "guaranteed" by a +//! borrowed pointer? I mean any data that is reached by first +//! dereferencing a borrowed pointer and then either traversing +//! interior offsets or owned pointers. We say that the guarantor +//! of such data it the region of the borrowed pointer that was +//! traversed. This is essentially the same as the ownership +//! relation, except that a borrowed pointer never owns its +//! contents. +//! +//! ### Inferring borrow kinds for upvars +//! +//! Whenever there is a closure expression, we need to determine how each +//! upvar is used. We do this by initially assigning each upvar an +//! immutable "borrow kind" (see `ty::BorrowKind` for details) and then +//! "escalating" the kind as needed. The borrow kind proceeds according to +//! the following lattice: +//! +//! ty::ImmBorrow -> ty::UniqueImmBorrow -> ty::MutBorrow +//! +//! So, for example, if we see an assignment `x = 5` to an upvar `x`, we +//! will promote its borrow kind to mutable borrow. If we see an `&mut x` +//! we'll do the same. Naturally, this applies not just to the upvar, but +//! to everything owned by `x`, so the result is the same for something +//! like `x.f = 5` and so on (presuming `x` is not a borrowed pointer to a +//! struct). These adjustments are performed in +//! `adjust_upvar_borrow_kind()` (you can trace backwards through the code +//! from there). +//! +//! The fact that we are inferring borrow kinds as we go results in a +//! semi-hacky interaction with mem-categorization. In particular, +//! mem-categorization will query the current borrow kind as it +//! categorizes, and we'll return the *current* value, but this may get +//! adjusted later. Therefore, in this module, we generally ignore the +//! borrow kind (and derived mutabilities) that are returned from +//! mem-categorization, since they may be inaccurate. (Another option +//! would be to use a unification scheme, where instead of returning a +//! concrete borrow kind like `ty::ImmBorrow`, we return a +//! `ty::InferBorrow(upvar_id)` or something like that, but this would +//! then mean that all later passes would have to check for these figments +//! and report an error, and it just seems like more mess in the end.) use middle::def; use middle::mem_categorization as mc; @@ -177,15 +173,11 @@ pub fn regionck_fn(fcx: &FnCtxt, id: ast::NodeId, blk: &ast::Block) { fcx.infcx().resolve_regions_and_report_errors(); } +/// Checks that the types in `component_tys` are well-formed. This will add constraints into the +/// region graph. Does *not* run `resolve_regions_and_report_errors` and so forth. pub fn regionck_ensure_component_tys_wf<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, span: Span, component_tys: &[Ty<'tcx>]) { - /*! - * Checks that the types in `component_tys` are well-formed. - * This will add constraints into the region graph. - * Does *not* run `resolve_regions_and_report_errors` and so forth. - */ - let mut rcx = Rcx::new(fcx, 0); for &component_ty in component_tys.iter() { // Check that each type outlives the empty region. Since the @@ -239,12 +231,8 @@ pub struct Rcx<'a, 'tcx: 'a> { maybe_links: MaybeLinkMap<'tcx> } +/// Returns the validity region of `def` -- that is, how long is `def` valid? fn region_of_def(fcx: &FnCtxt, def: def::Def) -> ty::Region { - /*! - * Returns the validity region of `def` -- that is, how long - * is `def` valid? - */ - let tcx = fcx.tcx(); match def { def::DefLocal(node_id) => { @@ -283,35 +271,30 @@ pub fn set_repeating_scope(&mut self, scope: ast::NodeId) -> ast::NodeId { old_scope } + /// Try to resolve the type for the given node, returning t_err if an error results. Note that + /// we never care about the details of the error, the same error will be detected and reported + /// in the writeback phase. + /// + /// Note one important point: we do not attempt to resolve *region variables* here. This is + /// because regionck is essentially adding constraints to those region variables and so may yet + /// influence how they are resolved. + /// + /// Consider this silly example: + /// + /// ``` + /// fn borrow(x: &int) -> &int {x} + /// fn foo(x: @int) -> int { // block: B + /// let b = borrow(x); // region: + /// *b + /// } + /// ``` + /// + /// Here, the region of `b` will be ``. `` is constrainted to be some subregion of the + /// block B and some superregion of the call. If we forced it now, we'd choose the smaller + /// region (the call). But that would make the *b illegal. Since we don't resolve, the type + /// of b will be `&.int` and then `*b` will require that `` be bigger than the let and + /// the `*b` expression, so we will effectively resolve `` to be the block B. pub fn resolve_type(&self, unresolved_ty: Ty<'tcx>) -> Ty<'tcx> { - /*! - * Try to resolve the type for the given node, returning - * t_err if an error results. Note that we never care - * about the details of the error, the same error will be - * detected and reported in the writeback phase. - * - * Note one important point: we do not attempt to resolve - * *region variables* here. This is because regionck is - * essentially adding constraints to those region variables - * and so may yet influence how they are resolved. - * - * Consider this silly example: - * - * fn borrow(x: &int) -> &int {x} - * fn foo(x: @int) -> int { // block: B - * let b = borrow(x); // region: - * *b - * } - * - * Here, the region of `b` will be ``. `` is - * constrainted to be some subregion of the block B and some - * superregion of the call. If we forced it now, we'd choose - * the smaller region (the call). But that would make the *b - * illegal. Since we don't resolve, the type of b will be - * `&.int` and then `*b` will require that `` be - * bigger than the let and the `*b` expression, so we will - * effectively resolve `` to be the block B. - */ match resolve_type(self.fcx.infcx(), None, unresolved_ty, resolve_and_force_all_but_regions) { Ok(t) => t, @@ -384,25 +367,19 @@ fn visit_region_obligations(&mut self, node_id: ast::NodeId) } } + /// This method populates the region map's `free_region_map`. It walks over the transformed + /// argument and return types for each function just before we check the body of that function, + /// looking for types where you have a borrowed pointer to other borrowed data (e.g., `&'a &'b + /// [uint]`. We do not allow references to outlive the things they point at, so we can assume + /// that `'a <= 'b`. This holds for both the argument and return types, basically because, on + /// the caller side, the caller is responsible for checking that the type of every expression + /// (including the actual values for the arguments, as well as the return type of the fn call) + /// is well-formed. + /// + /// Tests: `src/test/compile-fail/regions-free-region-ordering-*.rs` fn relate_free_regions(&mut self, fn_sig_tys: &[Ty<'tcx>], body_id: ast::NodeId) { - /*! - * This method populates the region map's `free_region_map`. - * It walks over the transformed argument and return types for - * each function just before we check the body of that - * function, looking for types where you have a borrowed - * pointer to other borrowed data (e.g., `&'a &'b [uint]`. We - * do not allow references to outlive the things they point - * at, so we can assume that `'a <= 'b`. This holds for both - * the argument and return types, basically because, on the caller - * side, the caller is responsible for checking that the type of - * every expression (including the actual values for the arguments, - * as well as the return type of the fn call) is well-formed. - * - * Tests: `src/test/compile-fail/regions-free-region-ordering-*.rs` - */ - debug!("relate_free_regions >>"); let tcx = self.tcx(); @@ -921,19 +898,15 @@ fn check_expr_fn_block(rcx: &mut Rcx, _ => {} } + /// Make sure that the type of all free variables referenced inside a closure/proc outlive the + /// closure/proc's lifetime bound. This is just a special case of the usual rules about closed + /// over values outliving the object's lifetime bound. fn ensure_free_variable_types_outlive_closure_bound( rcx: &mut Rcx, bounds: ty::ExistentialBounds, expr: &ast::Expr, freevars: &[ty::Freevar]) { - /*! - * Make sure that the type of all free variables referenced - * inside a closure/proc outlive the closure/proc's lifetime - * bound. This is just a special case of the usual rules about - * closed over values outliving the object's lifetime bound. - */ - let tcx = rcx.fcx.ccx.tcx; debug!("ensure_free_variable_types_outlive_closure_bound({}, {})", @@ -984,18 +957,14 @@ fn ensure_free_variable_types_outlive_closure_bound( } } + /// Make sure that all free variables referenced inside the closure outlive the closure's + /// lifetime bound. Also, create an entry in the upvar_borrows map with a region. fn constrain_free_variables_in_by_ref_closure( rcx: &mut Rcx, region_bound: ty::Region, expr: &ast::Expr, freevars: &[ty::Freevar]) { - /*! - * Make sure that all free variables referenced inside the - * closure outlive the closure's lifetime bound. Also, create - * an entry in the upvar_borrows map with a region. - */ - let tcx = rcx.fcx.ccx.tcx; let infcx = rcx.fcx.infcx(); debug!("constrain_free_variables({}, {})", @@ -1183,15 +1152,12 @@ fn constrain_call<'a, I: Iterator<&'a ast::Expr>>(rcx: &mut Rcx, } } +/// Invoked on any auto-dereference that occurs. Checks that if this is a region pointer being +/// dereferenced, the lifetime of the pointer includes the deref expr. fn constrain_autoderefs<'a, 'tcx>(rcx: &mut Rcx<'a, 'tcx>, deref_expr: &ast::Expr, derefs: uint, mut derefd_ty: Ty<'tcx>) { - /*! - * Invoked on any auto-dereference that occurs. Checks that if - * this is a region pointer being dereferenced, the lifetime of - * the pointer includes the deref expr. - */ let r_deref_expr = ty::ReScope(CodeExtent::from_node_id(deref_expr.id)); for i in range(0u, derefs) { debug!("constrain_autoderefs(deref_expr=?, derefd_ty={}, derefs={}/{}", @@ -1259,16 +1225,12 @@ pub fn mk_subregion_due_to_dereference(rcx: &mut Rcx, } +/// Invoked on any index expression that occurs. Checks that if this is a slice being indexed, the +/// lifetime of the pointer includes the deref expr. fn constrain_index<'a, 'tcx>(rcx: &mut Rcx<'a, 'tcx>, index_expr: &ast::Expr, indexed_ty: Ty<'tcx>) { - /*! - * Invoked on any index expression that occurs. Checks that if - * this is a slice being indexed, the lifetime of the pointer - * includes the deref expr. - */ - debug!("constrain_index(index_expr=?, indexed_ty={}", rcx.fcx.infcx().ty_to_string(indexed_ty)); @@ -1286,18 +1248,14 @@ fn constrain_index<'a, 'tcx>(rcx: &mut Rcx<'a, 'tcx>, } } +/// Guarantees that any lifetimes which appear in the type of the node `id` (after applying +/// adjustments) are valid for at least `minimum_lifetime` fn type_of_node_must_outlive<'a, 'tcx>( rcx: &mut Rcx<'a, 'tcx>, origin: infer::SubregionOrigin<'tcx>, id: ast::NodeId, minimum_lifetime: ty::Region) { - /*! - * Guarantees that any lifetimes which appear in the type of - * the node `id` (after applying adjustments) are valid for at - * least `minimum_lifetime` - */ - let tcx = rcx.fcx.tcx(); // Try to resolve the type. If we encounter an error, then typeck @@ -1314,14 +1272,10 @@ fn type_of_node_must_outlive<'a, 'tcx>( type_must_outlive(rcx, origin, ty, minimum_lifetime); } +/// Computes the guarantor for an expression `&base` and then ensures that the lifetime of the +/// resulting pointer is linked to the lifetime of its guarantor (if any). fn link_addr_of(rcx: &mut Rcx, expr: &ast::Expr, mutability: ast::Mutability, base: &ast::Expr) { - /*! - * Computes the guarantor for an expression `&base` and then - * ensures that the lifetime of the resulting pointer is linked - * to the lifetime of its guarantor (if any). - */ - debug!("link_addr_of(base=?)"); let cmt = { @@ -1331,13 +1285,10 @@ fn link_addr_of(rcx: &mut Rcx, expr: &ast::Expr, link_region_from_node_type(rcx, expr.span, expr.id, mutability, cmt); } +/// Computes the guarantors for any ref bindings in a `let` and +/// then ensures that the lifetime of the resulting pointer is +/// linked to the lifetime of the initialization expression. fn link_local(rcx: &Rcx, local: &ast::Local) { - /*! - * Computes the guarantors for any ref bindings in a `let` and - * then ensures that the lifetime of the resulting pointer is - * linked to the lifetime of the initialization expression. - */ - debug!("regionck::for_local()"); let init_expr = match local.init { None => { return; } @@ -1348,12 +1299,10 @@ fn link_local(rcx: &Rcx, local: &ast::Local) { link_pattern(rcx, mc, discr_cmt, &*local.pat); } +/// Computes the guarantors for any ref bindings in a match and +/// then ensures that the lifetime of the resulting pointer is +/// linked to the lifetime of its guarantor (if any). fn link_match(rcx: &Rcx, discr: &ast::Expr, arms: &[ast::Arm]) { - /*! - * Computes the guarantors for any ref bindings in a match and - * then ensures that the lifetime of the resulting pointer is - * linked to the lifetime of its guarantor (if any). - */ debug!("regionck::for_match()"); let mc = mc::MemCategorizationContext::new(rcx); @@ -1366,15 +1315,12 @@ fn link_match(rcx: &Rcx, discr: &ast::Expr, arms: &[ast::Arm]) { } } +/// Link lifetimes of any ref bindings in `root_pat` to the pointers found in the discriminant, if +/// needed. fn link_pattern<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, mc: mc::MemCategorizationContext>, discr_cmt: mc::cmt<'tcx>, root_pat: &ast::Pat) { - /*! - * Link lifetimes of any ref bindings in `root_pat` to - * the pointers found in the discriminant, if needed. - */ - let _ = mc.cat_pattern(discr_cmt, root_pat, |mc, sub_cmt, sub_pat| { match sub_pat.node { // `ref x` pattern @@ -1400,14 +1346,12 @@ fn link_pattern<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, }); } +/// Link lifetime of borrowed pointer resulting from autoref to lifetimes in the value being +/// autoref'd. fn link_autoref(rcx: &Rcx, expr: &ast::Expr, autoderefs: uint, autoref: &ty::AutoRef) { - /*! - * Link lifetime of borrowed pointer resulting from autoref - * to lifetimes in the value being autoref'd. - */ debug!("link_autoref(autoref={})", autoref); let mc = mc::MemCategorizationContext::new(rcx); @@ -1424,15 +1368,11 @@ fn link_autoref(rcx: &Rcx, } } +/// Computes the guarantor for cases where the `expr` is being passed by implicit reference and +/// must outlive `callee_scope`. fn link_by_ref(rcx: &Rcx, expr: &ast::Expr, callee_scope: CodeExtent) { - /*! - * Computes the guarantor for cases where the `expr` is - * being passed by implicit reference and must outlive - * `callee_scope`. - */ - let tcx = rcx.tcx(); debug!("link_by_ref(expr={}, callee_scope={})", expr.repr(tcx), callee_scope); @@ -1442,17 +1382,13 @@ fn link_by_ref(rcx: &Rcx, link_region(rcx, expr.span, borrow_region, ty::ImmBorrow, expr_cmt); } +/// Like `link_region()`, except that the region is extracted from the type of `id`, which must be +/// some reference (`&T`, `&str`, etc). fn link_region_from_node_type<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, span: Span, id: ast::NodeId, mutbl: ast::Mutability, cmt_borrowed: mc::cmt<'tcx>) { - /*! - * Like `link_region()`, except that the region is - * extracted from the type of `id`, which must be some - * reference (`&T`, `&str`, etc). - */ - let rptr_ty = rcx.resolve_node_type(id); if !ty::type_is_error(rptr_ty) { let tcx = rcx.fcx.ccx.tcx; @@ -1463,19 +1399,14 @@ fn link_region_from_node_type<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, } } +/// Informs the inference engine that `borrow_cmt` is being borrowed with kind `borrow_kind` and +/// lifetime `borrow_region`. In order to ensure borrowck is satisfied, this may create constraints +/// between regions, as explained in `link_reborrowed_region()`. fn link_region<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, span: Span, borrow_region: ty::Region, borrow_kind: ty::BorrowKind, borrow_cmt: mc::cmt<'tcx>) { - /*! - * Informs the inference engine that `borrow_cmt` is being - * borrowed with kind `borrow_kind` and lifetime `borrow_region`. - * In order to ensure borrowck is satisfied, this may create - * constraints between regions, as explained in - * `link_reborrowed_region()`. - */ - let mut borrow_cmt = borrow_cmt; let mut borrow_kind = borrow_kind; @@ -1525,6 +1456,46 @@ fn link_region<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, } } +/// This is the most complicated case: the path being borrowed is +/// itself the referent of a borrowed pointer. Let me give an +/// example fragment of code to make clear(er) the situation: +/// +/// let r: &'a mut T = ...; // the original reference "r" has lifetime 'a +/// ... +/// &'z *r // the reborrow has lifetime 'z +/// +/// Now, in this case, our primary job is to add the inference +/// constraint that `'z <= 'a`. Given this setup, let's clarify the +/// parameters in (roughly) terms of the example: +/// +/// A borrow of: `& 'z bk * r` where `r` has type `& 'a bk T` +/// borrow_region ^~ ref_region ^~ +/// borrow_kind ^~ ref_kind ^~ +/// ref_cmt ^ +/// +/// Here `bk` stands for some borrow-kind (e.g., `mut`, `uniq`, etc). +/// +/// Unfortunately, there are some complications beyond the simple +/// scenario I just painted: +/// +/// 1. The reference `r` might in fact be a "by-ref" upvar. In that +/// case, we have two jobs. First, we are inferring whether this reference +/// should be an `&T`, `&mut T`, or `&uniq T` reference, and we must +/// adjust that based on this borrow (e.g., if this is an `&mut` borrow, +/// then `r` must be an `&mut` reference). Second, whenever we link +/// two regions (here, `'z <= 'a`), we supply a *cause*, and in this +/// case we adjust the cause to indicate that the reference being +/// "reborrowed" is itself an upvar. This provides a nicer error message +/// should something go wrong. +/// +/// 2. There may in fact be more levels of reborrowing. In the +/// example, I said the borrow was like `&'z *r`, but it might +/// in fact be a borrow like `&'z **q` where `q` has type `&'a +/// &'b mut T`. In that case, we want to ensure that `'z <= 'a` +/// and `'z <= 'b`. This is explained more below. +/// +/// The return value of this function indicates whether we need to +/// recurse and process `ref_cmt` (see case 2 above). fn link_reborrowed_region<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, span: Span, borrow_region: ty::Region, @@ -1535,49 +1506,6 @@ fn link_reborrowed_region<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, note: mc::Note) -> Option<(mc::cmt<'tcx>, ty::BorrowKind)> { - /*! - * This is the most complicated case: the path being borrowed is - * itself the referent of a borrowed pointer. Let me give an - * example fragment of code to make clear(er) the situation: - * - * let r: &'a mut T = ...; // the original reference "r" has lifetime 'a - * ... - * &'z *r // the reborrow has lifetime 'z - * - * Now, in this case, our primary job is to add the inference - * constraint that `'z <= 'a`. Given this setup, let's clarify the - * parameters in (roughly) terms of the example: - * - * A borrow of: `& 'z bk * r` where `r` has type `& 'a bk T` - * borrow_region ^~ ref_region ^~ - * borrow_kind ^~ ref_kind ^~ - * ref_cmt ^ - * - * Here `bk` stands for some borrow-kind (e.g., `mut`, `uniq`, etc). - * - * Unfortunately, there are some complications beyond the simple - * scenario I just painted: - * - * 1. The reference `r` might in fact be a "by-ref" upvar. In that - * case, we have two jobs. First, we are inferring whether this reference - * should be an `&T`, `&mut T`, or `&uniq T` reference, and we must - * adjust that based on this borrow (e.g., if this is an `&mut` borrow, - * then `r` must be an `&mut` reference). Second, whenever we link - * two regions (here, `'z <= 'a`), we supply a *cause*, and in this - * case we adjust the cause to indicate that the reference being - * "reborrowed" is itself an upvar. This provides a nicer error message - * should something go wrong. - * - * 2. There may in fact be more levels of reborrowing. In the - * example, I said the borrow was like `&'z *r`, but it might - * in fact be a borrow like `&'z **q` where `q` has type `&'a - * &'b mut T`. In that case, we want to ensure that `'z <= 'a` - * and `'z <= 'b`. This is explained more below. - * - * The return value of this function indicates whether we need to - * recurse and process `ref_cmt` (see case 2 above). - */ - // Possible upvar ID we may need later to create an entry in the // maybe link map. @@ -1715,27 +1643,19 @@ fn link_reborrowed_region<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, } } +/// Adjusts the inferred borrow_kind as needed to account for upvars that are assigned to in an +/// assignment expression. fn adjust_borrow_kind_for_assignment_lhs(rcx: &Rcx, lhs: &ast::Expr) { - /*! - * Adjusts the inferred borrow_kind as needed to account - * for upvars that are assigned to in an assignment - * expression. - */ - let mc = mc::MemCategorizationContext::new(rcx); let cmt = ignore_err!(mc.cat_expr(lhs)); adjust_upvar_borrow_kind_for_mut(rcx, cmt); } +/// Indicates that `cmt` is being directly mutated (e.g., assigned to). If cmt contains any by-ref +/// upvars, this implies that those upvars must be borrowed using an `&mut` borow. fn adjust_upvar_borrow_kind_for_mut<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, cmt: mc::cmt<'tcx>) { - /*! - * Indicates that `cmt` is being directly mutated (e.g., assigned - * to). If cmt contains any by-ref upvars, this implies that - * those upvars must be borrowed using an `&mut` borow. - */ - let mut cmt = cmt; loop { debug!("adjust_upvar_borrow_kind_for_mut(cmt={})", @@ -1834,16 +1754,12 @@ fn adjust_upvar_borrow_kind_for_unique<'a, 'tcx>(rcx: &Rcx<'a, 'tcx>, cmt: mc::c } } +/// Indicates that the borrow_kind of `outer_upvar_id` must permit a reborrowing with the +/// borrow_kind of `inner_upvar_id`. This occurs in nested closures, see comment above at the call +/// to this function. fn link_upvar_borrow_kind_for_nested_closures(rcx: &mut Rcx, inner_upvar_id: ty::UpvarId, outer_upvar_id: ty::UpvarId) { - /*! - * Indicates that the borrow_kind of `outer_upvar_id` must - * permit a reborrowing with the borrow_kind of `inner_upvar_id`. - * This occurs in nested closures, see comment above at the call to - * this function. - */ - debug!("link_upvar_borrow_kind: inner_upvar_id={} outer_upvar_id={}", inner_upvar_id, outer_upvar_id); @@ -1867,18 +1783,14 @@ fn adjust_upvar_borrow_kind_for_loan(rcx: &Rcx, adjust_upvar_borrow_kind(rcx, upvar_id, upvar_borrow, kind) } +/// We infer the borrow_kind with which to borrow upvars in a stack closure. The borrow_kind +/// basically follows a lattice of `imm < unique-imm < mut`, moving from left to right as needed +/// (but never right to left). Here the argument `mutbl` is the borrow_kind that is required by +/// some particular use. fn adjust_upvar_borrow_kind(rcx: &Rcx, upvar_id: ty::UpvarId, upvar_borrow: &mut ty::UpvarBorrow, kind: ty::BorrowKind) { - /*! - * We infer the borrow_kind with which to borrow upvars in a stack - * closure. The borrow_kind basically follows a lattice of - * `imm < unique-imm < mut`, moving from left to right as needed (but never - * right to left). Here the argument `mutbl` is the borrow_kind that - * is required by some particular use. - */ - debug!("adjust_upvar_borrow_kind: id={} kind=({} -> {})", upvar_id, upvar_borrow.kind, kind); @@ -1911,15 +1823,12 @@ fn adjust_upvar_borrow_kind(rcx: &Rcx, } } +/// Ensures that all borrowed data reachable via `ty` outlives `region`. fn type_must_outlive<'a, 'tcx>(rcx: &mut Rcx<'a, 'tcx>, origin: infer::SubregionOrigin<'tcx>, ty: Ty<'tcx>, region: ty::Region) { - /*! - * Ensures that all borrowed data reachable via `ty` outlives `region`. - */ - debug!("type_must_outlive(ty={}, region={})", ty.repr(rcx.tcx()), region.repr(rcx.tcx())); diff --git a/src/librustc/middle/typeck/check/regionmanip.rs b/src/librustc/middle/typeck/check/regionmanip.rs index 9fd24c4ee784e411a0bfc2df0a9bff9bc391388f..55214618aa90b5856bb8968487e07e45ae44617a 100644 --- a/src/librustc/middle/typeck/check/regionmanip.rs +++ b/src/librustc/middle/typeck/check/regionmanip.rs @@ -33,18 +33,14 @@ struct Wf<'a, 'tcx: 'a> { out: Vec>, } +/// This routine computes the well-formedness constraints that must hold for the type `ty` to +/// appear in a context with lifetime `outer_region` pub fn region_wf_constraints<'tcx>( tcx: &ty::ctxt<'tcx>, ty: Ty<'tcx>, outer_region: ty::Region) -> Vec> { - /*! - * This routine computes the well-formedness constraints that must - * hold for the type `ty` to appear in a context with lifetime - * `outer_region` - */ - let mut stack = Vec::new(); stack.push((outer_region, None)); let mut wf = Wf { tcx: tcx, @@ -168,12 +164,9 @@ fn accumulate_from_rptr(&mut self, self.stack.pop().unwrap(); } + /// Pushes a constraint that `r_b` must outlive the top region on the stack. fn push_region_constraint_from_top(&mut self, r_b: ty::Region) { - /*! - * Pushes a constraint that `r_b` must outlive the - * top region on the stack. - */ // Indicates that we have found borrowed content with a lifetime // of at least `r_b`. This adds a constraint that `r_b` must @@ -192,30 +185,26 @@ fn push_region_constraint_from_top(&mut self, self.push_sub_region_constraint(opt_ty, r_a, r_b); } + /// Pushes a constraint that `r_a <= r_b`, due to `opt_ty` fn push_sub_region_constraint(&mut self, opt_ty: Option>, r_a: ty::Region, r_b: ty::Region) { - /*! Pushes a constraint that `r_a <= r_b`, due to `opt_ty` */ self.out.push(RegionSubRegionConstraint(opt_ty, r_a, r_b)); } + /// Pushes a constraint that `param_ty` must outlive the top region on the stack. fn push_param_constraint_from_top(&mut self, param_ty: ty::ParamTy) { - /*! - * Pushes a constraint that `param_ty` must outlive the - * top region on the stack. - */ - let &(region, opt_ty) = self.stack.last().unwrap(); self.push_param_constraint(region, opt_ty, param_ty); } + /// Pushes a constraint that `region <= param_ty`, due to `opt_ty` fn push_param_constraint(&mut self, region: ty::Region, opt_ty: Option>, param_ty: ty::ParamTy) { - /*! Pushes a constraint that `region <= param_ty`, due to `opt_ty` */ self.out.push(RegionSubParamConstraint(opt_ty, region, param_ty)); } diff --git a/src/librustc/middle/typeck/check/vtable.rs b/src/librustc/middle/typeck/check/vtable.rs index 1619a4224f9f02218fd92562dc2e02c723e0ac7a..51978a01f712410bb0f8857f3205a376e863c81c 100644 --- a/src/librustc/middle/typeck/check/vtable.rs +++ b/src/librustc/middle/typeck/check/vtable.rs @@ -168,17 +168,14 @@ pub fn check_object_safety<'tcx>(tcx: &ty::ctxt<'tcx>, } } - // Returns a vec of error messages. If hte vec is empty - no errors! + /// Returns a vec of error messages. If hte vec is empty - no errors! + /// + /// There are some limitations to calling functions through an object, because (a) the self + /// type is not known (that's the whole point of a trait instance, after all, to obscure the + /// self type) and (b) the call must go through a vtable and hence cannot be monomorphized. fn check_object_safety_of_method<'tcx>(tcx: &ty::ctxt<'tcx>, method: &ty::Method<'tcx>) -> Vec { - /*! - * There are some limitations to calling functions through an - * object, because (a) the self type is not known - * (that's the whole point of a trait instance, after all, to - * obscure the self type) and (b) the call must go through a - * vtable and hence cannot be monomorphized. - */ let mut msgs = Vec::new(); let method_name = method.name.repr(tcx); @@ -455,8 +452,8 @@ pub fn maybe_report_ambiguity<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>, } } +/// Select as many obligations as we can at present. pub fn select_fcx_obligations_where_possible(fcx: &FnCtxt) { - /*! Select as many obligations as we can at present. */ match fcx.inh.fulfillment_cx @@ -468,14 +465,10 @@ pub fn select_fcx_obligations_where_possible(fcx: &FnCtxt) { } } +/// Try to select any fcx obligation that we haven't tried yet, in an effort to improve inference. +/// You could just call `select_fcx_obligations_where_possible` except that it leads to repeated +/// work. pub fn select_new_fcx_obligations(fcx: &FnCtxt) { - /*! - * Try to select any fcx obligation that we haven't tried yet, - * in an effort to improve inference. You could just call - * `select_fcx_obligations_where_possible` except that it leads - * to repeated work. - */ - match fcx.inh.fulfillment_cx .borrow_mut() diff --git a/src/librustc/middle/typeck/check/wf.rs b/src/librustc/middle/typeck/check/wf.rs index d9c6c3cb6262ad7105a38071e2466821faf70437..502e37aa9f37040fcea2e16f4403a127015519d3 100644 --- a/src/librustc/middle/typeck/check/wf.rs +++ b/src/librustc/middle/typeck/check/wf.rs @@ -38,24 +38,18 @@ pub fn new(ccx: &'ccx CrateCtxt<'ccx, 'tcx>) -> CheckTypeWellFormedVisitor<'ccx, CheckTypeWellFormedVisitor { ccx: ccx, cache: HashSet::new() } } + /// Checks that the field types (in a struct def'n) or argument types (in an enum def'n) are + /// well-formed, meaning that they do not require any constraints not declared in the struct + /// definition itself. For example, this definition would be illegal: + /// + /// struct Ref<'a, T> { x: &'a T } + /// + /// because the type did not declare that `T:'a`. + /// + /// We do this check as a pre-pass before checking fn bodies because if these constraints are + /// not included it frequently leads to confusing errors in fn bodies. So it's better to check + /// the types first. fn check_item_well_formed(&mut self, item: &ast::Item) { - /*! - * Checks that the field types (in a struct def'n) or - * argument types (in an enum def'n) are well-formed, - * meaning that they do not require any constraints not - * declared in the struct definition itself. - * For example, this definition would be illegal: - * - * struct Ref<'a, T> { x: &'a T } - * - * because the type did not declare that `T:'a`. - * - * We do this check as a pre-pass before checking fn bodies - * because if these constraints are not included it frequently - * leads to confusing errors in fn bodies. So it's better to check - * the types first. - */ - let ccx = self.ccx; debug!("check_item_well_formed(it.id={}, it.ident={})", item.id, @@ -107,16 +101,12 @@ fn with_fcx(&mut self, regionck::regionck_item(&fcx, item); } + /// In a type definition, we check that to ensure that the types of the fields are well-formed. fn check_type_defn(&mut self, item: &ast::Item, lookup_fields: for<'fcx> |&FnCtxt<'fcx, 'tcx>| -> Vec>) { - /*! - * In a type definition, we check that to ensure that the types of the fields are - * well-formed. - */ - self.with_fcx(item, |this, fcx| { let variants = lookup_fields(fcx); let mut bounds_checker = BoundsChecker::new(fcx, @@ -282,22 +272,16 @@ pub fn new(fcx: &'cx FnCtxt<'cx,'tcx>, cache: cache, binding_count: 0 } } + /// Given a trait ref like `A : Trait`, where `Trait` is defined as (say): + /// + /// trait Trait : Copy { ... } + /// + /// This routine will check that `B : OtherTrait` and `A : Trait`. It will also recursively + /// check that the types `A` and `B` are well-formed. + /// + /// Note that it does not (currently, at least) check that `A : Copy` (that check is delegated + /// to the point where impl `A : Trait` is implemented). pub fn check_trait_ref(&mut self, trait_ref: &ty::TraitRef<'tcx>) { - /*! - * Given a trait ref like `A : Trait`, where `Trait` is - * defined as (say): - * - * trait Trait : Copy { ... } - * - * This routine will check that `B : OtherTrait` and `A : - * Trait`. It will also recursively check that the types - * `A` and `B` are well-formed. - * - * Note that it does not (currently, at least) - * check that `A : Copy` (that check is delegated to the point - * where impl `A : Trait` is implemented). - */ - let trait_def = ty::lookup_trait_def(self.fcx.tcx(), trait_ref.def_id); let bounds = trait_def.generics.to_bounds(self.tcx(), &trait_ref.substs); diff --git a/src/librustc/middle/typeck/coherence/mod.rs b/src/librustc/middle/typeck/coherence/mod.rs index 1f32110a0933883744d33461d60a96af2354428a..758608b79c2cb226fefbce0c650d261bd93e784d 100644 --- a/src/librustc/middle/typeck/coherence/mod.rs +++ b/src/librustc/middle/typeck/coherence/mod.rs @@ -477,17 +477,13 @@ fn populate_destructor_table(&self) { } } +/// Substitutes the values for the receiver's type parameters that are found in method, leaving the +/// method's type parameters intact. pub fn make_substs_for_receiver_types<'tcx>(tcx: &ty::ctxt<'tcx>, trait_ref: &ty::TraitRef<'tcx>, method: &ty::Method<'tcx>) -> subst::Substs<'tcx> { - /*! - * Substitutes the values for the receiver's type parameters - * that are found in method, leaving the method's type parameters - * intact. - */ - let meth_tps: Vec = method.generics.types.get_slice(subst::FnSpace) .iter() diff --git a/src/librustc/middle/typeck/coherence/orphan.rs b/src/librustc/middle/typeck/coherence/orphan.rs index 57ce7f79e030a11b3d18790bf93ee1098999863f..dc3afaae35f615800056ddbfef49787cadf128c6 100644 --- a/src/librustc/middle/typeck/coherence/orphan.rs +++ b/src/librustc/middle/typeck/coherence/orphan.rs @@ -8,10 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Orphan checker: every impl either implements a trait defined in this - * crate or pertains to a type defined in this crate. - */ +//! Orphan checker: every impl either implements a trait defined in this +//! crate or pertains to a type defined in this crate. use middle::traits; use middle::ty; diff --git a/src/librustc/middle/typeck/coherence/overlap.rs b/src/librustc/middle/typeck/coherence/overlap.rs index 933c2c81ac269380b0ad7a04070ad31e00a7fdd2..9f10a58f45852eb81c76c28f789a151639c8b9cb 100644 --- a/src/librustc/middle/typeck/coherence/overlap.rs +++ b/src/librustc/middle/typeck/coherence/overlap.rs @@ -8,10 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Overlap: No two impls for the same trait are implemented for the - * same type. - */ +//! Overlap: No two impls for the same trait are implemented for the +//! same type. use middle::traits; use middle::ty; diff --git a/src/librustc/middle/typeck/collect.rs b/src/librustc/middle/typeck/collect.rs index 13a0bf0bdcb522000a9a737cdb52a37446cc4826..3a62978ed007a37ba900505219159153eb26ddb6 100644 --- a/src/librustc/middle/typeck/collect.rs +++ b/src/librustc/middle/typeck/collect.rs @@ -1944,6 +1944,9 @@ fn get_or_create_type_parameter_def<'tcx,AC>(this: &AC, def } +/// Translate the AST's notion of ty param bounds (which are an enum consisting of a newtyped Ty or +/// a region) to ty's notion of ty param bounds, which can either be user-defined traits, or the +/// built-in trait (formerly known as kind): Send. fn compute_bounds<'tcx,AC>(this: &AC, name_of_bounded_thing: ast::Name, param_ty: ty::ParamTy, @@ -1953,13 +1956,6 @@ fn compute_bounds<'tcx,AC>(this: &AC, where_clause: &ast::WhereClause) -> ty::ParamBounds<'tcx> where AC: AstConv<'tcx> { - /*! - * Translate the AST's notion of ty param bounds (which are an - * enum consisting of a newtyped Ty or a region) to ty's - * notion of ty param bounds, which can either be user-defined - * traits, or the built-in trait (formerly known as kind): Send. - */ - let mut param_bounds = conv_param_bounds(this, span, param_ty, @@ -2040,16 +2036,13 @@ fn conv_param_bounds<'tcx,AC>(this: &AC, } } +/// Merges the bounds declared on a type parameter with those found from where clauses into a +/// single list. fn merge_param_bounds<'a>(tcx: &ty::ctxt, param_ty: ty::ParamTy, ast_bounds: &'a [ast::TyParamBound], where_clause: &'a ast::WhereClause) -> Vec<&'a ast::TyParamBound> { - /*! - * Merges the bounds declared on a type parameter with those - * found from where clauses into a single list. - */ - let mut result = Vec::new(); for ast_bound in ast_bounds.iter() { diff --git a/src/librustc/middle/typeck/infer/coercion.rs b/src/librustc/middle/typeck/infer/coercion.rs index 49ac7178eb8db48d2fed3f25d59c2be8fc88e896..51f8668692ea71dab909949f7c5196a6c28d7851 100644 --- a/src/librustc/middle/typeck/infer/coercion.rs +++ b/src/librustc/middle/typeck/infer/coercion.rs @@ -8,61 +8,57 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# Type Coercion - -Under certain circumstances we will coerce from one type to another, -for example by auto-borrowing. This occurs in situations where the -compiler has a firm 'expected type' that was supplied from the user, -and where the actual type is similar to that expected type in purpose -but not in representation (so actual subtyping is inappropriate). - -## Reborrowing - -Note that if we are expecting a reference, we will *reborrow* -even if the argument provided was already a reference. This is -useful for freezing mut/const things (that is, when the expected is &T -but you have &const T or &mut T) and also for avoiding the linearity -of mut things (when the expected is &mut T and you have &mut T). See -the various `src/test/run-pass/coerce-reborrow-*.rs` tests for -examples of where this is useful. - -## Subtle note - -When deciding what type coercions to consider, we do not attempt to -resolve any type variables we may encounter. This is because `b` -represents the expected type "as the user wrote it", meaning that if -the user defined a generic function like - - fn foo(a: A, b: A) { ... } - -and then we wrote `foo(&1, @2)`, we will not auto-borrow -either argument. In older code we went to some lengths to -resolve the `b` variable, which could mean that we'd -auto-borrow later arguments but not earlier ones, which -seems very confusing. - -## Subtler note - -However, right now, if the user manually specifies the -values for the type variables, as so: - - foo::<&int>(@1, @2) - -then we *will* auto-borrow, because we can't distinguish this from a -function that declared `&int`. This is inconsistent but it's easiest -at the moment. The right thing to do, I think, is to consider the -*unsubstituted* type when deciding whether to auto-borrow, but the -*substituted* type when considering the bounds and so forth. But most -of our methods don't give access to the unsubstituted type, and -rightly so because they'd be error-prone. So maybe the thing to do is -to actually determine the kind of coercions that should occur -separately and pass them in. Or maybe it's ok as is. Anyway, it's -sort of a minor point so I've opted to leave it for later---after all -we may want to adjust precisely when coercions occur. - -*/ +//! # Type Coercion +//! +//! Under certain circumstances we will coerce from one type to another, +//! for example by auto-borrowing. This occurs in situations where the +//! compiler has a firm 'expected type' that was supplied from the user, +//! and where the actual type is similar to that expected type in purpose +//! but not in representation (so actual subtyping is inappropriate). +//! +//! ## Reborrowing +//! +//! Note that if we are expecting a reference, we will *reborrow* +//! even if the argument provided was already a reference. This is +//! useful for freezing mut/const things (that is, when the expected is &T +//! but you have &const T or &mut T) and also for avoiding the linearity +//! of mut things (when the expected is &mut T and you have &mut T). See +//! the various `src/test/run-pass/coerce-reborrow-*.rs` tests for +//! examples of where this is useful. +//! +//! ## Subtle note +//! +//! When deciding what type coercions to consider, we do not attempt to +//! resolve any type variables we may encounter. This is because `b` +//! represents the expected type "as the user wrote it", meaning that if +//! the user defined a generic function like +//! +//! fn foo(a: A, b: A) { ... } +//! +//! and then we wrote `foo(&1, @2)`, we will not auto-borrow +//! either argument. In older code we went to some lengths to +//! resolve the `b` variable, which could mean that we'd +//! auto-borrow later arguments but not earlier ones, which +//! seems very confusing. +//! +//! ## Subtler note +//! +//! However, right now, if the user manually specifies the +//! values for the type variables, as so: +//! +//! foo::<&int>(@1, @2) +//! +//! then we *will* auto-borrow, because we can't distinguish this from a +//! function that declared `&int`. This is inconsistent but it's easiest +//! at the moment. The right thing to do, I think, is to consider the +//! *unsubstituted* type when deciding whether to auto-borrow, but the +//! *substituted* type when considering the bounds and so forth. But most +//! of our methods don't give access to the unsubstituted type, and +//! rightly so because they'd be error-prone. So maybe the thing to do is +//! to actually determine the kind of coercions that should occur +//! separately and pass them in. Or maybe it's ok as is. Anyway, it's +//! sort of a minor point so I've opted to leave it for later---after all +//! we may want to adjust precisely when coercions occur. use middle::subst; use middle::ty::{AutoPtr, AutoDerefRef, AdjustDerefRef, AutoUnsize, AutoUnsafe}; @@ -512,14 +508,10 @@ pub fn coerce_borrowed_fn(&self, } } + /// Attempts to coerce from a bare Rust function (`extern "Rust" fn`) into a closure or a + /// `proc`. fn coerce_from_bare_fn(&self, a: Ty<'tcx>, fn_ty_a: &ty::BareFnTy<'tcx>, b: Ty<'tcx>) -> CoerceResult<'tcx> { - /*! - * - * Attempts to coerce from a bare Rust function (`extern - * "Rust" fn`) into a closure or a `proc`. - */ - self.unpack_actual_value(b, |sty_b| { debug!("coerce_from_bare_fn(a={}, b={})", diff --git a/src/librustc/middle/typeck/infer/combine.rs b/src/librustc/middle/typeck/infer/combine.rs index 763f204dc98bc5b942524195a0d32784d67b5483..ba6ae00b6671f197cbb85ec9b97dcd927fc44f29 100644 --- a/src/librustc/middle/typeck/infer/combine.rs +++ b/src/librustc/middle/typeck/infer/combine.rs @@ -642,21 +642,16 @@ pub fn instantiate(&self, Ok(()) } + /// Attempts to generalize `ty` for the type variable `for_vid`. This checks for cycle -- that + /// is, whether the type `ty` references `for_vid`. If `make_region_vars` is true, it will also + /// replace all regions with fresh variables. Returns `ty_err` in the case of a cycle, `Ok` + /// otherwise. fn generalize(&self, ty: Ty<'tcx>, for_vid: ty::TyVid, make_region_vars: bool) -> cres<'tcx, Ty<'tcx>> { - /*! - * Attempts to generalize `ty` for the type variable - * `for_vid`. This checks for cycle -- that is, whether the - * type `ty` references `for_vid`. If `make_region_vars` is - * true, it will also replace all regions with fresh - * variables. Returns `ty_err` in the case of a cycle, `Ok` - * otherwise. - */ - let mut generalize = Generalizer { infcx: self.infcx, span: self.trace.origin.span(), for_vid: for_vid, diff --git a/src/librustc/middle/typeck/infer/doc.rs b/src/librustc/middle/typeck/infer/doc.rs index 886550a3b246149c874ebd6dc3acf8ae45597d84..0e3cc5f68c868c86b00e12cb906a0da5bfc5dc71 100644 --- a/src/librustc/middle/typeck/infer/doc.rs +++ b/src/librustc/middle/typeck/infer/doc.rs @@ -8,244 +8,240 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# Type inference engine - -This is loosely based on standard HM-type inference, but with an -extension to try and accommodate subtyping. There is nothing -principled about this extension; it's sound---I hope!---but it's a -heuristic, ultimately, and does not guarantee that it finds a valid -typing even if one exists (in fact, there are known scenarios where it -fails, some of which may eventually become problematic). - -## Key idea - -The main change is that each type variable T is associated with a -lower-bound L and an upper-bound U. L and U begin as bottom and top, -respectively, but gradually narrow in response to new constraints -being introduced. When a variable is finally resolved to a concrete -type, it can (theoretically) select any type that is a supertype of L -and a subtype of U. - -There are several critical invariants which we maintain: - -- the upper-bound of a variable only becomes lower and the lower-bound - only becomes higher over time; -- the lower-bound L is always a subtype of the upper bound U; -- the lower-bound L and upper-bound U never refer to other type variables, - but only to types (though those types may contain type variables). - -> An aside: if the terms upper- and lower-bound confuse you, think of -> "supertype" and "subtype". The upper-bound is a "supertype" -> (super=upper in Latin, or something like that anyway) and the lower-bound -> is a "subtype" (sub=lower in Latin). I find it helps to visualize -> a simple class hierarchy, like Java minus interfaces and -> primitive types. The class Object is at the root (top) and other -> types lie in between. The bottom type is then the Null type. -> So the tree looks like: -> -> ```text -> Object -> / \ -> String Other -> \ / -> (null) -> ``` -> -> So the upper bound type is the "supertype" and the lower bound is the -> "subtype" (also, super and sub mean upper and lower in Latin, or something -> like that anyway). - -## Satisfying constraints - -At a primitive level, there is only one form of constraint that the -inference understands: a subtype relation. So the outside world can -say "make type A a subtype of type B". If there are variables -involved, the inferencer will adjust their upper- and lower-bounds as -needed to ensure that this relation is satisfied. (We also allow "make -type A equal to type B", but this is translated into "A <: B" and "B -<: A") - -As stated above, we always maintain the invariant that type bounds -never refer to other variables. This keeps the inference relatively -simple, avoiding the scenario of having a kind of graph where we have -to pump constraints along and reach a fixed point, but it does impose -some heuristics in the case where the user is relating two type -variables A <: B. - -Combining two variables such that variable A will forever be a subtype -of variable B is the trickiest part of the algorithm because there is -often no right choice---that is, the right choice will depend on -future constraints which we do not yet know. The problem comes about -because both A and B have bounds that can be adjusted in the future. -Let's look at some of the cases that can come up. - -Imagine, to start, the best case, where both A and B have an upper and -lower bound (that is, the bounds are not top nor bot respectively). In -that case, if we're lucky, A.ub <: B.lb, and so we know that whatever -A and B should become, they will forever have the desired subtyping -relation. We can just leave things as they are. - -### Option 1: Unify - -However, suppose that A.ub is *not* a subtype of B.lb. In -that case, we must make a decision. One option is to unify A -and B so that they are one variable whose bounds are: - - UB = GLB(A.ub, B.ub) - LB = LUB(A.lb, B.lb) - -(Note that we will have to verify that LB <: UB; if it does not, the -types are not intersecting and there is an error) In that case, A <: B -holds trivially because A==B. However, we have now lost some -flexibility, because perhaps the user intended for A and B to end up -as different types and not the same type. - -Pictorally, what this does is to take two distinct variables with -(hopefully not completely) distinct type ranges and produce one with -the intersection. - -```text - B.ub B.ub - /\ / - A.ub / \ A.ub / - / \ / \ \ / - / X \ UB - / / \ \ / \ - / / / \ / / - \ \ / / \ / - \ X / LB - \ / \ / / \ - \ / \ / / \ - A.lb B.lb A.lb B.lb -``` - - -### Option 2: Relate UB/LB - -Another option is to keep A and B as distinct variables but set their -bounds in such a way that, whatever happens, we know that A <: B will hold. -This can be achieved by ensuring that A.ub <: B.lb. In practice there -are two ways to do that, depicted pictorally here: - -```text - Before Option #1 Option #2 - - B.ub B.ub B.ub - /\ / \ / \ - A.ub / \ A.ub /(B')\ A.ub /(B')\ - / \ / \ \ / / \ / / - / X \ __UB____/ UB / - / / \ \ / | | / - / / / \ / | | / - \ \ / / /(A')| | / - \ X / / LB ______LB/ - \ / \ / / / \ / (A')/ \ - \ / \ / \ / \ \ / \ - A.lb B.lb A.lb B.lb A.lb B.lb -``` - -In these diagrams, UB and LB are defined as before. As you can see, -the new ranges `A'` and `B'` are quite different from the range that -would be produced by unifying the variables. - -### What we do now - -Our current technique is to *try* (transactionally) to relate the -existing bounds of A and B, if there are any (i.e., if `UB(A) != top -&& LB(B) != bot`). If that succeeds, we're done. If it fails, then -we merge A and B into same variable. - -This is not clearly the correct course. For example, if `UB(A) != -top` but `LB(B) == bot`, we could conceivably set `LB(B)` to `UB(A)` -and leave the variables unmerged. This is sometimes the better -course, it depends on the program. - -The main case which fails today that I would like to support is: - -```text -fn foo(x: T, y: T) { ... } - -fn bar() { - let x: @mut int = @mut 3; - let y: @int = @3; - foo(x, y); -} -``` - -In principle, the inferencer ought to find that the parameter `T` to -`foo(x, y)` is `@const int`. Today, however, it does not; this is -because the type variable `T` is merged with the type variable for -`X`, and thus inherits its UB/LB of `@mut int`. This leaves no -flexibility for `T` to later adjust to accommodate `@int`. - -### What to do when not all bounds are present - -In the prior discussion we assumed that A.ub was not top and B.lb was -not bot. Unfortunately this is rarely the case. Often type variables -have "lopsided" bounds. For example, if a variable in the program has -been initialized but has not been used, then its corresponding type -variable will have a lower bound but no upper bound. When that -variable is then used, we would like to know its upper bound---but we -don't have one! In this case we'll do different things depending on -how the variable is being used. - -## Transactional support - -Whenever we adjust merge variables or adjust their bounds, we always -keep a record of the old value. This allows the changes to be undone. - -## Regions - -I've only talked about type variables here, but region variables -follow the same principle. They have upper- and lower-bounds. A -region A is a subregion of a region B if A being valid implies that B -is valid. This basically corresponds to the block nesting structure: -the regions for outer block scopes are superregions of those for inner -block scopes. - -## Integral and floating-point type variables - -There is a third variety of type variable that we use only for -inferring the types of unsuffixed integer literals. Integral type -variables differ from general-purpose type variables in that there's -no subtyping relationship among the various integral types, so instead -of associating each variable with an upper and lower bound, we just -use simple unification. Each integer variable is associated with at -most one integer type. Floating point types are handled similarly to -integral types. - -## GLB/LUB - -Computing the greatest-lower-bound and least-upper-bound of two -types/regions is generally straightforward except when type variables -are involved. In that case, we follow a similar "try to use the bounds -when possible but otherwise merge the variables" strategy. In other -words, `GLB(A, B)` where `A` and `B` are variables will often result -in `A` and `B` being merged and the result being `A`. - -## Type coercion - -We have a notion of assignability which differs somewhat from -subtyping; in particular it may cause region borrowing to occur. See -the big comment later in this file on Type Coercion for specifics. - -### In conclusion - -I showed you three ways to relate `A` and `B`. There are also more, -of course, though I'm not sure if there are any more sensible options. -The main point is that there are various options, each of which -produce a distinct range of types for `A` and `B`. Depending on what -the correct values for A and B are, one of these options will be the -right choice: but of course we don't know the right values for A and B -yet, that's what we're trying to find! In our code, we opt to unify -(Option #1). - -# Implementation details - -We make use of a trait-like implementation strategy to consolidate -duplicated code between subtypes, GLB, and LUB computations. See the -section on "Type Combining" below for details. - -*/ +//! # Type inference engine +//! +//! This is loosely based on standard HM-type inference, but with an +//! extension to try and accommodate subtyping. There is nothing +//! principled about this extension; it's sound---I hope!---but it's a +//! heuristic, ultimately, and does not guarantee that it finds a valid +//! typing even if one exists (in fact, there are known scenarios where it +//! fails, some of which may eventually become problematic). +//! +//! ## Key idea +//! +//! The main change is that each type variable T is associated with a +//! lower-bound L and an upper-bound U. L and U begin as bottom and top, +//! respectively, but gradually narrow in response to new constraints +//! being introduced. When a variable is finally resolved to a concrete +//! type, it can (theoretically) select any type that is a supertype of L +//! and a subtype of U. +//! +//! There are several critical invariants which we maintain: +//! +//! - the upper-bound of a variable only becomes lower and the lower-bound +//! only becomes higher over time; +//! - the lower-bound L is always a subtype of the upper bound U; +//! - the lower-bound L and upper-bound U never refer to other type variables, +//! but only to types (though those types may contain type variables). +//! +//! > An aside: if the terms upper- and lower-bound confuse you, think of +//! > "supertype" and "subtype". The upper-bound is a "supertype" +//! > (super=upper in Latin, or something like that anyway) and the lower-bound +//! > is a "subtype" (sub=lower in Latin). I find it helps to visualize +//! > a simple class hierarchy, like Java minus interfaces and +//! > primitive types. The class Object is at the root (top) and other +//! > types lie in between. The bottom type is then the Null type. +//! > So the tree looks like: +//! > +//! > ```text +//! > Object +//! > / \ +//! > String Other +//! > \ / +//! > (null) +//! > ``` +//! > +//! > So the upper bound type is the "supertype" and the lower bound is the +//! > "subtype" (also, super and sub mean upper and lower in Latin, or something +//! > like that anyway). +//! +//! ## Satisfying constraints +//! +//! At a primitive level, there is only one form of constraint that the +//! inference understands: a subtype relation. So the outside world can +//! say "make type A a subtype of type B". If there are variables +//! involved, the inferencer will adjust their upper- and lower-bounds as +//! needed to ensure that this relation is satisfied. (We also allow "make +//! type A equal to type B", but this is translated into "A <: B" and "B +//! <: A") +//! +//! As stated above, we always maintain the invariant that type bounds +//! never refer to other variables. This keeps the inference relatively +//! simple, avoiding the scenario of having a kind of graph where we have +//! to pump constraints along and reach a fixed point, but it does impose +//! some heuristics in the case where the user is relating two type +//! variables A <: B. +//! +//! Combining two variables such that variable A will forever be a subtype +//! of variable B is the trickiest part of the algorithm because there is +//! often no right choice---that is, the right choice will depend on +//! future constraints which we do not yet know. The problem comes about +//! because both A and B have bounds that can be adjusted in the future. +//! Let's look at some of the cases that can come up. +//! +//! Imagine, to start, the best case, where both A and B have an upper and +//! lower bound (that is, the bounds are not top nor bot respectively). In +//! that case, if we're lucky, A.ub <: B.lb, and so we know that whatever +//! A and B should become, they will forever have the desired subtyping +//! relation. We can just leave things as they are. +//! +//! ### Option 1: Unify +//! +//! However, suppose that A.ub is *not* a subtype of B.lb. In +//! that case, we must make a decision. One option is to unify A +//! and B so that they are one variable whose bounds are: +//! +//! UB = GLB(A.ub, B.ub) +//! LB = LUB(A.lb, B.lb) +//! +//! (Note that we will have to verify that LB <: UB; if it does not, the +//! types are not intersecting and there is an error) In that case, A <: B +//! holds trivially because A==B. However, we have now lost some +//! flexibility, because perhaps the user intended for A and B to end up +//! as different types and not the same type. +//! +//! Pictorally, what this does is to take two distinct variables with +//! (hopefully not completely) distinct type ranges and produce one with +//! the intersection. +//! +//! ```text +//! B.ub B.ub +//! /\ / +//! A.ub / \ A.ub / +//! / \ / \ \ / +//! / X \ UB +//! / / \ \ / \ +//! / / / \ / / +//! \ \ / / \ / +//! \ X / LB +//! \ / \ / / \ +//! \ / \ / / \ +//! A.lb B.lb A.lb B.lb +//! ``` +//! +//! +//! ### Option 2: Relate UB/LB +//! +//! Another option is to keep A and B as distinct variables but set their +//! bounds in such a way that, whatever happens, we know that A <: B will hold. +//! This can be achieved by ensuring that A.ub <: B.lb. In practice there +//! are two ways to do that, depicted pictorally here: +//! +//! ```text +//! Before Option #1 Option #2 +//! +//! B.ub B.ub B.ub +//! /\ / \ / \ +//! A.ub / \ A.ub /(B')\ A.ub /(B')\ +//! / \ / \ \ / / \ / / +//! / X \ __UB____/ UB / +//! / / \ \ / | | / +//! / / / \ / | | / +//! \ \ / / /(A')| | / +//! \ X / / LB ______LB/ +//! \ / \ / / / \ / (A')/ \ +//! \ / \ / \ / \ \ / \ +//! A.lb B.lb A.lb B.lb A.lb B.lb +//! ``` +//! +//! In these diagrams, UB and LB are defined as before. As you can see, +//! the new ranges `A'` and `B'` are quite different from the range that +//! would be produced by unifying the variables. +//! +//! ### What we do now +//! +//! Our current technique is to *try* (transactionally) to relate the +//! existing bounds of A and B, if there are any (i.e., if `UB(A) != top +//! && LB(B) != bot`). If that succeeds, we're done. If it fails, then +//! we merge A and B into same variable. +//! +//! This is not clearly the correct course. For example, if `UB(A) != +//! top` but `LB(B) == bot`, we could conceivably set `LB(B)` to `UB(A)` +//! and leave the variables unmerged. This is sometimes the better +//! course, it depends on the program. +//! +//! The main case which fails today that I would like to support is: +//! +//! ```text +//! fn foo(x: T, y: T) { ... } +//! +//! fn bar() { +//! let x: @mut int = @mut 3; +//! let y: @int = @3; +//! foo(x, y); +//! } +//! ``` +//! +//! In principle, the inferencer ought to find that the parameter `T` to +//! `foo(x, y)` is `@const int`. Today, however, it does not; this is +//! because the type variable `T` is merged with the type variable for +//! `X`, and thus inherits its UB/LB of `@mut int`. This leaves no +//! flexibility for `T` to later adjust to accommodate `@int`. +//! +//! ### What to do when not all bounds are present +//! +//! In the prior discussion we assumed that A.ub was not top and B.lb was +//! not bot. Unfortunately this is rarely the case. Often type variables +//! have "lopsided" bounds. For example, if a variable in the program has +//! been initialized but has not been used, then its corresponding type +//! variable will have a lower bound but no upper bound. When that +//! variable is then used, we would like to know its upper bound---but we +//! don't have one! In this case we'll do different things depending on +//! how the variable is being used. +//! +//! ## Transactional support +//! +//! Whenever we adjust merge variables or adjust their bounds, we always +//! keep a record of the old value. This allows the changes to be undone. +//! +//! ## Regions +//! +//! I've only talked about type variables here, but region variables +//! follow the same principle. They have upper- and lower-bounds. A +//! region A is a subregion of a region B if A being valid implies that B +//! is valid. This basically corresponds to the block nesting structure: +//! the regions for outer block scopes are superregions of those for inner +//! block scopes. +//! +//! ## Integral and floating-point type variables +//! +//! There is a third variety of type variable that we use only for +//! inferring the types of unsuffixed integer literals. Integral type +//! variables differ from general-purpose type variables in that there's +//! no subtyping relationship among the various integral types, so instead +//! of associating each variable with an upper and lower bound, we just +//! use simple unification. Each integer variable is associated with at +//! most one integer type. Floating point types are handled similarly to +//! integral types. +//! +//! ## GLB/LUB +//! +//! Computing the greatest-lower-bound and least-upper-bound of two +//! types/regions is generally straightforward except when type variables +//! are involved. In that case, we follow a similar "try to use the bounds +//! when possible but otherwise merge the variables" strategy. In other +//! words, `GLB(A, B)` where `A` and `B` are variables will often result +//! in `A` and `B` being merged and the result being `A`. +//! +//! ## Type coercion +//! +//! We have a notion of assignability which differs somewhat from +//! subtyping; in particular it may cause region borrowing to occur. See +//! the big comment later in this file on Type Coercion for specifics. +//! +//! ### In conclusion +//! +//! I showed you three ways to relate `A` and `B`. There are also more, +//! of course, though I'm not sure if there are any more sensible options. +//! The main point is that there are various options, each of which +//! produce a distinct range of types for `A` and `B`. Depending on what +//! the correct values for A and B are, one of these options will be the +//! right choice: but of course we don't know the right values for A and B +//! yet, that's what we're trying to find! In our code, we opt to unify +//! (Option #1). +//! +//! # Implementation details +//! +//! We make use of a trait-like implementation strategy to consolidate +//! duplicated code between subtypes, GLB, and LUB computations. See the +//! section on "Type Combining" below for details. diff --git a/src/librustc/middle/typeck/infer/error_reporting.rs b/src/librustc/middle/typeck/infer/error_reporting.rs index bc36a2bd801b46765e0db98fb094163f421f845d..abc68852f4bdf5515982c0113c6a06b0e1bab82c 100644 --- a/src/librustc/middle/typeck/infer/error_reporting.rs +++ b/src/librustc/middle/typeck/infer/error_reporting.rs @@ -8,56 +8,53 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Error Reporting Code for the inference engine - -Because of the way inference, and in particular region inference, -works, it often happens that errors are not detected until far after -the relevant line of code has been type-checked. Therefore, there is -an elaborate system to track why a particular constraint in the -inference graph arose so that we can explain to the user what gave -rise to a particular error. - -The basis of the system are the "origin" types. An "origin" is the -reason that a constraint or inference variable arose. There are -different "origin" enums for different kinds of constraints/variables -(e.g., `TypeOrigin`, `RegionVariableOrigin`). An origin always has -a span, but also more information so that we can generate a meaningful -error message. - -Having a catalogue of all the different reasons an error can arise is -also useful for other reasons, like cross-referencing FAQs etc, though -we are not really taking advantage of this yet. - -# Region Inference - -Region inference is particularly tricky because it always succeeds "in -the moment" and simply registers a constraint. Then, at the end, we -can compute the full graph and report errors, so we need to be able to -store and later report what gave rise to the conflicting constraints. - -# Subtype Trace - -Determing whether `T1 <: T2` often involves a number of subtypes and -subconstraints along the way. A "TypeTrace" is an extended version -of an origin that traces the types and other values that were being -compared. It is not necessarily comprehensive (in fact, at the time of -this writing it only tracks the root values being compared) but I'd -like to extend it to include significant "waypoints". For example, if -you are comparing `(T1, T2) <: (T3, T4)`, and the problem is that `T2 -<: T4` fails, I'd like the trace to include enough information to say -"in the 2nd element of the tuple". Similarly, failures when comparing -arguments or return types in fn types should be able to cite the -specific position, etc. - -# Reality vs plan - -Of course, there is still a LOT of code in typeck that has yet to be -ported to this system, and which relies on string concatenation at the -time of error detection. - -*/ +//! Error Reporting Code for the inference engine +//! +//! Because of the way inference, and in particular region inference, +//! works, it often happens that errors are not detected until far after +//! the relevant line of code has been type-checked. Therefore, there is +//! an elaborate system to track why a particular constraint in the +//! inference graph arose so that we can explain to the user what gave +//! rise to a particular error. +//! +//! The basis of the system are the "origin" types. An "origin" is the +//! reason that a constraint or inference variable arose. There are +//! different "origin" enums for different kinds of constraints/variables +//! (e.g., `TypeOrigin`, `RegionVariableOrigin`). An origin always has +//! a span, but also more information so that we can generate a meaningful +//! error message. +//! +//! Having a catalogue of all the different reasons an error can arise is +//! also useful for other reasons, like cross-referencing FAQs etc, though +//! we are not really taking advantage of this yet. +//! +//! # Region Inference +//! +//! Region inference is particularly tricky because it always succeeds "in +//! the moment" and simply registers a constraint. Then, at the end, we +//! can compute the full graph and report errors, so we need to be able to +//! store and later report what gave rise to the conflicting constraints. +//! +//! # Subtype Trace +//! +//! Determing whether `T1 <: T2` often involves a number of subtypes and +//! subconstraints along the way. A "TypeTrace" is an extended version +//! of an origin that traces the types and other values that were being +//! compared. It is not necessarily comprehensive (in fact, at the time of +//! this writing it only tracks the root values being compared) but I'd +//! like to extend it to include significant "waypoints". For example, if +//! you are comparing `(T1, T2) <: (T3, T4)`, and the problem is that `T2 +//! <: T4` fails, I'd like the trace to include enough information to say +//! "in the 2nd element of the tuple". Similarly, failures when comparing +//! arguments or return types in fn types should be able to cite the +//! specific position, etc. +//! +//! # Reality vs plan +//! +//! Of course, there is still a LOT of code in typeck that has yet to be +//! ported to this system, and which relies on string concatenation at the +//! time of error detection. + use self::FreshOrKept::*; use std::collections::HashSet; @@ -391,11 +388,9 @@ fn report_and_explain_type_error(&self, ty::note_and_explain_type_err(self.tcx, terr); } + /// Returns a string of the form "expected `{}`, found `{}`", or None if this is a derived + /// error. fn values_str(&self, values: &ValuePairs<'tcx>) -> Option { - /*! - * Returns a string of the form "expected `{}`, found `{}`", - * or None if this is a derived error. - */ match *values { infer::Types(ref exp_found) => self.expected_found_str(exp_found), infer::TraitRefs(ref exp_found) => self.expected_found_str(exp_found) diff --git a/src/librustc/middle/typeck/infer/higher_ranked/doc.rs b/src/librustc/middle/typeck/infer/higher_ranked/doc.rs index 4c4452ac892361e4af91dd6eb450e044135d2f1d..2bad3616a05d19fd92f2bc67820f5cdc7a8dd7fd 100644 --- a/src/librustc/middle/typeck/infer/higher_ranked/doc.rs +++ b/src/librustc/middle/typeck/infer/higher_ranked/doc.rs @@ -8,408 +8,404 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# Skolemization and functions - -One of the trickiest and most subtle aspects of regions is dealing -with higher-ranked things which include bound region variables, such -as function types. I strongly suggest that if you want to understand -the situation, you read this paper (which is, admittedly, very long, -but you don't have to read the whole thing): - -http://research.microsoft.com/en-us/um/people/simonpj/papers/higher-rank/ - -Although my explanation will never compete with SPJ's (for one thing, -his is approximately 100 pages), I will attempt to explain the basic -problem and also how we solve it. Note that the paper only discusses -subtyping, not the computation of LUB/GLB. - -The problem we are addressing is that there is a kind of subtyping -between functions with bound region parameters. Consider, for -example, whether the following relation holds: - - for<'a> fn(&'a int) <: for<'b> fn(&'b int)? (Yes, a => b) - -The answer is that of course it does. These two types are basically -the same, except that in one we used the name `a` and one we used -the name `b`. - -In the examples that follow, it becomes very important to know whether -a lifetime is bound in a function type (that is, is a lifetime -parameter) or appears free (is defined in some outer scope). -Therefore, from now on I will always write the bindings explicitly, -using the Rust syntax `for<'a> fn(&'a int)` to indicate that `a` is a -lifetime parameter. - -Now let's consider two more function types. Here, we assume that the -`'b` lifetime is defined somewhere outside and hence is not a lifetime -parameter bound by the function type (it "appears free"): - - for<'a> fn(&'a int) <: fn(&'b int)? (Yes, a => b) - -This subtyping relation does in fact hold. To see why, you have to -consider what subtyping means. One way to look at `T1 <: T2` is to -say that it means that it is always ok to treat an instance of `T1` as -if it had the type `T2`. So, with our functions, it is always ok to -treat a function that can take pointers with any lifetime as if it -were a function that can only take a pointer with the specific -lifetime `'b`. After all, `'b` is a lifetime, after all, and -the function can take values of any lifetime. - -You can also look at subtyping as the *is a* relationship. This amounts -to the same thing: a function that accepts pointers with any lifetime -*is a* function that accepts pointers with some specific lifetime. - -So, what if we reverse the order of the two function types, like this: - - fn(&'b int) <: for<'a> fn(&'a int)? (No) - -Does the subtyping relationship still hold? The answer of course is -no. In this case, the function accepts *only the lifetime `'b`*, -so it is not reasonable to treat it as if it were a function that -accepted any lifetime. - -What about these two examples: - - for<'a,'b> fn(&'a int, &'b int) <: for<'a> fn(&'a int, &'a int)? (Yes) - for<'a> fn(&'a int, &'a int) <: for<'a,'b> fn(&'a int, &'b int)? (No) - -Here, it is true that functions which take two pointers with any two -lifetimes can be treated as if they only accepted two pointers with -the same lifetime, but not the reverse. - -## The algorithm - -Here is the algorithm we use to perform the subtyping check: - -1. Replace all bound regions in the subtype with new variables -2. Replace all bound regions in the supertype with skolemized - equivalents. A "skolemized" region is just a new fresh region - name. -3. Check that the parameter and return types match as normal -4. Ensure that no skolemized regions 'leak' into region variables - visible from "the outside" - -Let's walk through some examples and see how this algorithm plays out. - -#### First example - -We'll start with the first example, which was: - - 1. for<'a> fn(&'a T) <: for<'b> fn(&'b T)? Yes: a -> b - -After steps 1 and 2 of the algorithm we will have replaced the types -like so: - - 1. fn(&'A T) <: fn(&'x T)? - -Here the upper case `&A` indicates a *region variable*, that is, a -region whose value is being inferred by the system. I also replaced -`&b` with `&x`---I'll use letters late in the alphabet (`x`, `y`, `z`) -to indicate skolemized region names. We can assume they don't appear -elsewhere. Note that neither the sub- nor the supertype bind any -region names anymore (as indicated by the absence of `<` and `>`). - -The next step is to check that the parameter types match. Because -parameters are contravariant, this means that we check whether: - - &'x T <: &'A T - -Region pointers are contravariant so this implies that - - &A <= &x - -must hold, where `<=` is the subregion relationship. Processing -*this* constrain simply adds a constraint into our graph that `&A <= -&x` and is considered successful (it can, for example, be satisfied by -choosing the value `&x` for `&A`). - -So far we have encountered no error, so the subtype check succeeds. - -#### The third example - -Now let's look first at the third example, which was: - - 3. fn(&'a T) <: for<'b> fn(&'b T)? No! - -After steps 1 and 2 of the algorithm we will have replaced the types -like so: - - 3. fn(&'a T) <: fn(&'x T)? - -This looks pretty much the same as before, except that on the LHS -`'a` was not bound, and hence was left as-is and not replaced with -a variable. The next step is again to check that the parameter types -match. This will ultimately require (as before) that `'a` <= `&x` -must hold: but this does not hold. `self` and `x` are both distinct -free regions. So the subtype check fails. - -#### Checking for skolemization leaks - -You may be wondering about that mysterious last step in the algorithm. -So far it has not been relevant. The purpose of that last step is to -catch something like *this*: - - for<'a> fn() -> fn(&'a T) <: fn() -> for<'b> fn(&'b T)? No. - -Here the function types are the same but for where the binding occurs. -The subtype returns a function that expects a value in precisely one -region. The supertype returns a function that expects a value in any -region. If we allow an instance of the subtype to be used where the -supertype is expected, then, someone could call the fn and think that -the return value has type `fn(&'b T)` when it really has type -`fn(&'a T)` (this is case #3, above). Bad. - -So let's step through what happens when we perform this subtype check. -We first replace the bound regions in the subtype (the supertype has -no bound regions). This gives us: - - fn() -> fn(&'A T) <: fn() -> for<'b> fn(&'b T)? - -Now we compare the return types, which are covariant, and hence we have: - - fn(&'A T) <: for<'b> fn(&'b T)? - -Here we skolemize the bound region in the supertype to yield: - - fn(&'A T) <: fn(&'x T)? - -And then proceed to compare the argument types: - - &'x T <: &'A T - 'A <= 'x - -Finally, this is where it gets interesting! This is where an error -*should* be reported. But in fact this will not happen. The reason why -is that `A` is a variable: we will infer that its value is the fresh -region `x` and think that everything is happy. In fact, this behavior -is *necessary*, it was key to the first example we walked through. - -The difference between this example and the first one is that the variable -`A` already existed at the point where the skolemization occurred. In -the first example, you had two functions: - - for<'a> fn(&'a T) <: for<'b> fn(&'b T) - -and hence `&A` and `&x` were created "together". In general, the -intention of the skolemized names is that they are supposed to be -fresh names that could never be equal to anything from the outside. -But when inference comes into play, we might not be respecting this -rule. - -So the way we solve this is to add a fourth step that examines the -constraints that refer to skolemized names. Basically, consider a -non-directed verison of the constraint graph. Let `Tainted(x)` be the -set of all things reachable from a skolemized variable `x`. -`Tainted(x)` should not contain any regions that existed before the -step at which the skolemization was performed. So this case here -would fail because `&x` was created alone, but is relatable to `&A`. - -## Computing the LUB and GLB - -The paper I pointed you at is written for Haskell. It does not -therefore considering subtyping and in particular does not consider -LUB or GLB computation. We have to consider this. Here is the -algorithm I implemented. - -First though, let's discuss what we are trying to compute in more -detail. The LUB is basically the "common supertype" and the GLB is -"common subtype"; one catch is that the LUB should be the -*most-specific* common supertype and the GLB should be *most general* -common subtype (as opposed to any common supertype or any common -subtype). - -Anyway, to help clarify, here is a table containing some function -pairs and their LUB/GLB (for conciseness, in this table, I'm just -including the lifetimes here, not the rest of the types, and I'm -writing `fn<>` instead of `for<> fn`): - -``` -Type 1 Type 2 LUB GLB -fn<'a>('a) fn('X) fn('X) fn<'a>('a) -fn('a) fn('X) -- fn<'a>('a) -fn<'a,'b>('a, 'b) fn<'x>('x, 'x) fn<'a>('a, 'a) fn<'a,'b>('a, 'b) -fn<'a,'b>('a, 'b, 'a) fn<'x,'y>('x, 'y, 'y) fn<'a>('a, 'a, 'a) fn<'a,'b,'c>('a,'b,'c) -``` - -### Conventions - -I use lower-case letters (e.g., `&a`) for bound regions and upper-case -letters for free regions (`&A`). Region variables written with a -dollar-sign (e.g., `$a`). I will try to remember to enumerate the -bound-regions on the fn type as well (e.g., `for<'a> fn(&a)`). - -### High-level summary - -Both the LUB and the GLB algorithms work in a similar fashion. They -begin by replacing all bound regions (on both sides) with fresh region -inference variables. Therefore, both functions are converted to types -that contain only free regions. We can then compute the LUB/GLB in a -straightforward way, as described in `combine.rs`. This results in an -interim type T. The algorithms then examine the regions that appear -in T and try to, in some cases, replace them with bound regions to -yield the final result. - -To decide whether to replace a region `R` that appears in `T` with a -bound region, the algorithms make use of two bits of information. -First is a set `V` that contains all region variables created as part -of the LUB/GLB computation. `V` will contain the region variables -created to replace the bound regions in the input types, but it also -contains 'intermediate' variables created to represent the LUB/GLB of -individual regions. Basically, when asked to compute the LUB/GLB of a -region variable with another region, the inferencer cannot oblige -immediately since the values of that variables are not known. -Therefore, it creates a new variable that is related to the two -regions. For example, the LUB of two variables `$x` and `$y` is a -fresh variable `$z` that is constrained such that `$x <= $z` and `$y -<= $z`. So `V` will contain these intermediate variables as well. - -The other important factor in deciding how to replace a region in T is -the function `Tainted($r)` which, for a region variable, identifies -all regions that the region variable is related to in some way -(`Tainted()` made an appearance in the subtype computation as well). - -### LUB - -The LUB algorithm proceeds in three steps: - -1. Replace all bound regions (on both sides) with fresh region - inference variables. -2. Compute the LUB "as normal", meaning compute the GLB of each - pair of argument types and the LUB of the return types and - so forth. Combine those to a new function type `F`. -3. Replace each region `R` that appears in `F` as follows: - - Let `V` be the set of variables created during the LUB - computational steps 1 and 2, as described in the previous section. - - If `R` is not in `V`, replace `R` with itself. - - If `Tainted(R)` contains a region that is not in `V`, - replace `R` with itself. - - Otherwise, select the earliest variable in `Tainted(R)` that originates - from the left-hand side and replace `R` with the bound region that - this variable was a replacement for. - -So, let's work through the simplest example: `fn(&A)` and `for<'a> fn(&a)`. -In this case, `&a` will be replaced with `$a` and the interim LUB type -`fn($b)` will be computed, where `$b=GLB(&A,$a)`. Therefore, `V = -{$a, $b}` and `Tainted($b) = { $b, $a, &A }`. When we go to replace -`$b`, we find that since `&A \in Tainted($b)` is not a member of `V`, -we leave `$b` as is. When region inference happens, `$b` will be -resolved to `&A`, as we wanted. - -Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`. In -this case, we'll end up with a (pre-replacement) LUB type of `fn(&g, -&h)` and a graph that looks like: - -``` - $a $b *--$x - \ \ / / - \ $h-* / - $g-----------* -``` - -Here `$g` and `$h` are fresh variables that are created to represent -the LUB/GLB of things requiring inference. This means that `V` and -`Tainted` will look like: - -``` -V = {$a, $b, $g, $h, $x} -Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x } -``` - -Therefore we replace both `$g` and `$h` with `$a`, and end up -with the type `fn(&a, &a)`. - -### GLB - -The procedure for computing the GLB is similar. The difference lies -in computing the replacements for the various variables. For each -region `R` that appears in the type `F`, we again compute `Tainted(R)` -and examine the results: - -1. If `R` is not in `V`, it is not replaced. -2. Else, if `Tainted(R)` contains only variables in `V`, and it - contains exactly one variable from the LHS and one variable from - the RHS, then `R` can be mapped to the bound version of the - variable from the LHS. -3. Else, if `Tainted(R)` contains no variable from the LHS and no - variable from the RHS, then `R` can be mapped to itself. -4. Else, `R` is mapped to a fresh bound variable. - -These rules are pretty complex. Let's look at some examples to see -how they play out. - -Out first example was `fn(&a)` and `fn(&X)`. In this case, `&a` will -be replaced with `$a` and we will ultimately compute a -(pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`. -Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}. To find the -replacement for `$g` we consult the rules above: -- Rule (1) does not apply because `$g \in V` -- Rule (2) does not apply because `&X \in Tainted($g)` -- Rule (3) does not apply because `$a \in Tainted($g)` -- Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`. -So our final result is `fn(&z)`, which is correct. - -The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again -have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`. -Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`. In this case, -by rule (3), `$g` is mapped to itself, and hence the result is -`fn($g)`. This result is correct (in this case, at least), but it is -indicative of a case that *can* lead us into concluding that there is -no GLB when in fact a GLB does exist. See the section "Questionable -Results" below for more details. - -The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as -before, we'll end up with `F=fn($g, $h)` where `Tainted($g) = -Tainted($h) = {$g, $h, $a, $b, $c}`. Only rule (4) applies and hence -we'll select fresh bound variables `y` and `z` and wind up with -`fn(&y, &z)`. - -For the last example, let's consider what may seem trivial, but is -not: `fn(&a, &a)` and `fn(&b, &b)`. In this case, we'll get `F=fn($g, -$h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a, -$x}`. Both of these sets contain exactly one bound variable from each -side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which -is the desired result. - -### Shortcomings and correctness - -You may be wondering whether this algorithm is correct. The answer is -"sort of". There are definitely cases where they fail to compute a -result even though a correct result exists. I believe, though, that -if they succeed, then the result is valid, and I will attempt to -convince you. The basic argument is that the "pre-replacement" step -computes a set of constraints. The replacements, then, attempt to -satisfy those constraints, using bound identifiers where needed. - -For now I will briefly go over the cases for LUB/GLB and identify -their intent: - -- LUB: - - The region variables that are substituted in place of bound regions - are intended to collect constraints on those bound regions. - - If Tainted(R) contains only values in V, then this region is unconstrained - and can therefore be generalized, otherwise it cannot. -- GLB: - - The region variables that are substituted in place of bound regions - are intended to collect constraints on those bound regions. - - If Tainted(R) contains exactly one variable from each side, and - only variables in V, that indicates that those two bound regions - must be equated. - - Otherwise, if Tainted(R) references any variables from left or right - side, then it is trying to combine a bound region with a free one or - multiple bound regions, so we need to select fresh bound regions. - -Sorry this is more of a shorthand to myself. I will try to write up something -more convincing in the future. - -#### Where are the algorithms wrong? - -- The pre-replacement computation can fail even though using a - bound-region would have succeeded. -- We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the - GLB of $a and $b. But if inference finds that $a and $b must be mapped - to regions without a GLB, then this is effectively a failure to compute - the GLB. However, the result `fn<$c>(fn($c))` is a valid GLB. - - */ +//! # Skolemization and functions +//! +//! One of the trickiest and most subtle aspects of regions is dealing +//! with higher-ranked things which include bound region variables, such +//! as function types. I strongly suggest that if you want to understand +//! the situation, you read this paper (which is, admittedly, very long, +//! but you don't have to read the whole thing): +//! +//! http://research.microsoft.com/en-us/um/people/simonpj/papers/higher-rank/ +//! +//! Although my explanation will never compete with SPJ's (for one thing, +//! his is approximately 100 pages), I will attempt to explain the basic +//! problem and also how we solve it. Note that the paper only discusses +//! subtyping, not the computation of LUB/GLB. +//! +//! The problem we are addressing is that there is a kind of subtyping +//! between functions with bound region parameters. Consider, for +//! example, whether the following relation holds: +//! +//! for<'a> fn(&'a int) <: for<'b> fn(&'b int)? (Yes, a => b) +//! +//! The answer is that of course it does. These two types are basically +//! the same, except that in one we used the name `a` and one we used +//! the name `b`. +//! +//! In the examples that follow, it becomes very important to know whether +//! a lifetime is bound in a function type (that is, is a lifetime +//! parameter) or appears free (is defined in some outer scope). +//! Therefore, from now on I will always write the bindings explicitly, +//! using the Rust syntax `for<'a> fn(&'a int)` to indicate that `a` is a +//! lifetime parameter. +//! +//! Now let's consider two more function types. Here, we assume that the +//! `'b` lifetime is defined somewhere outside and hence is not a lifetime +//! parameter bound by the function type (it "appears free"): +//! +//! for<'a> fn(&'a int) <: fn(&'b int)? (Yes, a => b) +//! +//! This subtyping relation does in fact hold. To see why, you have to +//! consider what subtyping means. One way to look at `T1 <: T2` is to +//! say that it means that it is always ok to treat an instance of `T1` as +//! if it had the type `T2`. So, with our functions, it is always ok to +//! treat a function that can take pointers with any lifetime as if it +//! were a function that can only take a pointer with the specific +//! lifetime `'b`. After all, `'b` is a lifetime, after all, and +//! the function can take values of any lifetime. +//! +//! You can also look at subtyping as the *is a* relationship. This amounts +//! to the same thing: a function that accepts pointers with any lifetime +//! *is a* function that accepts pointers with some specific lifetime. +//! +//! So, what if we reverse the order of the two function types, like this: +//! +//! fn(&'b int) <: for<'a> fn(&'a int)? (No) +//! +//! Does the subtyping relationship still hold? The answer of course is +//! no. In this case, the function accepts *only the lifetime `'b`*, +//! so it is not reasonable to treat it as if it were a function that +//! accepted any lifetime. +//! +//! What about these two examples: +//! +//! for<'a,'b> fn(&'a int, &'b int) <: for<'a> fn(&'a int, &'a int)? (Yes) +//! for<'a> fn(&'a int, &'a int) <: for<'a,'b> fn(&'a int, &'b int)? (No) +//! +//! Here, it is true that functions which take two pointers with any two +//! lifetimes can be treated as if they only accepted two pointers with +//! the same lifetime, but not the reverse. +//! +//! ## The algorithm +//! +//! Here is the algorithm we use to perform the subtyping check: +//! +//! 1. Replace all bound regions in the subtype with new variables +//! 2. Replace all bound regions in the supertype with skolemized +//! equivalents. A "skolemized" region is just a new fresh region +//! name. +//! 3. Check that the parameter and return types match as normal +//! 4. Ensure that no skolemized regions 'leak' into region variables +//! visible from "the outside" +//! +//! Let's walk through some examples and see how this algorithm plays out. +//! +//! #### First example +//! +//! We'll start with the first example, which was: +//! +//! 1. for<'a> fn(&'a T) <: for<'b> fn(&'b T)? Yes: a -> b +//! +//! After steps 1 and 2 of the algorithm we will have replaced the types +//! like so: +//! +//! 1. fn(&'A T) <: fn(&'x T)? +//! +//! Here the upper case `&A` indicates a *region variable*, that is, a +//! region whose value is being inferred by the system. I also replaced +//! `&b` with `&x`---I'll use letters late in the alphabet (`x`, `y`, `z`) +//! to indicate skolemized region names. We can assume they don't appear +//! elsewhere. Note that neither the sub- nor the supertype bind any +//! region names anymore (as indicated by the absence of `<` and `>`). +//! +//! The next step is to check that the parameter types match. Because +//! parameters are contravariant, this means that we check whether: +//! +//! &'x T <: &'A T +//! +//! Region pointers are contravariant so this implies that +//! +//! &A <= &x +//! +//! must hold, where `<=` is the subregion relationship. Processing +//! *this* constrain simply adds a constraint into our graph that `&A <= +//! &x` and is considered successful (it can, for example, be satisfied by +//! choosing the value `&x` for `&A`). +//! +//! So far we have encountered no error, so the subtype check succeeds. +//! +//! #### The third example +//! +//! Now let's look first at the third example, which was: +//! +//! 3. fn(&'a T) <: for<'b> fn(&'b T)? No! +//! +//! After steps 1 and 2 of the algorithm we will have replaced the types +//! like so: +//! +//! 3. fn(&'a T) <: fn(&'x T)? +//! +//! This looks pretty much the same as before, except that on the LHS +//! `'a` was not bound, and hence was left as-is and not replaced with +//! a variable. The next step is again to check that the parameter types +//! match. This will ultimately require (as before) that `'a` <= `&x` +//! must hold: but this does not hold. `self` and `x` are both distinct +//! free regions. So the subtype check fails. +//! +//! #### Checking for skolemization leaks +//! +//! You may be wondering about that mysterious last step in the algorithm. +//! So far it has not been relevant. The purpose of that last step is to +//! catch something like *this*: +//! +//! for<'a> fn() -> fn(&'a T) <: fn() -> for<'b> fn(&'b T)? No. +//! +//! Here the function types are the same but for where the binding occurs. +//! The subtype returns a function that expects a value in precisely one +//! region. The supertype returns a function that expects a value in any +//! region. If we allow an instance of the subtype to be used where the +//! supertype is expected, then, someone could call the fn and think that +//! the return value has type `fn(&'b T)` when it really has type +//! `fn(&'a T)` (this is case #3, above). Bad. +//! +//! So let's step through what happens when we perform this subtype check. +//! We first replace the bound regions in the subtype (the supertype has +//! no bound regions). This gives us: +//! +//! fn() -> fn(&'A T) <: fn() -> for<'b> fn(&'b T)? +//! +//! Now we compare the return types, which are covariant, and hence we have: +//! +//! fn(&'A T) <: for<'b> fn(&'b T)? +//! +//! Here we skolemize the bound region in the supertype to yield: +//! +//! fn(&'A T) <: fn(&'x T)? +//! +//! And then proceed to compare the argument types: +//! +//! &'x T <: &'A T +//! 'A <= 'x +//! +//! Finally, this is where it gets interesting! This is where an error +//! *should* be reported. But in fact this will not happen. The reason why +//! is that `A` is a variable: we will infer that its value is the fresh +//! region `x` and think that everything is happy. In fact, this behavior +//! is *necessary*, it was key to the first example we walked through. +//! +//! The difference between this example and the first one is that the variable +//! `A` already existed at the point where the skolemization occurred. In +//! the first example, you had two functions: +//! +//! for<'a> fn(&'a T) <: for<'b> fn(&'b T) +//! +//! and hence `&A` and `&x` were created "together". In general, the +//! intention of the skolemized names is that they are supposed to be +//! fresh names that could never be equal to anything from the outside. +//! But when inference comes into play, we might not be respecting this +//! rule. +//! +//! So the way we solve this is to add a fourth step that examines the +//! constraints that refer to skolemized names. Basically, consider a +//! non-directed verison of the constraint graph. Let `Tainted(x)` be the +//! set of all things reachable from a skolemized variable `x`. +//! `Tainted(x)` should not contain any regions that existed before the +//! step at which the skolemization was performed. So this case here +//! would fail because `&x` was created alone, but is relatable to `&A`. +//! +//! ## Computing the LUB and GLB +//! +//! The paper I pointed you at is written for Haskell. It does not +//! therefore considering subtyping and in particular does not consider +//! LUB or GLB computation. We have to consider this. Here is the +//! algorithm I implemented. +//! +//! First though, let's discuss what we are trying to compute in more +//! detail. The LUB is basically the "common supertype" and the GLB is +//! "common subtype"; one catch is that the LUB should be the +//! *most-specific* common supertype and the GLB should be *most general* +//! common subtype (as opposed to any common supertype or any common +//! subtype). +//! +//! Anyway, to help clarify, here is a table containing some function +//! pairs and their LUB/GLB (for conciseness, in this table, I'm just +//! including the lifetimes here, not the rest of the types, and I'm +//! writing `fn<>` instead of `for<> fn`): +//! +//! ``` +//! Type 1 Type 2 LUB GLB +//! fn<'a>('a) fn('X) fn('X) fn<'a>('a) +//! fn('a) fn('X) -- fn<'a>('a) +//! fn<'a,'b>('a, 'b) fn<'x>('x, 'x) fn<'a>('a, 'a) fn<'a,'b>('a, 'b) +//! fn<'a,'b>('a, 'b, 'a) fn<'x,'y>('x, 'y, 'y) fn<'a>('a, 'a, 'a) fn<'a,'b,'c>('a,'b,'c) +//! ``` +//! +//! ### Conventions +//! +//! I use lower-case letters (e.g., `&a`) for bound regions and upper-case +//! letters for free regions (`&A`). Region variables written with a +//! dollar-sign (e.g., `$a`). I will try to remember to enumerate the +//! bound-regions on the fn type as well (e.g., `for<'a> fn(&a)`). +//! +//! ### High-level summary +//! +//! Both the LUB and the GLB algorithms work in a similar fashion. They +//! begin by replacing all bound regions (on both sides) with fresh region +//! inference variables. Therefore, both functions are converted to types +//! that contain only free regions. We can then compute the LUB/GLB in a +//! straightforward way, as described in `combine.rs`. This results in an +//! interim type T. The algorithms then examine the regions that appear +//! in T and try to, in some cases, replace them with bound regions to +//! yield the final result. +//! +//! To decide whether to replace a region `R` that appears in `T` with a +//! bound region, the algorithms make use of two bits of information. +//! First is a set `V` that contains all region variables created as part +//! of the LUB/GLB computation. `V` will contain the region variables +//! created to replace the bound regions in the input types, but it also +//! contains 'intermediate' variables created to represent the LUB/GLB of +//! individual regions. Basically, when asked to compute the LUB/GLB of a +//! region variable with another region, the inferencer cannot oblige +//! immediately since the values of that variables are not known. +//! Therefore, it creates a new variable that is related to the two +//! regions. For example, the LUB of two variables `$x` and `$y` is a +//! fresh variable `$z` that is constrained such that `$x <= $z` and `$y +//! <= $z`. So `V` will contain these intermediate variables as well. +//! +//! The other important factor in deciding how to replace a region in T is +//! the function `Tainted($r)` which, for a region variable, identifies +//! all regions that the region variable is related to in some way +//! (`Tainted()` made an appearance in the subtype computation as well). +//! +//! ### LUB +//! +//! The LUB algorithm proceeds in three steps: +//! +//! 1. Replace all bound regions (on both sides) with fresh region +//! inference variables. +//! 2. Compute the LUB "as normal", meaning compute the GLB of each +//! pair of argument types and the LUB of the return types and +//! so forth. Combine those to a new function type `F`. +//! 3. Replace each region `R` that appears in `F` as follows: +//! - Let `V` be the set of variables created during the LUB +//! computational steps 1 and 2, as described in the previous section. +//! - If `R` is not in `V`, replace `R` with itself. +//! - If `Tainted(R)` contains a region that is not in `V`, +//! replace `R` with itself. +//! - Otherwise, select the earliest variable in `Tainted(R)` that originates +//! from the left-hand side and replace `R` with the bound region that +//! this variable was a replacement for. +//! +//! So, let's work through the simplest example: `fn(&A)` and `for<'a> fn(&a)`. +//! In this case, `&a` will be replaced with `$a` and the interim LUB type +//! `fn($b)` will be computed, where `$b=GLB(&A,$a)`. Therefore, `V = +//! {$a, $b}` and `Tainted($b) = { $b, $a, &A }`. When we go to replace +//! `$b`, we find that since `&A \in Tainted($b)` is not a member of `V`, +//! we leave `$b` as is. When region inference happens, `$b` will be +//! resolved to `&A`, as we wanted. +//! +//! Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`. In +//! this case, we'll end up with a (pre-replacement) LUB type of `fn(&g, +//! &h)` and a graph that looks like: +//! +//! ``` +//! $a $b *--$x +//! \ \ / / +//! \ $h-* / +//! $g-----------* +//! ``` +//! +//! Here `$g` and `$h` are fresh variables that are created to represent +//! the LUB/GLB of things requiring inference. This means that `V` and +//! `Tainted` will look like: +//! +//! ``` +//! V = {$a, $b, $g, $h, $x} +//! Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x } +//! ``` +//! +//! Therefore we replace both `$g` and `$h` with `$a`, and end up +//! with the type `fn(&a, &a)`. +//! +//! ### GLB +//! +//! The procedure for computing the GLB is similar. The difference lies +//! in computing the replacements for the various variables. For each +//! region `R` that appears in the type `F`, we again compute `Tainted(R)` +//! and examine the results: +//! +//! 1. If `R` is not in `V`, it is not replaced. +//! 2. Else, if `Tainted(R)` contains only variables in `V`, and it +//! contains exactly one variable from the LHS and one variable from +//! the RHS, then `R` can be mapped to the bound version of the +//! variable from the LHS. +//! 3. Else, if `Tainted(R)` contains no variable from the LHS and no +//! variable from the RHS, then `R` can be mapped to itself. +//! 4. Else, `R` is mapped to a fresh bound variable. +//! +//! These rules are pretty complex. Let's look at some examples to see +//! how they play out. +//! +//! Out first example was `fn(&a)` and `fn(&X)`. In this case, `&a` will +//! be replaced with `$a` and we will ultimately compute a +//! (pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`. +//! Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}. To find the +//! replacement for `$g` we consult the rules above: +//! - Rule (1) does not apply because `$g \in V` +//! - Rule (2) does not apply because `&X \in Tainted($g)` +//! - Rule (3) does not apply because `$a \in Tainted($g)` +//! - Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`. +//! So our final result is `fn(&z)`, which is correct. +//! +//! The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again +//! have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`. +//! Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`. In this case, +//! by rule (3), `$g` is mapped to itself, and hence the result is +//! `fn($g)`. This result is correct (in this case, at least), but it is +//! indicative of a case that *can* lead us into concluding that there is +//! no GLB when in fact a GLB does exist. See the section "Questionable +//! Results" below for more details. +//! +//! The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as +//! before, we'll end up with `F=fn($g, $h)` where `Tainted($g) = +//! Tainted($h) = {$g, $h, $a, $b, $c}`. Only rule (4) applies and hence +//! we'll select fresh bound variables `y` and `z` and wind up with +//! `fn(&y, &z)`. +//! +//! For the last example, let's consider what may seem trivial, but is +//! not: `fn(&a, &a)` and `fn(&b, &b)`. In this case, we'll get `F=fn($g, +//! $h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a, +//! $x}`. Both of these sets contain exactly one bound variable from each +//! side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which +//! is the desired result. +//! +//! ### Shortcomings and correctness +//! +//! You may be wondering whether this algorithm is correct. The answer is +//! "sort of". There are definitely cases where they fail to compute a +//! result even though a correct result exists. I believe, though, that +//! if they succeed, then the result is valid, and I will attempt to +//! convince you. The basic argument is that the "pre-replacement" step +//! computes a set of constraints. The replacements, then, attempt to +//! satisfy those constraints, using bound identifiers where needed. +//! +//! For now I will briefly go over the cases for LUB/GLB and identify +//! their intent: +//! +//! - LUB: +//! - The region variables that are substituted in place of bound regions +//! are intended to collect constraints on those bound regions. +//! - If Tainted(R) contains only values in V, then this region is unconstrained +//! and can therefore be generalized, otherwise it cannot. +//! - GLB: +//! - The region variables that are substituted in place of bound regions +//! are intended to collect constraints on those bound regions. +//! - If Tainted(R) contains exactly one variable from each side, and +//! only variables in V, that indicates that those two bound regions +//! must be equated. +//! - Otherwise, if Tainted(R) references any variables from left or right +//! side, then it is trying to combine a bound region with a free one or +//! multiple bound regions, so we need to select fresh bound regions. +//! +//! Sorry this is more of a shorthand to myself. I will try to write up something +//! more convincing in the future. +//! +//! #### Where are the algorithms wrong? +//! +//! - The pre-replacement computation can fail even though using a +//! bound-region would have succeeded. +//! - We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the +//! GLB of $a and $b. But if inference finds that $a and $b must be mapped +//! to regions without a GLB, then this is effectively a failure to compute +//! the GLB. However, the result `fn<$c>(fn($c))` is a valid GLB. diff --git a/src/librustc/middle/typeck/infer/higher_ranked/mod.rs b/src/librustc/middle/typeck/infer/higher_ranked/mod.rs index 812aa5c55572814c22e4df034da1b32fbc31d5b9..2f80a574bb18bde49fafca54e0c41ca905364a62 100644 --- a/src/librustc/middle/typeck/infer/higher_ranked/mod.rs +++ b/src/librustc/middle/typeck/infer/higher_ranked/mod.rs @@ -8,10 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Helper routines for higher-ranked things. See the `doc` module at - * the end of the file for details. - */ +//! Helper routines for higher-ranked things. See the `doc` module at +//! the end of the file for details. use middle::ty::{mod, Ty, replace_late_bound_regions}; use middle::typeck::infer::{mod, combine, cres, InferCtxt}; diff --git a/src/librustc/middle/typeck/infer/lattice.rs b/src/librustc/middle/typeck/infer/lattice.rs index 6e6c631f007490382f221058a5c68814448cb750..daec959d11cd322efdadef224fa1c95c53a3b643 100644 --- a/src/librustc/middle/typeck/infer/lattice.rs +++ b/src/librustc/middle/typeck/infer/lattice.rs @@ -8,28 +8,26 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * # Lattice Variables - * - * This file contains generic code for operating on inference variables - * that are characterized by an upper- and lower-bound. The logic and - * reasoning is explained in detail in the large comment in `infer.rs`. - * - * The code in here is defined quite generically so that it can be - * applied both to type variables, which represent types being inferred, - * and fn variables, which represent function types being inferred. - * It may eventually be applied to their types as well, who knows. - * In some cases, the functions are also generic with respect to the - * operation on the lattice (GLB vs LUB). - * - * Although all the functions are generic, we generally write the - * comments in a way that is specific to type variables and the LUB - * operation. It's just easier that way. - * - * In general all of the functions are defined parametrically - * over a `LatticeValue`, which is a value defined with respect to - * a lattice. - */ +//! # Lattice Variables +//! +//! This file contains generic code for operating on inference variables +//! that are characterized by an upper- and lower-bound. The logic and +//! reasoning is explained in detail in the large comment in `infer.rs`. +//! +//! The code in here is defined quite generically so that it can be +//! applied both to type variables, which represent types being inferred, +//! and fn variables, which represent function types being inferred. +//! It may eventually be applied to their types as well, who knows. +//! In some cases, the functions are also generic with respect to the +//! operation on the lattice (GLB vs LUB). +//! +//! Although all the functions are generic, we generally write the +//! comments in a way that is specific to type variables and the LUB +//! operation. It's just easier that way. +//! +//! In general all of the functions are defined parametrically +//! over a `LatticeValue`, which is a value defined with respect to +//! a lattice. use middle::ty::{TyVar}; use middle::ty::{mod, Ty}; diff --git a/src/librustc/middle/typeck/infer/mod.rs b/src/librustc/middle/typeck/infer/mod.rs index 93c11693091c2daef28c7dd05d2dd25640eeb058..c5845b143af89aca4da0d3ee6b563e498d094543 100644 --- a/src/librustc/middle/typeck/infer/mod.rs +++ b/src/librustc/middle/typeck/infer/mod.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! See doc.rs for documentation */ +//! See doc.rs for documentation #![allow(non_camel_case_types)] @@ -305,6 +305,8 @@ pub fn new_infer_ctxt<'a, 'tcx>(tcx: &'a ty::ctxt<'tcx>) } } +/// Computes the least upper-bound of `a` and `b`. If this is not possible, reports an error and +/// returns ty::err. pub fn common_supertype<'a, 'tcx>(cx: &InferCtxt<'a, 'tcx>, origin: TypeOrigin, a_is_expected: bool, @@ -312,11 +314,6 @@ pub fn common_supertype<'a, 'tcx>(cx: &InferCtxt<'a, 'tcx>, b: Ty<'tcx>) -> Ty<'tcx> { - /*! - * Computes the least upper-bound of `a` and `b`. If this is - * not possible, reports an error and returns ty::err. - */ - debug!("common_supertype({}, {})", a.repr(cx.tcx), b.repr(cx.tcx)); @@ -754,17 +751,13 @@ pub fn region_vars_for_defs(&self, .collect() } + /// Given a set of generics defined on a type or impl, returns a substitution mapping each + /// type/region parameter to a fresh inference variable. pub fn fresh_substs_for_generics(&self, span: Span, generics: &ty::Generics<'tcx>) -> subst::Substs<'tcx> { - /*! - * Given a set of generics defined on a type or impl, returns - * a substitution mapping each type/region parameter to a - * fresh inference variable. - */ - let type_params = generics.types.map( |_| self.next_ty_var()); @@ -774,18 +767,15 @@ pub fn fresh_substs_for_generics(&self, subst::Substs::new(type_params, region_params) } + /// Given a set of generics defined on a trait, returns a substitution mapping each output + /// type/region parameter to a fresh inference variable, and mapping the self type to + /// `self_ty`. pub fn fresh_substs_for_trait(&self, span: Span, generics: &ty::Generics<'tcx>, self_ty: Ty<'tcx>) -> subst::Substs<'tcx> { - /*! - * Given a set of generics defined on a trait, returns a - * substitution mapping each output type/region parameter to a - * fresh inference variable, and mapping the self type to - * `self_ty`. - */ assert!(generics.types.len(subst::SelfSpace) == 1); assert!(generics.types.len(subst::FnSpace) == 0); diff --git a/src/librustc/middle/typeck/infer/region_inference/doc.rs b/src/librustc/middle/typeck/infer/region_inference/doc.rs index 40b41deeb2b66096cf78ddd979c994846d543224..b4eac4c002677cdd87695a8d78ef60c0caf4ec20 100644 --- a/src/librustc/middle/typeck/infer/region_inference/doc.rs +++ b/src/librustc/middle/typeck/infer/region_inference/doc.rs @@ -8,371 +8,367 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Region inference module. - -# Terminology - -Note that we use the terms region and lifetime interchangeably, -though the term `lifetime` is preferred. - -# Introduction - -Region inference uses a somewhat more involved algorithm than type -inference. It is not the most efficient thing ever written though it -seems to work well enough in practice (famous last words). The reason -that we use a different algorithm is because, unlike with types, it is -impractical to hand-annotate with regions (in some cases, there aren't -even the requisite syntactic forms). So we have to get it right, and -it's worth spending more time on a more involved analysis. Moreover, -regions are a simpler case than types: they don't have aggregate -structure, for example. - -Unlike normal type inference, which is similar in spirit to H-M and thus -works progressively, the region type inference works by accumulating -constraints over the course of a function. Finally, at the end of -processing a function, we process and solve the constraints all at -once. - -The constraints are always of one of three possible forms: - -- ConstrainVarSubVar(R_i, R_j) states that region variable R_i - must be a subregion of R_j -- ConstrainRegSubVar(R, R_i) states that the concrete region R - (which must not be a variable) must be a subregion of the varibale R_i -- ConstrainVarSubReg(R_i, R) is the inverse - -# Building up the constraints - -Variables and constraints are created using the following methods: - -- `new_region_var()` creates a new, unconstrained region variable; -- `make_subregion(R_i, R_j)` states that R_i is a subregion of R_j -- `lub_regions(R_i, R_j) -> R_k` returns a region R_k which is - the smallest region that is greater than both R_i and R_j -- `glb_regions(R_i, R_j) -> R_k` returns a region R_k which is - the greatest region that is smaller than both R_i and R_j - -The actual region resolution algorithm is not entirely -obvious, though it is also not overly complex. - -## Snapshotting - -It is also permitted to try (and rollback) changes to the graph. This -is done by invoking `start_snapshot()`, which returns a value. Then -later you can call `rollback_to()` which undoes the work. -Alternatively, you can call `commit()` which ends all snapshots. -Snapshots can be recursive---so you can start a snapshot when another -is in progress, but only the root snapshot can "commit". - -# Resolving constraints - -The constraint resolution algorithm is not super complex but also not -entirely obvious. Here I describe the problem somewhat abstractly, -then describe how the current code works. There may be other, smarter -ways of doing this with which I am unfamiliar and can't be bothered to -research at the moment. - NDM - -## The problem - -Basically our input is a directed graph where nodes can be divided -into two categories: region variables and concrete regions. Each edge -`R -> S` in the graph represents a constraint that the region `R` is a -subregion of the region `S`. - -Region variable nodes can have arbitrary degree. There is one region -variable node per region variable. - -Each concrete region node is associated with some, well, concrete -region: e.g., a free lifetime, or the region for a particular scope. -Note that there may be more than one concrete region node for a -particular region value. Moreover, because of how the graph is built, -we know that all concrete region nodes have either in-degree 1 or -out-degree 1. - -Before resolution begins, we build up the constraints in a hashmap -that maps `Constraint` keys to spans. During resolution, we construct -the actual `Graph` structure that we describe here. - -## Our current algorithm - -We divide region variables into two groups: Expanding and Contracting. -Expanding region variables are those that have a concrete region -predecessor (direct or indirect). Contracting region variables are -all others. - -We first resolve the values of Expanding region variables and then -process Contracting ones. We currently use an iterative, fixed-point -procedure (but read on, I believe this could be replaced with a linear -walk). Basically we iterate over the edges in the graph, ensuring -that, if the source of the edge has a value, then this value is a -subregion of the target value. If the target does not yet have a -value, it takes the value from the source. If the target already had -a value, then the resulting value is Least Upper Bound of the old and -new values. When we are done, each Expanding node will have the -smallest region that it could possibly have and still satisfy the -constraints. - -We next process the Contracting nodes. Here we again iterate over the -edges, only this time we move values from target to source (if the -source is a Contracting node). For each contracting node, we compute -its value as the GLB of all its successors. Basically contracting -nodes ensure that there is overlap between their successors; we will -ultimately infer the largest overlap possible. - -# The Region Hierarchy - -## Without closures - -Let's first consider the region hierarchy without thinking about -closures, because they add a lot of complications. The region -hierarchy *basically* mirrors the lexical structure of the code. -There is a region for every piece of 'evaluation' that occurs, meaning -every expression, block, and pattern (patterns are considered to -"execute" by testing the value they are applied to and creating any -relevant bindings). So, for example: - - fn foo(x: int, y: int) { // -+ - // +------------+ // | - // | +-----+ // | - // | +-+ +-+ +-+ // | - // | | | | | | | // | - // v v v v v v v // | - let z = x + y; // | - ... // | - } // -+ - - fn bar() { ... } - -In this example, there is a region for the fn body block as a whole, -and then a subregion for the declaration of the local variable. -Within that, there are sublifetimes for the assignment pattern and -also the expression `x + y`. The expression itself has sublifetimes -for evaluating `x` and `y`. - -## Function calls - -Function calls are a bit tricky. I will describe how we handle them -*now* and then a bit about how we can improve them (Issue #6268). - -Consider a function call like `func(expr1, expr2)`, where `func`, -`arg1`, and `arg2` are all arbitrary expressions. Currently, -we construct a region hierarchy like: - - +----------------+ - | | - +--+ +---+ +---+| - v v v v v vv - func(expr1, expr2) - -Here you can see that the call as a whole has a region and the -function plus arguments are subregions of that. As a side-effect of -this, we get a lot of spurious errors around nested calls, in -particular when combined with `&mut` functions. For example, a call -like this one - - self.foo(self.bar()) - -where both `foo` and `bar` are `&mut self` functions will always yield -an error. - -Here is a more involved example (which is safe) so we can see what's -going on: - - struct Foo { f: uint, g: uint } - ... - fn add(p: &mut uint, v: uint) { - *p += v; - } - ... - fn inc(p: &mut uint) -> uint { - *p += 1; *p - } - fn weird() { - let mut x: Box = box Foo { ... }; - 'a: add(&mut (*x).f, - 'b: inc(&mut (*x).f)) // (..) - } - -The important part is the line marked `(..)` which contains a call to -`add()`. The first argument is a mutable borrow of the field `f`. The -second argument also borrows the field `f`. Now, in the current borrow -checker, the first borrow is given the lifetime of the call to -`add()`, `'a`. The second borrow is given the lifetime of `'b` of the -call to `inc()`. Because `'b` is considered to be a sublifetime of -`'a`, an error is reported since there are two co-existing mutable -borrows of the same data. - -However, if we were to examine the lifetimes a bit more carefully, we -can see that this error is unnecessary. Let's examine the lifetimes -involved with `'a` in detail. We'll break apart all the steps involved -in a call expression: - - 'a: { - 'a_arg1: let a_temp1: ... = add; - 'a_arg2: let a_temp2: &'a mut uint = &'a mut (*x).f; - 'a_arg3: let a_temp3: uint = { - let b_temp1: ... = inc; - let b_temp2: &'b = &'b mut (*x).f; - 'b_call: b_temp1(b_temp2) - }; - 'a_call: a_temp1(a_temp2, a_temp3) // (**) - } - -Here we see that the lifetime `'a` includes a number of substatements. -In particular, there is this lifetime I've called `'a_call` that -corresponds to the *actual execution of the function `add()`*, after -all arguments have been evaluated. There is a corresponding lifetime -`'b_call` for the execution of `inc()`. If we wanted to be precise -about it, the lifetime of the two borrows should be `'a_call` and -`'b_call` respectively, since the references that were created -will not be dereferenced except during the execution itself. - -However, this model by itself is not sound. The reason is that -while the two references that are created will never be used -simultaneously, it is still true that the first reference is -*created* before the second argument is evaluated, and so even though -it will not be *dereferenced* during the evaluation of the second -argument, it can still be *invalidated* by that evaluation. Consider -this similar but unsound example: - - struct Foo { f: uint, g: uint } - ... - fn add(p: &mut uint, v: uint) { - *p += v; - } - ... - fn consume(x: Box) -> uint { - x.f + x.g - } - fn weird() { - let mut x: Box = box Foo { ... }; - 'a: add(&mut (*x).f, consume(x)) // (..) - } - -In this case, the second argument to `add` actually consumes `x`, thus -invalidating the first argument. - -So, for now, we exclude the `call` lifetimes from our model. -Eventually I would like to include them, but we will have to make the -borrow checker handle this situation correctly. In particular, if -there is a reference created whose lifetime does not enclose -the borrow expression, we must issue sufficient restrictions to ensure -that the pointee remains valid. - -## Adding closures - -The other significant complication to the region hierarchy is -closures. I will describe here how closures should work, though some -of the work to implement this model is ongoing at the time of this -writing. - -The body of closures are type-checked along with the function that -creates them. However, unlike other expressions that appear within the -function body, it is not entirely obvious when a closure body executes -with respect to the other expressions. This is because the closure -body will execute whenever the closure is called; however, we can -never know precisely when the closure will be called, especially -without some sort of alias analysis. - -However, we can place some sort of limits on when the closure -executes. In particular, the type of every closure `fn:'r K` includes -a region bound `'r`. This bound indicates the maximum lifetime of that -closure; once we exit that region, the closure cannot be called -anymore. Therefore, we say that the lifetime of the closure body is a -sublifetime of the closure bound, but the closure body itself is unordered -with respect to other parts of the code. - -For example, consider the following fragment of code: - - 'a: { - let closure: fn:'a() = || 'b: { - 'c: ... - }; - 'd: ... - } - -Here we have four lifetimes, `'a`, `'b`, `'c`, and `'d`. The closure -`closure` is bounded by the lifetime `'a`. The lifetime `'b` is the -lifetime of the closure body, and `'c` is some statement within the -closure body. Finally, `'d` is a statement within the outer block that -created the closure. - -We can say that the closure body `'b` is a sublifetime of `'a` due to -the closure bound. By the usual lexical scoping conventions, the -statement `'c` is clearly a sublifetime of `'b`, and `'d` is a -sublifetime of `'d`. However, there is no ordering between `'c` and -`'d` per se (this kind of ordering between statements is actually only -an issue for dataflow; passes like the borrow checker must assume that -closures could execute at any time from the moment they are created -until they go out of scope). - -### Complications due to closure bound inference - -There is only one problem with the above model: in general, we do not -actually *know* the closure bounds during region inference! In fact, -closure bounds are almost always region variables! This is very tricky -because the inference system implicitly assumes that we can do things -like compute the LUB of two scoped lifetimes without needing to know -the values of any variables. - -Here is an example to illustrate the problem: - - fn identify(x: T) -> T { x } - - fn foo() { // 'foo is the function body - 'a: { - let closure = identity(|| 'b: { - 'c: ... - }); - 'd: closure(); - } - 'e: ...; - } - -In this example, the closure bound is not explicit. At compile time, -we will create a region variable (let's call it `V0`) to represent the -closure bound. - -The primary difficulty arises during the constraint propagation phase. -Imagine there is some variable with incoming edges from `'c` and `'d`. -This means that the value of the variable must be `LUB('c, -'d)`. However, without knowing what the closure bound `V0` is, we -can't compute the LUB of `'c` and `'d`! Any we don't know the closure -bound until inference is done. - -The solution is to rely on the fixed point nature of inference. -Basically, when we must compute `LUB('c, 'd)`, we just use the current -value for `V0` as the closure's bound. If `V0`'s binding should -change, then we will do another round of inference, and the result of -`LUB('c, 'd)` will change. - -One minor implication of this is that the graph does not in fact track -the full set of dependencies between edges. We cannot easily know -whether the result of a LUB computation will change, since there may -be indirect dependencies on other variables that are not reflected on -the graph. Therefore, we must *always* iterate over all edges when -doing the fixed point calculation, not just those adjacent to nodes -whose values have changed. - -Were it not for this requirement, we could in fact avoid fixed-point -iteration altogether. In that universe, we could instead first -identify and remove strongly connected components (SCC) in the graph. -Note that such components must consist solely of region variables; all -of these variables can effectively be unified into a single variable. -Once SCCs are removed, we are left with a DAG. At this point, we -could walk the DAG in topological order once to compute the expanding -nodes, and again in reverse topological order to compute the -contracting nodes. However, as I said, this does not work given the -current treatment of closure bounds, but perhaps in the future we can -address this problem somehow and make region inference somewhat more -efficient. Note that this is solely a matter of performance, not -expressiveness. - -### Skolemization - -For a discussion on skolemization and higher-ranked subtyping, please -see the module `middle::typeck::infer::higher_ranked::doc`. - -*/ +//! Region inference module. +//! +//! # Terminology +//! +//! Note that we use the terms region and lifetime interchangeably, +//! though the term `lifetime` is preferred. +//! +//! # Introduction +//! +//! Region inference uses a somewhat more involved algorithm than type +//! inference. It is not the most efficient thing ever written though it +//! seems to work well enough in practice (famous last words). The reason +//! that we use a different algorithm is because, unlike with types, it is +//! impractical to hand-annotate with regions (in some cases, there aren't +//! even the requisite syntactic forms). So we have to get it right, and +//! it's worth spending more time on a more involved analysis. Moreover, +//! regions are a simpler case than types: they don't have aggregate +//! structure, for example. +//! +//! Unlike normal type inference, which is similar in spirit to H-M and thus +//! works progressively, the region type inference works by accumulating +//! constraints over the course of a function. Finally, at the end of +//! processing a function, we process and solve the constraints all at +//! once. +//! +//! The constraints are always of one of three possible forms: +//! +//! - ConstrainVarSubVar(R_i, R_j) states that region variable R_i +//! must be a subregion of R_j +//! - ConstrainRegSubVar(R, R_i) states that the concrete region R +//! (which must not be a variable) must be a subregion of the varibale R_i +//! - ConstrainVarSubReg(R_i, R) is the inverse +//! +//! # Building up the constraints +//! +//! Variables and constraints are created using the following methods: +//! +//! - `new_region_var()` creates a new, unconstrained region variable; +//! - `make_subregion(R_i, R_j)` states that R_i is a subregion of R_j +//! - `lub_regions(R_i, R_j) -> R_k` returns a region R_k which is +//! the smallest region that is greater than both R_i and R_j +//! - `glb_regions(R_i, R_j) -> R_k` returns a region R_k which is +//! the greatest region that is smaller than both R_i and R_j +//! +//! The actual region resolution algorithm is not entirely +//! obvious, though it is also not overly complex. +//! +//! ## Snapshotting +//! +//! It is also permitted to try (and rollback) changes to the graph. This +//! is done by invoking `start_snapshot()`, which returns a value. Then +//! later you can call `rollback_to()` which undoes the work. +//! Alternatively, you can call `commit()` which ends all snapshots. +//! Snapshots can be recursive---so you can start a snapshot when another +//! is in progress, but only the root snapshot can "commit". +//! +//! # Resolving constraints +//! +//! The constraint resolution algorithm is not super complex but also not +//! entirely obvious. Here I describe the problem somewhat abstractly, +//! then describe how the current code works. There may be other, smarter +//! ways of doing this with which I am unfamiliar and can't be bothered to +//! research at the moment. - NDM +//! +//! ## The problem +//! +//! Basically our input is a directed graph where nodes can be divided +//! into two categories: region variables and concrete regions. Each edge +//! `R -> S` in the graph represents a constraint that the region `R` is a +//! subregion of the region `S`. +//! +//! Region variable nodes can have arbitrary degree. There is one region +//! variable node per region variable. +//! +//! Each concrete region node is associated with some, well, concrete +//! region: e.g., a free lifetime, or the region for a particular scope. +//! Note that there may be more than one concrete region node for a +//! particular region value. Moreover, because of how the graph is built, +//! we know that all concrete region nodes have either in-degree 1 or +//! out-degree 1. +//! +//! Before resolution begins, we build up the constraints in a hashmap +//! that maps `Constraint` keys to spans. During resolution, we construct +//! the actual `Graph` structure that we describe here. +//! +//! ## Our current algorithm +//! +//! We divide region variables into two groups: Expanding and Contracting. +//! Expanding region variables are those that have a concrete region +//! predecessor (direct or indirect). Contracting region variables are +//! all others. +//! +//! We first resolve the values of Expanding region variables and then +//! process Contracting ones. We currently use an iterative, fixed-point +//! procedure (but read on, I believe this could be replaced with a linear +//! walk). Basically we iterate over the edges in the graph, ensuring +//! that, if the source of the edge has a value, then this value is a +//! subregion of the target value. If the target does not yet have a +//! value, it takes the value from the source. If the target already had +//! a value, then the resulting value is Least Upper Bound of the old and +//! new values. When we are done, each Expanding node will have the +//! smallest region that it could possibly have and still satisfy the +//! constraints. +//! +//! We next process the Contracting nodes. Here we again iterate over the +//! edges, only this time we move values from target to source (if the +//! source is a Contracting node). For each contracting node, we compute +//! its value as the GLB of all its successors. Basically contracting +//! nodes ensure that there is overlap between their successors; we will +//! ultimately infer the largest overlap possible. +//! +//! # The Region Hierarchy +//! +//! ## Without closures +//! +//! Let's first consider the region hierarchy without thinking about +//! closures, because they add a lot of complications. The region +//! hierarchy *basically* mirrors the lexical structure of the code. +//! There is a region for every piece of 'evaluation' that occurs, meaning +//! every expression, block, and pattern (patterns are considered to +//! "execute" by testing the value they are applied to and creating any +//! relevant bindings). So, for example: +//! +//! fn foo(x: int, y: int) { // -+ +//! // +------------+ // | +//! // | +-----+ // | +//! // | +-+ +-+ +-+ // | +//! // | | | | | | | // | +//! // v v v v v v v // | +//! let z = x + y; // | +//! ... // | +//! } // -+ +//! +//! fn bar() { ... } +//! +//! In this example, there is a region for the fn body block as a whole, +//! and then a subregion for the declaration of the local variable. +//! Within that, there are sublifetimes for the assignment pattern and +//! also the expression `x + y`. The expression itself has sublifetimes +//! for evaluating `x` and `y`. +//! +//! ## Function calls +//! +//! Function calls are a bit tricky. I will describe how we handle them +//! *now* and then a bit about how we can improve them (Issue #6268). +//! +//! Consider a function call like `func(expr1, expr2)`, where `func`, +//! `arg1`, and `arg2` are all arbitrary expressions. Currently, +//! we construct a region hierarchy like: +//! +//! +----------------+ +//! | | +//! +--+ +---+ +---+| +//! v v v v v vv +//! func(expr1, expr2) +//! +//! Here you can see that the call as a whole has a region and the +//! function plus arguments are subregions of that. As a side-effect of +//! this, we get a lot of spurious errors around nested calls, in +//! particular when combined with `&mut` functions. For example, a call +//! like this one +//! +//! self.foo(self.bar()) +//! +//! where both `foo` and `bar` are `&mut self` functions will always yield +//! an error. +//! +//! Here is a more involved example (which is safe) so we can see what's +//! going on: +//! +//! struct Foo { f: uint, g: uint } +//! ... +//! fn add(p: &mut uint, v: uint) { +//! *p += v; +//! } +//! ... +//! fn inc(p: &mut uint) -> uint { +//! *p += 1; *p +//! } +//! fn weird() { +//! let mut x: Box = box Foo { ... }; +//! 'a: add(&mut (*x).f, +//! 'b: inc(&mut (*x).f)) // (..) +//! } +//! +//! The important part is the line marked `(..)` which contains a call to +//! `add()`. The first argument is a mutable borrow of the field `f`. The +//! second argument also borrows the field `f`. Now, in the current borrow +//! checker, the first borrow is given the lifetime of the call to +//! `add()`, `'a`. The second borrow is given the lifetime of `'b` of the +//! call to `inc()`. Because `'b` is considered to be a sublifetime of +//! `'a`, an error is reported since there are two co-existing mutable +//! borrows of the same data. +//! +//! However, if we were to examine the lifetimes a bit more carefully, we +//! can see that this error is unnecessary. Let's examine the lifetimes +//! involved with `'a` in detail. We'll break apart all the steps involved +//! in a call expression: +//! +//! 'a: { +//! 'a_arg1: let a_temp1: ... = add; +//! 'a_arg2: let a_temp2: &'a mut uint = &'a mut (*x).f; +//! 'a_arg3: let a_temp3: uint = { +//! let b_temp1: ... = inc; +//! let b_temp2: &'b = &'b mut (*x).f; +//! 'b_call: b_temp1(b_temp2) +//! }; +//! 'a_call: a_temp1(a_temp2, a_temp3) // (**) +//! } +//! +//! Here we see that the lifetime `'a` includes a number of substatements. +//! In particular, there is this lifetime I've called `'a_call` that +//! corresponds to the *actual execution of the function `add()`*, after +//! all arguments have been evaluated. There is a corresponding lifetime +//! `'b_call` for the execution of `inc()`. If we wanted to be precise +//! about it, the lifetime of the two borrows should be `'a_call` and +//! `'b_call` respectively, since the references that were created +//! will not be dereferenced except during the execution itself. +//! +//! However, this model by itself is not sound. The reason is that +//! while the two references that are created will never be used +//! simultaneously, it is still true that the first reference is +//! *created* before the second argument is evaluated, and so even though +//! it will not be *dereferenced* during the evaluation of the second +//! argument, it can still be *invalidated* by that evaluation. Consider +//! this similar but unsound example: +//! +//! struct Foo { f: uint, g: uint } +//! ... +//! fn add(p: &mut uint, v: uint) { +//! *p += v; +//! } +//! ... +//! fn consume(x: Box) -> uint { +//! x.f + x.g +//! } +//! fn weird() { +//! let mut x: Box = box Foo { ... }; +//! 'a: add(&mut (*x).f, consume(x)) // (..) +//! } +//! +//! In this case, the second argument to `add` actually consumes `x`, thus +//! invalidating the first argument. +//! +//! So, for now, we exclude the `call` lifetimes from our model. +//! Eventually I would like to include them, but we will have to make the +//! borrow checker handle this situation correctly. In particular, if +//! there is a reference created whose lifetime does not enclose +//! the borrow expression, we must issue sufficient restrictions to ensure +//! that the pointee remains valid. +//! +//! ## Adding closures +//! +//! The other significant complication to the region hierarchy is +//! closures. I will describe here how closures should work, though some +//! of the work to implement this model is ongoing at the time of this +//! writing. +//! +//! The body of closures are type-checked along with the function that +//! creates them. However, unlike other expressions that appear within the +//! function body, it is not entirely obvious when a closure body executes +//! with respect to the other expressions. This is because the closure +//! body will execute whenever the closure is called; however, we can +//! never know precisely when the closure will be called, especially +//! without some sort of alias analysis. +//! +//! However, we can place some sort of limits on when the closure +//! executes. In particular, the type of every closure `fn:'r K` includes +//! a region bound `'r`. This bound indicates the maximum lifetime of that +//! closure; once we exit that region, the closure cannot be called +//! anymore. Therefore, we say that the lifetime of the closure body is a +//! sublifetime of the closure bound, but the closure body itself is unordered +//! with respect to other parts of the code. +//! +//! For example, consider the following fragment of code: +//! +//! 'a: { +//! let closure: fn:'a() = || 'b: { +//! 'c: ... +//! }; +//! 'd: ... +//! } +//! +//! Here we have four lifetimes, `'a`, `'b`, `'c`, and `'d`. The closure +//! `closure` is bounded by the lifetime `'a`. The lifetime `'b` is the +//! lifetime of the closure body, and `'c` is some statement within the +//! closure body. Finally, `'d` is a statement within the outer block that +//! created the closure. +//! +//! We can say that the closure body `'b` is a sublifetime of `'a` due to +//! the closure bound. By the usual lexical scoping conventions, the +//! statement `'c` is clearly a sublifetime of `'b`, and `'d` is a +//! sublifetime of `'d`. However, there is no ordering between `'c` and +//! `'d` per se (this kind of ordering between statements is actually only +//! an issue for dataflow; passes like the borrow checker must assume that +//! closures could execute at any time from the moment they are created +//! until they go out of scope). +//! +//! ### Complications due to closure bound inference +//! +//! There is only one problem with the above model: in general, we do not +//! actually *know* the closure bounds during region inference! In fact, +//! closure bounds are almost always region variables! This is very tricky +//! because the inference system implicitly assumes that we can do things +//! like compute the LUB of two scoped lifetimes without needing to know +//! the values of any variables. +//! +//! Here is an example to illustrate the problem: +//! +//! fn identify(x: T) -> T { x } +//! +//! fn foo() { // 'foo is the function body +//! 'a: { +//! let closure = identity(|| 'b: { +//! 'c: ... +//! }); +//! 'd: closure(); +//! } +//! 'e: ...; +//! } +//! +//! In this example, the closure bound is not explicit. At compile time, +//! we will create a region variable (let's call it `V0`) to represent the +//! closure bound. +//! +//! The primary difficulty arises during the constraint propagation phase. +//! Imagine there is some variable with incoming edges from `'c` and `'d`. +//! This means that the value of the variable must be `LUB('c, +//! 'd)`. However, without knowing what the closure bound `V0` is, we +//! can't compute the LUB of `'c` and `'d`! Any we don't know the closure +//! bound until inference is done. +//! +//! The solution is to rely on the fixed point nature of inference. +//! Basically, when we must compute `LUB('c, 'd)`, we just use the current +//! value for `V0` as the closure's bound. If `V0`'s binding should +//! change, then we will do another round of inference, and the result of +//! `LUB('c, 'd)` will change. +//! +//! One minor implication of this is that the graph does not in fact track +//! the full set of dependencies between edges. We cannot easily know +//! whether the result of a LUB computation will change, since there may +//! be indirect dependencies on other variables that are not reflected on +//! the graph. Therefore, we must *always* iterate over all edges when +//! doing the fixed point calculation, not just those adjacent to nodes +//! whose values have changed. +//! +//! Were it not for this requirement, we could in fact avoid fixed-point +//! iteration altogether. In that universe, we could instead first +//! identify and remove strongly connected components (SCC) in the graph. +//! Note that such components must consist solely of region variables; all +//! of these variables can effectively be unified into a single variable. +//! Once SCCs are removed, we are left with a DAG. At this point, we +//! could walk the DAG in topological order once to compute the expanding +//! nodes, and again in reverse topological order to compute the +//! contracting nodes. However, as I said, this does not work given the +//! current treatment of closure bounds, but perhaps in the future we can +//! address this problem somehow and make region inference somewhat more +//! efficient. Note that this is solely a matter of performance, not +//! expressiveness. +//! +//! ### Skolemization +//! +//! For a discussion on skolemization and higher-ranked subtyping, please +//! see the module `middle::typeck::infer::higher_ranked::doc`. diff --git a/src/librustc/middle/typeck/infer/region_inference/mod.rs b/src/librustc/middle/typeck/infer/region_inference/mod.rs index 6a447d467cfcec46b903fdd58ac7c5dce30e1853..01533cba7ab6dcad2dd1c35523d4e07c51ab3760 100644 --- a/src/librustc/middle/typeck/infer/region_inference/mod.rs +++ b/src/librustc/middle/typeck/infer/region_inference/mod.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! See doc.rs */ +//! See doc.rs pub use self::Constraint::*; pub use self::Verify::*; @@ -597,15 +597,10 @@ pub fn vars_created_since_mark(&self, mark: RegionMark) .collect() } + /// Computes all regions that have been related to `r0` in any way since the mark `mark` was + /// made---`r0` itself will be the first entry. This is used when checking whether skolemized + /// regions are being improperly related to other regions. pub fn tainted(&self, mark: RegionMark, r0: Region) -> Vec { - /*! - * Computes all regions that have been related to `r0` in any - * way since the mark `mark` was made---`r0` itself will be - * the first entry. This is used when checking whether - * skolemized regions are being improperly related to other - * regions. - */ - debug!("tainted(mark={}, r0={})", mark, r0.repr(self.tcx)); let _indenter = indenter(); @@ -783,16 +778,12 @@ fn lub_concrete_regions(&self, a: Region, b: Region) -> Region { } } + /// Computes a region that encloses both free region arguments. Guarantee that if the same two + /// regions are given as argument, in any order, a consistent result is returned. fn lub_free_regions(&self, a: &FreeRegion, b: &FreeRegion) -> ty::Region { - /*! - * Computes a region that encloses both free region arguments. - * Guarantee that if the same two regions are given as argument, - * in any order, a consistent result is returned. - */ - return match a.cmp(b) { Less => helper(self, a, b), Greater => helper(self, b, a), @@ -884,16 +875,13 @@ fn glb_concrete_regions(&self, } } + /// Computes a region that is enclosed by both free region arguments, if any. Guarantees that + /// if the same two regions are given as argument, in any order, a consistent result is + /// returned. fn glb_free_regions(&self, a: &FreeRegion, b: &FreeRegion) -> cres<'tcx, ty::Region> { - /*! - * Computes a region that is enclosed by both free region arguments, - * if any. Guarantees that if the same two regions are given as argument, - * in any order, a consistent result is returned. - */ - return match a.cmp(b) { Less => helper(self, a, b), Greater => helper(self, b, a), diff --git a/src/librustc/middle/typeck/infer/skolemize.rs b/src/librustc/middle/typeck/infer/skolemize.rs index 5907a2bb9b61d945d2eee360d7ea99653f65d937..62bf1d0126a59d156442f1260ecd98b47d21dae2 100644 --- a/src/librustc/middle/typeck/infer/skolemize.rs +++ b/src/librustc/middle/typeck/infer/skolemize.rs @@ -8,37 +8,27 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Skolemization is the process of replacing unknown variables with - * fresh types. The idea is that the type, after skolemization, - * contains no inference variables but instead contains either a value - * for each variable or fresh "arbitrary" types wherever a variable - * would have been. - * - * Skolemization is used primarily to get a good type for inserting - * into a cache. The result summarizes what the type inferencer knows - * "so far". The primary place it is used right now is in the trait - * matching algorithm, which needs to be able to cache whether an - * `impl` self type matches some other type X -- *without* affecting - * `X`. That means if that if the type `X` is in fact an unbound type - * variable, we want the match to be regarded as ambiguous, because - * depending on what type that type variable is ultimately assigned, - * the match may or may not succeed. - * - * Note that you should be careful not to allow the output of - * skolemization to leak to the user in error messages or in any other - * form. Skolemization is only really useful as an internal detail. - * - * __An important detail concerning regions.__ The skolemizer also - * replaces *all* regions with 'static. The reason behind this is - * that, in general, we do not take region relationships into account - * when making type-overloaded decisions. This is important because of - * the design of the region inferencer, which is not based on - * unification but rather on accumulating and then solving a set of - * constraints. In contrast, the type inferencer assigns a value to - * each type variable only once, and it does so as soon as it can, so - * it is reasonable to ask what the type inferencer knows "so far". - */ +//! Skolemization is the process of replacing unknown variables with fresh types. The idea is that +//! the type, after skolemization, contains no inference variables but instead contains either a +//! value for each variable or fresh "arbitrary" types wherever a variable would have been. +//! +//! Skolemization is used primarily to get a good type for inserting into a cache. The result +//! summarizes what the type inferencer knows "so far". The primary place it is used right now is +//! in the trait matching algorithm, which needs to be able to cache whether an `impl` self type +//! matches some other type X -- *without* affecting `X`. That means if that if the type `X` is in +//! fact an unbound type variable, we want the match to be regarded as ambiguous, because depending +//! on what type that type variable is ultimately assigned, the match may or may not succeed. +//! +//! Note that you should be careful not to allow the output of skolemization to leak to the user in +//! error messages or in any other form. Skolemization is only really useful as an internal detail. +//! +//! __An important detail concerning regions.__ The skolemizer also replaces *all* regions with +//! 'static. The reason behind this is that, in general, we do not take region relationships into +//! account when making type-overloaded decisions. This is important because of the design of the +//! region inferencer, which is not based on unification but rather on accumulating and then +//! solving a set of constraints. In contrast, the type inferencer assigns a value to each type +//! variable only once, and it does so as soon as it can, so it is reasonable to ask what the type +//! inferencer knows "so far". use middle::ty::{mod, Ty}; use middle::ty_fold; diff --git a/src/librustc/middle/typeck/infer/type_variable.rs b/src/librustc/middle/typeck/infer/type_variable.rs index f7f7389602f8292892a8c92ee9f4c88455cb4203..3058f09a83a851d5006d846f6f1e3ddbf9aaed6f 100644 --- a/src/librustc/middle/typeck/infer/type_variable.rs +++ b/src/librustc/middle/typeck/infer/type_variable.rs @@ -72,12 +72,10 @@ pub fn var_diverges<'a>(&'a self, vid: ty::TyVid) -> bool { self.values.get(vid.index).diverging } + /// Records that `a <: b`, `a :> b`, or `a == b`, depending on `dir`. + /// + /// Precondition: neither `a` nor `b` are known. pub fn relate_vars(&mut self, a: ty::TyVid, dir: RelationDir, b: ty::TyVid) { - /*! - * Records that `a <: b`, `a :> b`, or `a == b`, depending on `dir`. - * - * Precondition: neither `a` nor `b` are known. - */ if a != b { self.relations(a).push((dir, b)); @@ -86,19 +84,15 @@ pub fn relate_vars(&mut self, a: ty::TyVid, dir: RelationDir, b: ty::TyVid) { } } + /// Instantiates `vid` with the type `ty` and then pushes an entry onto `stack` for each of the + /// relations of `vid` to other variables. The relations will have the form `(ty, dir, vid1)` + /// where `vid1` is some other variable id. pub fn instantiate_and_push( &mut self, vid: ty::TyVid, ty: Ty<'tcx>, stack: &mut Vec<(Ty<'tcx>, RelationDir, ty::TyVid)>) { - /*! - * Instantiates `vid` with the type `ty` and then pushes an - * entry onto `stack` for each of the relations of `vid` to - * other variables. The relations will have the form `(ty, - * dir, vid1)` where `vid1` is some other variable id. - */ - let old_value = { let value_ptr = &mut self.values.get_mut(vid.index).value; mem::replace(value_ptr, Known(ty)) diff --git a/src/librustc/middle/typeck/infer/unify.rs b/src/librustc/middle/typeck/infer/unify.rs index fcf042b3f8b80e4cb56f1e565c809bbd7304b5a8..38f55cc3f467b226242152c0eb67a50c94af6722 100644 --- a/src/librustc/middle/typeck/infer/unify.rs +++ b/src/librustc/middle/typeck/infer/unify.rs @@ -157,13 +157,9 @@ pub fn new_key(&mut self, value: V) -> K { k } + /// Find the root node for `vid`. This uses the standard union-find algorithm with path + /// compression: http://en.wikipedia.org/wiki/Disjoint-set_data_structure pub fn get(&mut self, tcx: &ty::ctxt, vid: K) -> Node { - /*! - * Find the root node for `vid`. This uses the standard - * union-find algorithm with path compression: - * http://en.wikipedia.org/wiki/Disjoint-set_data_structure - */ - let index = vid.index(); let value = (*self.values.get(index)).clone(); match value { @@ -188,16 +184,13 @@ fn is_root(&self, key: &K) -> bool { } } + /// Sets the value for `vid` to `new_value`. `vid` MUST be a root node! Also, we must be in the + /// middle of a snapshot. pub fn set(&mut self, tcx: &ty::ctxt<'tcx>, key: K, new_value: VarValue) { - /*! - * Sets the value for `vid` to `new_value`. `vid` MUST be a - * root node! Also, we must be in the middle of a snapshot. - */ - assert!(self.is_root(&key)); debug!("Updating variable {} to {}", @@ -207,19 +200,15 @@ pub fn set(&mut self, self.values.set(key.index(), new_value); } + /// Either redirects node_a to node_b or vice versa, depending on the relative rank. Returns + /// the new root and rank. You should then update the value of the new root to something + /// suitable. pub fn unify(&mut self, tcx: &ty::ctxt<'tcx>, node_a: &Node, node_b: &Node) -> (K, uint) { - /*! - * Either redirects node_a to node_b or vice versa, depending - * on the relative rank. Returns the new root and rank. You - * should then update the value of the new root to something - * suitable. - */ - debug!("unify(node_a(id={}, rank={}), node_b(id={}, rank={}))", node_a.key.repr(tcx), node_a.rank, @@ -295,19 +284,15 @@ fn simple_var_t(&self, impl<'a,'tcx,V:SimplyUnifiable<'tcx>,K:UnifyKey<'tcx, Option>> InferCtxtMethodsForSimplyUnifiableTypes<'tcx, V, K> for InferCtxt<'a, 'tcx> { + /// Unifies two simple keys. Because simple keys do not have any subtyping relationships, if + /// both keys have already been associated with a value, then those two values must be the + /// same. fn simple_vars(&self, a_is_expected: bool, a_id: K, b_id: K) -> ures<'tcx> { - /*! - * Unifies two simple keys. Because simple keys do - * not have any subtyping relationships, if both keys - * have already been associated with a value, then those two - * values must be the same. - */ - let tcx = self.tcx; let table = UnifyKey::unification_table(self); let node_a = table.borrow_mut().get(tcx, a_id); @@ -341,19 +326,14 @@ fn simple_vars(&self, return Ok(()) } + /// Sets the value of the key `a_id` to `b`. Because simple keys do not have any subtyping + /// relationships, if `a_id` already has a value, it must be the same as `b`. fn simple_var_t(&self, a_is_expected: bool, a_id: K, b: V) -> ures<'tcx> { - /*! - * Sets the value of the key `a_id` to `b`. Because - * simple keys do not have any subtyping relationships, - * if `a_id` already has a value, it must be the same as - * `b`. - */ - let tcx = self.tcx; let table = UnifyKey::unification_table(self); let node_a = table.borrow_mut().get(tcx, a_id); diff --git a/src/librustc/middle/typeck/variance.rs b/src/librustc/middle/typeck/variance.rs index 51b610dccce3846d0db52793c104f6b0a9511662..fa001f0434ffd2319190b184be82c0d36029a67d 100644 --- a/src/librustc/middle/typeck/variance.rs +++ b/src/librustc/middle/typeck/variance.rs @@ -8,189 +8,186 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -This file infers the variance of type and lifetime parameters. The -algorithm is taken from Section 4 of the paper "Taming the Wildcards: -Combining Definition- and Use-Site Variance" published in PLDI'11 and -written by Altidor et al., and hereafter referred to as The Paper. - -This inference is explicitly designed *not* to consider the uses of -types within code. To determine the variance of type parameters -defined on type `X`, we only consider the definition of the type `X` -and the definitions of any types it references. - -We only infer variance for type parameters found on *types*: structs, -enums, and traits. We do not infer variance for type parameters found -on fns or impls. This is because those things are not type definitions -and variance doesn't really make sense in that context. - -It is worth covering what variance means in each case. For structs and -enums, I think it is fairly straightforward. The variance of the type -or lifetime parameters defines whether `T` is a subtype of `T` -(resp. `T<'a>` and `T<'b>`) based on the relationship of `A` and `B` -(resp. `'a` and `'b`). (FIXME #3598 -- we do not currently make use of -the variances we compute for type parameters.) - -### Variance on traits - -The meaning of variance for trait parameters is more subtle and worth -expanding upon. There are in fact two uses of the variance values we -compute. - -#### Trait variance and object types - -The first is for object types. Just as with structs and enums, we can -decide the subtyping relationship between two object types `&Trait` -and `&Trait` based on the relationship of `A` and `B`. Note that -for object types we ignore the `Self` type parameter -- it is unknown, -and the nature of dynamic dispatch ensures that we will always call a -function that is expected the appropriate `Self` type. However, we -must be careful with the other type parameters, or else we could end -up calling a function that is expecting one type but provided another. - -To see what I mean, consider a trait like so: - - trait ConvertTo { - fn convertTo(&self) -> A; - } - -Intuitively, If we had one object `O=&ConvertTo` and another -`S=&ConvertTo`, then `S <: O` because `String <: Object` -(presuming Java-like "string" and "object" types, my go to examples -for subtyping). The actual algorithm would be to compare the -(explicit) type parameters pairwise respecting their variance: here, -the type parameter A is covariant (it appears only in a return -position), and hence we require that `String <: Object`. - -You'll note though that we did not consider the binding for the -(implicit) `Self` type parameter: in fact, it is unknown, so that's -good. The reason we can ignore that parameter is precisely because we -don't need to know its value until a call occurs, and at that time (as -you said) the dynamic nature of virtual dispatch means the code we run -will be correct for whatever value `Self` happens to be bound to for -the particular object whose method we called. `Self` is thus different -from `A`, because the caller requires that `A` be known in order to -know the return type of the method `convertTo()`. (As an aside, we -have rules preventing methods where `Self` appears outside of the -receiver position from being called via an object.) - -#### Trait variance and vtable resolution - -But traits aren't only used with objects. They're also used when -deciding whether a given impl satisfies a given trait bound. To set the -scene here, imagine I had a function: - - fn convertAll>(v: &[T]) { - ... - } - -Now imagine that I have an implementation of `ConvertTo` for `Object`: - - impl ConvertTo for Object { ... } - -And I want to call `convertAll` on an array of strings. Suppose -further that for whatever reason I specifically supply the value of -`String` for the type parameter `T`: - - let mut vector = ~["string", ...]; - convertAll::(v); - -Is this legal? To put another way, can we apply the `impl` for -`Object` to the type `String`? The answer is yes, but to see why -we have to expand out what will happen: - -- `convertAll` will create a pointer to one of the entries in the - vector, which will have type `&String` -- It will then call the impl of `convertTo()` that is intended - for use with objects. This has the type: - - fn(self: &Object) -> int - - It is ok to provide a value for `self` of type `&String` because - `&String <: &Object`. - -OK, so intuitively we want this to be legal, so let's bring this back -to variance and see whether we are computing the correct result. We -must first figure out how to phrase the question "is an impl for -`Object,int` usable where an impl for `String,int` is expected?" - -Maybe it's helpful to think of a dictionary-passing implementation of -type classes. In that case, `convertAll()` takes an implicit parameter -representing the impl. In short, we *have* an impl of type: - - V_O = ConvertTo for Object - -and the function prototype expects an impl of type: - - V_S = ConvertTo for String - -As with any argument, this is legal if the type of the value given -(`V_O`) is a subtype of the type expected (`V_S`). So is `V_O <: V_S`? -The answer will depend on the variance of the various parameters. In -this case, because the `Self` parameter is contravariant and `A` is -covariant, it means that: +//! This file infers the variance of type and lifetime parameters. The +//! algorithm is taken from Section 4 of the paper "Taming the Wildcards: +//! Combining Definition- and Use-Site Variance" published in PLDI'11 and +//! written by Altidor et al., and hereafter referred to as The Paper. +//! +//! This inference is explicitly designed *not* to consider the uses of +//! types within code. To determine the variance of type parameters +//! defined on type `X`, we only consider the definition of the type `X` +//! and the definitions of any types it references. +//! +//! We only infer variance for type parameters found on *types*: structs, +//! enums, and traits. We do not infer variance for type parameters found +//! on fns or impls. This is because those things are not type definitions +//! and variance doesn't really make sense in that context. +//! +//! It is worth covering what variance means in each case. For structs and +//! enums, I think it is fairly straightforward. The variance of the type +//! or lifetime parameters defines whether `T` is a subtype of `T` +//! (resp. `T<'a>` and `T<'b>`) based on the relationship of `A` and `B` +//! (resp. `'a` and `'b`). (FIXME #3598 -- we do not currently make use of +//! the variances we compute for type parameters.) +//! +//! ### Variance on traits +//! +//! The meaning of variance for trait parameters is more subtle and worth +//! expanding upon. There are in fact two uses of the variance values we +//! compute. +//! +//! #### Trait variance and object types +//! +//! The first is for object types. Just as with structs and enums, we can +//! decide the subtyping relationship between two object types `&Trait` +//! and `&Trait` based on the relationship of `A` and `B`. Note that +//! for object types we ignore the `Self` type parameter -- it is unknown, +//! and the nature of dynamic dispatch ensures that we will always call a +//! function that is expected the appropriate `Self` type. However, we +//! must be careful with the other type parameters, or else we could end +//! up calling a function that is expecting one type but provided another. +//! +//! To see what I mean, consider a trait like so: +//! +//! trait ConvertTo { +//! fn convertTo(&self) -> A; +//! } +//! +//! Intuitively, If we had one object `O=&ConvertTo` and another +//! `S=&ConvertTo`, then `S <: O` because `String <: Object` +//! (presuming Java-like "string" and "object" types, my go to examples +//! for subtyping). The actual algorithm would be to compare the +//! (explicit) type parameters pairwise respecting their variance: here, +//! the type parameter A is covariant (it appears only in a return +//! position), and hence we require that `String <: Object`. +//! +//! You'll note though that we did not consider the binding for the +//! (implicit) `Self` type parameter: in fact, it is unknown, so that's +//! good. The reason we can ignore that parameter is precisely because we +//! don't need to know its value until a call occurs, and at that time (as +//! you said) the dynamic nature of virtual dispatch means the code we run +//! will be correct for whatever value `Self` happens to be bound to for +//! the particular object whose method we called. `Self` is thus different +//! from `A`, because the caller requires that `A` be known in order to +//! know the return type of the method `convertTo()`. (As an aside, we +//! have rules preventing methods where `Self` appears outside of the +//! receiver position from being called via an object.) +//! +//! #### Trait variance and vtable resolution +//! +//! But traits aren't only used with objects. They're also used when +//! deciding whether a given impl satisfies a given trait bound. To set the +//! scene here, imagine I had a function: +//! +//! fn convertAll>(v: &[T]) { +//! ... +//! } +//! +//! Now imagine that I have an implementation of `ConvertTo` for `Object`: +//! +//! impl ConvertTo for Object { ... } +//! +//! And I want to call `convertAll` on an array of strings. Suppose +//! further that for whatever reason I specifically supply the value of +//! `String` for the type parameter `T`: +//! +//! let mut vector = ~["string", ...]; +//! convertAll::(v); +//! +//! Is this legal? To put another way, can we apply the `impl` for +//! `Object` to the type `String`? The answer is yes, but to see why +//! we have to expand out what will happen: +//! +//! - `convertAll` will create a pointer to one of the entries in the +//! vector, which will have type `&String` +//! - It will then call the impl of `convertTo()` that is intended +//! for use with objects. This has the type: +//! +//! fn(self: &Object) -> int +//! +//! It is ok to provide a value for `self` of type `&String` because +//! `&String <: &Object`. +//! +//! OK, so intuitively we want this to be legal, so let's bring this back +//! to variance and see whether we are computing the correct result. We +//! must first figure out how to phrase the question "is an impl for +//! `Object,int` usable where an impl for `String,int` is expected?" +//! +//! Maybe it's helpful to think of a dictionary-passing implementation of +//! type classes. In that case, `convertAll()` takes an implicit parameter +//! representing the impl. In short, we *have* an impl of type: +//! +//! V_O = ConvertTo for Object +//! +//! and the function prototype expects an impl of type: +//! +//! V_S = ConvertTo for String +//! +//! As with any argument, this is legal if the type of the value given +//! (`V_O`) is a subtype of the type expected (`V_S`). So is `V_O <: V_S`? +//! The answer will depend on the variance of the various parameters. In +//! this case, because the `Self` parameter is contravariant and `A` is +//! covariant, it means that: +//! +//! V_O <: V_S iff +//! int <: int +//! String <: Object +//! +//! These conditions are satisfied and so we are happy. +//! +//! ### The algorithm +//! +//! The basic idea is quite straightforward. We iterate over the types +//! defined and, for each use of a type parameter X, accumulate a +//! constraint indicating that the variance of X must be valid for the +//! variance of that use site. We then iteratively refine the variance of +//! X until all constraints are met. There is *always* a sol'n, because at +//! the limit we can declare all type parameters to be invariant and all +//! constraints will be satisfied. +//! +//! As a simple example, consider: +//! +//! enum Option { Some(A), None } +//! enum OptionalFn { Some(|B|), None } +//! enum OptionalMap { Some(|C| -> C), None } +//! +//! Here, we will generate the constraints: +//! +//! 1. V(A) <= + +//! 2. V(B) <= - +//! 3. V(C) <= + +//! 4. V(C) <= - +//! +//! These indicate that (1) the variance of A must be at most covariant; +//! (2) the variance of B must be at most contravariant; and (3, 4) the +//! variance of C must be at most covariant *and* contravariant. All of these +//! results are based on a variance lattice defined as follows: +//! +//! * Top (bivariant) +//! - + +//! o Bottom (invariant) +//! +//! Based on this lattice, the solution V(A)=+, V(B)=-, V(C)=o is the +//! optimal solution. Note that there is always a naive solution which +//! just declares all variables to be invariant. +//! +//! You may be wondering why fixed-point iteration is required. The reason +//! is that the variance of a use site may itself be a function of the +//! variance of other type parameters. In full generality, our constraints +//! take the form: +//! +//! V(X) <= Term +//! Term := + | - | * | o | V(X) | Term x Term +//! +//! Here the notation V(X) indicates the variance of a type/region +//! parameter `X` with respect to its defining class. `Term x Term` +//! represents the "variance transform" as defined in the paper: +//! +//! If the variance of a type variable `X` in type expression `E` is `V2` +//! and the definition-site variance of the [corresponding] type parameter +//! of a class `C` is `V1`, then the variance of `X` in the type expression +//! `C` is `V3 = V1.xform(V2)`. - V_O <: V_S iff - int <: int - String <: Object - -These conditions are satisfied and so we are happy. - -### The algorithm - -The basic idea is quite straightforward. We iterate over the types -defined and, for each use of a type parameter X, accumulate a -constraint indicating that the variance of X must be valid for the -variance of that use site. We then iteratively refine the variance of -X until all constraints are met. There is *always* a sol'n, because at -the limit we can declare all type parameters to be invariant and all -constraints will be satisfied. - -As a simple example, consider: - - enum Option { Some(A), None } - enum OptionalFn { Some(|B|), None } - enum OptionalMap { Some(|C| -> C), None } - -Here, we will generate the constraints: - - 1. V(A) <= + - 2. V(B) <= - - 3. V(C) <= + - 4. V(C) <= - - -These indicate that (1) the variance of A must be at most covariant; -(2) the variance of B must be at most contravariant; and (3, 4) the -variance of C must be at most covariant *and* contravariant. All of these -results are based on a variance lattice defined as follows: - - * Top (bivariant) - - + - o Bottom (invariant) - -Based on this lattice, the solution V(A)=+, V(B)=-, V(C)=o is the -optimal solution. Note that there is always a naive solution which -just declares all variables to be invariant. - -You may be wondering why fixed-point iteration is required. The reason -is that the variance of a use site may itself be a function of the -variance of other type parameters. In full generality, our constraints -take the form: - - V(X) <= Term - Term := + | - | * | o | V(X) | Term x Term - -Here the notation V(X) indicates the variance of a type/region -parameter `X` with respect to its defining class. `Term x Term` -represents the "variance transform" as defined in the paper: - - If the variance of a type variable `X` in type expression `E` is `V2` - and the definition-site variance of the [corresponding] type parameter - of a class `C` is `V1`, then the variance of `X` in the type expression - `C` is `V3 = V1.xform(V2)`. - -*/ use self::VarianceTerm::*; use self::ParamKind::*; @@ -632,6 +629,8 @@ fn is_to_be_inferred(&self, param_id: ast::NodeId) -> bool { return result; } + /// Returns a variance term representing the declared variance of the type/region parameter + /// with the given id. fn declared_variance(&self, param_def_id: ast::DefId, item_def_id: ast::DefId, @@ -639,11 +638,6 @@ fn declared_variance(&self, space: ParamSpace, index: uint) -> VarianceTermPtr<'a> { - /*! - * Returns a variance term representing the declared variance of - * the type/region parameter with the given id. - */ - assert_eq!(param_def_id.krate, item_def_id.krate); if self.invariant_lang_items[kind as uint] == Some(item_def_id) { diff --git a/src/librustc/plugin/mod.rs b/src/librustc/plugin/mod.rs index a03ee471be68155e8e87b6957317249762000712..8dd60880cdd56022773879ed502dcce3bc4ed8f7 100644 --- a/src/librustc/plugin/mod.rs +++ b/src/librustc/plugin/mod.rs @@ -8,54 +8,52 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Infrastructure for compiler plugins. - * - * Plugins are Rust libraries which extend the behavior of `rustc` - * in various ways. - * - * Plugin authors will use the `Registry` type re-exported by - * this module, along with its methods. The rest of the module - * is for use by `rustc` itself. - * - * To define a plugin, build a dylib crate with a - * `#[plugin_registrar]` function: - * - * ```rust,ignore - * #![crate_name = "myplugin"] - * #![crate_type = "dylib"] - * #![feature(plugin_registrar)] - * - * extern crate rustc; - * - * use rustc::plugin::Registry; - * - * #[plugin_registrar] - * pub fn plugin_registrar(reg: &mut Registry) { - * reg.register_macro("mymacro", expand_mymacro); - * } - * - * fn expand_mymacro(...) { // details elided - * ``` - * - * WARNING: We currently don't check that the registrar function - * has the appropriate type! - * - * To use a plugin while compiling another crate: - * - * ```rust - * #![feature(phase)] - * - * #[phase(plugin)] - * extern crate myplugin; - * ``` - * - * If you also need the plugin crate available at runtime, use - * `phase(plugin, link)`. - * - * See [the compiler plugin guide](../../guide-plugin.html) - * for more examples. - */ +//! Infrastructure for compiler plugins. +//! +//! Plugins are Rust libraries which extend the behavior of `rustc` +//! in various ways. +//! +//! Plugin authors will use the `Registry` type re-exported by +//! this module, along with its methods. The rest of the module +//! is for use by `rustc` itself. +//! +//! To define a plugin, build a dylib crate with a +//! `#[plugin_registrar]` function: +//! +//! ```rust,ignore +//! #![crate_name = "myplugin"] +//! #![crate_type = "dylib"] +//! #![feature(plugin_registrar)] +//! +//! extern crate rustc; +//! +//! use rustc::plugin::Registry; +//! +//! #[plugin_registrar] +//! pub fn plugin_registrar(reg: &mut Registry) { +//! reg.register_macro("mymacro", expand_mymacro); +//! } +//! +//! fn expand_mymacro(...) { // details elided +//! ``` +//! +//! WARNING: We currently don't check that the registrar function +//! has the appropriate type! +//! +//! To use a plugin while compiling another crate: +//! +//! ```rust +//! #![feature(phase)] +//! +//! #[phase(plugin)] +//! extern crate myplugin; +//! ``` +//! +//! If you also need the plugin crate available at runtime, use +//! `phase(plugin, link)`. +//! +//! See [the compiler plugin guide](../../guide-plugin.html) +//! for more examples. pub use self::registry::Registry; diff --git a/src/librustc/util/common.rs b/src/librustc/util/common.rs index 7973004d5151997163e4b710b9abc4e57bac2633..ea252d9fd205c99d0abac30fdc05648ee4f9c24f 100644 --- a/src/librustc/util/common.rs +++ b/src/librustc/util/common.rs @@ -122,24 +122,20 @@ pub fn block_query(b: &ast::Block, p: |&ast::Expr| -> bool) -> bool { return v.flag; } -// K: Eq + Hash, V, S, H: Hasher +/// K: Eq + Hash, V, S, H: Hasher +/// +/// Determines whether there exists a path from `source` to `destination`. The graph is defined by +/// the `edges_map`, which maps from a node `S` to a list of its adjacent nodes `T`. +/// +/// Efficiency note: This is implemented in an inefficient way because it is typically invoked on +/// very small graphs. If the graphs become larger, a more efficient graph representation and +/// algorithm would probably be advised. pub fn can_reach,T:Eq+Clone+Hash>( edges_map: &HashMap,H>, source: T, destination: T) -> bool { - /*! - * Determines whether there exists a path from `source` to - * `destination`. The graph is defined by the `edges_map`, which - * maps from a node `S` to a list of its adjacent nodes `T`. - * - * Efficiency note: This is implemented in an inefficient way - * because it is typically invoked on very small graphs. If the graphs - * become larger, a more efficient graph representation and algorithm - * would probably be advised. - */ - if source == destination { return true; } diff --git a/src/librustc/util/ppaux.rs b/src/librustc/util/ppaux.rs index 761a1f66501ca597ad7694ee0c46a055d8a8df87..b739a97f734bea228db1d3f76fa2f6ca96a43da3 100644 --- a/src/librustc/util/ppaux.rs +++ b/src/librustc/util/ppaux.rs @@ -65,12 +65,9 @@ pub fn note_and_explain_region(cx: &ctxt, } } +/// When a free region is associated with `item`, how should we describe the item in the error +/// message. fn item_scope_tag(item: &ast::Item) -> &'static str { - /*! - * When a free region is associated with `item`, how should we describe - * the item in the error message. - */ - match item.node { ast::ItemImpl(..) => "impl", ast::ItemStruct(..) => "struct", diff --git a/src/librustc/util/snapshot_vec.rs b/src/librustc/util/snapshot_vec.rs index 91e67bbacc30f30bb0ebed04ca87ddea2ce71172..64e67a1f4bf753d7ddc5c45f4dd9ae9c8371a9ef 100644 --- a/src/librustc/util/snapshot_vec.rs +++ b/src/librustc/util/snapshot_vec.rs @@ -8,21 +8,16 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * A utility class for implementing "snapshottable" things; a - * snapshottable data structure permits you to take a snapshot (via - * `start_snapshot`) and then, after making some changes, elect either - * to rollback to the start of the snapshot or commit those changes. - * - * This vector is intended to be used as part of an abstraction, not - * serve as a complete abstraction on its own. As such, while it will - * roll back most changes on its own, it also supports a `get_mut` - * operation that gives you an abitrary mutable pointer into the - * vector. To ensure that any changes you make this with this pointer - * are rolled back, you must invoke `record` to record any changes you - * make and also supplying a delegate capable of reversing those - * changes. - */ +//! A utility class for implementing "snapshottable" things; a snapshottable data structure permits +//! you to take a snapshot (via `start_snapshot`) and then, after making some changes, elect either +//! to rollback to the start of the snapshot or commit those changes. +//! +//! This vector is intended to be used as part of an abstraction, not serve as a complete +//! abstraction on its own. As such, while it will roll back most changes on its own, it also +//! supports a `get_mut` operation that gives you an abitrary mutable pointer into the vector. To +//! ensure that any changes you make this with this pointer are rolled back, you must invoke +//! `record` to record any changes you make and also supplying a delegate capable of reversing +//! those changes. use self::UndoLog::*; use std::kinds::marker; @@ -98,23 +93,16 @@ pub fn get<'a>(&'a self, index: uint) -> &'a T { &self.values[index] } + /// Returns a mutable pointer into the vec; whatever changes you make here cannot be undone + /// automatically, so you should be sure call `record()` with some sort of suitable undo + /// action. pub fn get_mut<'a>(&'a mut self, index: uint) -> &'a mut T { - /*! - * Returns a mutable pointer into the vec; whatever changes - * you make here cannot be undone automatically, so you should - * be sure call `record()` with some sort of suitable undo - * action. - */ - &mut self.values[index] } + /// Updates the element at the given index. The old value will saved (and perhaps restored) if + /// a snapshot is active. pub fn set(&mut self, index: uint, new_elem: T) { - /*! - * Updates the element at the given index. The old value will - * saved (and perhaps restored) if a snapshot is active. - */ - let old_elem = mem::replace(&mut self.values[index], new_elem); if self.in_snapshot() { self.undo_log.push(SetElem(index, old_elem)); diff --git a/src/librustc_trans/lib.rs b/src/librustc_trans/lib.rs index f89580b768ea5c62c3615a7099c9a3187471b8c9..4186f479fcce653e985524237765d325b42dac32 100644 --- a/src/librustc_trans/lib.rs +++ b/src/librustc_trans/lib.rs @@ -8,15 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -The Rust compiler. - -# Note - -This API is completely unstable and subject to change. - -*/ +//! The Rust compiler. +//! +//! # Note +//! +//! This API is completely unstable and subject to change. #![crate_name = "rustc_trans"] #![experimental] diff --git a/src/librustc_trans/test.rs b/src/librustc_trans/test.rs index 1e8c1fd14787d240f1a35f1991b43641710f6e4c..41fbe855769331291f6fb5b20845a997e633a70b 100644 --- a/src/librustc_trans/test.rs +++ b/src/librustc_trans/test.rs @@ -8,11 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# Standalone Tests for the Inference Module - -*/ +//! # Standalone Tests for the Inference Module use driver::diagnostic; use driver::diagnostic::Emitter; @@ -537,12 +533,10 @@ fn glb_bound_static() { }) } +/// Test substituting a bound region into a function, which introduces another level of binding. +/// This requires adjusting the Debruijn index. #[test] fn subst_ty_renumber_bound() { - /*! - * Test substituting a bound region into a function, which introduces another - * level of binding. This requires adjusting the Debruijn index. - */ test_env(EMPTY_SOURCE_STR, errors(&[]), |env| { // Situation: @@ -575,13 +569,10 @@ fn subst_ty_renumber_bound() { }) } +/// Test substituting a bound region into a function, which introduces another level of binding. +/// This requires adjusting the Debruijn index. #[test] fn subst_ty_renumber_some_bounds() { - /*! - * Test substituting a bound region into a function, which introduces another - * level of binding. This requires adjusting the Debruijn index. - */ - test_env(EMPTY_SOURCE_STR, errors(&[]), |env| { // Situation: // Theta = [A -> &'a foo] @@ -615,12 +606,9 @@ fn subst_ty_renumber_some_bounds() { }) } +/// Test that we correctly compute whether a type has escaping regions or not. #[test] fn escaping() { - /*! - * Test that we correctly compute whether a type has escaping - * regions or not. - */ test_env(EMPTY_SOURCE_STR, errors(&[]), |env| { // Situation: @@ -658,12 +646,10 @@ fn escaping() { }) } +/// Test applying a substitution where the value being substituted for an early-bound region is a +/// late-bound region. #[test] fn subst_region_renumber_region() { - /*! - * Test applying a substitution where the value being substituted - * for an early-bound region is a late-bound region. - */ test_env(EMPTY_SOURCE_STR, errors(&[]), |env| { let re_bound1 = env.re_late_bound_with_debruijn(1, ty::DebruijnIndex::new(1)); diff --git a/src/librustc_trans/trans/_match.rs b/src/librustc_trans/trans/_match.rs index 381220d587cbca9008ac916e8b8d2aad7b99d717..d83eeadc7b96f6a47b8343940866c808274ab080 100644 --- a/src/librustc_trans/trans/_match.rs +++ b/src/librustc_trans/trans/_match.rs @@ -8,183 +8,179 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * - * # Compilation of match statements - * - * I will endeavor to explain the code as best I can. I have only a loose - * understanding of some parts of it. - * - * ## Matching - * - * The basic state of the code is maintained in an array `m` of `Match` - * objects. Each `Match` describes some list of patterns, all of which must - * match against the current list of values. If those patterns match, then - * the arm listed in the match is the correct arm. A given arm may have - * multiple corresponding match entries, one for each alternative that - * remains. As we proceed these sets of matches are adjusted by the various - * `enter_XXX()` functions, each of which adjusts the set of options given - * some information about the value which has been matched. - * - * So, initially, there is one value and N matches, each of which have one - * constituent pattern. N here is usually the number of arms but may be - * greater, if some arms have multiple alternatives. For example, here: - * - * enum Foo { A, B(int), C(uint, uint) } - * match foo { - * A => ..., - * B(x) => ..., - * C(1u, 2) => ..., - * C(_) => ... - * } - * - * The value would be `foo`. There would be four matches, each of which - * contains one pattern (and, in one case, a guard). We could collect the - * various options and then compile the code for the case where `foo` is an - * `A`, a `B`, and a `C`. When we generate the code for `C`, we would (1) - * drop the two matches that do not match a `C` and (2) expand the other two - * into two patterns each. In the first case, the two patterns would be `1u` - * and `2`, and the in the second case the _ pattern would be expanded into - * `_` and `_`. The two values are of course the arguments to `C`. - * - * Here is a quick guide to the various functions: - * - * - `compile_submatch()`: The main workhouse. It takes a list of values and - * a list of matches and finds the various possibilities that could occur. - * - * - `enter_XXX()`: modifies the list of matches based on some information - * about the value that has been matched. For example, - * `enter_rec_or_struct()` adjusts the values given that a record or struct - * has been matched. This is an infallible pattern, so *all* of the matches - * must be either wildcards or record/struct patterns. `enter_opt()` - * handles the fallible cases, and it is correspondingly more complex. - * - * ## Bindings - * - * We store information about the bound variables for each arm as part of the - * per-arm `ArmData` struct. There is a mapping from identifiers to - * `BindingInfo` structs. These structs contain the mode/id/type of the - * binding, but they also contain an LLVM value which points at an alloca - * called `llmatch`. For by value bindings that are Copy, we also create - * an extra alloca that we copy the matched value to so that any changes - * we do to our copy is not reflected in the original and vice-versa. - * We don't do this if it's a move since the original value can't be used - * and thus allowing us to cheat in not creating an extra alloca. - * - * The `llmatch` binding always stores a pointer into the value being matched - * which points at the data for the binding. If the value being matched has - * type `T`, then, `llmatch` will point at an alloca of type `T*` (and hence - * `llmatch` has type `T**`). So, if you have a pattern like: - * - * let a: A = ...; - * let b: B = ...; - * match (a, b) { (ref c, d) => { ... } } - * - * For `c` and `d`, we would generate allocas of type `C*` and `D*` - * respectively. These are called the `llmatch`. As we match, when we come - * up against an identifier, we store the current pointer into the - * corresponding alloca. - * - * Once a pattern is completely matched, and assuming that there is no guard - * pattern, we will branch to a block that leads to the body itself. For any - * by-value bindings, this block will first load the ptr from `llmatch` (the - * one of type `D*`) and then load a second time to get the actual value (the - * one of type `D`). For by ref bindings, the value of the local variable is - * simply the first alloca. - * - * So, for the example above, we would generate a setup kind of like this: - * - * +-------+ - * | Entry | - * +-------+ - * | - * +--------------------------------------------+ - * | llmatch_c = (addr of first half of tuple) | - * | llmatch_d = (addr of second half of tuple) | - * +--------------------------------------------+ - * | - * +--------------------------------------+ - * | *llbinding_d = **llmatch_d | - * +--------------------------------------+ - * - * If there is a guard, the situation is slightly different, because we must - * execute the guard code. Moreover, we need to do so once for each of the - * alternatives that lead to the arm, because if the guard fails, they may - * have different points from which to continue the search. Therefore, in that - * case, we generate code that looks more like: - * - * +-------+ - * | Entry | - * +-------+ - * | - * +-------------------------------------------+ - * | llmatch_c = (addr of first half of tuple) | - * | llmatch_d = (addr of first half of tuple) | - * +-------------------------------------------+ - * | - * +-------------------------------------------------+ - * | *llbinding_d = **llmatch_d | - * | check condition | - * | if false { goto next case } | - * | if true { goto body } | - * +-------------------------------------------------+ - * - * The handling for the cleanups is a bit... sensitive. Basically, the body - * is the one that invokes `add_clean()` for each binding. During the guard - * evaluation, we add temporary cleanups and revoke them after the guard is - * evaluated (it could fail, after all). Note that guards and moves are - * just plain incompatible. - * - * Some relevant helper functions that manage bindings: - * - `create_bindings_map()` - * - `insert_lllocals()` - * - * - * ## Notes on vector pattern matching. - * - * Vector pattern matching is surprisingly tricky. The problem is that - * the structure of the vector isn't fully known, and slice matches - * can be done on subparts of it. - * - * The way that vector pattern matches are dealt with, then, is as - * follows. First, we make the actual condition associated with a - * vector pattern simply a vector length comparison. So the pattern - * [1, .. x] gets the condition "vec len >= 1", and the pattern - * [.. x] gets the condition "vec len >= 0". The problem here is that - * having the condition "vec len >= 1" hold clearly does not mean that - * only a pattern that has exactly that condition will match. This - * means that it may well be the case that a condition holds, but none - * of the patterns matching that condition match; to deal with this, - * when doing vector length matches, we have match failures proceed to - * the next condition to check. - * - * There are a couple more subtleties to deal with. While the "actual" - * condition associated with vector length tests is simply a test on - * the vector length, the actual vec_len Opt entry contains more - * information used to restrict which matches are associated with it. - * So that all matches in a submatch are matching against the same - * values from inside the vector, they are split up by how many - * elements they match at the front and at the back of the vector. In - * order to make sure that arms are properly checked in order, even - * with the overmatching conditions, each vec_len Opt entry is - * associated with a range of matches. - * Consider the following: - * - * match &[1, 2, 3] { - * [1, 1, .. _] => 0, - * [1, 2, 2, .. _] => 1, - * [1, 2, 3, .. _] => 2, - * [1, 2, .. _] => 3, - * _ => 4 - * } - * The proper arm to match is arm 2, but arms 0 and 3 both have the - * condition "len >= 2". If arm 3 was lumped in with arm 0, then the - * wrong branch would be taken. Instead, vec_len Opts are associated - * with a contiguous range of matches that have the same "shape". - * This is sort of ugly and requires a bunch of special handling of - * vec_len options. - * - */ +//! # Compilation of match statements +//! +//! I will endeavor to explain the code as best I can. I have only a loose +//! understanding of some parts of it. +//! +//! ## Matching +//! +//! The basic state of the code is maintained in an array `m` of `Match` +//! objects. Each `Match` describes some list of patterns, all of which must +//! match against the current list of values. If those patterns match, then +//! the arm listed in the match is the correct arm. A given arm may have +//! multiple corresponding match entries, one for each alternative that +//! remains. As we proceed these sets of matches are adjusted by the various +//! `enter_XXX()` functions, each of which adjusts the set of options given +//! some information about the value which has been matched. +//! +//! So, initially, there is one value and N matches, each of which have one +//! constituent pattern. N here is usually the number of arms but may be +//! greater, if some arms have multiple alternatives. For example, here: +//! +//! enum Foo { A, B(int), C(uint, uint) } +//! match foo { +//! A => ..., +//! B(x) => ..., +//! C(1u, 2) => ..., +//! C(_) => ... +//! } +//! +//! The value would be `foo`. There would be four matches, each of which +//! contains one pattern (and, in one case, a guard). We could collect the +//! various options and then compile the code for the case where `foo` is an +//! `A`, a `B`, and a `C`. When we generate the code for `C`, we would (1) +//! drop the two matches that do not match a `C` and (2) expand the other two +//! into two patterns each. In the first case, the two patterns would be `1u` +//! and `2`, and the in the second case the _ pattern would be expanded into +//! `_` and `_`. The two values are of course the arguments to `C`. +//! +//! Here is a quick guide to the various functions: +//! +//! - `compile_submatch()`: The main workhouse. It takes a list of values and +//! a list of matches and finds the various possibilities that could occur. +//! +//! - `enter_XXX()`: modifies the list of matches based on some information +//! about the value that has been matched. For example, +//! `enter_rec_or_struct()` adjusts the values given that a record or struct +//! has been matched. This is an infallible pattern, so *all* of the matches +//! must be either wildcards or record/struct patterns. `enter_opt()` +//! handles the fallible cases, and it is correspondingly more complex. +//! +//! ## Bindings +//! +//! We store information about the bound variables for each arm as part of the +//! per-arm `ArmData` struct. There is a mapping from identifiers to +//! `BindingInfo` structs. These structs contain the mode/id/type of the +//! binding, but they also contain an LLVM value which points at an alloca +//! called `llmatch`. For by value bindings that are Copy, we also create +//! an extra alloca that we copy the matched value to so that any changes +//! we do to our copy is not reflected in the original and vice-versa. +//! We don't do this if it's a move since the original value can't be used +//! and thus allowing us to cheat in not creating an extra alloca. +//! +//! The `llmatch` binding always stores a pointer into the value being matched +//! which points at the data for the binding. If the value being matched has +//! type `T`, then, `llmatch` will point at an alloca of type `T*` (and hence +//! `llmatch` has type `T**`). So, if you have a pattern like: +//! +//! let a: A = ...; +//! let b: B = ...; +//! match (a, b) { (ref c, d) => { ... } } +//! +//! For `c` and `d`, we would generate allocas of type `C*` and `D*` +//! respectively. These are called the `llmatch`. As we match, when we come +//! up against an identifier, we store the current pointer into the +//! corresponding alloca. +//! +//! Once a pattern is completely matched, and assuming that there is no guard +//! pattern, we will branch to a block that leads to the body itself. For any +//! by-value bindings, this block will first load the ptr from `llmatch` (the +//! one of type `D*`) and then load a second time to get the actual value (the +//! one of type `D`). For by ref bindings, the value of the local variable is +//! simply the first alloca. +//! +//! So, for the example above, we would generate a setup kind of like this: +//! +//! +-------+ +//! | Entry | +//! +-------+ +//! | +//! +--------------------------------------------+ +//! | llmatch_c = (addr of first half of tuple) | +//! | llmatch_d = (addr of second half of tuple) | +//! +--------------------------------------------+ +//! | +//! +--------------------------------------+ +//! | *llbinding_d = **llmatch_d | +//! +--------------------------------------+ +//! +//! If there is a guard, the situation is slightly different, because we must +//! execute the guard code. Moreover, we need to do so once for each of the +//! alternatives that lead to the arm, because if the guard fails, they may +//! have different points from which to continue the search. Therefore, in that +//! case, we generate code that looks more like: +//! +//! +-------+ +//! | Entry | +//! +-------+ +//! | +//! +-------------------------------------------+ +//! | llmatch_c = (addr of first half of tuple) | +//! | llmatch_d = (addr of first half of tuple) | +//! +-------------------------------------------+ +//! | +//! +-------------------------------------------------+ +//! | *llbinding_d = **llmatch_d | +//! | check condition | +//! | if false { goto next case } | +//! | if true { goto body } | +//! +-------------------------------------------------+ +//! +//! The handling for the cleanups is a bit... sensitive. Basically, the body +//! is the one that invokes `add_clean()` for each binding. During the guard +//! evaluation, we add temporary cleanups and revoke them after the guard is +//! evaluated (it could fail, after all). Note that guards and moves are +//! just plain incompatible. +//! +//! Some relevant helper functions that manage bindings: +//! - `create_bindings_map()` +//! - `insert_lllocals()` +//! +//! +//! ## Notes on vector pattern matching. +//! +//! Vector pattern matching is surprisingly tricky. The problem is that +//! the structure of the vector isn't fully known, and slice matches +//! can be done on subparts of it. +//! +//! The way that vector pattern matches are dealt with, then, is as +//! follows. First, we make the actual condition associated with a +//! vector pattern simply a vector length comparison. So the pattern +//! [1, .. x] gets the condition "vec len >= 1", and the pattern +//! [.. x] gets the condition "vec len >= 0". The problem here is that +//! having the condition "vec len >= 1" hold clearly does not mean that +//! only a pattern that has exactly that condition will match. This +//! means that it may well be the case that a condition holds, but none +//! of the patterns matching that condition match; to deal with this, +//! when doing vector length matches, we have match failures proceed to +//! the next condition to check. +//! +//! There are a couple more subtleties to deal with. While the "actual" +//! condition associated with vector length tests is simply a test on +//! the vector length, the actual vec_len Opt entry contains more +//! information used to restrict which matches are associated with it. +//! So that all matches in a submatch are matching against the same +//! values from inside the vector, they are split up by how many +//! elements they match at the front and at the back of the vector. In +//! order to make sure that arms are properly checked in order, even +//! with the overmatching conditions, each vec_len Opt entry is +//! associated with a range of matches. +//! Consider the following: +//! +//! match &[1, 2, 3] { +//! [1, 1, .. _] => 0, +//! [1, 2, 2, .. _] => 1, +//! [1, 2, 3, .. _] => 2, +//! [1, 2, .. _] => 3, +//! _ => 4 +//! } +//! The proper arm to match is arm 2, but arms 0 and 3 both have the +//! condition "len >= 2". If arm 3 was lumped in with arm 0, then the +//! wrong branch would be taken. Instead, vec_len Opts are associated +//! with a contiguous range of matches that have the same "shape". +//! This is sort of ugly and requires a bunch of special handling of +//! vec_len options. pub use self::BranchKind::*; pub use self::OptResult::*; @@ -620,12 +616,9 @@ fn extract_variant_args<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, ExtractedBlock { vals: args, bcx: bcx } } +/// Helper for converting from the ValueRef that we pass around in the match code, which is always +/// an lvalue, into a Datum. Eventually we should just pass around a Datum and be done with it. fn match_datum<'tcx>(val: ValueRef, left_ty: Ty<'tcx>) -> Datum<'tcx, Lvalue> { - /*! - * Helper for converting from the ValueRef that we pass around in - * the match code, which is always an lvalue, into a Datum. Eventually - * we should just pass around a Datum and be done with it. - */ Datum::new(val, left_ty, Lvalue) } @@ -831,15 +824,11 @@ fn compare_str<'blk, 'tcx>(cx: Block<'blk, 'tcx>, } } +/// For each binding in `data.bindings_map`, adds an appropriate entry into the `fcx.lllocals` map fn insert_lllocals<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, bindings_map: &BindingsMap<'tcx>, cs: Option) -> Block<'blk, 'tcx> { - /*! - * For each binding in `data.bindings_map`, adds an appropriate entry into - * the `fcx.lllocals` map - */ - for (&ident, &binding_info) in bindings_map.iter() { let llval = match binding_info.trmode { // By value mut binding for a copy type: load from the ptr @@ -1416,13 +1405,11 @@ fn trans_match_inner<'blk, 'tcx>(scope_cx: Block<'blk, 'tcx>, return bcx; } +/// Generates code for a local variable declaration like `let ;` or `let = +/// `. pub fn store_local<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, local: &ast::Local) -> Block<'blk, 'tcx> { - /*! - * Generates code for a local variable declaration like - * `let ;` or `let = `. - */ let _icx = push_ctxt("match::store_local"); let mut bcx = bcx; let tcx = bcx.tcx(); @@ -1482,24 +1469,21 @@ fn create_dummy_locals<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, } } +/// Generates code for argument patterns like `fn foo(: T)`. +/// Creates entries in the `lllocals` map for each of the bindings +/// in `pat`. +/// +/// # Arguments +/// +/// - `pat` is the argument pattern +/// - `llval` is a pointer to the argument value (in other words, +/// if the argument type is `T`, then `llval` is a `T*`). In some +/// cases, this code may zero out the memory `llval` points at. pub fn store_arg<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, pat: &ast::Pat, arg: Datum<'tcx, Rvalue>, arg_scope: cleanup::ScopeId) -> Block<'blk, 'tcx> { - /*! - * Generates code for argument patterns like `fn foo(: T)`. - * Creates entries in the `lllocals` map for each of the bindings - * in `pat`. - * - * # Arguments - * - * - `pat` is the argument pattern - * - `llval` is a pointer to the argument value (in other words, - * if the argument type is `T`, then `llval` is a `T*`). In some - * cases, this code may zero out the memory `llval` points at. - */ - let _icx = push_ctxt("match::store_arg"); match simple_identifier(&*pat) { @@ -1583,26 +1567,23 @@ fn mk_binding_alloca<'blk, 'tcx, A>(bcx: Block<'blk, 'tcx>, bcx } +/// A simple version of the pattern matching code that only handles +/// irrefutable patterns. This is used in let/argument patterns, +/// not in match statements. Unifying this code with the code above +/// sounds nice, but in practice it produces very inefficient code, +/// since the match code is so much more general. In most cases, +/// LLVM is able to optimize the code, but it causes longer compile +/// times and makes the generated code nigh impossible to read. +/// +/// # Arguments +/// - bcx: starting basic block context +/// - pat: the irrefutable pattern being matched. +/// - val: the value being matched -- must be an lvalue (by ref, with cleanup) fn bind_irrefutable_pat<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, pat: &ast::Pat, val: ValueRef, cleanup_scope: cleanup::ScopeId) -> Block<'blk, 'tcx> { - /*! - * A simple version of the pattern matching code that only handles - * irrefutable patterns. This is used in let/argument patterns, - * not in match statements. Unifying this code with the code above - * sounds nice, but in practice it produces very inefficient code, - * since the match code is so much more general. In most cases, - * LLVM is able to optimize the code, but it causes longer compile - * times and makes the generated code nigh impossible to read. - * - * # Arguments - * - bcx: starting basic block context - * - pat: the irrefutable pattern being matched. - * - val: the value being matched -- must be an lvalue (by ref, with cleanup) - */ - debug!("bind_irrefutable_pat(bcx={}, pat={})", bcx.to_str(), pat.repr(bcx.tcx())); diff --git a/src/librustc_trans/trans/adt.rs b/src/librustc_trans/trans/adt.rs index e7d1b9726a1b199a61804a46a23280fcb59a8279..568805bee40479f66595be7fe82a84d3acc04725 100644 --- a/src/librustc_trans/trans/adt.rs +++ b/src/librustc_trans/trans/adt.rs @@ -8,40 +8,38 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * # Representation of Algebraic Data Types - * - * This module determines how to represent enums, structs, and tuples - * based on their monomorphized types; it is responsible both for - * choosing a representation and translating basic operations on - * values of those types. (Note: exporting the representations for - * debuggers is handled in debuginfo.rs, not here.) - * - * Note that the interface treats everything as a general case of an - * enum, so structs/tuples/etc. have one pseudo-variant with - * discriminant 0; i.e., as if they were a univariant enum. - * - * Having everything in one place will enable improvements to data - * structure representation; possibilities include: - * - * - User-specified alignment (e.g., cacheline-aligning parts of - * concurrently accessed data structures); LLVM can't represent this - * directly, so we'd have to insert padding fields in any structure - * that might contain one and adjust GEP indices accordingly. See - * issue #4578. - * - * - Store nested enums' discriminants in the same word. Rather, if - * some variants start with enums, and those enums representations - * have unused alignment padding between discriminant and body, the - * outer enum's discriminant can be stored there and those variants - * can start at offset 0. Kind of fancy, and might need work to - * make copies of the inner enum type cooperate, but it could help - * with `Option` or `Result` wrapped around another enum. - * - * - Tagged pointers would be neat, but given that any type can be - * used unboxed and any field can have pointers (including mutable) - * taken to it, implementing them for Rust seems difficult. - */ +//! # Representation of Algebraic Data Types +//! +//! This module determines how to represent enums, structs, and tuples +//! based on their monomorphized types; it is responsible both for +//! choosing a representation and translating basic operations on +//! values of those types. (Note: exporting the representations for +//! debuggers is handled in debuginfo.rs, not here.) +//! +//! Note that the interface treats everything as a general case of an +//! enum, so structs/tuples/etc. have one pseudo-variant with +//! discriminant 0; i.e., as if they were a univariant enum. +//! +//! Having everything in one place will enable improvements to data +//! structure representation; possibilities include: +//! +//! - User-specified alignment (e.g., cacheline-aligning parts of +//! concurrently accessed data structures); LLVM can't represent this +//! directly, so we'd have to insert padding fields in any structure +//! that might contain one and adjust GEP indices accordingly. See +//! issue #4578. +//! +//! - Store nested enums' discriminants in the same word. Rather, if +//! some variants start with enums, and those enums representations +//! have unused alignment padding between discriminant and body, the +//! outer enum's discriminant can be stored there and those variants +//! can start at offset 0. Kind of fancy, and might need work to +//! make copies of the inner enum type cooperate, but it could help +//! with `Option` or `Result` wrapped around another enum. +//! +//! - Tagged pointers would be neat, but given that any type can be +//! used unboxed and any field can have pointers (including mutable) +//! taken to it, implementing them for Rust seems difficult. #![allow(unsigned_negation)] diff --git a/src/librustc_trans/trans/asm.rs b/src/librustc_trans/trans/asm.rs index 9b499b6d1a147ca9088dd661e249e88fd13da3fb..024df2a63adb5c7ef92d8ee500353ef9ef96f06a 100644 --- a/src/librustc_trans/trans/asm.rs +++ b/src/librustc_trans/trans/asm.rs @@ -8,9 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -# Translation of inline assembly. -*/ +//! # Translation of inline assembly. use llvm; use trans::build::*; diff --git a/src/librustc_trans/trans/base.rs b/src/librustc_trans/trans/base.rs index 6fe5298393e7d7d92ca839df2943819312cc2a72..52e54a4a2613a41289835ffc16e3bf545ea0741b 100644 --- a/src/librustc_trans/trans/base.rs +++ b/src/librustc_trans/trans/base.rs @@ -1050,14 +1050,11 @@ pub fn load_if_immediate<'blk, 'tcx>(cx: Block<'blk, 'tcx>, return v; } +/// Helper for loading values from memory. Does the necessary conversion if the in-memory type +/// differs from the type used for SSA values. Also handles various special cases where the type +/// gives us better information about what we are loading. pub fn load_ty<'blk, 'tcx>(cx: Block<'blk, 'tcx>, ptr: ValueRef, t: Ty<'tcx>) -> ValueRef { - /*! - * Helper for loading values from memory. Does the necessary conversion if - * the in-memory type differs from the type used for SSA values. Also - * handles various special cases where the type gives us better information - * about what we are loading. - */ if type_is_zero_size(cx.ccx(), t) { C_undef(type_of::type_of(cx.ccx(), t)) } else if ty::type_is_bool(t) { @@ -1071,11 +1068,9 @@ pub fn load_ty<'blk, 'tcx>(cx: Block<'blk, 'tcx>, } } +/// Helper for storing values in memory. Does the necessary conversion if the in-memory type +/// differs from the type used for SSA values. pub fn store_ty(cx: Block, v: ValueRef, dst: ValueRef, t: Ty) { - /*! - * Helper for storing values in memory. Does the necessary conversion if - * the in-memory type differs from the type used for SSA values. - */ if ty::type_is_bool(t) { Store(cx, ZExt(cx, v, Type::i8(cx.ccx())), dst); } else { diff --git a/src/librustc_trans/trans/callee.rs b/src/librustc_trans/trans/callee.rs index 6d0f598044235a520da926c946f9976d4ccba229..5d713526a3d6aa0bdc1be0ce95413723bd7204b7 100644 --- a/src/librustc_trans/trans/callee.rs +++ b/src/librustc_trans/trans/callee.rs @@ -8,13 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Handles translation of callees as well as other call-related - * things. Callees are a superset of normal rust values and sometimes - * have different representations. In particular, top-level fn items - * and methods are represented as just a fn ptr and not a full - * closure. - */ +//! Handles translation of callees as well as other call-related +//! things. Callees are a superset of normal rust values and sometimes +//! have different representations. In particular, top-level fn items +//! and methods are represented as just a fn ptr and not a full +//! closure. pub use self::AutorefArg::*; pub use self::CalleeData::*; @@ -220,13 +218,9 @@ fn trans_def<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, } } +/// Translates a reference (with id `ref_id`) to the fn/method with id `def_id` into a function +/// pointer. This may require monomorphization or inlining. pub fn trans_fn_ref(bcx: Block, def_id: ast::DefId, node: ExprOrMethodCall) -> ValueRef { - /*! - * Translates a reference (with id `ref_id`) to the fn/method - * with id `def_id` into a function pointer. This may require - * monomorphization or inlining. - */ - let _icx = push_ctxt("trans_fn_ref"); let substs = node_id_substs(bcx, node); @@ -398,6 +392,17 @@ pub fn trans_unboxing_shim<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, llfn } +/// Translates a reference to a fn/method item, monomorphizing and +/// inlining as it goes. +/// +/// # Parameters +/// +/// - `bcx`: the current block where the reference to the fn occurs +/// - `def_id`: def id of the fn or method item being referenced +/// - `node`: node id of the reference to the fn/method, if applicable. +/// This parameter may be zero; but, if so, the resulting value may not +/// have the right type, so it must be cast before being used. +/// - `substs`: values for each of the fn/method's parameters pub fn trans_fn_ref_with_substs<'blk, 'tcx>( bcx: Block<'blk, 'tcx>, // def_id: ast::DefId, // def id of fn @@ -405,20 +410,6 @@ pub fn trans_fn_ref_with_substs<'blk, 'tcx>( substs: subst::Substs<'tcx>) // vtables for the call -> ValueRef { - /*! - * Translates a reference to a fn/method item, monomorphizing and - * inlining as it goes. - * - * # Parameters - * - * - `bcx`: the current block where the reference to the fn occurs - * - `def_id`: def id of the fn or method item being referenced - * - `node`: node id of the reference to the fn/method, if applicable. - * This parameter may be zero; but, if so, the resulting value may not - * have the right type, so it must be cast before being used. - * - `substs`: values for each of the fn/method's parameters - */ - let _icx = push_ctxt("trans_fn_ref_with_substs"); let ccx = bcx.ccx(); let tcx = bcx.tcx(); @@ -668,6 +659,16 @@ pub fn trans_lang_call<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, dest) } +/// This behemoth of a function translates function calls. Unfortunately, in order to generate more +/// efficient LLVM output at -O0, it has quite a complex signature (refactoring this into two +/// functions seems like a good idea). +/// +/// In particular, for lang items, it is invoked with a dest of None, and in that case the return +/// value contains the result of the fn. The lang item must not return a structural type or else +/// all heck breaks loose. +/// +/// For non-lang items, `dest` is always Some, and hence the result is written into memory +/// somewhere. Nonetheless we return the actual return value of the function. pub fn trans_call_inner<'a, 'blk, 'tcx>(bcx: Block<'blk, 'tcx>, call_info: Option, callee_ty: Ty<'tcx>, @@ -677,22 +678,6 @@ pub fn trans_call_inner<'a, 'blk, 'tcx>(bcx: Block<'blk, 'tcx>, args: CallArgs<'a, 'tcx>, dest: Option) -> Result<'blk, 'tcx> { - /*! - * This behemoth of a function translates function calls. - * Unfortunately, in order to generate more efficient LLVM - * output at -O0, it has quite a complex signature (refactoring - * this into two functions seems like a good idea). - * - * In particular, for lang items, it is invoked with a dest of - * None, and in that case the return value contains the result of - * the fn. The lang item must not return a structural type or else - * all heck breaks loose. - * - * For non-lang items, `dest` is always Some, and hence the result - * is written into memory somewhere. Nonetheless we return the - * actual return value of the function. - */ - // Introduce a temporary cleanup scope that will contain cleanups // for the arguments while they are being evaluated. The purpose // this cleanup is to ensure that, should a panic occur while diff --git a/src/librustc_trans/trans/cleanup.rs b/src/librustc_trans/trans/cleanup.rs index b0235be7497eae95dd301aee4e83c1409833ed44..d7da83ddb0d04168957d6cb59fbe2700ab0680e5 100644 --- a/src/librustc_trans/trans/cleanup.rs +++ b/src/librustc_trans/trans/cleanup.rs @@ -8,10 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Code pertaining to cleanup of temporaries as well as execution of - * drop glue. See discussion in `doc.rs` for a high-level summary. - */ +//! Code pertaining to cleanup of temporaries as well as execution of +//! drop glue. See discussion in `doc.rs` for a high-level summary. pub use self::ScopeId::*; pub use self::CleanupScopeKind::*; @@ -114,12 +112,8 @@ pub enum ScopeId { } impl<'blk, 'tcx> CleanupMethods<'blk, 'tcx> for FunctionContext<'blk, 'tcx> { + /// Invoked when we start to trans the code contained within a new cleanup scope. fn push_ast_cleanup_scope(&self, debug_loc: NodeInfo) { - /*! - * Invoked when we start to trans the code contained - * within a new cleanup scope. - */ - debug!("push_ast_cleanup_scope({})", self.ccx.tcx().map.node_to_string(debug_loc.id)); @@ -189,16 +183,12 @@ fn push_custom_cleanup_scope_with_debug_loc(&self, CustomScopeIndex { index: index } } + /// Removes the cleanup scope for id `cleanup_scope`, which must be at the top of the cleanup + /// stack, and generates the code to do its cleanups for normal exit. fn pop_and_trans_ast_cleanup_scope(&self, bcx: Block<'blk, 'tcx>, cleanup_scope: ast::NodeId) -> Block<'blk, 'tcx> { - /*! - * Removes the cleanup scope for id `cleanup_scope`, which - * must be at the top of the cleanup stack, and generates the - * code to do its cleanups for normal exit. - */ - debug!("pop_and_trans_ast_cleanup_scope({})", self.ccx.tcx().map.node_to_string(cleanup_scope)); @@ -208,15 +198,11 @@ fn pop_and_trans_ast_cleanup_scope(&self, self.trans_scope_cleanups(bcx, &scope) } + /// Removes the loop cleanup scope for id `cleanup_scope`, which must be at the top of the + /// cleanup stack. Does not generate any cleanup code, since loop scopes should exit by + /// branching to a block generated by `normal_exit_block`. fn pop_loop_cleanup_scope(&self, cleanup_scope: ast::NodeId) { - /*! - * Removes the loop cleanup scope for id `cleanup_scope`, which - * must be at the top of the cleanup stack. Does not generate - * any cleanup code, since loop scopes should exit by - * branching to a block generated by `normal_exit_block`. - */ - debug!("pop_loop_cleanup_scope({})", self.ccx.tcx().map.node_to_string(cleanup_scope)); @@ -225,29 +211,21 @@ fn pop_loop_cleanup_scope(&self, let _ = self.pop_scope(); } + /// Removes the top cleanup scope from the stack without executing its cleanups. The top + /// cleanup scope must be the temporary scope `custom_scope`. fn pop_custom_cleanup_scope(&self, custom_scope: CustomScopeIndex) { - /*! - * Removes the top cleanup scope from the stack without - * executing its cleanups. The top cleanup scope must - * be the temporary scope `custom_scope`. - */ - debug!("pop_custom_cleanup_scope({})", custom_scope.index); assert!(self.is_valid_to_pop_custom_scope(custom_scope)); let _ = self.pop_scope(); } + /// Removes the top cleanup scope from the stack, which must be a temporary scope, and + /// generates the code to do its cleanups for normal exit. fn pop_and_trans_custom_cleanup_scope(&self, bcx: Block<'blk, 'tcx>, custom_scope: CustomScopeIndex) -> Block<'blk, 'tcx> { - /*! - * Removes the top cleanup scope from the stack, which must be - * a temporary scope, and generates the code to do its - * cleanups for normal exit. - */ - debug!("pop_and_trans_custom_cleanup_scope({})", custom_scope); assert!(self.is_valid_to_pop_custom_scope(custom_scope)); @@ -255,11 +233,8 @@ fn pop_and_trans_custom_cleanup_scope(&self, self.trans_scope_cleanups(bcx, &scope) } + /// Returns the id of the top-most loop scope fn top_loop_scope(&self) -> ast::NodeId { - /*! - * Returns the id of the top-most loop scope - */ - for scope in self.scopes.borrow().iter().rev() { match scope.kind { LoopScopeKind(id, _) => { @@ -271,24 +246,17 @@ fn top_loop_scope(&self) -> ast::NodeId { self.ccx.sess().bug("no loop scope found"); } + /// Returns a block to branch to which will perform all pending cleanups and then + /// break/continue (depending on `exit`) out of the loop with id `cleanup_scope` fn normal_exit_block(&'blk self, cleanup_scope: ast::NodeId, exit: uint) -> BasicBlockRef { - /*! - * Returns a block to branch to which will perform all pending - * cleanups and then break/continue (depending on `exit`) out - * of the loop with id `cleanup_scope` - */ - self.trans_cleanups_to_exit_scope(LoopExit(cleanup_scope, exit)) } + /// Returns a block to branch to which will perform all pending cleanups and then return from + /// this function fn return_exit_block(&'blk self) -> BasicBlockRef { - /*! - * Returns a block to branch to which will perform all pending - * cleanups and then return from this function - */ - self.trans_cleanups_to_exit_scope(ReturnExit) } @@ -306,15 +274,11 @@ fn schedule_lifetime_end(&self, self.schedule_clean(cleanup_scope, drop as CleanupObj); } + /// Schedules a (deep) drop of `val`, which is a pointer to an instance of `ty` fn schedule_drop_mem(&self, cleanup_scope: ScopeId, val: ValueRef, ty: Ty<'tcx>) { - /*! - * Schedules a (deep) drop of `val`, which is a pointer to an - * instance of `ty` - */ - if !ty::type_needs_drop(self.ccx.tcx(), ty) { return; } let drop = box DropValue { is_immediate: false, @@ -332,15 +296,11 @@ fn schedule_drop_mem(&self, self.schedule_clean(cleanup_scope, drop as CleanupObj); } + /// Schedules a (deep) drop and zero-ing of `val`, which is a pointer to an instance of `ty` fn schedule_drop_and_zero_mem(&self, cleanup_scope: ScopeId, val: ValueRef, ty: Ty<'tcx>) { - /*! - * Schedules a (deep) drop and zero-ing of `val`, which is a pointer - * to an instance of `ty` - */ - if !ty::type_needs_drop(self.ccx.tcx(), ty) { return; } let drop = box DropValue { is_immediate: false, @@ -359,13 +319,11 @@ fn schedule_drop_and_zero_mem(&self, self.schedule_clean(cleanup_scope, drop as CleanupObj); } + /// Schedules a (deep) drop of `val`, which is an instance of `ty` fn schedule_drop_immediate(&self, cleanup_scope: ScopeId, val: ValueRef, ty: Ty<'tcx>) { - /*! - * Schedules a (deep) drop of `val`, which is an instance of `ty` - */ if !ty::type_needs_drop(self.ccx.tcx(), ty) { return; } let drop = box DropValue { @@ -384,16 +342,12 @@ fn schedule_drop_immediate(&self, self.schedule_clean(cleanup_scope, drop as CleanupObj); } + /// Schedules a call to `free(val)`. Note that this is a shallow operation. fn schedule_free_value(&self, cleanup_scope: ScopeId, val: ValueRef, heap: Heap, content_ty: Ty<'tcx>) { - /*! - * Schedules a call to `free(val)`. Note that this is a shallow - * operation. - */ - let drop = box FreeValue { ptr: val, heap: heap, content_ty: content_ty }; debug!("schedule_free_value({}, val={}, heap={})", @@ -404,17 +358,13 @@ fn schedule_free_value(&self, self.schedule_clean(cleanup_scope, drop as CleanupObj); } + /// Schedules a call to `free(val)`. Note that this is a shallow operation. fn schedule_free_slice(&self, cleanup_scope: ScopeId, val: ValueRef, size: ValueRef, align: ValueRef, heap: Heap) { - /*! - * Schedules a call to `free(val)`. Note that this is a shallow - * operation. - */ - let drop = box FreeSlice { ptr: val, size: size, align: align, heap: heap }; debug!("schedule_free_slice({}, val={}, heap={})", @@ -434,15 +384,12 @@ fn schedule_clean(&self, } } + /// Schedules a cleanup to occur upon exit from `cleanup_scope`. If `cleanup_scope` is not + /// provided, then the cleanup is scheduled in the topmost scope, which must be a temporary + /// scope. fn schedule_clean_in_ast_scope(&self, cleanup_scope: ast::NodeId, cleanup: CleanupObj<'tcx>) { - /*! - * Schedules a cleanup to occur upon exit from `cleanup_scope`. - * If `cleanup_scope` is not provided, then the cleanup is scheduled - * in the topmost scope, which must be a temporary scope. - */ - debug!("schedule_clean_in_ast_scope(cleanup_scope={})", cleanup_scope); @@ -462,14 +409,10 @@ fn schedule_clean_in_ast_scope(&self, self.ccx.tcx().map.node_to_string(cleanup_scope)).as_slice()); } + /// Schedules a cleanup to occur in the top-most scope, which must be a temporary scope. fn schedule_clean_in_custom_scope(&self, custom_scope: CustomScopeIndex, cleanup: CleanupObj<'tcx>) { - /*! - * Schedules a cleanup to occur in the top-most scope, - * which must be a temporary scope. - */ - debug!("schedule_clean_in_custom_scope(custom_scope={})", custom_scope.index); @@ -481,22 +424,14 @@ fn schedule_clean_in_custom_scope(&self, scope.clear_cached_exits(); } + /// Returns true if there are pending cleanups that should execute on panic. fn needs_invoke(&self) -> bool { - /*! - * Returns true if there are pending cleanups that should - * execute on panic. - */ - self.scopes.borrow().iter().rev().any(|s| s.needs_invoke()) } + /// Returns a basic block to branch to in the event of a panic. This block will run the panic + /// cleanups and eventually invoke the LLVM `Resume` instruction. fn get_landing_pad(&'blk self) -> BasicBlockRef { - /*! - * Returns a basic block to branch to in the event of a panic. - * This block will run the panic cleanups and eventually - * invoke the LLVM `Resume` instruction. - */ - let _icx = base::push_ctxt("get_landing_pad"); debug!("get_landing_pad"); @@ -529,10 +464,8 @@ fn get_landing_pad(&'blk self) -> BasicBlockRef { } impl<'blk, 'tcx> CleanupHelperMethods<'blk, 'tcx> for FunctionContext<'blk, 'tcx> { + /// Returns the id of the current top-most AST scope, if any. fn top_ast_scope(&self) -> Option { - /*! - * Returns the id of the current top-most AST scope, if any. - */ for scope in self.scopes.borrow().iter().rev() { match scope.kind { CustomScopeKind | LoopScopeKind(..) => {} @@ -559,10 +492,10 @@ fn is_valid_custom_scope(&self, custom_scope: CustomScopeIndex) -> bool { (*scopes)[custom_scope.index].kind.is_temp() } + /// Generates the cleanups for `scope` into `bcx` fn trans_scope_cleanups(&self, // cannot borrow self, will recurse bcx: Block<'blk, 'tcx>, scope: &CleanupScope<'blk, 'tcx>) -> Block<'blk, 'tcx> { - /*! Generates the cleanups for `scope` into `bcx` */ let mut bcx = bcx; if !bcx.unreachable.get() { @@ -593,37 +526,31 @@ fn top_scope(&self, f: |&CleanupScope<'blk, 'tcx>| -> R) -> R { f(self.scopes.borrow().last().unwrap()) } + /// Used when the caller wishes to jump to an early exit, such as a return, break, continue, or + /// unwind. This function will generate all cleanups between the top of the stack and the exit + /// `label` and return a basic block that the caller can branch to. + /// + /// For example, if the current stack of cleanups were as follows: + /// + /// AST 22 + /// Custom 1 + /// AST 23 + /// Loop 23 + /// Custom 2 + /// AST 24 + /// + /// and the `label` specifies a break from `Loop 23`, then this function would generate a + /// series of basic blocks as follows: + /// + /// Cleanup(AST 24) -> Cleanup(Custom 2) -> break_blk + /// + /// where `break_blk` is the block specified in `Loop 23` as the target for breaks. The return + /// value would be the first basic block in that sequence (`Cleanup(AST 24)`). The caller could + /// then branch to `Cleanup(AST 24)` and it will perform all cleanups and finally branch to the + /// `break_blk`. fn trans_cleanups_to_exit_scope(&'blk self, label: EarlyExitLabel) -> BasicBlockRef { - /*! - * Used when the caller wishes to jump to an early exit, such - * as a return, break, continue, or unwind. This function will - * generate all cleanups between the top of the stack and the - * exit `label` and return a basic block that the caller can - * branch to. - * - * For example, if the current stack of cleanups were as follows: - * - * AST 22 - * Custom 1 - * AST 23 - * Loop 23 - * Custom 2 - * AST 24 - * - * and the `label` specifies a break from `Loop 23`, then this - * function would generate a series of basic blocks as follows: - * - * Cleanup(AST 24) -> Cleanup(Custom 2) -> break_blk - * - * where `break_blk` is the block specified in `Loop 23` as - * the target for breaks. The return value would be the first - * basic block in that sequence (`Cleanup(AST 24)`). The - * caller could then branch to `Cleanup(AST 24)` and it will - * perform all cleanups and finally branch to the `break_blk`. - */ - debug!("trans_cleanups_to_exit_scope label={} scopes={}", label, self.scopes_len()); @@ -756,20 +683,15 @@ fn trans_cleanups_to_exit_scope(&'blk self, prev_llbb } + /// Creates a landing pad for the top scope, if one does not exist. The landing pad will + /// perform all cleanups necessary for an unwind and then `resume` to continue error + /// propagation: + /// + /// landing_pad -> ... cleanups ... -> [resume] + /// + /// (The cleanups and resume instruction are created by `trans_cleanups_to_exit_scope()`, not + /// in this function itself.) fn get_or_create_landing_pad(&'blk self) -> BasicBlockRef { - /*! - * Creates a landing pad for the top scope, if one does not - * exist. The landing pad will perform all cleanups necessary - * for an unwind and then `resume` to continue error - * propagation: - * - * landing_pad -> ... cleanups ... -> [resume] - * - * (The cleanups and resume instruction are created by - * `trans_cleanups_to_exit_scope()`, not in this function - * itself.) - */ - let pad_bcx; debug!("get_or_create_landing_pad"); @@ -883,19 +805,15 @@ fn add_cached_early_exit(&mut self, cleanup_block: blk }); } + /// True if this scope has cleanups that need unwinding fn needs_invoke(&self) -> bool { - /*! True if this scope has cleanups that need unwinding */ self.cached_landing_pad.is_some() || self.cleanups.iter().any(|c| c.must_unwind()) } + /// Returns a suitable name to use for the basic block that handles this cleanup scope fn block_name(&self, prefix: &str) -> String { - /*! - * Returns a suitable name to use for the basic block that - * handles this cleanup scope - */ - match self.kind { CustomScopeKind => format!("{}_custom_", prefix), AstScopeKind(id) => format!("{}_ast_{}_", prefix, id), @@ -930,14 +848,10 @@ fn is_loop_with_id(&self, id: ast::NodeId) -> bool { } } + /// If this is a loop scope with id `id`, return the early exit block `exit`, else `None` fn early_exit_block(&self, id: ast::NodeId, exit: uint) -> Option { - /*! - * If this is a loop scope with id `id`, return the early - * exit block `exit`, else `None` - */ - match *self { LoopScopeKind(i, ref exits) if id == i => Some(exits[exit].llbb), _ => None, diff --git a/src/librustc_trans/trans/closure.rs b/src/librustc_trans/trans/closure.rs index ca955975dfb3051899138d5245a9d8524445b8d8..2f82b8286c2d531da021e90752b4eba7b025b56f 100644 --- a/src/librustc_trans/trans/closure.rs +++ b/src/librustc_trans/trans/closure.rs @@ -386,6 +386,15 @@ pub fn load<'blk>(self, bcx: Block<'blk, 'tcx>, arg_scope: ScopeId) } } +/// Translates the body of a closure expression. +/// +/// - `store` +/// - `decl` +/// - `body` +/// - `id`: The id of the closure expression. +/// - `cap_clause`: information about captured variables, if any. +/// - `dest`: where to write the closure value, which must be a +/// (fn ptr, env) pair pub fn trans_expr_fn<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, store: ty::TraitStore, decl: &ast::FnDecl, @@ -393,19 +402,6 @@ pub fn trans_expr_fn<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, id: ast::NodeId, dest: expr::Dest) -> Block<'blk, 'tcx> { - /*! - * - * Translates the body of a closure expression. - * - * - `store` - * - `decl` - * - `body` - * - `id`: The id of the closure expression. - * - `cap_clause`: information about captured variables, if any. - * - `dest`: where to write the closure value, which must be a - (fn ptr, env) pair - */ - let _icx = push_ctxt("closure::trans_expr_fn"); let dest_addr = match dest { diff --git a/src/librustc_trans/trans/common.rs b/src/librustc_trans/trans/common.rs index 235805a7c8308bf62e9e95cd805c6d9ea9ecd2f2..febb33f6c54aff7db2476556317d18cf3705d801 100644 --- a/src/librustc_trans/trans/common.rs +++ b/src/librustc_trans/trans/common.rs @@ -95,26 +95,19 @@ pub fn type_is_immediate<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, ty: Ty<'tcx>) - } } +/// Identify types which have size zero at runtime. pub fn type_is_zero_size<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, ty: Ty<'tcx>) -> bool { - /*! - * Identify types which have size zero at runtime. - */ - use trans::machine::llsize_of_alloc; use trans::type_of::sizing_type_of; let llty = sizing_type_of(ccx, ty); llsize_of_alloc(ccx, llty) == 0 } +/// Identifies types which we declare to be equivalent to `void` in C for the purpose of function +/// return types. These are `()`, bot, and uninhabited enums. Note that all such types are also +/// zero-size, but not all zero-size types use a `void` return type (in order to aid with C ABI +/// compatibility). pub fn return_type_is_void(ccx: &CrateContext, ty: Ty) -> bool { - /*! - * Identifies types which we declare to be equivalent to `void` - * in C for the purpose of function return types. These are - * `()`, bot, and uninhabited enums. Note that all such types - * are also zero-size, but not all zero-size types use a `void` - * return type (in order to aid with C ABI compatibility). - */ - ty::type_is_nil(ty) || ty::type_is_empty(ccx.tcx(), ty) } @@ -768,19 +761,14 @@ pub fn expr_ty_adjusted<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, ex: &ast::Expr) -> T monomorphize_type(bcx, ty::expr_ty_adjusted(bcx.tcx(), ex)) } +/// Attempts to resolve an obligation. The result is a shallow vtable resolution -- meaning that we +/// do not (necessarily) resolve all nested obligations on the impl. Note that type check should +/// guarantee to us that all nested obligations *could be* resolved if we wanted to. pub fn fulfill_obligation<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, span: Span, trait_ref: Rc>) -> traits::Vtable<'tcx, ()> { - /*! - * Attempts to resolve an obligation. The result is a shallow - * vtable resolution -- meaning that we do not (necessarily) resolve - * all nested obligations on the impl. Note that type check should - * guarantee to us that all nested obligations *could be* resolved - * if we wanted to. - */ - let tcx = ccx.tcx(); // Remove any references to regions; this helps improve caching. diff --git a/src/librustc_trans/trans/datum.rs b/src/librustc_trans/trans/datum.rs index 354a6072207156f9ca5d622532ed1ffa8a198784..22f030be3d6530f73dff4d9a227f098b990586c1 100644 --- a/src/librustc_trans/trans/datum.rs +++ b/src/librustc_trans/trans/datum.rs @@ -8,10 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * See the section on datums in `doc.rs` for an overview of what - * Datums are and how they are intended to be used. - */ +//! See the section on datums in `doc.rs` for an overview of what Datums are and how they are +//! intended to be used. pub use self::Expr::*; pub use self::RvalueMode::*; @@ -107,6 +105,10 @@ pub fn immediate_rvalue_bcx<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, } +/// Allocates temporary space on the stack using alloca() and returns a by-ref Datum pointing to +/// it. The memory will be dropped upon exit from `scope`. The callback `populate` should +/// initialize the memory. If `zero` is true, the space will be zeroed when it is allocated; this +/// is not necessary unless `bcx` does not dominate the end of `scope`. pub fn lvalue_scratch_datum<'blk, 'tcx, A>(bcx: Block<'blk, 'tcx>, ty: Ty<'tcx>, name: &str, @@ -116,15 +118,6 @@ pub fn lvalue_scratch_datum<'blk, 'tcx, A>(bcx: Block<'blk, 'tcx>, populate: |A, Block<'blk, 'tcx>, ValueRef| -> Block<'blk, 'tcx>) -> DatumBlock<'blk, 'tcx, Lvalue> { - /*! - * Allocates temporary space on the stack using alloca() and - * returns a by-ref Datum pointing to it. The memory will be - * dropped upon exit from `scope`. The callback `populate` should - * initialize the memory. If `zero` is true, the space will be - * zeroed when it is allocated; this is not necessary unless `bcx` - * does not dominate the end of `scope`. - */ - let scratch = if zero { alloca_zeroed(bcx, ty, name) } else { @@ -140,33 +133,24 @@ pub fn lvalue_scratch_datum<'blk, 'tcx, A>(bcx: Block<'blk, 'tcx>, DatumBlock::new(bcx, Datum::new(scratch, ty, Lvalue)) } +/// Allocates temporary space on the stack using alloca() and returns a by-ref Datum pointing to +/// it. If `zero` is true, the space will be zeroed when it is allocated; this is normally not +/// necessary, but in the case of automatic rooting in match statements it is possible to have +/// temporaries that may not get initialized if a certain arm is not taken, so we must zero them. +/// You must arrange any cleanups etc yourself! pub fn rvalue_scratch_datum<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, ty: Ty<'tcx>, name: &str) -> Datum<'tcx, Rvalue> { - /*! - * Allocates temporary space on the stack using alloca() and - * returns a by-ref Datum pointing to it. If `zero` is true, the - * space will be zeroed when it is allocated; this is normally not - * necessary, but in the case of automatic rooting in match - * statements it is possible to have temporaries that may not get - * initialized if a certain arm is not taken, so we must zero - * them. You must arrange any cleanups etc yourself! - */ - let llty = type_of::type_of(bcx.ccx(), ty); let scratch = alloca(bcx, llty, name); Datum::new(scratch, ty, Rvalue::new(ByRef)) } +/// Indicates the "appropriate" mode for this value, which is either by ref or by value, depending +/// on whether type is immediate or not. pub fn appropriate_rvalue_mode<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, ty: Ty<'tcx>) -> RvalueMode { - /*! - * Indicates the "appropriate" mode for this value, - * which is either by ref or by value, depending - * on whether type is immediate or not. - */ - if type_is_immediate(ccx, ty) { ByValue } else { @@ -234,17 +218,13 @@ fn to_expr_kind(self) -> Expr { } impl KindOps for Lvalue { + /// If an lvalue is moved, we must zero out the memory in which it resides so as to cancel + /// cleanup. If an @T lvalue is copied, we must increment the reference count. fn post_store<'blk, 'tcx>(&self, bcx: Block<'blk, 'tcx>, val: ValueRef, ty: Ty<'tcx>) -> Block<'blk, 'tcx> { - /*! - * If an lvalue is moved, we must zero out the memory in which - * it resides so as to cancel cleanup. If an @T lvalue is - * copied, we must increment the reference count. - */ - if ty::type_needs_drop(bcx.tcx(), ty) { // cancel cleanup of affine values by zeroing out let () = zero_mem(bcx, val, ty); @@ -288,31 +268,24 @@ fn to_expr_kind(self) -> Expr { } impl<'tcx> Datum<'tcx, Rvalue> { + /// Schedules a cleanup for this datum in the given scope. That means that this datum is no + /// longer an rvalue datum; hence, this function consumes the datum and returns the contained + /// ValueRef. pub fn add_clean<'a>(self, fcx: &FunctionContext<'a, 'tcx>, scope: cleanup::ScopeId) -> ValueRef { - /*! - * Schedules a cleanup for this datum in the given scope. - * That means that this datum is no longer an rvalue datum; - * hence, this function consumes the datum and returns the - * contained ValueRef. - */ - add_rvalue_clean(self.kind.mode, fcx, scope, self.val, self.ty); self.val } + /// Returns an lvalue datum (that is, a by ref datum with cleanup scheduled). If `self` is not + /// already an lvalue, cleanup will be scheduled in the temporary scope for `expr_id`. pub fn to_lvalue_datum_in_scope<'blk>(self, bcx: Block<'blk, 'tcx>, name: &str, scope: cleanup::ScopeId) -> DatumBlock<'blk, 'tcx, Lvalue> { - /*! - * Returns an lvalue datum (that is, a by ref datum with - * cleanup scheduled). If `self` is not already an lvalue, - * cleanup will be scheduled in the temporary scope for `expr_id`. - */ let fcx = bcx.fcx; match self.kind.mode { @@ -381,22 +354,16 @@ fn match_kind(self, } } + /// Asserts that this datum *is* an lvalue and returns it. #[allow(dead_code)] // potentially useful pub fn assert_lvalue(self, bcx: Block) -> Datum<'tcx, Lvalue> { - /*! - * Asserts that this datum *is* an lvalue and returns it. - */ - self.match_kind( |d| d, |_| bcx.sess().bug("assert_lvalue given rvalue")) } + /// Asserts that this datum *is* an lvalue and returns it. pub fn assert_rvalue(self, bcx: Block) -> Datum<'tcx, Rvalue> { - /*! - * Asserts that this datum *is* an lvalue and returns it. - */ - self.match_kind( |_| bcx.sess().bug("assert_rvalue given lvalue"), |r| r) @@ -418,14 +385,11 @@ pub fn store_to_dest<'blk>(self, } } + /// Arranges cleanup for `self` if it is an rvalue. Use when you are done working with a value + /// that may need drop. pub fn add_clean_if_rvalue<'blk>(self, bcx: Block<'blk, 'tcx>, expr_id: ast::NodeId) { - /*! - * Arranges cleanup for `self` if it is an rvalue. Use when - * you are done working with a value that may need drop. - */ - self.match_kind( |_| { /* Nothing to do, cleanup already arranged */ }, |r| { @@ -434,16 +398,12 @@ pub fn add_clean_if_rvalue<'blk>(self, }) } + /// Ensures that `self` will get cleaned up, if it is not an lvalue already. pub fn clean<'blk>(self, bcx: Block<'blk, 'tcx>, name: &'static str, expr_id: ast::NodeId) -> Block<'blk, 'tcx> { - /*! - * Ensures that `self` will get cleaned up, if it is not an lvalue - * already. - */ - self.to_lvalue_datum(bcx, name, expr_id).bcx } @@ -464,15 +424,11 @@ pub fn to_lvalue_datum<'blk>(self, }) } + /// Ensures that we have an rvalue datum (that is, a datum with no cleanup scheduled). pub fn to_rvalue_datum<'blk>(self, bcx: Block<'blk, 'tcx>, name: &'static str) -> DatumBlock<'blk, 'tcx, Rvalue> { - /*! - * Ensures that we have an rvalue datum (that is, a datum with - * no cleanup scheduled). - */ - self.match_kind( |l| { let mut bcx = bcx; @@ -501,12 +457,9 @@ pub fn to_rvalue_datum<'blk>(self, * from an array. */ impl<'tcx> Datum<'tcx, Lvalue> { + /// Converts a datum into a by-ref value. The datum type must be one which is always passed by + /// reference. pub fn to_llref(self) -> ValueRef { - /*! - * Converts a datum into a by-ref value. The datum type must - * be one which is always passed by reference. - */ - self.val } @@ -555,40 +508,30 @@ pub fn to_expr_datum(self) -> Datum<'tcx, Expr> { Datum { val: val, ty: ty, kind: kind.to_expr_kind() } } + /// Moves or copies this value into a new home, as appropriate depending on the type of the + /// datum. This method consumes the datum, since it would be incorrect to go on using the datum + /// if the value represented is affine (and hence the value is moved). pub fn store_to<'blk>(self, bcx: Block<'blk, 'tcx>, dst: ValueRef) -> Block<'blk, 'tcx> { - /*! - * Moves or copies this value into a new home, as appropriate - * depending on the type of the datum. This method consumes - * the datum, since it would be incorrect to go on using the - * datum if the value represented is affine (and hence the value - * is moved). - */ - self.shallow_copy_raw(bcx, dst); self.kind.post_store(bcx, self.val, self.ty) } + /// Helper function that performs a shallow copy of this value into `dst`, which should be a + /// pointer to a memory location suitable for `self.ty`. `dst` should contain uninitialized + /// memory (either newly allocated, zeroed, or dropped). + /// + /// This function is private to datums because it leaves memory in an unstable state, where the + /// source value has been copied but not zeroed. Public methods are `store_to` (if you no + /// longer need the source value) or `shallow_copy` (if you wish the source value to remain + /// valid). fn shallow_copy_raw<'blk>(&self, bcx: Block<'blk, 'tcx>, dst: ValueRef) -> Block<'blk, 'tcx> { - /*! - * Helper function that performs a shallow copy of this value - * into `dst`, which should be a pointer to a memory location - * suitable for `self.ty`. `dst` should contain uninitialized - * memory (either newly allocated, zeroed, or dropped). - * - * This function is private to datums because it leaves memory - * in an unstable state, where the source value has been - * copied but not zeroed. Public methods are `store_to` - * (if you no longer need the source value) or `shallow_copy` - * (if you wish the source value to remain valid). - */ - let _icx = push_ctxt("copy_to_no_check"); if type_is_zero_size(bcx.ccx(), self.ty) { @@ -604,17 +547,13 @@ fn shallow_copy_raw<'blk>(&self, return bcx; } + /// Copies the value into a new location. This function always preserves the existing datum as + /// a valid value. Therefore, it does not consume `self` and, also, cannot be applied to affine + /// values (since they must never be duplicated). pub fn shallow_copy<'blk>(&self, bcx: Block<'blk, 'tcx>, dst: ValueRef) -> Block<'blk, 'tcx> { - /*! - * Copies the value into a new location. This function always - * preserves the existing datum as a valid value. Therefore, - * it does not consume `self` and, also, cannot be applied to - * affine values (since they must never be duplicated). - */ - assert!(!ty::type_moves_by_default(bcx.tcx(), self.ty)); self.shallow_copy_raw(bcx, dst) } @@ -627,23 +566,17 @@ pub fn to_string<'a>(&self, ccx: &CrateContext<'a, 'tcx>) -> String { self.kind) } + //! See the `appropriate_rvalue_mode()` function pub fn appropriate_rvalue_mode<'a>(&self, ccx: &CrateContext<'a, 'tcx>) -> RvalueMode { - /*! See the `appropriate_rvalue_mode()` function */ - appropriate_rvalue_mode(ccx, self.ty) } + /// Converts `self` into a by-value `ValueRef`. Consumes this datum (i.e., absolves you of + /// responsibility to cleanup the value). For this to work, the value must be something + /// scalar-ish (like an int or a pointer) which (1) does not require drop glue and (2) is + /// naturally passed around by value, and not by reference. pub fn to_llscalarish<'blk>(self, bcx: Block<'blk, 'tcx>) -> ValueRef { - /*! - * Converts `self` into a by-value `ValueRef`. Consumes this - * datum (i.e., absolves you of responsibility to cleanup the - * value). For this to work, the value must be something - * scalar-ish (like an int or a pointer) which (1) does not - * require drop glue and (2) is naturally passed around by - * value, and not by reference. - */ - assert!(!ty::type_needs_drop(bcx.tcx(), self.ty)); assert!(self.appropriate_rvalue_mode(bcx.ccx()) == ByValue); if self.kind.is_by_ref() { diff --git a/src/librustc_trans/trans/debuginfo.rs b/src/librustc_trans/trans/debuginfo.rs index a3472e194cf588ee0c524b944f8a3c5f3e56d035..c35de3209c61f087566f87ab65495783bdf0c5cd 100644 --- a/src/librustc_trans/trans/debuginfo.rs +++ b/src/librustc_trans/trans/debuginfo.rs @@ -8,181 +8,180 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -# Debug Info Module - -This module serves the purpose of generating debug symbols. We use LLVM's -[source level debugging](http://llvm.org/docs/SourceLevelDebugging.html) -features for generating the debug information. The general principle is this: - -Given the right metadata in the LLVM IR, the LLVM code generator is able to -create DWARF debug symbols for the given code. The -[metadata](http://llvm.org/docs/LangRef.html#metadata-type) is structured much -like DWARF *debugging information entries* (DIE), representing type information -such as datatype layout, function signatures, block layout, variable location -and scope information, etc. It is the purpose of this module to generate correct -metadata and insert it into the LLVM IR. - -As the exact format of metadata trees may change between different LLVM -versions, we now use LLVM -[DIBuilder](http://llvm.org/docs/doxygen/html/classllvm_1_1DIBuilder.html) to -create metadata where possible. This will hopefully ease the adaption of this -module to future LLVM versions. - -The public API of the module is a set of functions that will insert the correct -metadata into the LLVM IR when called with the right parameters. The module is -thus driven from an outside client with functions like -`debuginfo::create_local_var_metadata(bcx: block, local: &ast::local)`. - -Internally the module will try to reuse already created metadata by utilizing a -cache. The way to get a shared metadata node when needed is thus to just call -the corresponding function in this module: - - let file_metadata = file_metadata(crate_context, path); - -The function will take care of probing the cache for an existing node for that -exact file path. - -All private state used by the module is stored within either the -CrateDebugContext struct (owned by the CrateContext) or the FunctionDebugContext -(owned by the FunctionContext). - -This file consists of three conceptual sections: -1. The public interface of the module -2. Module-internal metadata creation functions -3. Minor utility functions - - -## Recursive Types - -Some kinds of types, such as structs and enums can be recursive. That means that -the type definition of some type X refers to some other type which in turn -(transitively) refers to X. This introduces cycles into the type referral graph. -A naive algorithm doing an on-demand, depth-first traversal of this graph when -describing types, can get trapped in an endless loop when it reaches such a -cycle. - -For example, the following simple type for a singly-linked list... - -``` -struct List { - value: int, - tail: Option>, -} -``` - -will generate the following callstack with a naive DFS algorithm: - -``` -describe(t = List) - describe(t = int) - describe(t = Option>) - describe(t = Box) - describe(t = List) // at the beginning again... - ... -``` - -To break cycles like these, we use "forward declarations". That is, when the -algorithm encounters a possibly recursive type (any struct or enum), it -immediately creates a type description node and inserts it into the cache -*before* describing the members of the type. This type description is just a -stub (as type members are not described and added to it yet) but it allows the -algorithm to already refer to the type. After the stub is inserted into the -cache, the algorithm continues as before. If it now encounters a recursive -reference, it will hit the cache and does not try to describe the type anew. - -This behaviour is encapsulated in the 'RecursiveTypeDescription' enum, which -represents a kind of continuation, storing all state needed to continue -traversal at the type members after the type has been registered with the cache. -(This implementation approach might be a tad over-engineered and may change in -the future) - - -## Source Locations and Line Information - -In addition to data type descriptions the debugging information must also allow -to map machine code locations back to source code locations in order to be useful. -This functionality is also handled in this module. The following functions allow -to control source mappings: - -+ set_source_location() -+ clear_source_location() -+ start_emitting_source_locations() - -`set_source_location()` allows to set the current source location. All IR -instructions created after a call to this function will be linked to the given -source location, until another location is specified with -`set_source_location()` or the source location is cleared with -`clear_source_location()`. In the later case, subsequent IR instruction will not -be linked to any source location. As you can see, this is a stateful API -(mimicking the one in LLVM), so be careful with source locations set by previous -calls. It's probably best to not rely on any specific state being present at a -given point in code. - -One topic that deserves some extra attention is *function prologues*. At the -beginning of a function's machine code there are typically a few instructions -for loading argument values into allocas and checking if there's enough stack -space for the function to execute. This *prologue* is not visible in the source -code and LLVM puts a special PROLOGUE END marker into the line table at the -first non-prologue instruction of the function. In order to find out where the -prologue ends, LLVM looks for the first instruction in the function body that is -linked to a source location. So, when generating prologue instructions we have -to make sure that we don't emit source location information until the 'real' -function body begins. For this reason, source location emission is disabled by -default for any new function being translated and is only activated after a call -to the third function from the list above, `start_emitting_source_locations()`. -This function should be called right before regularly starting to translate the -top-level block of the given function. - -There is one exception to the above rule: `llvm.dbg.declare` instruction must be -linked to the source location of the variable being declared. For function -parameters these `llvm.dbg.declare` instructions typically occur in the middle -of the prologue, however, they are ignored by LLVM's prologue detection. The -`create_argument_metadata()` and related functions take care of linking the -`llvm.dbg.declare` instructions to the correct source locations even while -source location emission is still disabled, so there is no need to do anything -special with source location handling here. - -## Unique Type Identification - -In order for link-time optimization to work properly, LLVM needs a unique type -identifier that tells it across compilation units which types are the same as -others. This type identifier is created by TypeMap::get_unique_type_id_of_type() -using the following algorithm: - -(1) Primitive types have their name as ID -(2) Structs, enums and traits have a multipart identifier - - (1) The first part is the SVH (strict version hash) of the crate they were - originally defined in - - (2) The second part is the ast::NodeId of the definition in their original - crate - - (3) The final part is a concatenation of the type IDs of their concrete type - arguments if they are generic types. - -(3) Tuple-, pointer and function types are structurally identified, which means - that they are equivalent if their component types are equivalent (i.e. (int, - int) is the same regardless in which crate it is used). - -This algorithm also provides a stable ID for types that are defined in one crate -but instantiated from metadata within another crate. We just have to take care -to always map crate and node IDs back to the original crate context. - -As a side-effect these unique type IDs also help to solve a problem arising from -lifetime parameters. Since lifetime parameters are completely omitted in -debuginfo, more than one `Ty` instance may map to the same debuginfo type -metadata, that is, some struct `Struct<'a>` may have N instantiations with -different concrete substitutions for `'a`, and thus there will be N `Ty` -instances for the type `Struct<'a>` even though it is not generic otherwise. -Unfortunately this means that we cannot use `ty::type_id()` as cheap identifier -for type metadata---we have done this in the past, but it led to unnecessary -metadata duplication in the best case and LLVM assertions in the worst. However, -the unique type ID as described above *can* be used as identifier. Since it is -comparatively expensive to construct, though, `ty::type_id()` is still used -additionally as an optimization for cases where the exact same type has been -seen before (which is most of the time). */ +//! # Debug Info Module +//! +//! This module serves the purpose of generating debug symbols. We use LLVM's +//! [source level debugging](http://llvm.org/docs/SourceLevelDebugging.html) +//! features for generating the debug information. The general principle is this: +//! +//! Given the right metadata in the LLVM IR, the LLVM code generator is able to +//! create DWARF debug symbols for the given code. The +//! [metadata](http://llvm.org/docs/LangRef.html#metadata-type) is structured much +//! like DWARF *debugging information entries* (DIE), representing type information +//! such as datatype layout, function signatures, block layout, variable location +//! and scope information, etc. It is the purpose of this module to generate correct +//! metadata and insert it into the LLVM IR. +//! +//! As the exact format of metadata trees may change between different LLVM +//! versions, we now use LLVM +//! [DIBuilder](http://llvm.org/docs/doxygen/html/classllvm_1_1DIBuilder.html) to +//! create metadata where possible. This will hopefully ease the adaption of this +//! module to future LLVM versions. +//! +//! The public API of the module is a set of functions that will insert the correct +//! metadata into the LLVM IR when called with the right parameters. The module is +//! thus driven from an outside client with functions like +//! `debuginfo::create_local_var_metadata(bcx: block, local: &ast::local)`. +//! +//! Internally the module will try to reuse already created metadata by utilizing a +//! cache. The way to get a shared metadata node when needed is thus to just call +//! the corresponding function in this module: +//! +//! let file_metadata = file_metadata(crate_context, path); +//! +//! The function will take care of probing the cache for an existing node for that +//! exact file path. +//! +//! All private state used by the module is stored within either the +//! CrateDebugContext struct (owned by the CrateContext) or the FunctionDebugContext +//! (owned by the FunctionContext). +//! +//! This file consists of three conceptual sections: +//! 1. The public interface of the module +//! 2. Module-internal metadata creation functions +//! 3. Minor utility functions +//! +//! +//! ## Recursive Types +//! +//! Some kinds of types, such as structs and enums can be recursive. That means that +//! the type definition of some type X refers to some other type which in turn +//! (transitively) refers to X. This introduces cycles into the type referral graph. +//! A naive algorithm doing an on-demand, depth-first traversal of this graph when +//! describing types, can get trapped in an endless loop when it reaches such a +//! cycle. +//! +//! For example, the following simple type for a singly-linked list... +//! +//! ``` +//! struct List { +//! value: int, +//! tail: Option>, +//! } +//! ``` +//! +//! will generate the following callstack with a naive DFS algorithm: +//! +//! ``` +//! describe(t = List) +//! describe(t = int) +//! describe(t = Option>) +//! describe(t = Box) +//! describe(t = List) // at the beginning again... +//! ... +//! ``` +//! +//! To break cycles like these, we use "forward declarations". That is, when the +//! algorithm encounters a possibly recursive type (any struct or enum), it +//! immediately creates a type description node and inserts it into the cache +//! *before* describing the members of the type. This type description is just a +//! stub (as type members are not described and added to it yet) but it allows the +//! algorithm to already refer to the type. After the stub is inserted into the +//! cache, the algorithm continues as before. If it now encounters a recursive +//! reference, it will hit the cache and does not try to describe the type anew. +//! +//! This behaviour is encapsulated in the 'RecursiveTypeDescription' enum, which +//! represents a kind of continuation, storing all state needed to continue +//! traversal at the type members after the type has been registered with the cache. +//! (This implementation approach might be a tad over-engineered and may change in +//! the future) +//! +//! +//! ## Source Locations and Line Information +//! +//! In addition to data type descriptions the debugging information must also allow +//! to map machine code locations back to source code locations in order to be useful. +//! This functionality is also handled in this module. The following functions allow +//! to control source mappings: +//! +//! + set_source_location() +//! + clear_source_location() +//! + start_emitting_source_locations() +//! +//! `set_source_location()` allows to set the current source location. All IR +//! instructions created after a call to this function will be linked to the given +//! source location, until another location is specified with +//! `set_source_location()` or the source location is cleared with +//! `clear_source_location()`. In the later case, subsequent IR instruction will not +//! be linked to any source location. As you can see, this is a stateful API +//! (mimicking the one in LLVM), so be careful with source locations set by previous +//! calls. It's probably best to not rely on any specific state being present at a +//! given point in code. +//! +//! One topic that deserves some extra attention is *function prologues*. At the +//! beginning of a function's machine code there are typically a few instructions +//! for loading argument values into allocas and checking if there's enough stack +//! space for the function to execute. This *prologue* is not visible in the source +//! code and LLVM puts a special PROLOGUE END marker into the line table at the +//! first non-prologue instruction of the function. In order to find out where the +//! prologue ends, LLVM looks for the first instruction in the function body that is +//! linked to a source location. So, when generating prologue instructions we have +//! to make sure that we don't emit source location information until the 'real' +//! function body begins. For this reason, source location emission is disabled by +//! default for any new function being translated and is only activated after a call +//! to the third function from the list above, `start_emitting_source_locations()`. +//! This function should be called right before regularly starting to translate the +//! top-level block of the given function. +//! +//! There is one exception to the above rule: `llvm.dbg.declare` instruction must be +//! linked to the source location of the variable being declared. For function +//! parameters these `llvm.dbg.declare` instructions typically occur in the middle +//! of the prologue, however, they are ignored by LLVM's prologue detection. The +//! `create_argument_metadata()` and related functions take care of linking the +//! `llvm.dbg.declare` instructions to the correct source locations even while +//! source location emission is still disabled, so there is no need to do anything +//! special with source location handling here. +//! +//! ## Unique Type Identification +//! +//! In order for link-time optimization to work properly, LLVM needs a unique type +//! identifier that tells it across compilation units which types are the same as +//! others. This type identifier is created by TypeMap::get_unique_type_id_of_type() +//! using the following algorithm: +//! +//! (1) Primitive types have their name as ID +//! (2) Structs, enums and traits have a multipart identifier +//! +//! (1) The first part is the SVH (strict version hash) of the crate they were +//! originally defined in +//! +//! (2) The second part is the ast::NodeId of the definition in their original +//! crate +//! +//! (3) The final part is a concatenation of the type IDs of their concrete type +//! arguments if they are generic types. +//! +//! (3) Tuple-, pointer and function types are structurally identified, which means +//! that they are equivalent if their component types are equivalent (i.e. (int, +//! int) is the same regardless in which crate it is used). +//! +//! This algorithm also provides a stable ID for types that are defined in one crate +//! but instantiated from metadata within another crate. We just have to take care +//! to always map crate and node IDs back to the original crate context. +//! +//! As a side-effect these unique type IDs also help to solve a problem arising from +//! lifetime parameters. Since lifetime parameters are completely omitted in +//! debuginfo, more than one `Ty` instance may map to the same debuginfo type +//! metadata, that is, some struct `Struct<'a>` may have N instantiations with +//! different concrete substitutions for `'a`, and thus there will be N `Ty` +//! instances for the type `Struct<'a>` even though it is not generic otherwise. +//! Unfortunately this means that we cannot use `ty::type_id()` as cheap identifier +//! for type metadata---we have done this in the past, but it led to unnecessary +//! metadata duplication in the best case and LLVM assertions in the worst. However, +//! the unique type ID as described above *can* be used as identifier. Since it is +//! comparatively expensive to construct, though, `ty::type_id()` is still used +//! additionally as an optimization for cases where the exact same type has been +//! seen before (which is most of the time). use self::FunctionDebugContextRepr::*; use self::VariableAccess::*; use self::VariableKind::*; diff --git a/src/librustc_trans/trans/doc.rs b/src/librustc_trans/trans/doc.rs index a5281e582f13651e9771e2f9b7a0d7d4f847184e..c3ab8986372ad14b8d35106f599baa95cfd664d3 100644 --- a/src/librustc_trans/trans/doc.rs +++ b/src/librustc_trans/trans/doc.rs @@ -8,230 +8,226 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -# Documentation for the trans module - -This module contains high-level summaries of how the various modules -in trans work. It is a work in progress. For detailed comments, -naturally, you can refer to the individual modules themselves. - -## The Expr module - -The expr module handles translation of expressions. The most general -translation routine is `trans()`, which will translate an expression -into a datum. `trans_into()` is also available, which will translate -an expression and write the result directly into memory, sometimes -avoiding the need for a temporary stack slot. Finally, -`trans_to_lvalue()` is available if you'd like to ensure that the -result has cleanup scheduled. - -Internally, each of these functions dispatches to various other -expression functions depending on the kind of expression. We divide -up expressions into: - -- **Datum expressions:** Those that most naturally yield values. - Examples would be `22`, `box x`, or `a + b` (when not overloaded). -- **DPS expressions:** Those that most naturally write into a location - in memory. Examples would be `foo()` or `Point { x: 3, y: 4 }`. -- **Statement expressions:** That that do not generate a meaningful - result. Examples would be `while { ... }` or `return 44`. - -## The Datum module - -A `Datum` encapsulates the result of evaluating a Rust expression. It -contains a `ValueRef` indicating the result, a `Ty` describing -the Rust type, but also a *kind*. The kind indicates whether the datum -has cleanup scheduled (lvalue) or not (rvalue) and -- in the case of -rvalues -- whether or not the value is "by ref" or "by value". - -The datum API is designed to try and help you avoid memory errors like -forgetting to arrange cleanup or duplicating a value. The type of the -datum incorporates the kind, and thus reflects whether it has cleanup -scheduled: - -- `Datum` -- by ref, cleanup scheduled -- `Datum` -- by value or by ref, no cleanup scheduled -- `Datum` -- either `Datum` or `Datum` - -Rvalue and expr datums are noncopyable, and most of the methods on -datums consume the datum itself (with some notable exceptions). This -reflects the fact that datums may represent affine values which ought -to be consumed exactly once, and if you were to try to (for example) -store an affine value multiple times, you would be duplicating it, -which would certainly be a bug. - -Some of the datum methods, however, are designed to work only on -copyable values such as ints or pointers. Those methods may borrow the -datum (`&self`) rather than consume it, but they always include -assertions on the type of the value represented to check that this -makes sense. An example is `shallow_copy()`, which duplicates -a datum value. - -Translating an expression always yields a `Datum` result, but -the methods `to_[lr]value_datum()` can be used to coerce a -`Datum` into a `Datum` or `Datum` as -needed. Coercing to an lvalue is fairly common, and generally occurs -whenever it is necessary to inspect a value and pull out its -subcomponents (for example, a match, or indexing expression). Coercing -to an rvalue is more unusual; it occurs when moving values from place -to place, such as in an assignment expression or parameter passing. - -### Lvalues in detail - -An lvalue datum is one for which cleanup has been scheduled. Lvalue -datums are always located in memory, and thus the `ValueRef` for an -LLVM value is always a pointer to the actual Rust value. This means -that if the Datum has a Rust type of `int`, then the LLVM type of the -`ValueRef` will be `int*` (pointer to int). - -Because lvalues already have cleanups scheduled, the memory must be -zeroed to prevent the cleanup from taking place (presuming that the -Rust type needs drop in the first place, otherwise it doesn't -matter). The Datum code automatically performs this zeroing when the -value is stored to a new location, for example. - -Lvalues usually result from evaluating lvalue expressions. For -example, evaluating a local variable `x` yields an lvalue, as does a -reference to a field like `x.f` or an index `x[i]`. - -Lvalue datums can also arise by *converting* an rvalue into an lvalue. -This is done with the `to_lvalue_datum` method defined on -`Datum`. Basically this method just schedules cleanup if the -datum is an rvalue, possibly storing the value into a stack slot first -if needed. Converting rvalues into lvalues occurs in constructs like -`&foo()` or `match foo() { ref x => ... }`, where the user is -implicitly requesting a temporary. - -Somewhat surprisingly, not all lvalue expressions yield lvalue datums -when trans'd. Ultimately the reason for this is to micro-optimize -the resulting LLVM. For example, consider the following code: - - fn foo() -> Box { ... } - let x = *foo(); - -The expression `*foo()` is an lvalue, but if you invoke `expr::trans`, -it will return an rvalue datum. See `deref_once` in expr.rs for -more details. - -### Rvalues in detail - -Rvalues datums are values with no cleanup scheduled. One must be -careful with rvalue datums to ensure that cleanup is properly -arranged, usually by converting to an lvalue datum or by invoking the -`add_clean` method. - -### Scratch datums - -Sometimes you need some temporary scratch space. The functions -`[lr]value_scratch_datum()` can be used to get temporary stack -space. As their name suggests, they yield lvalues and rvalues -respectively. That is, the slot from `lvalue_scratch_datum` will have -cleanup arranged, and the slot from `rvalue_scratch_datum` does not. - -## The Cleanup module - -The cleanup module tracks what values need to be cleaned up as scopes -are exited, either via panic or just normal control flow. The basic -idea is that the function context maintains a stack of cleanup scopes -that are pushed/popped as we traverse the AST tree. There is typically -at least one cleanup scope per AST node; some AST nodes may introduce -additional temporary scopes. - -Cleanup items can be scheduled into any of the scopes on the stack. -Typically, when a scope is popped, we will also generate the code for -each of its cleanups at that time. This corresponds to a normal exit -from a block (for example, an expression completing evaluation -successfully without panic). However, it is also possible to pop a -block *without* executing its cleanups; this is typically used to -guard intermediate values that must be cleaned up on panic, but not -if everything goes right. See the section on custom scopes below for -more details. - -Cleanup scopes come in three kinds: -- **AST scopes:** each AST node in a function body has a corresponding - AST scope. We push the AST scope when we start generate code for an AST - node and pop it once the AST node has been fully generated. -- **Loop scopes:** loops have an additional cleanup scope. Cleanups are - never scheduled into loop scopes; instead, they are used to record the - basic blocks that we should branch to when a `continue` or `break` statement - is encountered. -- **Custom scopes:** custom scopes are typically used to ensure cleanup - of intermediate values. - -### When to schedule cleanup - -Although the cleanup system is intended to *feel* fairly declarative, -it's still important to time calls to `schedule_clean()` correctly. -Basically, you should not schedule cleanup for memory until it has -been initialized, because if an unwind should occur before the memory -is fully initialized, then the cleanup will run and try to free or -drop uninitialized memory. If the initialization itself produces -byproducts that need to be freed, then you should use temporary custom -scopes to ensure that those byproducts will get freed on unwind. For -example, an expression like `box foo()` will first allocate a box in the -heap and then call `foo()` -- if `foo()` should panic, this box needs -to be *shallowly* freed. - -### Long-distance jumps - -In addition to popping a scope, which corresponds to normal control -flow exiting the scope, we may also *jump out* of a scope into some -earlier scope on the stack. This can occur in response to a `return`, -`break`, or `continue` statement, but also in response to panic. In -any of these cases, we will generate a series of cleanup blocks for -each of the scopes that is exited. So, if the stack contains scopes A -... Z, and we break out of a loop whose corresponding cleanup scope is -X, we would generate cleanup blocks for the cleanups in X, Y, and Z. -After cleanup is done we would branch to the exit point for scope X. -But if panic should occur, we would generate cleanups for all the -scopes from A to Z and then resume the unwind process afterwards. - -To avoid generating tons of code, we cache the cleanup blocks that we -create for breaks, returns, unwinds, and other jumps. Whenever a new -cleanup is scheduled, though, we must clear these cached blocks. A -possible improvement would be to keep the cached blocks but simply -generate a new block which performs the additional cleanup and then -branches to the existing cached blocks. - -### AST and loop cleanup scopes - -AST cleanup scopes are pushed when we begin and end processing an AST -node. They are used to house cleanups related to rvalue temporary that -get referenced (e.g., due to an expression like `&Foo()`). Whenever an -AST scope is popped, we always trans all the cleanups, adding the cleanup -code after the postdominator of the AST node. - -AST nodes that represent breakable loops also push a loop scope; the -loop scope never has any actual cleanups, it's just used to point to -the basic blocks where control should flow after a "continue" or -"break" statement. Popping a loop scope never generates code. - -### Custom cleanup scopes - -Custom cleanup scopes are used for a variety of purposes. The most -common though is to handle temporary byproducts, where cleanup only -needs to occur on panic. The general strategy is to push a custom -cleanup scope, schedule *shallow* cleanups into the custom scope, and -then pop the custom scope (without transing the cleanups) when -execution succeeds normally. This way the cleanups are only trans'd on -unwind, and only up until the point where execution succeeded, at -which time the complete value should be stored in an lvalue or some -other place where normal cleanup applies. - -To spell it out, here is an example. Imagine an expression `box expr`. -We would basically: - -1. Push a custom cleanup scope C. -2. Allocate the box. -3. Schedule a shallow free in the scope C. -4. Trans `expr` into the box. -5. Pop the scope C. -6. Return the box as an rvalue. - -This way, if a panic occurs while transing `expr`, the custom -cleanup scope C is pushed and hence the box will be freed. The trans -code for `expr` itself is responsible for freeing any other byproducts -that may be in play. - -*/ +//! # Documentation for the trans module +//! +//! This module contains high-level summaries of how the various modules +//! in trans work. It is a work in progress. For detailed comments, +//! naturally, you can refer to the individual modules themselves. +//! +//! ## The Expr module +//! +//! The expr module handles translation of expressions. The most general +//! translation routine is `trans()`, which will translate an expression +//! into a datum. `trans_into()` is also available, which will translate +//! an expression and write the result directly into memory, sometimes +//! avoiding the need for a temporary stack slot. Finally, +//! `trans_to_lvalue()` is available if you'd like to ensure that the +//! result has cleanup scheduled. +//! +//! Internally, each of these functions dispatches to various other +//! expression functions depending on the kind of expression. We divide +//! up expressions into: +//! +//! - **Datum expressions:** Those that most naturally yield values. +//! Examples would be `22`, `box x`, or `a + b` (when not overloaded). +//! - **DPS expressions:** Those that most naturally write into a location +//! in memory. Examples would be `foo()` or `Point { x: 3, y: 4 }`. +//! - **Statement expressions:** That that do not generate a meaningful +//! result. Examples would be `while { ... }` or `return 44`. +//! +//! ## The Datum module +//! +//! A `Datum` encapsulates the result of evaluating a Rust expression. It +//! contains a `ValueRef` indicating the result, a `Ty` describing +//! the Rust type, but also a *kind*. The kind indicates whether the datum +//! has cleanup scheduled (lvalue) or not (rvalue) and -- in the case of +//! rvalues -- whether or not the value is "by ref" or "by value". +//! +//! The datum API is designed to try and help you avoid memory errors like +//! forgetting to arrange cleanup or duplicating a value. The type of the +//! datum incorporates the kind, and thus reflects whether it has cleanup +//! scheduled: +//! +//! - `Datum` -- by ref, cleanup scheduled +//! - `Datum` -- by value or by ref, no cleanup scheduled +//! - `Datum` -- either `Datum` or `Datum` +//! +//! Rvalue and expr datums are noncopyable, and most of the methods on +//! datums consume the datum itself (with some notable exceptions). This +//! reflects the fact that datums may represent affine values which ought +//! to be consumed exactly once, and if you were to try to (for example) +//! store an affine value multiple times, you would be duplicating it, +//! which would certainly be a bug. +//! +//! Some of the datum methods, however, are designed to work only on +//! copyable values such as ints or pointers. Those methods may borrow the +//! datum (`&self`) rather than consume it, but they always include +//! assertions on the type of the value represented to check that this +//! makes sense. An example is `shallow_copy()`, which duplicates +//! a datum value. +//! +//! Translating an expression always yields a `Datum` result, but +//! the methods `to_[lr]value_datum()` can be used to coerce a +//! `Datum` into a `Datum` or `Datum` as +//! needed. Coercing to an lvalue is fairly common, and generally occurs +//! whenever it is necessary to inspect a value and pull out its +//! subcomponents (for example, a match, or indexing expression). Coercing +//! to an rvalue is more unusual; it occurs when moving values from place +//! to place, such as in an assignment expression or parameter passing. +//! +//! ### Lvalues in detail +//! +//! An lvalue datum is one for which cleanup has been scheduled. Lvalue +//! datums are always located in memory, and thus the `ValueRef` for an +//! LLVM value is always a pointer to the actual Rust value. This means +//! that if the Datum has a Rust type of `int`, then the LLVM type of the +//! `ValueRef` will be `int*` (pointer to int). +//! +//! Because lvalues already have cleanups scheduled, the memory must be +//! zeroed to prevent the cleanup from taking place (presuming that the +//! Rust type needs drop in the first place, otherwise it doesn't +//! matter). The Datum code automatically performs this zeroing when the +//! value is stored to a new location, for example. +//! +//! Lvalues usually result from evaluating lvalue expressions. For +//! example, evaluating a local variable `x` yields an lvalue, as does a +//! reference to a field like `x.f` or an index `x[i]`. +//! +//! Lvalue datums can also arise by *converting* an rvalue into an lvalue. +//! This is done with the `to_lvalue_datum` method defined on +//! `Datum`. Basically this method just schedules cleanup if the +//! datum is an rvalue, possibly storing the value into a stack slot first +//! if needed. Converting rvalues into lvalues occurs in constructs like +//! `&foo()` or `match foo() { ref x => ... }`, where the user is +//! implicitly requesting a temporary. +//! +//! Somewhat surprisingly, not all lvalue expressions yield lvalue datums +//! when trans'd. Ultimately the reason for this is to micro-optimize +//! the resulting LLVM. For example, consider the following code: +//! +//! fn foo() -> Box { ... } +//! let x = *foo(); +//! +//! The expression `*foo()` is an lvalue, but if you invoke `expr::trans`, +//! it will return an rvalue datum. See `deref_once` in expr.rs for +//! more details. +//! +//! ### Rvalues in detail +//! +//! Rvalues datums are values with no cleanup scheduled. One must be +//! careful with rvalue datums to ensure that cleanup is properly +//! arranged, usually by converting to an lvalue datum or by invoking the +//! `add_clean` method. +//! +//! ### Scratch datums +//! +//! Sometimes you need some temporary scratch space. The functions +//! `[lr]value_scratch_datum()` can be used to get temporary stack +//! space. As their name suggests, they yield lvalues and rvalues +//! respectively. That is, the slot from `lvalue_scratch_datum` will have +//! cleanup arranged, and the slot from `rvalue_scratch_datum` does not. +//! +//! ## The Cleanup module +//! +//! The cleanup module tracks what values need to be cleaned up as scopes +//! are exited, either via panic or just normal control flow. The basic +//! idea is that the function context maintains a stack of cleanup scopes +//! that are pushed/popped as we traverse the AST tree. There is typically +//! at least one cleanup scope per AST node; some AST nodes may introduce +//! additional temporary scopes. +//! +//! Cleanup items can be scheduled into any of the scopes on the stack. +//! Typically, when a scope is popped, we will also generate the code for +//! each of its cleanups at that time. This corresponds to a normal exit +//! from a block (for example, an expression completing evaluation +//! successfully without panic). However, it is also possible to pop a +//! block *without* executing its cleanups; this is typically used to +//! guard intermediate values that must be cleaned up on panic, but not +//! if everything goes right. See the section on custom scopes below for +//! more details. +//! +//! Cleanup scopes come in three kinds: +//! - **AST scopes:** each AST node in a function body has a corresponding +//! AST scope. We push the AST scope when we start generate code for an AST +//! node and pop it once the AST node has been fully generated. +//! - **Loop scopes:** loops have an additional cleanup scope. Cleanups are +//! never scheduled into loop scopes; instead, they are used to record the +//! basic blocks that we should branch to when a `continue` or `break` statement +//! is encountered. +//! - **Custom scopes:** custom scopes are typically used to ensure cleanup +//! of intermediate values. +//! +//! ### When to schedule cleanup +//! +//! Although the cleanup system is intended to *feel* fairly declarative, +//! it's still important to time calls to `schedule_clean()` correctly. +//! Basically, you should not schedule cleanup for memory until it has +//! been initialized, because if an unwind should occur before the memory +//! is fully initialized, then the cleanup will run and try to free or +//! drop uninitialized memory. If the initialization itself produces +//! byproducts that need to be freed, then you should use temporary custom +//! scopes to ensure that those byproducts will get freed on unwind. For +//! example, an expression like `box foo()` will first allocate a box in the +//! heap and then call `foo()` -- if `foo()` should panic, this box needs +//! to be *shallowly* freed. +//! +//! ### Long-distance jumps +//! +//! In addition to popping a scope, which corresponds to normal control +//! flow exiting the scope, we may also *jump out* of a scope into some +//! earlier scope on the stack. This can occur in response to a `return`, +//! `break`, or `continue` statement, but also in response to panic. In +//! any of these cases, we will generate a series of cleanup blocks for +//! each of the scopes that is exited. So, if the stack contains scopes A +//! ... Z, and we break out of a loop whose corresponding cleanup scope is +//! X, we would generate cleanup blocks for the cleanups in X, Y, and Z. +//! After cleanup is done we would branch to the exit point for scope X. +//! But if panic should occur, we would generate cleanups for all the +//! scopes from A to Z and then resume the unwind process afterwards. +//! +//! To avoid generating tons of code, we cache the cleanup blocks that we +//! create for breaks, returns, unwinds, and other jumps. Whenever a new +//! cleanup is scheduled, though, we must clear these cached blocks. A +//! possible improvement would be to keep the cached blocks but simply +//! generate a new block which performs the additional cleanup and then +//! branches to the existing cached blocks. +//! +//! ### AST and loop cleanup scopes +//! +//! AST cleanup scopes are pushed when we begin and end processing an AST +//! node. They are used to house cleanups related to rvalue temporary that +//! get referenced (e.g., due to an expression like `&Foo()`). Whenever an +//! AST scope is popped, we always trans all the cleanups, adding the cleanup +//! code after the postdominator of the AST node. +//! +//! AST nodes that represent breakable loops also push a loop scope; the +//! loop scope never has any actual cleanups, it's just used to point to +//! the basic blocks where control should flow after a "continue" or +//! "break" statement. Popping a loop scope never generates code. +//! +//! ### Custom cleanup scopes +//! +//! Custom cleanup scopes are used for a variety of purposes. The most +//! common though is to handle temporary byproducts, where cleanup only +//! needs to occur on panic. The general strategy is to push a custom +//! cleanup scope, schedule *shallow* cleanups into the custom scope, and +//! then pop the custom scope (without transing the cleanups) when +//! execution succeeds normally. This way the cleanups are only trans'd on +//! unwind, and only up until the point where execution succeeded, at +//! which time the complete value should be stored in an lvalue or some +//! other place where normal cleanup applies. +//! +//! To spell it out, here is an example. Imagine an expression `box expr`. +//! We would basically: +//! +//! 1. Push a custom cleanup scope C. +//! 2. Allocate the box. +//! 3. Schedule a shallow free in the scope C. +//! 4. Trans `expr` into the box. +//! 5. Pop the scope C. +//! 6. Return the box as an rvalue. +//! +//! This way, if a panic occurs while transing `expr`, the custom +//! cleanup scope C is pushed and hence the box will be freed. The trans +//! code for `expr` itself is responsible for freeing any other byproducts +//! that may be in play. diff --git a/src/librustc_trans/trans/expr.rs b/src/librustc_trans/trans/expr.rs index 482b318e37202605d0b657560791be663bd803fa..60809c8644d2df3531cf30774d10c8bd580559d5 100644 --- a/src/librustc_trans/trans/expr.rs +++ b/src/librustc_trans/trans/expr.rs @@ -8,28 +8,26 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * # Translation of Expressions - * - * Public entry points: - * - * - `trans_into(bcx, expr, dest) -> bcx`: evaluates an expression, - * storing the result into `dest`. This is the preferred form, if you - * can manage it. - * - * - `trans(bcx, expr) -> DatumBlock`: evaluates an expression, yielding - * `Datum` with the result. You can then store the datum, inspect - * the value, etc. This may introduce temporaries if the datum is a - * structural type. - * - * - `trans_to_lvalue(bcx, expr, "...") -> DatumBlock`: evaluates an - * expression and ensures that the result has a cleanup associated with it, - * creating a temporary stack slot if necessary. - * - * - `trans_local_var -> Datum`: looks up a local variable or upvar. - * - * See doc.rs for more comments. - */ +//! # Translation of Expressions +//! +//! Public entry points: +//! +//! - `trans_into(bcx, expr, dest) -> bcx`: evaluates an expression, +//! storing the result into `dest`. This is the preferred form, if you +//! can manage it. +//! +//! - `trans(bcx, expr) -> DatumBlock`: evaluates an expression, yielding +//! `Datum` with the result. You can then store the datum, inspect +//! the value, etc. This may introduce temporaries if the datum is a +//! structural type. +//! +//! - `trans_to_lvalue(bcx, expr, "...") -> DatumBlock`: evaluates an +//! expression and ensures that the result has a cleanup associated with it, +//! creating a temporary stack slot if necessary. +//! +//! - `trans_local_var -> Datum`: looks up a local variable or upvar. +//! +//! See doc.rs for more comments. #![allow(non_camel_case_types)] @@ -82,15 +80,12 @@ pub fn to_string(&self, ccx: &CrateContext) -> String { } } +/// This function is equivalent to `trans(bcx, expr).store_to_dest(dest)` but it may generate +/// better optimized LLVM code. pub fn trans_into<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, expr: &ast::Expr, dest: Dest) -> Block<'blk, 'tcx> { - /*! - * This function is equivalent to `trans(bcx, expr).store_to_dest(dest)` - * but it may generate better optimized LLVM code. - */ - let mut bcx = bcx; if bcx.tcx().adjustments.borrow().contains_key(&expr.id) { @@ -124,16 +119,12 @@ pub fn trans_into<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, bcx.fcx.pop_and_trans_ast_cleanup_scope(bcx, expr.id) } +/// Translates an expression, returning a datum (and new block) encapsulating the result. When +/// possible, it is preferred to use `trans_into`, as that may avoid creating a temporary on the +/// stack. pub fn trans<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, expr: &ast::Expr) -> DatumBlock<'blk, 'tcx, Expr> { - /*! - * Translates an expression, returning a datum (and new block) - * encapsulating the result. When possible, it is preferred to - * use `trans_into`, as that may avoid creating a temporary on - * the stack. - */ - debug!("trans(expr={})", bcx.expr_to_string(expr)); let mut bcx = bcx; @@ -157,15 +148,12 @@ pub fn get_dataptr(bcx: Block, fat_ptr: ValueRef) -> ValueRef { GEPi(bcx, fat_ptr, &[0u, abi::FAT_PTR_ADDR]) } +/// Helper for trans that apply adjustments from `expr` to `datum`, which should be the unadjusted +/// translation of `expr`. fn apply_adjustments<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, expr: &ast::Expr, datum: Datum<'tcx, Expr>) -> DatumBlock<'blk, 'tcx, Expr> { - /*! - * Helper for trans that apply adjustments from `expr` to `datum`, - * which should be the unadjusted translation of `expr`. - */ - let mut bcx = bcx; let mut datum = datum; let adjustment = match bcx.tcx().adjustments.borrow().get(&expr.id).cloned() { @@ -480,34 +468,27 @@ fn add_env<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, } } +/// Translates an expression in "lvalue" mode -- meaning that it returns a reference to the memory +/// that the expr represents. +/// +/// If this expression is an rvalue, this implies introducing a temporary. In other words, +/// something like `x().f` is translated into roughly the equivalent of +/// +/// { tmp = x(); tmp.f } pub fn trans_to_lvalue<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, expr: &ast::Expr, name: &str) -> DatumBlock<'blk, 'tcx, Lvalue> { - /*! - * Translates an expression in "lvalue" mode -- meaning that it - * returns a reference to the memory that the expr represents. - * - * If this expression is an rvalue, this implies introducing a - * temporary. In other words, something like `x().f` is - * translated into roughly the equivalent of - * - * { tmp = x(); tmp.f } - */ - let mut bcx = bcx; let datum = unpack_datum!(bcx, trans(bcx, expr)); return datum.to_lvalue_datum(bcx, name, expr.id); } +/// A version of `trans` that ignores adjustments. You almost certainly do not want to call this +/// directly. fn trans_unadjusted<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, expr: &ast::Expr) -> DatumBlock<'blk, 'tcx, Expr> { - /*! - * A version of `trans` that ignores adjustments. You almost - * certainly do not want to call this directly. - */ - let mut bcx = bcx; debug!("trans_unadjusted(expr={})", bcx.expr_to_string(expr)); @@ -1218,14 +1199,10 @@ fn trans_def_fn_unadjusted<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, DatumBlock::new(bcx, Datum::new(llfn, fn_ty, RvalueExpr(Rvalue::new(ByValue)))) } +/// Translates a reference to a local variable or argument. This always results in an lvalue datum. pub fn trans_local_var<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, def: def::Def) -> Datum<'tcx, Lvalue> { - /*! - * Translates a reference to a local variable or argument. - * This always results in an lvalue datum. - */ - let _icx = push_ctxt("trans_local_var"); match def { @@ -1262,18 +1239,14 @@ pub fn trans_local_var<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, } } +/// Helper for enumerating the field types of structs, enums, or records. The optional node ID here +/// is the node ID of the path identifying the enum variant in use. If none, this cannot possibly +/// an enum variant (so, if it is and `node_id_opt` is none, this function panics). pub fn with_field_tys<'tcx, R>(tcx: &ty::ctxt<'tcx>, ty: Ty<'tcx>, node_id_opt: Option, op: |ty::Disr, (&[ty::field<'tcx>])| -> R) -> R { - /*! - * Helper for enumerating the field types of structs, enums, or records. - * The optional node ID here is the node ID of the path identifying the enum - * variant in use. If none, this cannot possibly an enum variant (so, if it - * is and `node_id_opt` is none, this function panics). - */ - match ty.sty { ty::ty_struct(did, ref substs) => { op(0, struct_fields(tcx, did, substs).as_slice()) @@ -2189,24 +2162,18 @@ fn deref_once<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, return r; + /// We microoptimize derefs of owned pointers a bit here. Basically, the idea is to make the + /// deref of an rvalue result in an rvalue. This helps to avoid intermediate stack slots in the + /// resulting LLVM. The idea here is that, if the `Box` pointer is an rvalue, then we can + /// schedule a *shallow* free of the `Box` pointer, and then return a ByRef rvalue into the + /// pointer. Because the free is shallow, it is legit to return an rvalue, because we know that + /// the contents are not yet scheduled to be freed. The language rules ensure that the contents + /// will be used (or moved) before the free occurs. fn deref_owned_pointer<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, expr: &ast::Expr, datum: Datum<'tcx, Expr>, content_ty: Ty<'tcx>) -> DatumBlock<'blk, 'tcx, Expr> { - /*! - * We microoptimize derefs of owned pointers a bit here. - * Basically, the idea is to make the deref of an rvalue - * result in an rvalue. This helps to avoid intermediate stack - * slots in the resulting LLVM. The idea here is that, if the - * `Box` pointer is an rvalue, then we can schedule a *shallow* - * free of the `Box` pointer, and then return a ByRef rvalue - * into the pointer. Because the free is shallow, it is legit - * to return an rvalue, because we know that the contents are - * not yet scheduled to be freed. The language rules ensure that the - * contents will be used (or moved) before the free occurs. - */ - match datum.kind { RvalueExpr(Rvalue { mode: ByRef }) => { let scope = cleanup::temporary_scope(bcx.tcx(), expr.id); diff --git a/src/librustc_trans/trans/foreign.rs b/src/librustc_trans/trans/foreign.rs index 1f6aeacc860586cad25b8d3980aabebe08a67d89..6f97f6453fd91f5f9176c5c40326ae74153013b6 100644 --- a/src/librustc_trans/trans/foreign.rs +++ b/src/librustc_trans/trans/foreign.rs @@ -161,14 +161,10 @@ pub fn register_static(ccx: &CrateContext, } } +/// Registers a foreign function found in a library. Just adds a LLVM global. pub fn register_foreign_item_fn<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, abi: Abi, fty: Ty<'tcx>, name: &str) -> ValueRef { - /*! - * Registers a foreign function found in a library. - * Just adds a LLVM global. - */ - debug!("register_foreign_item_fn(abi={}, \ ty={}, \ name={})", @@ -201,6 +197,20 @@ pub fn register_foreign_item_fn<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, llfn } +/// Prepares a call to a native function. This requires adapting +/// from the Rust argument passing rules to the native rules. +/// +/// # Parameters +/// +/// - `callee_ty`: Rust type for the function we are calling +/// - `llfn`: the function pointer we are calling +/// - `llretptr`: where to store the return value of the function +/// - `llargs_rust`: a list of the argument values, prepared +/// as they would be if calling a Rust function +/// - `passed_arg_tys`: Rust type for the arguments. Normally we +/// can derive these from callee_ty but in the case of variadic +/// functions passed_arg_tys will include the Rust type of all +/// the arguments including the ones not specified in the fn's signature. pub fn trans_native_call<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, callee_ty: Ty<'tcx>, llfn: ValueRef, @@ -208,23 +218,6 @@ pub fn trans_native_call<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, llargs_rust: &[ValueRef], passed_arg_tys: Vec>) -> Block<'blk, 'tcx> { - /*! - * Prepares a call to a native function. This requires adapting - * from the Rust argument passing rules to the native rules. - * - * # Parameters - * - * - `callee_ty`: Rust type for the function we are calling - * - `llfn`: the function pointer we are calling - * - `llretptr`: where to store the return value of the function - * - `llargs_rust`: a list of the argument values, prepared - * as they would be if calling a Rust function - * - `passed_arg_tys`: Rust type for the arguments. Normally we - * can derive these from callee_ty but in the case of variadic - * functions passed_arg_tys will include the Rust type of all - * the arguments including the ones not specified in the fn's signature. - */ - let ccx = bcx.ccx(); let tcx = bcx.tcx(); @@ -832,17 +825,13 @@ pub fn link_name(i: &ast::ForeignItem) -> InternedString { } } +/// The ForeignSignature is the LLVM types of the arguments/return type of a function. Note that +/// these LLVM types are not quite the same as the LLVM types would be for a native Rust function +/// because foreign functions just plain ignore modes. They also don't pass aggregate values by +/// pointer like we do. fn foreign_signature<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_sig: &ty::FnSig<'tcx>, arg_tys: &[Ty<'tcx>]) -> LlvmSignature { - /*! - * The ForeignSignature is the LLVM types of the arguments/return type - * of a function. Note that these LLVM types are not quite the same - * as the LLVM types would be for a native Rust function because foreign - * functions just plain ignore modes. They also don't pass aggregate - * values by pointer like we do. - */ - let llarg_tys = arg_tys.iter().map(|&arg| arg_type_of(ccx, arg)).collect(); let (llret_ty, ret_def) = match fn_sig.output { ty::FnConverging(ret_ty) => diff --git a/src/librustc_trans/trans/meth.rs b/src/librustc_trans/trans/meth.rs index 0ff7f3ee71cc6ee4f9fefbe859ddcf78b7138775..06d916c1ea6f2d9f12df83398abfef5149d9a6a9 100644 --- a/src/librustc_trans/trans/meth.rs +++ b/src/librustc_trans/trans/meth.rs @@ -377,28 +377,21 @@ fn trans_monomorphized_callee<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, } } + /// Creates a concatenated set of substitutions which includes those from the impl and those from + /// the method. This are some subtle complications here. Statically, we have a list of type + /// parameters like `[T0, T1, T2, M1, M2, M3]` where `Tn` are type parameters that appear on the + /// receiver. For example, if the receiver is a method parameter `A` with a bound like + /// `trait` then `Tn` would be `[B,C,D]`. + /// + /// The weird part is that the type `A` might now be bound to any other type, such as `foo`. + /// In that case, the vector we want is: `[X, M1, M2, M3]`. Therefore, what we do now is to slice + /// off the method type parameters and append them to the type parameters from the type that the + /// receiver is mapped to. fn combine_impl_and_methods_tps<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, node: ExprOrMethodCall, rcvr_substs: subst::Substs<'tcx>) -> subst::Substs<'tcx> { - /*! - * Creates a concatenated set of substitutions which includes - * those from the impl and those from the method. This are - * some subtle complications here. Statically, we have a list - * of type parameters like `[T0, T1, T2, M1, M2, M3]` where - * `Tn` are type parameters that appear on the receiver. For - * example, if the receiver is a method parameter `A` with a - * bound like `trait` then `Tn` would be `[B,C,D]`. - * - * The weird part is that the type `A` might now be bound to - * any other type, such as `foo`. In that case, the vector - * we want is: `[X, M1, M2, M3]`. Therefore, what we do now is - * to slice off the method type parameters and append them to - * the type parameters from the type that the receiver is - * mapped to. - */ - let ccx = bcx.ccx(); let node_substs = node_id_substs(bcx, node); @@ -422,21 +415,16 @@ fn combine_impl_and_methods_tps<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, } } +/// Create a method callee where the method is coming from a trait object (e.g., Box type). +/// In this case, we must pull the fn pointer out of the vtable that is packaged up with the +/// object. Objects are represented as a pair, so we first evaluate the self expression and then +/// extract the self data and vtable out of the pair. fn trans_trait_callee<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, method_ty: Ty<'tcx>, n_method: uint, self_expr: &ast::Expr, arg_cleanup_scope: cleanup::ScopeId) -> Callee<'blk, 'tcx> { - /*! - * Create a method callee where the method is coming from a trait - * object (e.g., Box type). In this case, we must pull the fn - * pointer out of the vtable that is packaged up with the object. - * Objects are represented as a pair, so we first evaluate the self - * expression and then extract the self data and vtable out of the - * pair. - */ - let _icx = push_ctxt("meth::trans_trait_callee"); let mut bcx = bcx; @@ -466,16 +454,13 @@ fn trans_trait_callee<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, trans_trait_callee_from_llval(bcx, method_ty, n_method, llval) } +/// Same as `trans_trait_callee()` above, except that it is given a by-ref pointer to the object +/// pair. pub fn trans_trait_callee_from_llval<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, callee_ty: Ty<'tcx>, n_method: uint, llpair: ValueRef) -> Callee<'blk, 'tcx> { - /*! - * Same as `trans_trait_callee()` above, except that it is given - * a by-ref pointer to the object pair. - */ - let _icx = push_ctxt("meth::trans_trait_callee"); let ccx = bcx.ccx(); @@ -731,19 +716,15 @@ fn emit_vtable_methods<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, }).collect() } +/// Generates the code to convert from a pointer (`Box`, `&T`, etc) into an object +/// (`Box`, `&Trait`, etc). This means creating a pair where the first word is the vtable +/// and the second word is the pointer. pub fn trans_trait_cast<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, datum: Datum<'tcx, Expr>, id: ast::NodeId, trait_ref: Rc>, dest: expr::Dest) -> Block<'blk, 'tcx> { - /*! - * Generates the code to convert from a pointer (`Box`, `&T`, etc) - * into an object (`Box`, `&Trait`, etc). This means creating a - * pair where the first word is the vtable and the second word is - * the pointer. - */ - let mut bcx = bcx; let _icx = push_ctxt("meth::trans_trait_cast"); diff --git a/src/librustc_trans/trans/tvec.rs b/src/librustc_trans/trans/tvec.rs index 8e986defb6a361aaa1a630fbdafa0aa4bc28fdcf..9aeb4cdb8a30a7a8fd19b3a6ba5516a8fbf0f5a8 100644 --- a/src/librustc_trans/trans/tvec.rs +++ b/src/librustc_trans/trans/tvec.rs @@ -134,17 +134,13 @@ pub fn trans_fixed_vstore<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, }; } +/// &[...] allocates memory on the stack and writes the values into it, returning the vector (the +/// caller must make the reference). "..." is similar except that the memory can be statically +/// allocated and we return a reference (strings are always by-ref). pub fn trans_slice_vec<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, slice_expr: &ast::Expr, content_expr: &ast::Expr) -> DatumBlock<'blk, 'tcx, Expr> { - /*! - * &[...] allocates memory on the stack and writes the values into it, - * returning the vector (the caller must make the reference). "..." is - * similar except that the memory can be statically allocated and we return - * a reference (strings are always by-ref). - */ - let fcx = bcx.fcx; let ccx = fcx.ccx; let mut bcx = bcx; @@ -208,17 +204,13 @@ pub fn trans_slice_vec<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, immediate_rvalue_bcx(bcx, llfixed, vec_ty).to_expr_datumblock() } +/// Literal strings translate to slices into static memory. This is different from +/// trans_slice_vstore() above because it doesn't need to copy the content anywhere. pub fn trans_lit_str<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, lit_expr: &ast::Expr, str_lit: InternedString, dest: Dest) -> Block<'blk, 'tcx> { - /*! - * Literal strings translate to slices into static memory. This is - * different from trans_slice_vstore() above because it doesn't need to copy - * the content anywhere. - */ - debug!("trans_lit_str(lit_expr={}, dest={})", bcx.expr_to_string(lit_expr), dest.to_string(bcx.ccx())); @@ -382,15 +374,12 @@ pub fn elements_required(bcx: Block, content_expr: &ast::Expr) -> uint { } } +/// Converts a fixed-length vector into the slice pair. The vector should be stored in `llval` +/// which should be by ref. pub fn get_fixed_base_and_len(bcx: Block, llval: ValueRef, vec_length: uint) -> (ValueRef, ValueRef) { - /*! - * Converts a fixed-length vector into the slice pair. - * The vector should be stored in `llval` which should be by ref. - */ - let ccx = bcx.ccx(); let base = expr::get_dataptr(bcx, llval); @@ -406,18 +395,13 @@ fn get_slice_base_and_len(bcx: Block, (base, len) } +/// Converts a vector into the slice pair. The vector should be stored in `llval` which should be +/// by-reference. If you have a datum, you would probably prefer to call +/// `Datum::get_base_and_len()` which will handle any conversions for you. pub fn get_base_and_len(bcx: Block, llval: ValueRef, vec_ty: Ty) -> (ValueRef, ValueRef) { - /*! - * Converts a vector into the slice pair. The vector should be - * stored in `llval` which should be by-reference. If you have a - * datum, you would probably prefer to call - * `Datum::get_base_and_len()` which will handle any conversions - * for you. - */ - let ccx = bcx.ccx(); match vec_ty.sty { diff --git a/src/librustrt/c_str.rs b/src/librustrt/c_str.rs index d62b1485db33adc4c015b88616833fe421d4671a..261bd1b9f8cb88367ad0b1d30180322dbccf044e 100644 --- a/src/librustrt/c_str.rs +++ b/src/librustrt/c_str.rs @@ -8,68 +8,64 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -C-string manipulation and management - -This modules provides the basic methods for creating and manipulating -null-terminated strings for use with FFI calls (back to C). Most C APIs require -that the string being passed to them is null-terminated, and by default rust's -string types are *not* null terminated. - -The other problem with translating Rust strings to C strings is that Rust -strings can validly contain a null-byte in the middle of the string (0 is a -valid Unicode codepoint). This means that not all Rust strings can actually be -translated to C strings. - -# Creation of a C string - -A C string is managed through the `CString` type defined in this module. It -"owns" the internal buffer of characters and will automatically deallocate the -buffer when the string is dropped. The `ToCStr` trait is implemented for `&str` -and `&[u8]`, but the conversions can fail due to some of the limitations -explained above. - -This also means that currently whenever a C string is created, an allocation -must be performed to place the data elsewhere (the lifetime of the C string is -not tied to the lifetime of the original string/data buffer). If C strings are -heavily used in applications, then caching may be advisable to prevent -unnecessary amounts of allocations. - -Be carefull to remember that the memory is managed by C allocator API and not -by Rust allocator API. -That means that the CString pointers should be freed with C allocator API -if you intend to do that on your own, as the behaviour if you free them with -Rust's allocator API is not well defined - -An example of creating and using a C string would be: - -```rust -extern crate libc; - -extern { - fn puts(s: *const libc::c_char); -} - -fn main() { - let my_string = "Hello, world!"; - - // Allocate the C string with an explicit local that owns the string. The - // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope. - let my_c_string = my_string.to_c_str(); - unsafe { - puts(my_c_string.as_ptr()); - } - - // Don't save/return the pointer to the C string, the `c_buffer` will be - // deallocated when this block returns! - my_string.with_c_str(|c_buffer| { - unsafe { puts(c_buffer); } - }); -} -``` - -*/ +//! C-string manipulation and management +//! +//! This modules provides the basic methods for creating and manipulating +//! null-terminated strings for use with FFI calls (back to C). Most C APIs require +//! that the string being passed to them is null-terminated, and by default rust's +//! string types are *not* null terminated. +//! +//! The other problem with translating Rust strings to C strings is that Rust +//! strings can validly contain a null-byte in the middle of the string (0 is a +//! valid Unicode codepoint). This means that not all Rust strings can actually be +//! translated to C strings. +//! +//! # Creation of a C string +//! +//! A C string is managed through the `CString` type defined in this module. It +//! "owns" the internal buffer of characters and will automatically deallocate the +//! buffer when the string is dropped. The `ToCStr` trait is implemented for `&str` +//! and `&[u8]`, but the conversions can fail due to some of the limitations +//! explained above. +//! +//! This also means that currently whenever a C string is created, an allocation +//! must be performed to place the data elsewhere (the lifetime of the C string is +//! not tied to the lifetime of the original string/data buffer). If C strings are +//! heavily used in applications, then caching may be advisable to prevent +//! unnecessary amounts of allocations. +//! +//! Be carefull to remember that the memory is managed by C allocator API and not +//! by Rust allocator API. +//! That means that the CString pointers should be freed with C allocator API +//! if you intend to do that on your own, as the behaviour if you free them with +//! Rust's allocator API is not well defined +//! +//! An example of creating and using a C string would be: +//! +//! ```rust +//! extern crate libc; +//! +//! extern { +//! fn puts(s: *const libc::c_char); +//! } +//! +//! fn main() { +//! let my_string = "Hello, world!"; +//! +//! // Allocate the C string with an explicit local that owns the string. The +//! // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope. +//! let my_c_string = my_string.to_c_str(); +//! unsafe { +//! puts(my_c_string.as_ptr()); +//! } +//! +//! // Don't save/return the pointer to the C string, the `c_buffer` will be +//! // deallocated when this block returns! +//! my_string.with_c_str(|c_buffer| { +//! unsafe { puts(c_buffer); } +//! }); +//! } +//! ``` use collections::string::String; use collections::hash; diff --git a/src/libserialize/json.rs b/src/libserialize/json.rs index 4a2ca58fc9269f6c8c5c12bb202be811c8bd07d0..3c03dc35f3b291e59a21ec763c3c346654abcdec 100644 --- a/src/libserialize/json.rs +++ b/src/libserialize/json.rs @@ -14,185 +14,182 @@ #![forbid(non_camel_case_types)] #![allow(missing_docs)] -/*! -JSON parsing and serialization - -# What is JSON? - -JSON (JavaScript Object Notation) is a way to write data in Javascript. -Like XML, it allows to encode structured data in a text format that can be easily read by humans. -Its simple syntax and native compatibility with JavaScript have made it a widely used format. - -Data types that can be encoded are JavaScript types (see the `Json` enum for more details): - -* `Boolean`: equivalent to rust's `bool` -* `Number`: equivalent to rust's `f64` -* `String`: equivalent to rust's `String` -* `Array`: equivalent to rust's `Vec`, but also allowing objects of different types in the same -array -* `Object`: equivalent to rust's `Treemap` -* `Null` - -An object is a series of string keys mapping to values, in `"key": value` format. -Arrays are enclosed in square brackets ([ ... ]) and objects in curly brackets ({ ... }). -A simple JSON document encoding a person, his/her age, address and phone numbers could look like: - -```ignore -{ - "FirstName": "John", - "LastName": "Doe", - "Age": 43, - "Address": { - "Street": "Downing Street 10", - "City": "London", - "Country": "Great Britain" - }, - "PhoneNumbers": [ - "+44 1234567", - "+44 2345678" - ] -} -``` - -# Rust Type-based Encoding and Decoding - -Rust provides a mechanism for low boilerplate encoding & decoding of values to and from JSON via -the serialization API. -To be able to encode a piece of data, it must implement the `serialize::Encodable` trait. -To be able to decode a piece of data, it must implement the `serialize::Decodable` trait. -The Rust compiler provides an annotation to automatically generate the code for these traits: -`#[deriving(Decodable, Encodable)]` - -The JSON API provides an enum `json::Json` and a trait `ToJson` to encode objects. -The `ToJson` trait provides a `to_json` method to convert an object into a `json::Json` value. -A `json::Json` value can be encoded as a string or buffer using the functions described above. -You can also use the `json::Encoder` object, which implements the `Encoder` trait. - -When using `ToJson` the `Encodable` trait implementation is not mandatory. - -# Examples of use - -## Using Autoserialization - -Create a struct called `TestStruct` and serialize and deserialize it to and from JSON using the -serialization API, using the derived serialization code. - -```rust -extern crate serialize; -use serialize::json; - -// Automatically generate `Decodable` and `Encodable` trait implementations -#[deriving(Decodable, Encodable)] -pub struct TestStruct { - data_int: u8, - data_str: String, - data_vector: Vec, -} - -fn main() { - let object = TestStruct { - data_int: 1, - data_str: "toto".to_string(), - data_vector: vec![2,3,4,5], - }; - - // Serialize using `json::encode` - let encoded = json::encode(&object); - - // Deserialize using `json::decode` - let decoded: TestStruct = json::decode(encoded.as_slice()).unwrap(); -} -``` - -## Using the `ToJson` trait - -The examples above use the `ToJson` trait to generate the JSON string, which is required -for custom mappings. - -### Simple example of `ToJson` usage - -```rust -extern crate serialize; -use serialize::json::ToJson; -use serialize::json; - -// A custom data structure -struct ComplexNum { - a: f64, - b: f64, -} - -// JSON value representation -impl ToJson for ComplexNum { - fn to_json(&self) -> json::Json { - json::String(format!("{}+{}i", self.a, self.b)) - } -} - -// Only generate `Encodable` trait implementation -#[deriving(Encodable)] -pub struct ComplexNumRecord { - uid: u8, - dsc: String, - val: json::Json, -} - -fn main() { - let num = ComplexNum { a: 0.0001, b: 12.539 }; - let data: String = json::encode(&ComplexNumRecord{ - uid: 1, - dsc: "test".to_string(), - val: num.to_json(), - }); - println!("data: {}", data); - // data: {"uid":1,"dsc":"test","val":"0.0001+12.539j"}; -} -``` - -### Verbose example of `ToJson` usage - -```rust -extern crate serialize; -use std::collections::TreeMap; -use serialize::json::ToJson; -use serialize::json; - -// Only generate `Decodable` trait implementation -#[deriving(Decodable)] -pub struct TestStruct { - data_int: u8, - data_str: String, - data_vector: Vec, -} - -// Specify encoding method manually -impl ToJson for TestStruct { - fn to_json(&self) -> json::Json { - let mut d = TreeMap::new(); - // All standard types implement `to_json()`, so use it - d.insert("data_int".to_string(), self.data_int.to_json()); - d.insert("data_str".to_string(), self.data_str.to_json()); - d.insert("data_vector".to_string(), self.data_vector.to_json()); - json::Object(d) - } -} - -fn main() { - // Serialize using `ToJson` - let input_data = TestStruct { - data_int: 1, - data_str: "toto".to_string(), - data_vector: vec![2,3,4,5], - }; - let json_obj: json::Json = input_data.to_json(); - let json_str: String = json_obj.to_string(); - - // Deserialize like before - let decoded: TestStruct = json::decode(json_str.as_slice()).unwrap(); -} -``` - -*/ +//! JSON parsing and serialization +//! +//! # What is JSON? +//! +//! JSON (JavaScript Object Notation) is a way to write data in Javascript. +//! Like XML, it allows to encode structured data in a text format that can be easily read by humans +//! Its simple syntax and native compatibility with JavaScript have made it a widely used format. +//! +//! Data types that can be encoded are JavaScript types (see the `Json` enum for more details): +//! +//! * `Boolean`: equivalent to rust's `bool` +//! * `Number`: equivalent to rust's `f64` +//! * `String`: equivalent to rust's `String` +//! * `Array`: equivalent to rust's `Vec`, but also allowing objects of different types in the +//! same array +//! * `Object`: equivalent to rust's `Treemap` +//! * `Null` +//! +//! An object is a series of string keys mapping to values, in `"key": value` format. +//! Arrays are enclosed in square brackets ([ ... ]) and objects in curly brackets ({ ... }). +//! A simple JSON document encoding a person, his/her age, address and phone numbers could look like +//! +//! ```ignore +//! { +//! "FirstName": "John", +//! "LastName": "Doe", +//! "Age": 43, +//! "Address": { +//! "Street": "Downing Street 10", +//! "City": "London", +//! "Country": "Great Britain" +//! }, +//! "PhoneNumbers": [ +//! "+44 1234567", +//! "+44 2345678" +//! ] +//! } +//! ``` +//! +//! # Rust Type-based Encoding and Decoding +//! +//! Rust provides a mechanism for low boilerplate encoding & decoding of values to and from JSON via +//! the serialization API. +//! To be able to encode a piece of data, it must implement the `serialize::Encodable` trait. +//! To be able to decode a piece of data, it must implement the `serialize::Decodable` trait. +//! The Rust compiler provides an annotation to automatically generate the code for these traits: +//! `#[deriving(Decodable, Encodable)]` +//! +//! The JSON API provides an enum `json::Json` and a trait `ToJson` to encode objects. +//! The `ToJson` trait provides a `to_json` method to convert an object into a `json::Json` value. +//! A `json::Json` value can be encoded as a string or buffer using the functions described above. +//! You can also use the `json::Encoder` object, which implements the `Encoder` trait. +//! +//! When using `ToJson` the `Encodable` trait implementation is not mandatory. +//! +//! # Examples of use +//! +//! ## Using Autoserialization +//! +//! Create a struct called `TestStruct` and serialize and deserialize it to and from JSON using the +//! serialization API, using the derived serialization code. +//! +//! ```rust +//! extern crate serialize; +//! use serialize::json; +//! +//! // Automatically generate `Decodable` and `Encodable` trait implementations +//! #[deriving(Decodable, Encodable)] +//! pub struct TestStruct { +//! data_int: u8, +//! data_str: String, +//! data_vector: Vec, +//! } +//! +//! fn main() { +//! let object = TestStruct { +//! data_int: 1, +//! data_str: "toto".to_string(), +//! data_vector: vec![2,3,4,5], +//! }; +//! +//! // Serialize using `json::encode` +//! let encoded = json::encode(&object); +//! +//! // Deserialize using `json::decode` +//! let decoded: TestStruct = json::decode(encoded.as_slice()).unwrap(); +//! } +//! ``` +//! +//! ## Using the `ToJson` trait +//! +//! The examples above use the `ToJson` trait to generate the JSON string, which is required +//! for custom mappings. +//! +//! ### Simple example of `ToJson` usage +//! +//! ```rust +//! extern crate serialize; +//! use serialize::json::ToJson; +//! use serialize::json; +//! +//! // A custom data structure +//! struct ComplexNum { +//! a: f64, +//! b: f64, +//! } +//! +//! // JSON value representation +//! impl ToJson for ComplexNum { +//! fn to_json(&self) -> json::Json { +//! json::String(format!("{}+{}i", self.a, self.b)) +//! } +//! } +//! +//! // Only generate `Encodable` trait implementation +//! #[deriving(Encodable)] +//! pub struct ComplexNumRecord { +//! uid: u8, +//! dsc: String, +//! val: json::Json, +//! } +//! +//! fn main() { +//! let num = ComplexNum { a: 0.0001, b: 12.539 }; +//! let data: String = json::encode(&ComplexNumRecord{ +//! uid: 1, +//! dsc: "test".to_string(), +//! val: num.to_json(), +//! }); +//! println!("data: {}", data); +//! // data: {"uid":1,"dsc":"test","val":"0.0001+12.539j"}; +//! } +//! ``` +//! +//! ### Verbose example of `ToJson` usage +//! +//! ```rust +//! extern crate serialize; +//! use std::collections::TreeMap; +//! use serialize::json::ToJson; +//! use serialize::json; +//! +//! // Only generate `Decodable` trait implementation +//! #[deriving(Decodable)] +//! pub struct TestStruct { +//! data_int: u8, +//! data_str: String, +//! data_vector: Vec, +//! } +//! +//! // Specify encoding method manually +//! impl ToJson for TestStruct { +//! fn to_json(&self) -> json::Json { +//! let mut d = TreeMap::new(); +//! // All standard types implement `to_json()`, so use it +//! d.insert("data_int".to_string(), self.data_int.to_json()); +//! d.insert("data_str".to_string(), self.data_str.to_json()); +//! d.insert("data_vector".to_string(), self.data_vector.to_json()); +//! json::Object(d) +//! } +//! } +//! +//! fn main() { +//! // Serialize using `ToJson` +//! let input_data = TestStruct { +//! data_int: 1, +//! data_str: "toto".to_string(), +//! data_vector: vec![2,3,4,5], +//! }; +//! let json_obj: json::Json = input_data.to_json(); +//! let json_str: String = json_obj.to_string(); +//! +//! // Deserialize like before +//! let decoded: TestStruct = json::decode(json_str.as_slice()).unwrap(); +//! } +//! ``` pub use self::JsonEvent::*; pub use self::StackElement::*; diff --git a/src/libstd/dynamic_lib.rs b/src/libstd/dynamic_lib.rs index 4b868f6a95b7c73cb6fc566ed2e90f816ae41299..3cd0c0eeaf290b949180286e43a689b5f7dbd14e 100644 --- a/src/libstd/dynamic_lib.rs +++ b/src/libstd/dynamic_lib.rs @@ -8,13 +8,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Dynamic library facilities. - -A simple wrapper over the platform's dynamic library facilities - -*/ +//! Dynamic library facilities. +//! +//! A simple wrapper over the platform's dynamic library facilities #![experimental] #![allow(missing_docs)] diff --git a/src/libstd/fmt.rs b/src/libstd/fmt.rs index c817e6a806bb79dfc74ca283503219ee7a1fbe11..62ca3483c21b37e69c9c73cd8b6c357e53debf75 100644 --- a/src/libstd/fmt.rs +++ b/src/libstd/fmt.rs @@ -10,392 +10,388 @@ // // ignore-lexer-test FIXME #15679 -/*! - -Utilities for formatting and printing strings - -This module contains the runtime support for the `format!` syntax extension. -This macro is implemented in the compiler to emit calls to this module in order -to format arguments at runtime into strings and streams. - -The functions contained in this module should not normally be used in everyday -use cases of `format!`. The assumptions made by these functions are unsafe for -all inputs, and the compiler performs a large amount of validation on the -arguments to `format!` in order to ensure safety at runtime. While it is -possible to call these functions directly, it is not recommended to do so in the -general case. - -## Usage - -The `format!` macro is intended to be familiar to those coming from C's -printf/fprintf functions or Python's `str.format` function. In its current -revision, the `format!` macro returns a `String` type which is the result of -the formatting. In the future it will also be able to pass in a stream to -format arguments directly while performing minimal allocations. - -Some examples of the `format!` extension are: - -```rust -# fn main() { -format!("Hello"); // => "Hello" -format!("Hello, {}!", "world"); // => "Hello, world!" -format!("The number is {}", 1i); // => "The number is 1" -format!("{}", (3i, 4i)); // => "(3, 4)" -format!("{value}", value=4i); // => "4" -format!("{} {}", 1i, 2u); // => "1 2" -# } -``` - -From these, you can see that the first argument is a format string. It is -required by the compiler for this to be a string literal; it cannot be a -variable passed in (in order to perform validity checking). The compiler will -then parse the format string and determine if the list of arguments provided is -suitable to pass to this format string. - -### Positional parameters - -Each formatting argument is allowed to specify which value argument it's -referencing, and if omitted it is assumed to be "the next argument". For -example, the format string `{} {} {}` would take three parameters, and they -would be formatted in the same order as they're given. The format string -`{2} {1} {0}`, however, would format arguments in reverse order. - -Things can get a little tricky once you start intermingling the two types of -positional specifiers. The "next argument" specifier can be thought of as an -iterator over the argument. Each time a "next argument" specifier is seen, the -iterator advances. This leads to behavior like this: - -```rust -format!("{1} {} {0} {}", 1i, 2i); // => "2 1 1 2" -``` - -The internal iterator over the argument has not been advanced by the time the -first `{}` is seen, so it prints the first argument. Then upon reaching the -second `{}`, the iterator has advanced forward to the second argument. -Essentially, parameters which explicitly name their argument do not affect -parameters which do not name an argument in terms of positional specifiers. - -A format string is required to use all of its arguments, otherwise it is a -compile-time error. You may refer to the same argument more than once in the -format string, although it must always be referred to with the same type. - -### Named parameters - -Rust itself does not have a Python-like equivalent of named parameters to a -function, but the `format!` macro is a syntax extension which allows it to -leverage named parameters. Named parameters are listed at the end of the -argument list and have the syntax: - -```text -identifier '=' expression -``` - -For example, the following `format!` expressions all use named argument: - -```rust -# fn main() { -format!("{argument}", argument = "test"); // => "test" -format!("{name} {}", 1i, name = 2i); // => "2 1" -format!("{a} {c} {b}", a="a", b=(), c=3i); // => "a 3 ()" -# } -``` - -It is illegal to put positional parameters (those without names) after arguments -which have names. Like with positional parameters, it is illegal to provide -named parameters that are unused by the format string. - -### Argument types - -Each argument's type is dictated by the format string. It is a requirement that every argument is -only ever referred to by one type. For example, this is an invalid format string: - -```text -{0:x} {0:o} -``` - -This is invalid because the first argument is both referred to as a hexidecimal as well as an -octal. - -There are various parameters which do require a particular type, however. Namely if the syntax -`{:.*}` is used, then the number of characters to print precedes the actual object being formatted, -and the number of characters must have the type `uint`. Although a `uint` can be printed with -`{}`, it is illegal to reference an argument as such. For example this is another invalid -format string: - -```text -{:.*} {0} -``` - -### Formatting traits - -When requesting that an argument be formatted with a particular type, you are -actually requesting that an argument ascribes to a particular trait. This allows -multiple actual types to be formatted via `{:x}` (like `i8` as well as `int`). -The current mapping of types to traits is: - -* *nothing* ⇒ `Show` -* `o` ⇒ `Octal` -* `x` ⇒ `LowerHex` -* `X` ⇒ `UpperHex` -* `p` ⇒ `Pointer` -* `b` ⇒ `Binary` -* `e` ⇒ `LowerExp` -* `E` ⇒ `UpperExp` - -What this means is that any type of argument which implements the -`std::fmt::Binary` trait can then be formatted with `{:b}`. Implementations are -provided for these traits for a number of primitive types by the standard -library as well. If no format is specified (as in `{}` or `{:6}`), then the -format trait used is the `Show` trait. This is one of the more commonly -implemented traits when formatting a custom type. - -When implementing a format trait for your own type, you will have to implement a -method of the signature: - -```rust -# use std::fmt; -# struct Foo; // our custom type -# impl fmt::Show for Foo { -fn fmt(&self, f: &mut std::fmt::Formatter) -> fmt::Result { -# write!(f, "testing, testing") -# } } -``` - -Your type will be passed as `self` by-reference, and then the function should -emit output into the `f.buf` stream. It is up to each format trait -implementation to correctly adhere to the requested formatting parameters. The -values of these parameters will be listed in the fields of the `Formatter` -struct. In order to help with this, the `Formatter` struct also provides some -helper methods. - -Additionally, the return value of this function is `fmt::Result` which is a -typedef to `Result<(), IoError>` (also known as `IoResult<()>`). Formatting -implementations should ensure that they return errors from `write!` correctly -(propagating errors upward). - -An example of implementing the formatting traits would look -like: - -```rust -use std::fmt; -use std::f64; -use std::num::Float; - -struct Vector2D { - x: int, - y: int, -} - -impl fmt::Show for Vector2D { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - // The `f` value implements the `Writer` trait, which is what the - // write! macro is expecting. Note that this formatting ignores the - // various flags provided to format strings. - write!(f, "({}, {})", self.x, self.y) - } -} - -// Different traits allow different forms of output of a type. The meaning of -// this format is to print the magnitude of a vector. -impl fmt::Binary for Vector2D { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let magnitude = (self.x * self.x + self.y * self.y) as f64; - let magnitude = magnitude.sqrt(); - - // Respect the formatting flags by using the helper method - // `pad_integral` on the Formatter object. See the method documentation - // for details, and the function `pad` can be used to pad strings. - let decimals = f.precision().unwrap_or(3); - let string = f64::to_str_exact(magnitude, decimals); - f.pad_integral(true, "", string.as_bytes()) - } -} - -fn main() { - let myvector = Vector2D { x: 3, y: 4 }; - - println!("{}", myvector); // => "(3, 4)" - println!("{:10.3b}", myvector); // => " 5.000" -} -``` - -### Related macros - -There are a number of related macros in the `format!` family. The ones that are -currently implemented are: - -```ignore -format! // described above -write! // first argument is a &mut io::Writer, the destination -writeln! // same as write but appends a newline -print! // the format string is printed to the standard output -println! // same as print but appends a newline -format_args! // described below. -``` - -#### `write!` - -This and `writeln` are two macros which are used to emit the format string to a -specified stream. This is used to prevent intermediate allocations of format -strings and instead directly write the output. Under the hood, this function is -actually invoking the `write` function defined in this module. Example usage is: - -```rust -# #![allow(unused_must_use)] -use std::io; - -let mut w = Vec::new(); -write!(&mut w as &mut io::Writer, "Hello {}!", "world"); -``` - -#### `print!` - -This and `println` emit their output to stdout. Similarly to the `write!` macro, -the goal of these macros is to avoid intermediate allocations when printing -output. Example usage is: - -```rust -print!("Hello {}!", "world"); -println!("I have a newline {}", "character at the end"); -``` - -#### `format_args!` -This is a curious macro which is used to safely pass around -an opaque object describing the format string. This object -does not require any heap allocations to create, and it only -references information on the stack. Under the hood, all of -the related macros are implemented in terms of this. First -off, some example usage is: - -``` -use std::fmt; -use std::io; - -# #[allow(unused_must_use)] -# fn main() { -format_args!(fmt::format, "this returns {}", "String"); - -let some_writer: &mut io::Writer = &mut io::stdout(); -format_args!(|args| { write!(some_writer, "{}", args) }, "print with a {}", "closure"); - -fn my_fmt_fn(args: &fmt::Arguments) { - write!(&mut io::stdout(), "{}", args); -} -format_args!(my_fmt_fn, "or a {} too", "function"); -# } -``` - -The first argument of the `format_args!` macro is a function (or closure) which -takes one argument of type `&fmt::Arguments`. This structure can then be -passed to the `write` and `format` functions inside this module in order to -process the format string. The goal of this macro is to even further prevent -intermediate allocations when dealing formatting strings. - -For example, a logging library could use the standard formatting syntax, but it -would internally pass around this structure until it has been determined where -output should go to. - -It is unsafe to programmatically create an instance of `fmt::Arguments` because -the operations performed when executing a format string require the compile-time -checks provided by the compiler. The `format_args!` macro is the only method of -safely creating these structures, but they can be unsafely created with the -constructor provided. - -## Syntax - -The syntax for the formatting language used is drawn from other languages, so it -should not be too alien. Arguments are formatted with python-like syntax, -meaning that arguments are surrounded by `{}` instead of the C-like `%`. The -actual grammar for the formatting syntax is: - -```text -format_string := [ format ] * -format := '{' [ argument ] [ ':' format_spec ] '}' -argument := integer | identifier - -format_spec := [[fill]align][sign]['#'][0][width]['.' precision][type] -fill := character -align := '<' | '^' | '>' -sign := '+' | '-' -width := count -precision := count | '*' -type := identifier | '' -count := parameter | integer -parameter := integer '$' -``` - -## Formatting Parameters - -Each argument being formatted can be transformed by a number of formatting -parameters (corresponding to `format_spec` in the syntax above). These -parameters affect the string representation of what's being formatted. This -syntax draws heavily from Python's, so it may seem a bit familiar. - -### Fill/Alignment - -The fill character is provided normally in conjunction with the `width` -parameter. This indicates that if the value being formatted is smaller than -`width` some extra characters will be printed around it. The extra characters -are specified by `fill`, and the alignment can be one of two options: - -* `<` - the argument is left-aligned in `width` columns -* `^` - the argument is center-aligned in `width` columns -* `>` - the argument is right-aligned in `width` columns - -### Sign/#/0 - -These can all be interpreted as flags for a particular formatter. - -* '+' - This is intended for numeric types and indicates that the sign should - always be printed. Positive signs are never printed by default, and the - negative sign is only printed by default for the `Signed` trait. This - flag indicates that the correct sign (+ or -) should always be printed. -* '-' - Currently not used -* '#' - This flag is indicates that the "alternate" form of printing should be - used. By default, this only applies to the integer formatting traits and - performs like: - * `x` - precedes the argument with a "0x" - * `X` - precedes the argument with a "0x" - * `t` - precedes the argument with a "0b" - * `o` - precedes the argument with a "0o" -* '0' - This is used to indicate for integer formats that the padding should - both be done with a `0` character as well as be sign-aware. A format - like `{:08d}` would yield `00000001` for the integer `1`, while the same - format would yield `-0000001` for the integer `-1`. Notice that the - negative version has one fewer zero than the positive version. - -### Width - -This is a parameter for the "minimum width" that the format should take up. If -the value's string does not fill up this many characters, then the padding -specified by fill/alignment will be used to take up the required space. - -The default fill/alignment for non-numerics is a space and left-aligned. The -defaults for numeric formatters is also a space but with right-alignment. If the -'0' flag is specified for numerics, then the implicit fill character is '0'. - -The value for the width can also be provided as a `uint` in the list of -parameters by using the `2$` syntax indicating that the second argument is a -`uint` specifying the width. - -### Precision - -For non-numeric types, this can be considered a "maximum width". If the -resulting string is longer than this width, then it is truncated down to this -many characters and only those are emitted. - -For integral types, this has no meaning currently. - -For floating-point types, this indicates how many digits after the decimal point -should be printed. - -## Escaping - -The literal characters `{` and `}` may be included in a string by preceding them -with the same character. For example, the `{` character is escaped with `{{` and -the `}` character is escaped with `}}`. - -*/ +//! Utilities for formatting and printing strings +//! +//! This module contains the runtime support for the `format!` syntax extension. +//! This macro is implemented in the compiler to emit calls to this module in order +//! to format arguments at runtime into strings and streams. +//! +//! The functions contained in this module should not normally be used in everyday +//! use cases of `format!`. The assumptions made by these functions are unsafe for +//! all inputs, and the compiler performs a large amount of validation on the +//! arguments to `format!` in order to ensure safety at runtime. While it is +//! possible to call these functions directly, it is not recommended to do so in the +//! general case. +//! +//! ## Usage +//! +//! The `format!` macro is intended to be familiar to those coming from C's +//! printf/fprintf functions or Python's `str.format` function. In its current +//! revision, the `format!` macro returns a `String` type which is the result of +//! the formatting. In the future it will also be able to pass in a stream to +//! format arguments directly while performing minimal allocations. +//! +//! Some examples of the `format!` extension are: +//! +//! ```rust +//! # fn main() { +//! format!("Hello"); // => "Hello" +//! format!("Hello, {}!", "world"); // => "Hello, world!" +//! format!("The number is {}", 1i); // => "The number is 1" +//! format!("{}", (3i, 4i)); // => "(3, 4)" +//! format!("{value}", value=4i); // => "4" +//! format!("{} {}", 1i, 2u); // => "1 2" +//! # } +//! ``` +//! +//! From these, you can see that the first argument is a format string. It is +//! required by the compiler for this to be a string literal; it cannot be a +//! variable passed in (in order to perform validity checking). The compiler will +//! then parse the format string and determine if the list of arguments provided is +//! suitable to pass to this format string. +//! +//! ### Positional parameters +//! +//! Each formatting argument is allowed to specify which value argument it's +//! referencing, and if omitted it is assumed to be "the next argument". For +//! example, the format string `{} {} {}` would take three parameters, and they +//! would be formatted in the same order as they're given. The format string +//! `{2} {1} {0}`, however, would format arguments in reverse order. +//! +//! Things can get a little tricky once you start intermingling the two types of +//! positional specifiers. The "next argument" specifier can be thought of as an +//! iterator over the argument. Each time a "next argument" specifier is seen, the +//! iterator advances. This leads to behavior like this: +//! +//! ```rust +//! format!("{1} {} {0} {}", 1i, 2i); // => "2 1 1 2" +//! ``` +//! +//! The internal iterator over the argument has not been advanced by the time the +//! first `{}` is seen, so it prints the first argument. Then upon reaching the +//! second `{}`, the iterator has advanced forward to the second argument. +//! Essentially, parameters which explicitly name their argument do not affect +//! parameters which do not name an argument in terms of positional specifiers. +//! +//! A format string is required to use all of its arguments, otherwise it is a +//! compile-time error. You may refer to the same argument more than once in the +//! format string, although it must always be referred to with the same type. +//! +//! ### Named parameters +//! +//! Rust itself does not have a Python-like equivalent of named parameters to a +//! function, but the `format!` macro is a syntax extension which allows it to +//! leverage named parameters. Named parameters are listed at the end of the +//! argument list and have the syntax: +//! +//! ```text +//! identifier '=' expression +//! ``` +//! +//! For example, the following `format!` expressions all use named argument: +//! +//! ```rust +//! # fn main() { +//! format!("{argument}", argument = "test"); // => "test" +//! format!("{name} {}", 1i, name = 2i); // => "2 1" +//! format!("{a} {c} {b}", a="a", b=(), c=3i); // => "a 3 ()" +//! # } +//! ``` +//! +//! It is illegal to put positional parameters (those without names) after arguments +//! which have names. Like with positional parameters, it is illegal to provide +//! named parameters that are unused by the format string. +//! +//! ### Argument types +//! +//! Each argument's type is dictated by the format string. It is a requirement that every argument is +//! only ever referred to by one type. For example, this is an invalid format string: +//! +//! ```text +//! {0:x} {0:o} +//! ``` +//! +//! This is invalid because the first argument is both referred to as a hexidecimal as well as an +//! octal. +//! +//! There are various parameters which do require a particular type, however. Namely if the syntax +//! `{:.*}` is used, then the number of characters to print precedes the actual object being formatted, +//! and the number of characters must have the type `uint`. Although a `uint` can be printed with +//! `{}`, it is illegal to reference an argument as such. For example this is another invalid +//! format string: +//! +//! ```text +//! {:.*} {0} +//! ``` +//! +//! ### Formatting traits +//! +//! When requesting that an argument be formatted with a particular type, you are +//! actually requesting that an argument ascribes to a particular trait. This allows +//! multiple actual types to be formatted via `{:x}` (like `i8` as well as `int`). +//! The current mapping of types to traits is: +//! +//! * *nothing* ⇒ `Show` +//! * `o` ⇒ `Octal` +//! * `x` ⇒ `LowerHex` +//! * `X` ⇒ `UpperHex` +//! * `p` ⇒ `Pointer` +//! * `b` ⇒ `Binary` +//! * `e` ⇒ `LowerExp` +//! * `E` ⇒ `UpperExp` +//! +//! What this means is that any type of argument which implements the +//! `std::fmt::Binary` trait can then be formatted with `{:b}`. Implementations are +//! provided for these traits for a number of primitive types by the standard +//! library as well. If no format is specified (as in `{}` or `{:6}`), then the +//! format trait used is the `Show` trait. This is one of the more commonly +//! implemented traits when formatting a custom type. +//! +//! When implementing a format trait for your own type, you will have to implement a +//! method of the signature: +//! +//! ```rust +//! # use std::fmt; +//! # struct Foo; // our custom type +//! # impl fmt::Show for Foo { +//! fn fmt(&self, f: &mut std::fmt::Formatter) -> fmt::Result { +//! # write!(f, "testing, testing") +//! # } } +//! ``` +//! +//! Your type will be passed as `self` by-reference, and then the function should +//! emit output into the `f.buf` stream. It is up to each format trait +//! implementation to correctly adhere to the requested formatting parameters. The +//! values of these parameters will be listed in the fields of the `Formatter` +//! struct. In order to help with this, the `Formatter` struct also provides some +//! helper methods. +//! +//! Additionally, the return value of this function is `fmt::Result` which is a +//! typedef to `Result<(), IoError>` (also known as `IoResult<()>`). Formatting +//! implementations should ensure that they return errors from `write!` correctly +//! (propagating errors upward). +//! +//! An example of implementing the formatting traits would look +//! like: +//! +//! ```rust +//! use std::fmt; +//! use std::f64; +//! use std::num::Float; +//! +//! struct Vector2D { +//! x: int, +//! y: int, +//! } +//! +//! impl fmt::Show for Vector2D { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! // The `f` value implements the `Writer` trait, which is what the +//! // write! macro is expecting. Note that this formatting ignores the +//! // various flags provided to format strings. +//! write!(f, "({}, {})", self.x, self.y) +//! } +//! } +//! +//! // Different traits allow different forms of output of a type. The meaning of +//! // this format is to print the magnitude of a vector. +//! impl fmt::Binary for Vector2D { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! let magnitude = (self.x * self.x + self.y * self.y) as f64; +//! let magnitude = magnitude.sqrt(); +//! +//! // Respect the formatting flags by using the helper method +//! // `pad_integral` on the Formatter object. See the method documentation +//! // for details, and the function `pad` can be used to pad strings. +//! let decimals = f.precision().unwrap_or(3); +//! let string = f64::to_str_exact(magnitude, decimals); +//! f.pad_integral(true, "", string.as_bytes()) +//! } +//! } +//! +//! fn main() { +//! let myvector = Vector2D { x: 3, y: 4 }; +//! +//! println!("{}", myvector); // => "(3, 4)" +//! println!("{:10.3b}", myvector); // => " 5.000" +//! } +//! ``` +//! +//! ### Related macros +//! +//! There are a number of related macros in the `format!` family. The ones that are +//! currently implemented are: +//! +//! ```ignore +//! format! // described above +//! write! // first argument is a &mut io::Writer, the destination +//! writeln! // same as write but appends a newline +//! print! // the format string is printed to the standard output +//! println! // same as print but appends a newline +//! format_args! // described below. +//! ``` +//! +//! #### `write!` +//! +//! This and `writeln` are two macros which are used to emit the format string to a +//! specified stream. This is used to prevent intermediate allocations of format +//! strings and instead directly write the output. Under the hood, this function is +//! actually invoking the `write` function defined in this module. Example usage is: +//! +//! ```rust +//! # #![allow(unused_must_use)] +//! use std::io; +//! +//! let mut w = Vec::new(); +//! write!(&mut w as &mut io::Writer, "Hello {}!", "world"); +//! ``` +//! +//! #### `print!` +//! +//! This and `println` emit their output to stdout. Similarly to the `write!` macro, +//! the goal of these macros is to avoid intermediate allocations when printing +//! output. Example usage is: +//! +//! ```rust +//! print!("Hello {}!", "world"); +//! println!("I have a newline {}", "character at the end"); +//! ``` +//! +//! #### `format_args!` +//! This is a curious macro which is used to safely pass around +//! an opaque object describing the format string. This object +//! does not require any heap allocations to create, and it only +//! references information on the stack. Under the hood, all of +//! the related macros are implemented in terms of this. First +//! off, some example usage is: +//! +//! ``` +//! use std::fmt; +//! use std::io; +//! +//! # #[allow(unused_must_use)] +//! # fn main() { +//! format_args!(fmt::format, "this returns {}", "String"); +//! +//! let some_writer: &mut io::Writer = &mut io::stdout(); +//! format_args!(|args| { write!(some_writer, "{}", args) }, "print with a {}", "closure"); +//! +//! fn my_fmt_fn(args: &fmt::Arguments) { +//! write!(&mut io::stdout(), "{}", args); +//! } +//! format_args!(my_fmt_fn, "or a {} too", "function"); +//! # } +//! ``` +//! +//! The first argument of the `format_args!` macro is a function (or closure) which +//! takes one argument of type `&fmt::Arguments`. This structure can then be +//! passed to the `write` and `format` functions inside this module in order to +//! process the format string. The goal of this macro is to even further prevent +//! intermediate allocations when dealing formatting strings. +//! +//! For example, a logging library could use the standard formatting syntax, but it +//! would internally pass around this structure until it has been determined where +//! output should go to. +//! +//! It is unsafe to programmatically create an instance of `fmt::Arguments` because +//! the operations performed when executing a format string require the compile-time +//! checks provided by the compiler. The `format_args!` macro is the only method of +//! safely creating these structures, but they can be unsafely created with the +//! constructor provided. +//! +//! ## Syntax +//! +//! The syntax for the formatting language used is drawn from other languages, so it +//! should not be too alien. Arguments are formatted with python-like syntax, +//! meaning that arguments are surrounded by `{}` instead of the C-like `%`. The +//! actual grammar for the formatting syntax is: +//! +//! ```text +//! format_string := [ format ] * +//! format := '{' [ argument ] [ ':' format_spec ] '}' +//! argument := integer | identifier +//! +//! format_spec := [[fill]align][sign]['#'][0][width]['.' precision][type] +//! fill := character +//! align := '<' | '^' | '>' +//! sign := '+' | '-' +//! width := count +//! precision := count | '*' +//! type := identifier | '' +//! count := parameter | integer +//! parameter := integer '$' +//! ``` +//! +//! ## Formatting Parameters +//! +//! Each argument being formatted can be transformed by a number of formatting +//! parameters (corresponding to `format_spec` in the syntax above). These +//! parameters affect the string representation of what's being formatted. This +//! syntax draws heavily from Python's, so it may seem a bit familiar. +//! +//! ### Fill/Alignment +//! +//! The fill character is provided normally in conjunction with the `width` +//! parameter. This indicates that if the value being formatted is smaller than +//! `width` some extra characters will be printed around it. The extra characters +//! are specified by `fill`, and the alignment can be one of two options: +//! +//! * `<` - the argument is left-aligned in `width` columns +//! * `^` - the argument is center-aligned in `width` columns +//! * `>` - the argument is right-aligned in `width` columns +//! +//! ### Sign/#/0 +//! +//! These can all be interpreted as flags for a particular formatter. +//! +//! * '+' - This is intended for numeric types and indicates that the sign should +//! always be printed. Positive signs are never printed by default, and the +//! negative sign is only printed by default for the `Signed` trait. This +//! flag indicates that the correct sign (+ or -) should always be printed. +//! * '-' - Currently not used +//! * '#' - This flag is indicates that the "alternate" form of printing should be +//! used. By default, this only applies to the integer formatting traits and +//! performs like: +//! * `x` - precedes the argument with a "0x" +//! * `X` - precedes the argument with a "0x" +//! * `t` - precedes the argument with a "0b" +//! * `o` - precedes the argument with a "0o" +//! * '0' - This is used to indicate for integer formats that the padding should +//! both be done with a `0` character as well as be sign-aware. A format +//! like `{:08d}` would yield `00000001` for the integer `1`, while the same +//! format would yield `-0000001` for the integer `-1`. Notice that the +//! negative version has one fewer zero than the positive version. +//! +//! ### Width +//! +//! This is a parameter for the "minimum width" that the format should take up. If +//! the value's string does not fill up this many characters, then the padding +//! specified by fill/alignment will be used to take up the required space. +//! +//! The default fill/alignment for non-numerics is a space and left-aligned. The +//! defaults for numeric formatters is also a space but with right-alignment. If the +//! '0' flag is specified for numerics, then the implicit fill character is '0'. +//! +//! The value for the width can also be provided as a `uint` in the list of +//! parameters by using the `2$` syntax indicating that the second argument is a +//! `uint` specifying the width. +//! +//! ### Precision +//! +//! For non-numeric types, this can be considered a "maximum width". If the +//! resulting string is longer than this width, then it is truncated down to this +//! many characters and only those are emitted. +//! +//! For integral types, this has no meaning currently. +//! +//! For floating-point types, this indicates how many digits after the decimal point +//! should be printed. +//! +//! ## Escaping +//! +//! The literal characters `{` and `}` may be included in a string by preceding them +//! with the same character. For example, the `{` character is escaped with `{{` and +//! the `}` character is escaped with `}}`. #![experimental] diff --git a/src/libstd/hash.rs b/src/libstd/hash.rs index e4017ea5a47fa8338299f99a5d628d8cf92cd944..ac68e1ef121fbed040c124b42664a459b12eb472 100644 --- a/src/libstd/hash.rs +++ b/src/libstd/hash.rs @@ -8,58 +8,56 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Generic hashing support. - * - * This module provides a generic way to compute the hash of a value. The - * simplest way to make a type hashable is to use `#[deriving(Hash)]`: - * - * # Example - * - * ```rust - * use std::hash; - * use std::hash::Hash; - * - * #[deriving(Hash)] - * struct Person { - * id: uint, - * name: String, - * phone: u64, - * } - * - * let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 }; - * let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 }; - * - * assert!(hash::hash(&person1) != hash::hash(&person2)); - * ``` - * - * If you need more control over how a value is hashed, you need to implement - * the trait `Hash`: - * - * ```rust - * use std::hash; - * use std::hash::Hash; - * use std::hash::sip::SipState; - * - * struct Person { - * id: uint, - * name: String, - * phone: u64, - * } - * - * impl Hash for Person { - * fn hash(&self, state: &mut SipState) { - * self.id.hash(state); - * self.phone.hash(state); - * } - * } - * - * let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 }; - * let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 }; - * - * assert!(hash::hash(&person1) == hash::hash(&person2)); - * ``` - */ +//! Generic hashing support. +//! +//! This module provides a generic way to compute the hash of a value. The +//! simplest way to make a type hashable is to use `#[deriving(Hash)]`: +//! +//! # Example +//! +//! ```rust +//! use std::hash; +//! use std::hash::Hash; +//! +//! #[deriving(Hash)] +//! struct Person { +//! id: uint, +//! name: String, +//! phone: u64, +//! } +//! +//! let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 }; +//! let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 }; +//! +//! assert!(hash::hash(&person1) != hash::hash(&person2)); +//! ``` +//! +//! If you need more control over how a value is hashed, you need to implement +//! the trait `Hash`: +//! +//! ```rust +//! use std::hash; +//! use std::hash::Hash; +//! use std::hash::sip::SipState; +//! +//! struct Person { +//! id: uint, +//! name: String, +//! phone: u64, +//! } +//! +//! impl Hash for Person { +//! fn hash(&self, state: &mut SipState) { +//! self.id.hash(state); +//! self.phone.hash(state); +//! } +//! } +//! +//! let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 }; +//! let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 }; +//! +//! assert!(hash::hash(&person1) == hash::hash(&person2)); +//! ``` #![experimental] diff --git a/src/libstd/io/fs.rs b/src/libstd/io/fs.rs index 6d29f3d25382c9f4602ce693ade253c1e4afc57d..da69cee69e6500160b36e5ba060c38e2205cce64 100644 --- a/src/libstd/io/fs.rs +++ b/src/libstd/io/fs.rs @@ -10,47 +10,45 @@ // // ignore-lexer-test FIXME #15679 -/*! Synchronous File I/O - -This module provides a set of functions and traits for working -with regular files & directories on a filesystem. - -At the top-level of the module are a set of freestanding functions, associated -with various filesystem operations. They all operate on `Path` objects. - -All operations in this module, including those as part of `File` et al -block the task during execution. In the event of failure, all functions/methods -will return an `IoResult` type with an `Err` value. - -Also included in this module is an implementation block on the `Path` object -defined in `std::path::Path`. The impl adds useful methods about inspecting the -metadata of a file. This includes getting the `stat` information, reading off -particular bits of it, etc. - -# Example - -```rust -# #![allow(unused_must_use)] -use std::io::fs::PathExtensions; -use std::io::{File, fs}; - -let path = Path::new("foo.txt"); - -// create the file, whether it exists or not -let mut file = File::create(&path); -file.write(b"foobar"); -# drop(file); - -// open the file in read-only mode -let mut file = File::open(&path); -file.read_to_end(); - -println!("{}", path.stat().unwrap().size); -# drop(file); -fs::unlink(&path); -``` - -*/ +//! Synchronous File I/O +//! +//! This module provides a set of functions and traits for working +//! with regular files & directories on a filesystem. +//! +//! At the top-level of the module are a set of freestanding functions, associated +//! with various filesystem operations. They all operate on `Path` objects. +//! +//! All operations in this module, including those as part of `File` et al +//! block the task during execution. In the event of failure, all functions/methods +//! will return an `IoResult` type with an `Err` value. +//! +//! Also included in this module is an implementation block on the `Path` object +//! defined in `std::path::Path`. The impl adds useful methods about inspecting the +//! metadata of a file. This includes getting the `stat` information, reading off +//! particular bits of it, etc. +//! +//! # Example +//! +//! ```rust +//! # #![allow(unused_must_use)] +//! use std::io::fs::PathExtensions; +//! use std::io::{File, fs}; +//! +//! let path = Path::new("foo.txt"); +//! +//! // create the file, whether it exists or not +//! let mut file = File::create(&path); +//! file.write(b"foobar"); +//! # drop(file); +//! +//! // open the file in read-only mode +//! let mut file = File::open(&path); +//! file.read_to_end(); +//! +//! println!("{}", path.stat().unwrap().size); +//! # drop(file); +//! fs::unlink(&path); +//! ``` use clone::Clone; use io::standard_error; diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index a25674030aeacd3d7f8a179e983c43cdcb4fa0d9..fc6ee58346dec63dd5433be70cdb0da62bcd3d24 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -16,207 +16,205 @@ // error handling -/*! I/O, including files, networking, timers, and processes - -`std::io` provides Rust's basic I/O types, -for reading and writing to files, TCP, UDP, -and other types of sockets and pipes, -manipulating the file system, spawning processes. - -# Examples - -Some examples of obvious things you might want to do - -* Read lines from stdin - - ```rust - use std::io; - - for line in io::stdin().lines() { - print!("{}", line.unwrap()); - } - ``` - -* Read a complete file - - ```rust - use std::io::File; - - let contents = File::open(&Path::new("message.txt")).read_to_end(); - ``` - -* Write a line to a file - - ```rust - # #![allow(unused_must_use)] - use std::io::File; - - let mut file = File::create(&Path::new("message.txt")); - file.write(b"hello, file!\n"); - # drop(file); - # ::std::io::fs::unlink(&Path::new("message.txt")); - ``` - -* Iterate over the lines of a file - - ```rust,no_run - use std::io::BufferedReader; - use std::io::File; - - let path = Path::new("message.txt"); - let mut file = BufferedReader::new(File::open(&path)); - for line in file.lines() { - print!("{}", line.unwrap()); - } - ``` - -* Pull the lines of a file into a vector of strings - - ```rust,no_run - use std::io::BufferedReader; - use std::io::File; - - let path = Path::new("message.txt"); - let mut file = BufferedReader::new(File::open(&path)); - let lines: Vec = file.lines().map(|x| x.unwrap()).collect(); - ``` - -* Make a simple TCP client connection and request - - ```rust - # #![allow(unused_must_use)] - use std::io::TcpStream; - - # // connection doesn't fail if a server is running on 8080 - # // locally, we still want to be type checking this code, so lets - # // just stop it running (#11576) - # if false { - let mut socket = TcpStream::connect("127.0.0.1:8080").unwrap(); - socket.write(b"GET / HTTP/1.0\n\n"); - let response = socket.read_to_end(); - # } - ``` - -* Make a simple TCP server - - ```rust - # fn main() { } - # fn foo() { - # #![allow(dead_code)] - use std::io::{TcpListener, TcpStream}; - use std::io::{Acceptor, Listener}; - - let listener = TcpListener::bind("127.0.0.1:80"); - - // bind the listener to the specified address - let mut acceptor = listener.listen(); - - fn handle_client(mut stream: TcpStream) { - // ... - # &mut stream; // silence unused mutability/variable warning - } - // accept connections and process them, spawning a new tasks for each one - for stream in acceptor.incoming() { - match stream { - Err(e) => { /* connection failed */ } - Ok(stream) => spawn(proc() { - // connection succeeded - handle_client(stream) - }) - } - } - - // close the socket server - drop(acceptor); - # } - ``` - - -# Error Handling - -I/O is an area where nearly every operation can result in unexpected -errors. Errors should be painfully visible when they happen, and handling them -should be easy to work with. It should be convenient to handle specific I/O -errors, and it should also be convenient to not deal with I/O errors. - -Rust's I/O employs a combination of techniques to reduce boilerplate -while still providing feedback about errors. The basic strategy: - -* All I/O operations return `IoResult` which is equivalent to - `Result`. The `Result` type is defined in the `std::result` - module. -* If the `Result` type goes unused, then the compiler will by default emit a - warning about the unused result. This is because `Result` has the - `#[must_use]` attribute. -* Common traits are implemented for `IoResult`, e.g. - `impl Reader for IoResult`, so that error values do not have - to be 'unwrapped' before use. - -These features combine in the API to allow for expressions like -`File::create(&Path::new("diary.txt")).write(b"Met a girl.\n")` -without having to worry about whether "diary.txt" exists or whether -the write succeeds. As written, if either `new` or `write_line` -encounters an error then the result of the entire expression will -be an error. - -If you wanted to handle the error though you might write: - -```rust -# #![allow(unused_must_use)] -use std::io::File; - -match File::create(&Path::new("diary.txt")).write(b"Met a girl.\n") { - Ok(()) => (), // succeeded - Err(e) => println!("failed to write to my diary: {}", e), -} - -# ::std::io::fs::unlink(&Path::new("diary.txt")); -``` - -So what actually happens if `create` encounters an error? -It's important to know that what `new` returns is not a `File` -but an `IoResult`. If the file does not open, then `new` will simply -return `Err(..)`. Because there is an implementation of `Writer` (the trait -required ultimately required for types to implement `write_line`) there is no -need to inspect or unwrap the `IoResult` and we simply call `write_line` -on it. If `new` returned an `Err(..)` then the followup call to `write_line` -will also return an error. - -## `try!` - -Explicit pattern matching on `IoResult`s can get quite verbose, especially -when performing many I/O operations. Some examples (like those above) are -alleviated with extra methods implemented on `IoResult`, but others have more -complex interdependencies among each I/O operation. - -The `try!` macro from `std::macros` is provided as a method of early-return -inside `Result`-returning functions. It expands to an early-return on `Err` -and otherwise unwraps the contained `Ok` value. - -If you wanted to read several `u32`s from a file and return their product: - -```rust -use std::io::{File, IoResult}; - -fn file_product(p: &Path) -> IoResult { - let mut f = File::open(p); - let x1 = try!(f.read_le_u32()); - let x2 = try!(f.read_le_u32()); - - Ok(x1 * x2) -} - -match file_product(&Path::new("numbers.bin")) { - Ok(x) => println!("{}", x), - Err(e) => println!("Failed to read numbers!") -} -``` - -With `try!` in `file_product`, each `read_le_u32` need not be directly -concerned with error handling; instead its caller is responsible for -responding to errors that may occur while attempting to read the numbers. - -*/ +//! I/O, including files, networking, timers, and processes +//! +//! `std::io` provides Rust's basic I/O types, +//! for reading and writing to files, TCP, UDP, +//! and other types of sockets and pipes, +//! manipulating the file system, spawning processes. +//! +//! # Examples +//! +//! Some examples of obvious things you might want to do +//! +//! * Read lines from stdin +//! +//! ```rust +//! use std::io; +//! +//! for line in io::stdin().lines() { +//! print!("{}", line.unwrap()); +//! } +//! ``` +//! +//! * Read a complete file +//! +//! ```rust +//! use std::io::File; +//! +//! let contents = File::open(&Path::new("message.txt")).read_to_end(); +//! ``` +//! +//! * Write a line to a file +//! +//! ```rust +//! # #![allow(unused_must_use)] +//! use std::io::File; +//! +//! let mut file = File::create(&Path::new("message.txt")); +//! file.write(b"hello, file!\n"); +//! # drop(file); +//! # ::std::io::fs::unlink(&Path::new("message.txt")); +//! ``` +//! +//! * Iterate over the lines of a file +//! +//! ```rust,no_run +//! use std::io::BufferedReader; +//! use std::io::File; +//! +//! let path = Path::new("message.txt"); +//! let mut file = BufferedReader::new(File::open(&path)); +//! for line in file.lines() { +//! print!("{}", line.unwrap()); +//! } +//! ``` +//! +//! * Pull the lines of a file into a vector of strings +//! +//! ```rust,no_run +//! use std::io::BufferedReader; +//! use std::io::File; +//! +//! let path = Path::new("message.txt"); +//! let mut file = BufferedReader::new(File::open(&path)); +//! let lines: Vec = file.lines().map(|x| x.unwrap()).collect(); +//! ``` +//! +//! * Make a simple TCP client connection and request +//! +//! ```rust +//! # #![allow(unused_must_use)] +//! use std::io::TcpStream; +//! +//! # // connection doesn't fail if a server is running on 8080 +//! # // locally, we still want to be type checking this code, so lets +//! # // just stop it running (#11576) +//! # if false { +//! let mut socket = TcpStream::connect("127.0.0.1:8080").unwrap(); +//! socket.write(b"GET / HTTP/1.0\n\n"); +//! let response = socket.read_to_end(); +//! # } +//! ``` +//! +//! * Make a simple TCP server +//! +//! ```rust +//! # fn main() { } +//! # fn foo() { +//! # #![allow(dead_code)] +//! use std::io::{TcpListener, TcpStream}; +//! use std::io::{Acceptor, Listener}; +//! +//! let listener = TcpListener::bind("127.0.0.1:80"); +//! +//! // bind the listener to the specified address +//! let mut acceptor = listener.listen(); +//! +//! fn handle_client(mut stream: TcpStream) { +//! // ... +//! # &mut stream; // silence unused mutability/variable warning +//! } +//! // accept connections and process them, spawning a new tasks for each one +//! for stream in acceptor.incoming() { +//! match stream { +//! Err(e) => { /* connection failed */ } +//! Ok(stream) => spawn(proc() { +//! // connection succeeded +//! handle_client(stream) +//! }) +//! } +//! } +//! +//! // close the socket server +//! drop(acceptor); +//! # } +//! ``` +//! +//! +//! # Error Handling +//! +//! I/O is an area where nearly every operation can result in unexpected +//! errors. Errors should be painfully visible when they happen, and handling them +//! should be easy to work with. It should be convenient to handle specific I/O +//! errors, and it should also be convenient to not deal with I/O errors. +//! +//! Rust's I/O employs a combination of techniques to reduce boilerplate +//! while still providing feedback about errors. The basic strategy: +//! +//! * All I/O operations return `IoResult` which is equivalent to +//! `Result`. The `Result` type is defined in the `std::result` +//! module. +//! * If the `Result` type goes unused, then the compiler will by default emit a +//! warning about the unused result. This is because `Result` has the +//! `#[must_use]` attribute. +//! * Common traits are implemented for `IoResult`, e.g. +//! `impl Reader for IoResult`, so that error values do not have +//! to be 'unwrapped' before use. +//! +//! These features combine in the API to allow for expressions like +//! `File::create(&Path::new("diary.txt")).write(b"Met a girl.\n")` +//! without having to worry about whether "diary.txt" exists or whether +//! the write succeeds. As written, if either `new` or `write_line` +//! encounters an error then the result of the entire expression will +//! be an error. +//! +//! If you wanted to handle the error though you might write: +//! +//! ```rust +//! # #![allow(unused_must_use)] +//! use std::io::File; +//! +//! match File::create(&Path::new("diary.txt")).write(b"Met a girl.\n") { +//! Ok(()) => (), // succeeded +//! Err(e) => println!("failed to write to my diary: {}", e), +//! } +//! +//! # ::std::io::fs::unlink(&Path::new("diary.txt")); +//! ``` +//! +//! So what actually happens if `create` encounters an error? +//! It's important to know that what `new` returns is not a `File` +//! but an `IoResult`. If the file does not open, then `new` will simply +//! return `Err(..)`. Because there is an implementation of `Writer` (the trait +//! required ultimately required for types to implement `write_line`) there is no +//! need to inspect or unwrap the `IoResult` and we simply call `write_line` +//! on it. If `new` returned an `Err(..)` then the followup call to `write_line` +//! will also return an error. +//! +//! ## `try!` +//! +//! Explicit pattern matching on `IoResult`s can get quite verbose, especially +//! when performing many I/O operations. Some examples (like those above) are +//! alleviated with extra methods implemented on `IoResult`, but others have more +//! complex interdependencies among each I/O operation. +//! +//! The `try!` macro from `std::macros` is provided as a method of early-return +//! inside `Result`-returning functions. It expands to an early-return on `Err` +//! and otherwise unwraps the contained `Ok` value. +//! +//! If you wanted to read several `u32`s from a file and return their product: +//! +//! ```rust +//! use std::io::{File, IoResult}; +//! +//! fn file_product(p: &Path) -> IoResult { +//! let mut f = File::open(p); +//! let x1 = try!(f.read_le_u32()); +//! let x2 = try!(f.read_le_u32()); +//! +//! Ok(x1 * x2) +//! } +//! +//! match file_product(&Path::new("numbers.bin")) { +//! Ok(x) => println!("{}", x), +//! Err(e) => println!("Failed to read numbers!") +//! } +//! ``` +//! +//! With `try!` in `file_product`, each `read_le_u32` need not be directly +//! concerned with error handling; instead its caller is responsible for +//! responding to errors that may occur while attempting to read the numbers. #![experimental] #![deny(unused_must_use)] diff --git a/src/libstd/io/net/addrinfo.rs b/src/libstd/io/net/addrinfo.rs index d6a48fd39e6239bcc4998cf9dd83f42a6fce216e..7de786921309153ed13eb6eb214c32980c17583a 100644 --- a/src/libstd/io/net/addrinfo.rs +++ b/src/libstd/io/net/addrinfo.rs @@ -8,14 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Synchronous DNS Resolution - -Contains the functionality to perform DNS resolution in a style related to -getaddrinfo() - -*/ +//! Synchronous DNS Resolution +//! +//! Contains the functionality to perform DNS resolution in a style related to +//! `getaddrinfo()` #![allow(missing_docs)] diff --git a/src/libstd/io/net/pipe.rs b/src/libstd/io/net/pipe.rs index 2984fa5963147c8b865ddf83dfbd4c19bf89d1ac..ec997b71986cc4782471827e2dee86a72a5d2a66 100644 --- a/src/libstd/io/net/pipe.rs +++ b/src/libstd/io/net/pipe.rs @@ -8,19 +8,15 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Named pipes - -This module contains the ability to communicate over named pipes with -synchronous I/O. On windows, this corresponds to talking over a Named Pipe, -while on Unix it corresponds to UNIX domain sockets. - -These pipes are similar to TCP in the sense that you can have both a stream to a -server and a server itself. The server provided accepts other `UnixStream` -instances as clients. - -*/ +//! Named pipes +//! +//! This module contains the ability to communicate over named pipes with +//! synchronous I/O. On windows, this corresponds to talking over a Named Pipe, +//! while on Unix it corresponds to UNIX domain sockets. +//! +//! These pipes are similar to TCP in the sense that you can have both a stream to a +//! server and a server itself. The server provided accepts other `UnixStream` +//! instances as clients. #![allow(missing_docs)] diff --git a/src/libstd/io/stdio.rs b/src/libstd/io/stdio.rs index e6dd20f63fbbcedea53287bca0ba69f489cc5365..665000eae883773ccd302a23b724502455593a06 100644 --- a/src/libstd/io/stdio.rs +++ b/src/libstd/io/stdio.rs @@ -8,24 +8,22 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! Non-blocking access to stdin, stdout, and stderr. - -This module provides bindings to the local event loop's TTY interface, using it -to offer synchronous but non-blocking versions of stdio. These handles can be -inspected for information about terminal dimensions or for related information -about the stream or terminal to which it is attached. - -# Example - -```rust -# #![allow(unused_must_use)] -use std::io; - -let mut out = io::stdout(); -out.write(b"Hello, world!"); -``` - -*/ +//! Non-blocking access to stdin, stdout, and stderr. +//! +//! This module provides bindings to the local event loop's TTY interface, using it +//! to offer synchronous but non-blocking versions of stdio. These handles can be +//! inspected for information about terminal dimensions or for related information +//! about the stream or terminal to which it is attached. +//! +//! # Example +//! +//! ```rust +//! # #![allow(unused_must_use)] +//! use std::io; +//! +//! let mut out = io::stdout(); +//! out.write(b"Hello, world!"); +//! ``` use self::StdSource::*; diff --git a/src/libstd/io/test.rs b/src/libstd/io/test.rs index a153ead2a3843b77f8b94bc9b0ed4038a845d753..af56735021e8646bda59f2ea981bc925817fe2fc 100644 --- a/src/libstd/io/test.rs +++ b/src/libstd/io/test.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! Various utility functions useful for writing I/O tests */ +//! Various utility functions useful for writing I/O tests #![macro_escape] @@ -95,17 +95,14 @@ pub fn raise_fd_limit() { unsafe { darwin_fd_limit::raise_fd_limit() } } +/// darwin_fd_limit exists to work around an issue where launchctl on Mac OS X defaults the rlimit +/// maxfiles to 256/unlimited. The default soft limit of 256 ends up being far too low for our +/// multithreaded scheduler testing, depending on the number of cores available. +/// +/// This fixes issue #7772. #[cfg(target_os="macos")] #[allow(non_camel_case_types)] mod darwin_fd_limit { - /*! - * darwin_fd_limit exists to work around an issue where launchctl on Mac OS X defaults the - * rlimit maxfiles to 256/unlimited. The default soft limit of 256 ends up being far too low - * for our multithreaded scheduler testing, depending on the number of cores available. - * - * This fixes issue #7772. - */ - use libc; type rlim_t = libc::uint64_t; #[repr(C)] diff --git a/src/libstd/io/timer.rs b/src/libstd/io/timer.rs index ec588f134784af9c69e746951668fedef27a9970..ad02b534d04c647de4572bc41c4748519882a906 100644 --- a/src/libstd/io/timer.rs +++ b/src/libstd/io/timer.rs @@ -8,14 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Synchronous Timers - -This module exposes the functionality to create timers, block the current task, -and create receivers which will receive notifications after a period of time. - -*/ +//! Synchronous Timers +//! +//! This module exposes the functionality to create timers, block the current task, +//! and create receivers which will receive notifications after a period of time. // FIXME: These functions take Durations but only pass ms to the backend impls. diff --git a/src/libstd/io/util.rs b/src/libstd/io/util.rs index 8e0cd6608164a2cee1688e8767d138f6b7d70772..393283ff64c5bf079f1acc27b2ad779a3baaa7bf 100644 --- a/src/libstd/io/util.rs +++ b/src/libstd/io/util.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! Utility implementations of Reader and Writer */ +//! Utility implementations of Reader and Writer use prelude::*; use cmp; diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 6b5ec983a8071b873c0880d40d2a6594e8a3beae..b9a103d3d9b2e01897889017fdc12f90cf03a195 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -8,23 +8,19 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Higher-level interfaces to libc::* functions and operating system services. - * - * In general these take and return rust types, use rust idioms (enums, - * closures, vectors) rather than C idioms, and do more extensive safety - * checks. - * - * This module is not meant to only contain 1:1 mappings to libc entries; any - * os-interface code that is reasonably useful and broadly applicable can go - * here. Including utility routines that merely build on other os code. - * - * We assume the general case is that users do not care, and do not want to - * be made to care, which operating system they are on. While they may want - * to special case various special cases -- and so we will not _hide_ the - * facts of which OS the user is on -- they should be given the opportunity - * to write OS-ignorant code by default. - */ +//! Higher-level interfaces to libc::* functions and operating system services. +//! +//! In general these take and return rust types, use rust idioms (enums, closures, vectors) rather +//! than C idioms, and do more extensive safety checks. +//! +//! This module is not meant to only contain 1:1 mappings to libc entries; any os-interface code +//! that is reasonably useful and broadly applicable can go here. Including utility routines that +//! merely build on other os code. +//! +//! We assume the general case is that users do not care, and do not want to be made to care, which +//! operating system they are on. While they may want to special case various special cases -- and +//! so we will not _hide_ the facts of which OS the user is on -- they should be given the +//! opportunity to write OS-ignorant code by default. #![experimental] diff --git a/src/libstd/path/mod.rs b/src/libstd/path/mod.rs index 047fa51b92f600fc142cff7a0b427724e6a5cb9e..b17106e811f622f5201baf2d6624020c2bc01cd8 100644 --- a/src/libstd/path/mod.rs +++ b/src/libstd/path/mod.rs @@ -8,62 +8,56 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Cross-platform path support - -This module implements support for two flavors of paths. `PosixPath` represents -a path on any unix-like system, whereas `WindowsPath` represents a path on -Windows. This module also exposes a typedef `Path` which is equal to the -appropriate platform-specific path variant. - -Both `PosixPath` and `WindowsPath` implement a trait `GenericPath`, which -contains the set of methods that behave the same for both paths. They each also -implement some methods that could not be expressed in `GenericPath`, yet behave -identically for both path flavors, such as `.components()`. - -The three main design goals of this module are 1) to avoid unnecessary -allocation, 2) to behave the same regardless of which flavor of path is being -used, and 3) to support paths that cannot be represented in UTF-8 (as Linux has -no restriction on paths beyond disallowing NUL). - -## Usage - -Usage of this module is fairly straightforward. Unless writing platform-specific -code, `Path` should be used to refer to the platform-native path. - -Creation of a path is typically done with either `Path::new(some_str)` or -`Path::new(some_vec)`. This path can be modified with `.push()` and -`.pop()` (and other setters). The resulting Path can either be passed to another -API that expects a path, or can be turned into a `&[u8]` with `.as_vec()` or a -`Option<&str>` with `.as_str()`. Similarly, attributes of the path can be queried -with methods such as `.filename()`. There are also methods that return a new -path instead of modifying the receiver, such as `.join()` or `.dir_path()`. - -Paths are always kept in normalized form. This means that creating the path -`Path::new("a/b/../c")` will return the path `a/c`. Similarly any attempt -to mutate the path will always leave it in normalized form. - -When rendering a path to some form of output, there is a method `.display()` -which is compatible with the `format!()` parameter `{}`. This will render the -path as a string, replacing all non-utf8 sequences with the Replacement -Character (U+FFFD). As such it is not suitable for passing to any API that -actually operates on the path; it is only intended for display. - -## Example - -```rust -use std::io::fs::PathExtensions; - -let mut path = Path::new("/tmp/path"); -println!("path: {}", path.display()); -path.set_filename("foo"); -path.push("bar"); -println!("new path: {}", path.display()); -println!("path exists: {}", path.exists()); -``` - -*/ +//! Cross-platform path support +//! +//! This module implements support for two flavors of paths. `PosixPath` represents a path on any +//! unix-like system, whereas `WindowsPath` represents a path on Windows. This module also exposes +//! a typedef `Path` which is equal to the appropriate platform-specific path variant. +//! +//! Both `PosixPath` and `WindowsPath` implement a trait `GenericPath`, which contains the set of +//! methods that behave the same for both paths. They each also implement some methods that could +//! not be expressed in `GenericPath`, yet behave identically for both path flavors, such as +//! `.components()`. +//! +//! The three main design goals of this module are 1) to avoid unnecessary allocation, 2) to behave +//! the same regardless of which flavor of path is being used, and 3) to support paths that cannot +//! be represented in UTF-8 (as Linux has no restriction on paths beyond disallowing NUL). +//! +//! ## Usage +//! +//! Usage of this module is fairly straightforward. Unless writing platform-specific code, `Path` +//! should be used to refer to the platform-native path. +//! +//! Creation of a path is typically done with either `Path::new(some_str)` or +//! `Path::new(some_vec)`. This path can be modified with `.push()` and `.pop()` (and other +//! setters). The resulting Path can either be passed to another API that expects a path, or can be +//! turned into a `&[u8]` with `.as_vec()` or a `Option<&str>` with `.as_str()`. Similarly, +//! attributes of the path can be queried with methods such as `.filename()`. There are also +//! methods that return a new path instead of modifying the receiver, such as `.join()` or +//! `.dir_path()`. +//! +//! Paths are always kept in normalized form. This means that creating the path +//! `Path::new("a/b/../c")` will return the path `a/c`. Similarly any attempt to mutate the path +//! will always leave it in normalized form. +//! +//! When rendering a path to some form of output, there is a method `.display()` which is +//! compatible with the `format!()` parameter `{}`. This will render the path as a string, +//! replacing all non-utf8 sequences with the Replacement Character (U+FFFD). As such it is not +//! suitable for passing to any API that actually operates on the path; it is only intended for +//! display. +//! +//! ## Example +//! +//! ```rust +//! use std::io::fs::PathExtensions; +//! +//! let mut path = Path::new("/tmp/path"); +//! println!("path: {}", path.display()); +//! path.set_filename("foo"); +//! path.push("bar"); +//! println!("new path: {}", path.display()); +//! println!("path exists: {}", path.exists()); +//! ``` #![experimental] diff --git a/src/libstd/rt/mod.rs b/src/libstd/rt/mod.rs index 872a545224184863f1cc990aaeadd367c8348b59..5ecd3ff04f1cd95c00ac9939f7b9ff0eb1cae64c 100644 --- a/src/libstd/rt/mod.rs +++ b/src/libstd/rt/mod.rs @@ -8,46 +8,38 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! Runtime services, including the task scheduler and I/O dispatcher - -The `rt` module provides the private runtime infrastructure necessary -to support core language features like the exchange and local heap, -logging, local data and unwinding. It also implements the default task -scheduler and task model. Initialization routines are provided for setting -up runtime resources in common configurations, including that used by -`rustc` when generating executables. - -It is intended that the features provided by `rt` can be factored in a -way such that the core library can be built with different 'profiles' -for different use cases, e.g. excluding the task scheduler. A number -of runtime features though are critical to the functioning of the -language and an implementation must be provided regardless of the -execution environment. - -Of foremost importance is the global exchange heap, in the module -`heap`. Very little practical Rust code can be written without -access to the global heap. Unlike most of `rt` the global heap is -truly a global resource and generally operates independently of the -rest of the runtime. - -All other runtime features are task-local, including the local heap, -local storage, logging and the stack unwinder. - -The relationship between `rt` and the rest of the core library is -not entirely clear yet and some modules will be moving into or -out of `rt` as development proceeds. - -Several modules in `core` are clients of `rt`: - -* `std::task` - The user-facing interface to the Rust task model. -* `std::local_data` - The interface to local data. -* `std::unstable::lang` - Miscellaneous lang items, some of which rely on `std::rt`. -* `std::cleanup` - Local heap destruction. -* `std::io` - In the future `std::io` will use an `rt` implementation. -* `std::logging` -* `std::comm` - -*/ +//! Runtime services, including the task scheduler and I/O dispatcher +//! +//! The `rt` module provides the private runtime infrastructure necessary to support core language +//! features like the exchange and local heap, logging, local data and unwinding. It also +//! implements the default task scheduler and task model. Initialization routines are provided for +//! setting up runtime resources in common configurations, including that used by `rustc` when +//! generating executables. +//! +//! It is intended that the features provided by `rt` can be factored in a way such that the core +//! library can be built with different 'profiles' for different use cases, e.g. excluding the task +//! scheduler. A number of runtime features though are critical to the functioning of the language +//! and an implementation must be provided regardless of the execution environment. +//! +//! Of foremost importance is the global exchange heap, in the module `heap`. Very little practical +//! Rust code can be written without access to the global heap. Unlike most of `rt` the global heap +//! is truly a global resource and generally operates independently of the rest of the runtime. +//! +//! All other runtime features are task-local, including the local heap, local storage, logging and +//! the stack unwinder. +//! +//! The relationship between `rt` and the rest of the core library is not entirely clear yet and +//! some modules will be moving into or out of `rt` as development proceeds. +//! +//! Several modules in `core` are clients of `rt`: +//! +//! * `std::task` - The user-facing interface to the Rust task model. +//! * `std::local_data` - The interface to local data. +//! * `std::unstable::lang` - Miscellaneous lang items, some of which rely on `std::rt`. +//! * `std::cleanup` - Local heap destruction. +//! * `std::io` - In the future `std::io` will use an `rt` implementation. +//! * `std::logging` +//! * `std::comm` #![experimental] diff --git a/src/libstd/sync/future.rs b/src/libstd/sync/future.rs index d6f413a082854eb08a25c857419a1ca9fe99bb46..f2f9351fd0d58542f2610fb820cfe17da58e49c6 100644 --- a/src/libstd/sync/future.rs +++ b/src/libstd/sync/future.rs @@ -8,21 +8,19 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * A type representing values that may be computed concurrently and - * operations for working with them. - * - * # Example - * - * ```rust - * use std::sync::Future; - * # fn fib(n: uint) -> uint {42}; - * # fn make_a_sandwich() {}; - * let mut delayed_fib = Future::spawn(proc() { fib(5000) }); - * make_a_sandwich(); - * println!("fib(5000) = {}", delayed_fib.get()) - * ``` - */ +//! A type representing values that may be computed concurrently and operations for working with +//! them. +//! +//! # Example +//! +//! ```rust +//! use std::sync::Future; +//! # fn fib(n: uint) -> uint {42}; +//! # fn make_a_sandwich() {}; +//! let mut delayed_fib = Future::spawn(proc() { fib(5000) }); +//! make_a_sandwich(); +//! println!("fib(5000) = {}", delayed_fib.get()) +//! ``` #![allow(missing_docs)] diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 3d33774aa55e1d922f5dde98082affd89dc9ef3a..26c049d267dfb6f955e4577f6bb9655a4de06af4 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -276,11 +276,9 @@ pub fn has_types(&self) -> bool { } } + /// Returns the types that the user wrote. Note that these do not necessarily map to the type + /// parameters in the parenthesized case. pub fn types(&self) -> Vec<&P> { - /*! - * Returns the types that the user wrote. Note that these do not - * necessarily map to the type parameters in the parenthesized case. - */ match *self { AngleBracketedParameters(ref data) => { data.types.iter().collect() diff --git a/src/libsyntax/ast_util.rs b/src/libsyntax/ast_util.rs index 043e79bffd9e9cc7d574f5cc190ef66d631a88ad..4d35fbc143723c12d7ac19e26081c1312af6b80d 100644 --- a/src/libsyntax/ast_util.rs +++ b/src/libsyntax/ast_util.rs @@ -569,6 +569,7 @@ pub fn compute_id_range_for_inlined_item(item: &InlinedItem) -> IdRange { visitor.result } +/// Computes the id range for a single fn body, ignoring nested items. pub fn compute_id_range_for_fn_body(fk: visit::FnKind, decl: &FnDecl, body: &Block, @@ -576,11 +577,6 @@ pub fn compute_id_range_for_fn_body(fk: visit::FnKind, id: NodeId) -> IdRange { - /*! - * Computes the id range for a single fn body, - * ignoring nested items. - */ - let mut visitor = IdRangeComputingVisitor { result: IdRange::max() }; diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index b019b31de5f3f70fff4414d26b00450a51544bd7..1c1e1acab1c57ed40970484faf08882f60d515ca 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -10,18 +10,12 @@ // // ignore-lexer-test FIXME #15679 -/*! - -The CodeMap tracks all the source code used within a single crate, mapping -from integer byte positions to the original source code location. Each bit of -source parsed during crate parsing (typically files, in-memory strings, or -various bits of macro expansion) cover a continuous range of bytes in the -CodeMap and are represented by FileMaps. Byte positions are stored in `spans` -and used pervasively in the compiler. They are absolute positions within the -CodeMap, which upon request can be converted to line and column information, -source code snippets, etc. - -*/ +//! The CodeMap tracks all the source code used within a single crate, mapping from integer byte +//! positions to the original source code location. Each bit of source parsed during crate parsing +//! (typically files, in-memory strings, or various bits of macro expansion) cover a continuous +//! range of bytes in the CodeMap and are represented by FileMaps. Byte positions are stored in +//! `spans` and used pervasively in the compiler. They are absolute positions within the CodeMap, +//! which upon request can be converted to line and column information, source code snippets, etc. pub use self::MacroFormat::*; diff --git a/src/libsyntax/ext/deriving/decodable.rs b/src/libsyntax/ext/deriving/decodable.rs index d0a03658386049ef40ca748772a454acd87eae11..e3cf2b68752fd78e0f000242c7742c89324b82d3 100644 --- a/src/libsyntax/ext/deriving/decodable.rs +++ b/src/libsyntax/ext/deriving/decodable.rs @@ -8,10 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -The compiler code necessary for `#[deriving(Decodable)]`. See -encodable.rs for more. -*/ +//! The compiler code necessary for `#[deriving(Decodable)]`. See encodable.rs for more. use ast; use ast::{MetaItem, Item, Expr, MutMutable}; diff --git a/src/libsyntax/ext/deriving/generic/ty.rs b/src/libsyntax/ext/deriving/generic/ty.rs index 700ada8b4ad8fdd52000a2d5931ca6dcf065e259..f285d2cc2ff3ab71f21b2822d99d23cfb2988bce 100644 --- a/src/libsyntax/ext/deriving/generic/ty.rs +++ b/src/libsyntax/ext/deriving/generic/ty.rs @@ -8,10 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -A mini version of ast::Ty, which is easier to use, and features an -explicit `Self` type to use when specifying impls to be derived. -*/ +//! A mini version of ast::Ty, which is easier to use, and features an explicit `Self` type to use +//! when specifying impls to be derived. pub use self::PtrTy::*; pub use self::Ty::*; diff --git a/src/libsyntax/ext/deriving/mod.rs b/src/libsyntax/ext/deriving/mod.rs index b8cebd8ea201c74708090bd056e22f4225071738..fccef47d1ea2c875632f172315721fd71402c827 100644 --- a/src/libsyntax/ext/deriving/mod.rs +++ b/src/libsyntax/ext/deriving/mod.rs @@ -8,15 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -The compiler code necessary to implement the `#[deriving]` extensions. - - -FIXME (#2810): hygiene. Search for "__" strings (in other files too). -We also assume "extra" is the standard library, and "std" is the core -library. - -*/ +//! The compiler code necessary to implement the `#[deriving]` extensions. +//! +//! FIXME (#2810): hygiene. Search for "__" strings (in other files too). We also assume "extra" is +//! the standard library, and "std" is the core library. use ast::{Item, MetaItem, MetaList, MetaNameValue, MetaWord}; use ext::base::ExtCtxt; diff --git a/src/libsyntax/parse/obsolete.rs b/src/libsyntax/parse/obsolete.rs index e2dee607c6925f53353765d9a4cf9a1f2cba8ae0..86a96fc521642569d92922eaeb1b22b79e453bc5 100644 --- a/src/libsyntax/parse/obsolete.rs +++ b/src/libsyntax/parse/obsolete.rs @@ -8,14 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! -Support for parsing unsupported, old syntaxes, for the -purpose of reporting errors. Parsing of these syntaxes -is tested by compile-test/obsolete-syntax.rs. - -Obsolete syntax that becomes too hard to parse can be -removed. -*/ +//! Support for parsing unsupported, old syntaxes, for the purpose of reporting errors. Parsing of +//! these syntaxes is tested by compile-test/obsolete-syntax.rs. +//! +//! Obsolete syntax that becomes too hard to parse can be removed. pub use self::ObsoleteSyntax::*; diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index c731a0005f8843228efff3f7ecf7942932baa771..b620799cc97e802a49f0536ba27ba41197eaff00 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -1963,11 +1963,9 @@ pub fn parse_lifetime(&mut self) -> ast::Lifetime { } } + /// Parses `lifetime_defs = [ lifetime_defs { ',' lifetime_defs } ]` where `lifetime_def = + /// lifetime [':' lifetimes]` pub fn parse_lifetime_defs(&mut self) -> Vec { - /*! - * Parses `lifetime_defs = [ lifetime_defs { ',' lifetime_defs } ]` - * where `lifetime_def = lifetime [':' lifetimes]` - */ let mut res = Vec::new(); loop { @@ -2003,16 +2001,13 @@ pub fn parse_lifetime_defs(&mut self) -> Vec { } } - // matches lifetimes = ( lifetime ) | ( lifetime , lifetimes ) - // actually, it matches the empty one too, but putting that in there - // messes up the grammar.... + /// matches lifetimes = ( lifetime ) | ( lifetime , lifetimes ) actually, it matches the empty + /// one too, but putting that in there messes up the grammar.... + /// + /// Parses zero or more comma separated lifetimes. Expects each lifetime to be followed by + /// either a comma or `>`. Used when parsing type parameter lists, where we expect something + /// like `<'a, 'b, T>`. pub fn parse_lifetimes(&mut self, sep: token::Token) -> Vec { - /*! - * Parses zero or more comma separated lifetimes. - * Expects each lifetime to be followed by either - * a comma or `>`. Used when parsing type parameter - * lists, where we expect something like `<'a, 'b, T>`. - */ let mut res = Vec::new(); loop { diff --git a/src/libsyntax/visit.rs b/src/libsyntax/visit.rs index 3f87dbc0740ec6b648b31786c43a43a46c229725..84afa56b07d5eb28b18d1d9bced94cc1fddafee2 100644 --- a/src/libsyntax/visit.rs +++ b/src/libsyntax/visit.rs @@ -92,14 +92,12 @@ fn visit_struct_def(&mut self, s: &'v StructDef, _: Ident, _: &'v Generics, _: N } fn visit_struct_field(&mut self, s: &'v StructField) { walk_struct_field(self, s) } fn visit_variant(&mut self, v: &'v Variant, g: &'v Generics) { walk_variant(self, v, g) } + + /// Visits an optional reference to a lifetime. The `span` is the span of some surrounding + /// reference should opt_lifetime be None. fn visit_opt_lifetime_ref(&mut self, _span: Span, opt_lifetime: &'v Option) { - /*! - * Visits an optional reference to a lifetime. The `span` is - * the span of some surrounding reference should opt_lifetime - * be None. - */ match *opt_lifetime { Some(ref l) => self.visit_lifetime_ref(l), None => () diff --git a/src/libunicode/normalize.rs b/src/libunicode/normalize.rs index ad36215c11bcbd1e527f2b7586641b8504eee084..962be3d5acdc910b5b992b116ab59573f07bee6e 100644 --- a/src/libunicode/normalize.rs +++ b/src/libunicode/normalize.rs @@ -8,10 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - Functions for computing canonical and compatible decompositions - for Unicode characters. - */ +//! Functions for computing canonical and compatible decompositions for Unicode characters. use core::cmp::{Equal, Less, Greater}; use core::option::{Option, Some, None}; diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index 369336639a7fe465b617b8c552cc0e42cffda939..a73dac1a6186678cb9f5562ec6e824aa3cadd8fc 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -8,12 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * Unicode-intensive `char` methods. - * - * These methods implement functionality for `char` that requires knowledge of - * Unicode definitions, including normalization, categorization, and display information. - */ +//! Unicode-intensive `char` methods. +//! +//! These methods implement functionality for `char` that requires knowledge of +//! Unicode definitions, including normalization, categorization, and display information. use core::option::Option; use tables::{derived_property, property, general_category, conversions, charwidth}; diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs index 03a50409d7e520f621b740d517124fbdacc95c82..a5f761425759510a168fcf25601433ba1dd538ed 100644 --- a/src/libunicode/u_str.rs +++ b/src/libunicode/u_str.rs @@ -10,12 +10,10 @@ // // ignore-lexer-test FIXME #15679 -/*! - * Unicode-intensive string manipulations. - * - * This module provides functionality to `str` that requires the Unicode - * methods provided by the UnicodeChar trait. - */ +//! Unicode-intensive string manipulations. +//! +//! This module provides functionality to `str` that requires the Unicode methods provided by the +//! UnicodeChar trait. use self::GraphemeState::*; use core::cmp;