提交 73c13837 编写于 作者: V vasia

small corrections in refactored examples

上级 465a6a4d
......@@ -37,7 +37,7 @@ import eu.stratosphere.example.java.clustering.util.KMeansData;
* Each point is assigned to the cluster center which is closest to it.
* Subsequently, each cluster center is moved to the center (<i>mean</i>) of all points that have been assigned to it.
* The moved cluster centers are fed into the next iteration.
* The algorithm terminates after a fixed number of iteration (as in this implementation)
* The algorithm terminates after a fixed number of iterations (as in this implementation)
* or if cluster centers do not (significantly) move in an iteration.
*
* <p>
......@@ -46,7 +46,7 @@ import eu.stratosphere.example.java.clustering.util.KMeansData;
* each data point is annotated with the id of the final cluster (center) it belongs to.
*
* <p>
* Input files are plain text files must be formatted as follows:
* Input files are plain text files and must be formatted as follows:
* <ul>
* <li>Data points are represented as two double values separated by a blank character.
* Data points are separated by newline characters.<br>
......@@ -84,7 +84,7 @@ public class KMeans {
// set number of bulk iterations for KMeans algorithm
IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);
DataSet<Centroid> newCentriods = points
DataSet<Centroid> newCentroids = points
// compute closest centroid for each point
.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
// count and sum point coordinates for each centroid
......@@ -94,7 +94,7 @@ public class KMeans {
.map(new CentroidAverager());
// feed new centroids back into next iteration
DataSet<Centroid> finalCentroids = loop.closeWith(newCentriods);
DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);
DataSet<Tuple2<Integer, Point>> clusteredPoints = points
// assign points to final clusters
......
......@@ -29,7 +29,8 @@ import eu.stratosphere.util.Collector;
/**
* An implementation of the connected components algorithm, using a delta iteration.
* Initially, the algorithm assigns each vertex its own ID. After the algorithm has completed, all vertices in the
* same component will have the same id. In each step, a vertex
* same component will have the same id. In each step, a vertex picks the minimum of its own ID and its
* neighbors' IDs, as its new ID.
* <p>
* A vertex whose component did not change needs not propagate its information in the next step. Because of that,
* the algorithm is easily expressible via a delta iteration. We here model the solution set as the vertices with
......@@ -38,9 +39,9 @@ import eu.stratosphere.util.Collector;
* is consequently also the next workset.
*
* <p>
* Input files are plain text files must be formatted as follows:
* Input files are plain text files and must be formatted as follows:
* <ul>
* <li>Vertexes represented as IDs and separated by new-line characters.<br>
* <li>Vertices represented as IDs and separated by new-line characters.<br>
* For example <code>"1\n2\n12\n42\n63\n"</code> gives five vertices (1), (2), (12), (42), and (63).
* <li>Edges are represented as pairs for vertex IDs which are separated by space
* characters. Edges are separated by new-line characters.<br>
......@@ -79,7 +80,7 @@ public class ConnectedComponents implements ProgramDescription {
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, maxIterations, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update the component of the candidate is smaller
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
......
......@@ -32,7 +32,7 @@ import eu.stratosphere.util.Collector;
/**
* Triangle enumeration is a preprocessing step to find closely connected parts in graphs.
* A triangle are three edges that connect three vertices with each other.
* A triangle consists of three edges that connect three vertices with each other.
*
* <p>
* The algorithm works as follows:
......@@ -41,7 +41,7 @@ import eu.stratosphere.util.Collector;
* that closes the triangle.
*
* <p>
* Input files are plain text files must be formatted as follows:
* Input files are plain text files and must be formatted as follows:
* <ul>
* <li>Edges are represented as pairs for vertex IDs which are separated by space
* characters. Edges are separated by new-line characters.<br>
......@@ -128,7 +128,7 @@ public class EnumTrianglesBasic {
// flip vertices if necessary
if(inEdge.getFirstVertex() > inEdge.getSecondVertex()) {
inEdge.flipVertics();
inEdge.flipVertices();
}
return inEdge;
......
......@@ -35,7 +35,7 @@ import eu.stratosphere.util.Collector;
/**
* Triangle enumeration is a preprocessing step to find closely connected parts in graphs.
* A triangle are three edges that connect three vertices with each other.
* A triangle consists of three edges that connect three vertices with each other.
*
* <p>
* The basic algorithm works as follows:
......@@ -51,7 +51,7 @@ import eu.stratosphere.util.Collector;
* grouping on edges on the vertex with the smaller degree.
*
* <p>
* Input files are plain text files must be formatted as follows:
* Input files are plain text files and must be formatted as follows:
* <ul>
* <li>Edges are represented as pairs for vertex IDs which are separated by space
* characters. Edges are separated by new-line characters.<br>
......@@ -141,7 +141,7 @@ public class EnumTrianglesOpt {
@Override
public void flatMap(Edge edge, Collector<Edge> out) throws Exception {
out.collect(edge);
edge.flipVertics();
edge.flipVertices();
out.collect(edge);
}
}
......@@ -231,7 +231,7 @@ public class EnumTrianglesOpt {
// flip vertices if first degree is larger than second degree.
if(inEdge.getFirstDegree() > inEdge.getSecondDegree()) {
outEdge.flipVertics();
outEdge.flipVertices();
}
// return edge
......@@ -247,7 +247,7 @@ public class EnumTrianglesOpt {
// flip vertices if necessary
if(inEdge.getFirstVertex() > inEdge.getSecondVertex()) {
inEdge.flipVertics();
inEdge.flipVertices();
}
return inEdge;
......
......@@ -37,12 +37,12 @@ import eu.stratosphere.util.Collector;
* This implementation requires a set of pages (vertices) with associated ranks and a set
* of directed links (edges) as input and works as follows. <br>
* In each iteration, the rank of every page is evenly distributed to all pages it points to.
* Each page collects the partial ranks of all pages that point to it, sums them up, and apply a dampening factor to the sum.
* Each page collects the partial ranks of all pages that point to it, sums them up, and applies a dampening factor to the sum.
* The result is the new rank of the page. A new iteration is started with the new ranks of all pages.
* This implementation terminates after a fixed number of iterations.
*
* <p>
* Input files are plain text files must be formatted as follows:
* Input files are plain text files and must be formatted as follows:
* <ul>
* <li>Pages represented as an (long) ID and a (double) rank separated by new-line characters.<br>
* For example <code>"1 0.4\n2 0.3\n12 0.15\n42 0.05\n63 0.1\n"</code> gives five pages with associated ranks
......
......@@ -52,7 +52,7 @@ public class EnumTrianglesDataTypes {
this.setSecondVertex(ewd.getSecondVertex());
}
public void flipVertics() {
public void flipVertices() {
Integer tmp = this.getFirstVertex();
this.setFirstVertex(this.getSecondVertex());
this.setSecondVertex(tmp);
......
......@@ -157,7 +157,7 @@ public class TPCHQuery3 {
}
});
// Join the last join result with Orders
// Join the last join result with LineItems
DataSet<ShippingPriorityItem> joined =
customerWithOrders.join(li)
.where(4)
......
......@@ -23,11 +23,11 @@ import eu.stratosphere.example.java.wordcount.util.WordCountData;
import eu.stratosphere.util.Collector;
/**
* Implements a the "WordCount" program that computes a simple word occurrence histogram
* Implements the "WordCount" program that computes a simple word occurrence histogram
* over text files.
*
* <p>
* The input are plain text files.
* The input is plain text files.
*
* <p>
* This example shows how to:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册