提交 468bcb0f 编写于 作者: F Fabian Hueske

Improved comments in Java example jobs

上级 2bb3e982
......@@ -57,6 +57,10 @@ import eu.stratosphere.example.java.clustering.util.KMeansData;
* </ul>
*
* <p>
* Usage: <code>KMeans &lt;points path&gt; &lt;centers path&gt; &lt;result path&gt; &lt;num iterations&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link KMeansData} and 10 iterations.
*
* <p>
* This example shows how to use:
* <ul>
* <li>Bulk iterations
......@@ -103,7 +107,7 @@ public class KMeans {
// emit result
if(fileOutput) {
clusteredPoints.writeAsCsv(outputPath, "\n", ",");
clusteredPoints.writeAsCsv(outputPath, "\n", " ");
} else {
clusteredPoints.print();
}
......@@ -153,7 +157,7 @@ public class KMeans {
@Override
public String toString() {
return x + "," + y;
return x + " " + y;
}
}
......@@ -178,7 +182,7 @@ public class KMeans {
@Override
public String toString() {
return id + "," + super.toString();
return id + " " + super.toString();
}
}
......
......@@ -18,6 +18,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.example.java.clustering.KMeans.Centroid;
import eu.stratosphere.example.java.clustering.KMeans.Point;
/**
* Provides the default data sets used for the K-Means example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class KMeansData {
public static DataSet<Centroid> getDefaultCentroidDataSet(ExecutionEnvironment env) {
......
......@@ -55,6 +55,10 @@ import eu.stratosphere.util.Collector;
* </ul>
*
* <p>
* Usage: <code>ConnectedComponents &lt;vertices path&gt; &lt;edges path&gt; &lt;result path&gt; &lt;max number of iterations&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link ConnectedComponentsData} and 10 iterations.
*
* <p>
* This example shows how to use:
* <ul>
* <li>Delta Iterations
......@@ -79,7 +83,7 @@ public class ConnectedComponents implements ProgramDescription {
DataSet<Long> vertices = getVertexDataSet(env);
DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env);
// assign the initial components (equal to the vertex id.
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
......
......@@ -54,6 +54,9 @@ import eu.stratosphere.util.Collector;
* (2)-(12)
* </pre>
*
* Usage: <code>EnumTriangleBasic &lt;edge path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}.
*
* <p>
* This example shows how to use:
* <ul>
......
......@@ -64,6 +64,9 @@ import eu.stratosphere.util.Collector;
* (2)-(12)
* </pre>
*
* Usage: <code>EnumTriangleOpt &lt;edge path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}.
*
* <p>
* This example shows how to use:
* <ul>
......
......@@ -55,6 +55,10 @@ import eu.stratosphere.util.Collector;
* </ul>
*
* <p>
* Usage: <code>PageRankBasic &lt;vertices with initial ranks path&gt; &lt;edges path&gt; &lt;output path&gt; &lt;num vertices&gt; &lt;num iterations&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link PageRankData} and 10 iterations.
*
* <p>
* This example shows how to use:
* <ul>
* <li>Bulk Iterations
......@@ -211,14 +215,14 @@ public class PageRankBasic {
numVertices = Integer.parseInt(args[3]);
maxIterations = Integer.parseInt(args[4]);
} else {
System.err.println("Usage: PageRankBasic <vertex with initial rank input> <edges path> <output path> <num vertices> <num iterations>");
System.err.println("Usage: PageRankBasic <vertices with initial ranks path> <edges path> <output path> <num vertices> <num iterations>");
System.exit(1);
}
} else {
System.out.println("Executing PageRank Basic example with default parameters and built-in default data.");
System.out.println(" Provide parameters to read input data from files.");
System.out.println(" See the documentation for the correct format of input files.");
System.out.println(" Usage: PageRankBasic <vertex with initial rank input> <edges path> <output path> <num vertices> <num iterations>");
System.out.println(" Usage: PageRankBasic <vertices with initial ranks path> <edges path> <output path> <num vertices> <num iterations>");
numVertices = PageRankData.getNumberOfPages();
}
......
......@@ -20,6 +20,11 @@ import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.tuple.Tuple2;
/**
* Provides the default data sets used for the Connected Components example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class ConnectedComponentsData {
public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) {
......
......@@ -19,6 +19,11 @@ import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.example.java.graph.util.EnumTrianglesDataTypes.Edge;
/**
* Provides the default data sets used for the Triangle Enumeration example programs.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class EnumTrianglesData {
public static DataSet<Edge> getDefaultEdgeDataSet(ExecutionEnvironment env) {
......
......@@ -22,6 +22,11 @@ import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.tuple.Tuple2;
/**
* Provides the default data sets used for the PageRank example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class PageRankData {
private static int numPages = 15;
......
......@@ -71,6 +71,9 @@ import eu.stratosphere.api.java.tuple.Tuple6;
* as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc.org/tpch/</a>.
*
* <p>
* Usage: <code>TPCHQuery10 &lt;customer-csv path&gt; &lt;orders-csv path&gt; &lt;lineitem-csv path&gt; &lt;nation-csv path&gt; &lt;result path&gt;</code><br>
*
* <p>
* This example shows how to use:
* <ul>
* <li> tuple data types
......
......@@ -71,6 +71,9 @@ import eu.stratosphere.api.java.tuple.Tuple5;
* as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc.org/tpch/</a>.
*
* <p>
* Usage: <code>TPCHQuery3 &lt;lineitem-csv path&gt; &lt;customer-csv path&gt; &lt;orders-csv path&gt; &lt;result path&gt;</code><br>
*
* <p>
* This example shows how to use:
* <ul>
* <li> custom data type derived from tuple data types
......@@ -157,7 +160,7 @@ public class TPCHQuery3 {
}
});
// Join the last join result with LineItems
// Join the last join result with Lineitems
DataSet<ShippingPriorityItem> joined =
customerWithOrders.join(li)
.where(4)
......
......@@ -74,6 +74,10 @@ import eu.stratosphere.util.Collector;
* </pre></code>
*
* <p>
* Usage: <code>WebLogAnalysis &lt;documents path&gt; &lt;ranks path&gt; &lt;visits path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link WebLogData}.
*
* <p>
* This example shows how to use:
* <ul>
* <li> tuple data types
......@@ -100,29 +104,28 @@ public class WebLogAnalysis {
DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env);
DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env);
// Create DataSet for filtering the entries from the documents relation
// Retain documents with keywords
DataSet<Tuple1<String>> filterDocs = documents
.filter(new FilterDocs())
.filter(new FilterDocByKeyWords())
.project(0).types(String.class);
// Create DataSet for filtering the entries from the ranks relation
// Filter ranks by minimum rank
DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
.filter(new FilterRanks());
.filter(new FilterByRank());
// Create DataSet for filtering the entries from the visits relation
// Filter visits by visit date
DataSet<Tuple1<String>> filterVisits = visits
.filter(new FilterVisits())
.filter(new FilterVisitsByDate())
.project(0).types(String.class);
// Create DataSet to join the filtered documents and ranks relation
// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
filterDocs.join(filterRanks)
.where(0).equalTo(1)
.projectSecond(0,1,2)
.types(Integer.class, String.class, Integer.class);
// Create DataSet to realize a anti join between the joined
// documents and ranks relation and the filtered visits relation
// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
DataSet<Tuple3<Integer, String, Integer>> result =
joinDocsRanks.coGroup(filterVisits)
.where(1).equalTo(0)
......@@ -148,7 +151,7 @@ public class WebLogAnalysis {
* MapFunction that filters for documents that contain a certain set of
* keywords.
*/
public static class FilterDocs extends FilterFunction<Tuple2<String, String>> {
public static class FilterDocByKeyWords extends FilterFunction<Tuple2<String, String>> {
private static final String[] KEYWORDS = { " editors ", " oscillations " };
......@@ -176,7 +179,7 @@ public class WebLogAnalysis {
/**
* MapFunction that filters for records where the rank exceeds a certain threshold.
*/
public static class FilterRanks extends FilterFunction<Tuple3<Integer, String, Integer>> {
public static class FilterByRank extends FilterFunction<Tuple3<Integer, String, Integer>> {
private static final int RANKFILTER = 40;
......@@ -199,7 +202,7 @@ public class WebLogAnalysis {
* MapFunction that filters for records of the visits relation where the year
* (from the date string) is equal to a certain value.
*/
public static class FilterVisits extends FilterFunction<Tuple2<String, String>> {
public static class FilterVisitsByDate extends FilterFunction<Tuple2<String, String>> {
private static final int YEARFILTER = 2007;
......
......@@ -23,6 +23,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.tuple.Tuple3;
/**
* Provides the default data sets used for the Weblog Analysis example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class WebLogData {
public static DataSet<Tuple2<String, String>> getDocumentDataSet(ExecutionEnvironment env) {
......
......@@ -30,6 +30,10 @@ import eu.stratosphere.util.Collector;
* The input is a plain text file with lines separated by newline characters.
*
* <p>
* Usage: <code>WordCount &lt;text path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link WordCountData}.
*
* <p>
* This example shows how to:
* <ul>
* <li>write a simple Stratosphere program.
......
......@@ -17,6 +17,11 @@ package eu.stratosphere.example.java.wordcount.util;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
/**
* Provides the default data sets used for the WordCount example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class WordCountData {
public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册