提交 468bcb0f 编写于 作者: F Fabian Hueske

Improved comments in Java example jobs

上级 2bb3e982
...@@ -57,6 +57,10 @@ import eu.stratosphere.example.java.clustering.util.KMeansData; ...@@ -57,6 +57,10 @@ import eu.stratosphere.example.java.clustering.util.KMeansData;
* </ul> * </ul>
* *
* <p> * <p>
* Usage: <code>KMeans &lt;points path&gt; &lt;centers path&gt; &lt;result path&gt; &lt;num iterations&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link KMeansData} and 10 iterations.
*
* <p>
* This example shows how to use: * This example shows how to use:
* <ul> * <ul>
* <li>Bulk iterations * <li>Bulk iterations
...@@ -103,7 +107,7 @@ public class KMeans { ...@@ -103,7 +107,7 @@ public class KMeans {
// emit result // emit result
if(fileOutput) { if(fileOutput) {
clusteredPoints.writeAsCsv(outputPath, "\n", ","); clusteredPoints.writeAsCsv(outputPath, "\n", " ");
} else { } else {
clusteredPoints.print(); clusteredPoints.print();
} }
...@@ -153,7 +157,7 @@ public class KMeans { ...@@ -153,7 +157,7 @@ public class KMeans {
@Override @Override
public String toString() { public String toString() {
return x + "," + y; return x + " " + y;
} }
} }
...@@ -178,7 +182,7 @@ public class KMeans { ...@@ -178,7 +182,7 @@ public class KMeans {
@Override @Override
public String toString() { public String toString() {
return id + "," + super.toString(); return id + " " + super.toString();
} }
} }
......
...@@ -18,6 +18,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment; ...@@ -18,6 +18,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.example.java.clustering.KMeans.Centroid; import eu.stratosphere.example.java.clustering.KMeans.Centroid;
import eu.stratosphere.example.java.clustering.KMeans.Point; import eu.stratosphere.example.java.clustering.KMeans.Point;
/**
* Provides the default data sets used for the K-Means example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class KMeansData { public class KMeansData {
public static DataSet<Centroid> getDefaultCentroidDataSet(ExecutionEnvironment env) { public static DataSet<Centroid> getDefaultCentroidDataSet(ExecutionEnvironment env) {
......
...@@ -55,6 +55,10 @@ import eu.stratosphere.util.Collector; ...@@ -55,6 +55,10 @@ import eu.stratosphere.util.Collector;
* </ul> * </ul>
* *
* <p> * <p>
* Usage: <code>ConnectedComponents &lt;vertices path&gt; &lt;edges path&gt; &lt;result path&gt; &lt;max number of iterations&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link ConnectedComponentsData} and 10 iterations.
*
* <p>
* This example shows how to use: * This example shows how to use:
* <ul> * <ul>
* <li>Delta Iterations * <li>Delta Iterations
...@@ -79,7 +83,7 @@ public class ConnectedComponents implements ProgramDescription { ...@@ -79,7 +83,7 @@ public class ConnectedComponents implements ProgramDescription {
DataSet<Long> vertices = getVertexDataSet(env); DataSet<Long> vertices = getVertexDataSet(env);
DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env); DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env);
// assign the initial components (equal to the vertex id. // assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration // open a delta iteration
......
...@@ -54,6 +54,9 @@ import eu.stratosphere.util.Collector; ...@@ -54,6 +54,9 @@ import eu.stratosphere.util.Collector;
* (2)-(12) * (2)-(12)
* </pre> * </pre>
* *
* Usage: <code>EnumTriangleBasic &lt;edge path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}.
*
* <p> * <p>
* This example shows how to use: * This example shows how to use:
* <ul> * <ul>
......
...@@ -64,6 +64,9 @@ import eu.stratosphere.util.Collector; ...@@ -64,6 +64,9 @@ import eu.stratosphere.util.Collector;
* (2)-(12) * (2)-(12)
* </pre> * </pre>
* *
* Usage: <code>EnumTriangleOpt &lt;edge path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}.
*
* <p> * <p>
* This example shows how to use: * This example shows how to use:
* <ul> * <ul>
......
...@@ -55,6 +55,10 @@ import eu.stratosphere.util.Collector; ...@@ -55,6 +55,10 @@ import eu.stratosphere.util.Collector;
* </ul> * </ul>
* *
* <p> * <p>
* Usage: <code>PageRankBasic &lt;vertices with initial ranks path&gt; &lt;edges path&gt; &lt;output path&gt; &lt;num vertices&gt; &lt;num iterations&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link PageRankData} and 10 iterations.
*
* <p>
* This example shows how to use: * This example shows how to use:
* <ul> * <ul>
* <li>Bulk Iterations * <li>Bulk Iterations
...@@ -211,14 +215,14 @@ public class PageRankBasic { ...@@ -211,14 +215,14 @@ public class PageRankBasic {
numVertices = Integer.parseInt(args[3]); numVertices = Integer.parseInt(args[3]);
maxIterations = Integer.parseInt(args[4]); maxIterations = Integer.parseInt(args[4]);
} else { } else {
System.err.println("Usage: PageRankBasic <vertex with initial rank input> <edges path> <output path> <num vertices> <num iterations>"); System.err.println("Usage: PageRankBasic <vertices with initial ranks path> <edges path> <output path> <num vertices> <num iterations>");
System.exit(1); System.exit(1);
} }
} else { } else {
System.out.println("Executing PageRank Basic example with default parameters and built-in default data."); System.out.println("Executing PageRank Basic example with default parameters and built-in default data.");
System.out.println(" Provide parameters to read input data from files."); System.out.println(" Provide parameters to read input data from files.");
System.out.println(" See the documentation for the correct format of input files."); System.out.println(" See the documentation for the correct format of input files.");
System.out.println(" Usage: PageRankBasic <vertex with initial rank input> <edges path> <output path> <num vertices> <num iterations>"); System.out.println(" Usage: PageRankBasic <vertices with initial ranks path> <edges path> <output path> <num vertices> <num iterations>");
numVertices = PageRankData.getNumberOfPages(); numVertices = PageRankData.getNumberOfPages();
} }
......
...@@ -20,6 +20,11 @@ import eu.stratosphere.api.java.DataSet; ...@@ -20,6 +20,11 @@ import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment; import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.tuple.Tuple2; import eu.stratosphere.api.java.tuple.Tuple2;
/**
* Provides the default data sets used for the Connected Components example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class ConnectedComponentsData { public class ConnectedComponentsData {
public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) { public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) {
......
...@@ -19,6 +19,11 @@ import eu.stratosphere.api.java.DataSet; ...@@ -19,6 +19,11 @@ import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment; import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.example.java.graph.util.EnumTrianglesDataTypes.Edge; import eu.stratosphere.example.java.graph.util.EnumTrianglesDataTypes.Edge;
/**
* Provides the default data sets used for the Triangle Enumeration example programs.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class EnumTrianglesData { public class EnumTrianglesData {
public static DataSet<Edge> getDefaultEdgeDataSet(ExecutionEnvironment env) { public static DataSet<Edge> getDefaultEdgeDataSet(ExecutionEnvironment env) {
......
...@@ -22,6 +22,11 @@ import eu.stratosphere.api.java.DataSet; ...@@ -22,6 +22,11 @@ import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment; import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.tuple.Tuple2; import eu.stratosphere.api.java.tuple.Tuple2;
/**
* Provides the default data sets used for the PageRank example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class PageRankData { public class PageRankData {
private static int numPages = 15; private static int numPages = 15;
......
...@@ -71,6 +71,9 @@ import eu.stratosphere.api.java.tuple.Tuple6; ...@@ -71,6 +71,9 @@ import eu.stratosphere.api.java.tuple.Tuple6;
* as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc.org/tpch/</a>. * as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc.org/tpch/</a>.
* *
* <p> * <p>
* Usage: <code>TPCHQuery10 &lt;customer-csv path&gt; &lt;orders-csv path&gt; &lt;lineitem-csv path&gt; &lt;nation-csv path&gt; &lt;result path&gt;</code><br>
*
* <p>
* This example shows how to use: * This example shows how to use:
* <ul> * <ul>
* <li> tuple data types * <li> tuple data types
......
...@@ -69,6 +69,9 @@ import eu.stratosphere.api.java.tuple.Tuple5; ...@@ -69,6 +69,9 @@ import eu.stratosphere.api.java.tuple.Tuple5;
* <p> * <p>
* Input files are plain text CSV files using the pipe character ('|') as field separator * Input files are plain text CSV files using the pipe character ('|') as field separator
* as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc.org/tpch/</a>. * as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc.org/tpch/</a>.
*
* <p>
* Usage: <code>TPCHQuery3 &lt;lineitem-csv path&gt; &lt;customer-csv path&gt; &lt;orders-csv path&gt; &lt;result path&gt;</code><br>
* *
* <p> * <p>
* This example shows how to use: * This example shows how to use:
...@@ -157,7 +160,7 @@ public class TPCHQuery3 { ...@@ -157,7 +160,7 @@ public class TPCHQuery3 {
} }
}); });
// Join the last join result with LineItems // Join the last join result with Lineitems
DataSet<ShippingPriorityItem> joined = DataSet<ShippingPriorityItem> joined =
customerWithOrders.join(li) customerWithOrders.join(li)
.where(4) .where(4)
......
...@@ -74,6 +74,10 @@ import eu.stratosphere.util.Collector; ...@@ -74,6 +74,10 @@ import eu.stratosphere.util.Collector;
* </pre></code> * </pre></code>
* *
* <p> * <p>
* Usage: <code>WebLogAnalysis &lt;documents path&gt; &lt;ranks path&gt; &lt;visits path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link WebLogData}.
*
* <p>
* This example shows how to use: * This example shows how to use:
* <ul> * <ul>
* <li> tuple data types * <li> tuple data types
...@@ -100,29 +104,28 @@ public class WebLogAnalysis { ...@@ -100,29 +104,28 @@ public class WebLogAnalysis {
DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env); DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env);
DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env); DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env);
// Create DataSet for filtering the entries from the documents relation // Retain documents with keywords
DataSet<Tuple1<String>> filterDocs = documents DataSet<Tuple1<String>> filterDocs = documents
.filter(new FilterDocs()) .filter(new FilterDocByKeyWords())
.project(0).types(String.class); .project(0).types(String.class);
// Create DataSet for filtering the entries from the ranks relation // Filter ranks by minimum rank
DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
.filter(new FilterRanks()); .filter(new FilterByRank());
// Create DataSet for filtering the entries from the visits relation // Filter visits by visit date
DataSet<Tuple1<String>> filterVisits = visits DataSet<Tuple1<String>> filterVisits = visits
.filter(new FilterVisits()) .filter(new FilterVisitsByDate())
.project(0).types(String.class); .project(0).types(String.class);
// Create DataSet to join the filtered documents and ranks relation // Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks = DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
filterDocs.join(filterRanks) filterDocs.join(filterRanks)
.where(0).equalTo(1) .where(0).equalTo(1)
.projectSecond(0,1,2) .projectSecond(0,1,2)
.types(Integer.class, String.class, Integer.class); .types(Integer.class, String.class, Integer.class);
// Create DataSet to realize a anti join between the joined // Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
// documents and ranks relation and the filtered visits relation
DataSet<Tuple3<Integer, String, Integer>> result = DataSet<Tuple3<Integer, String, Integer>> result =
joinDocsRanks.coGroup(filterVisits) joinDocsRanks.coGroup(filterVisits)
.where(1).equalTo(0) .where(1).equalTo(0)
...@@ -148,7 +151,7 @@ public class WebLogAnalysis { ...@@ -148,7 +151,7 @@ public class WebLogAnalysis {
* MapFunction that filters for documents that contain a certain set of * MapFunction that filters for documents that contain a certain set of
* keywords. * keywords.
*/ */
public static class FilterDocs extends FilterFunction<Tuple2<String, String>> { public static class FilterDocByKeyWords extends FilterFunction<Tuple2<String, String>> {
private static final String[] KEYWORDS = { " editors ", " oscillations " }; private static final String[] KEYWORDS = { " editors ", " oscillations " };
...@@ -176,7 +179,7 @@ public class WebLogAnalysis { ...@@ -176,7 +179,7 @@ public class WebLogAnalysis {
/** /**
* MapFunction that filters for records where the rank exceeds a certain threshold. * MapFunction that filters for records where the rank exceeds a certain threshold.
*/ */
public static class FilterRanks extends FilterFunction<Tuple3<Integer, String, Integer>> { public static class FilterByRank extends FilterFunction<Tuple3<Integer, String, Integer>> {
private static final int RANKFILTER = 40; private static final int RANKFILTER = 40;
...@@ -199,7 +202,7 @@ public class WebLogAnalysis { ...@@ -199,7 +202,7 @@ public class WebLogAnalysis {
* MapFunction that filters for records of the visits relation where the year * MapFunction that filters for records of the visits relation where the year
* (from the date string) is equal to a certain value. * (from the date string) is equal to a certain value.
*/ */
public static class FilterVisits extends FilterFunction<Tuple2<String, String>> { public static class FilterVisitsByDate extends FilterFunction<Tuple2<String, String>> {
private static final int YEARFILTER = 2007; private static final int YEARFILTER = 2007;
......
...@@ -23,6 +23,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment; ...@@ -23,6 +23,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.tuple.Tuple2; import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.tuple.Tuple3; import eu.stratosphere.api.java.tuple.Tuple3;
/**
* Provides the default data sets used for the Weblog Analysis example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class WebLogData { public class WebLogData {
public static DataSet<Tuple2<String, String>> getDocumentDataSet(ExecutionEnvironment env) { public static DataSet<Tuple2<String, String>> getDocumentDataSet(ExecutionEnvironment env) {
......
...@@ -30,6 +30,10 @@ import eu.stratosphere.util.Collector; ...@@ -30,6 +30,10 @@ import eu.stratosphere.util.Collector;
* The input is a plain text file with lines separated by newline characters. * The input is a plain text file with lines separated by newline characters.
* *
* <p> * <p>
* Usage: <code>WordCount &lt;text path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from {@link WordCountData}.
*
* <p>
* This example shows how to: * This example shows how to:
* <ul> * <ul>
* <li>write a simple Stratosphere program. * <li>write a simple Stratosphere program.
......
...@@ -17,6 +17,11 @@ package eu.stratosphere.example.java.wordcount.util; ...@@ -17,6 +17,11 @@ package eu.stratosphere.example.java.wordcount.util;
import eu.stratosphere.api.java.DataSet; import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment; import eu.stratosphere.api.java.ExecutionEnvironment;
/**
* Provides the default data sets used for the WordCount example program.
* The default data sets are used, if no parameters are given to the program.
*
*/
public class WordCountData { public class WordCountData {
public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) { public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册