Improved comments in Java example jobs

468bcb0f · Fabian Hueske · 2bb3e982 · 468bcb0f · 468bcb0f · 468bcb0f
15 changed file
--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/clustering/KMeans.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/clustering/KMeans.java
@@ -57,6 +57,10 @@ import eu.stratosphere.example.java.clustering.util.KMeansData;
 * </ul>
 * 
 * <p>
+ * Usage: <code>KMeans &lt;points path&gt; &lt;centers path&gt; &lt;result path&gt; &lt;num iterations&gt;</code><br>
+ * If no parameters are provided, the program is run with default data from {@link KMeansData} and 10 iterations. 
+ * 
+ * <p>
 * This example shows how to use:
 * <ul>
 * <li>Bulk iterations
@@ -103,7 +107,7 @@ public class KMeans {
 		
 		// emit result
 		if(fileOutput) {
-			clusteredPoints.writeAsCsv(outputPath, "\n", ",");
+			clusteredPoints.writeAsCsv(outputPath, "\n", " ");
 		} else {
 			clusteredPoints.print();
 		}
@@ -153,7 +157,7 @@ public class KMeans {
 		
 		@Override
 		public String toString() {
-			return x + "," + y;
+			return x + " " + y;
 		}
 	}
 	
@@ -178,7 +182,7 @@ public class KMeans {
 		
 		@Override
 		public String toString() {
-			return id + "," + super.toString();
+			return id + " " + super.toString();
 		}
 	}
 	

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/clustering/util/KMeansData.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/clustering/util/KMeansData.java
@@ -18,6 +18,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment;
 import eu.stratosphere.example.java.clustering.KMeans.Centroid;
 import eu.stratosphere.example.java.clustering.KMeans.Point;

+/**
+ * Provides the default data sets used for the K-Means example program.
+ * The default data sets are used, if no parameters are given to the program.
+ *
+ */
 public class KMeansData {

 	public static DataSet<Centroid> getDefaultCentroidDataSet(ExecutionEnvironment env) {

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/ConnectedComponents.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/ConnectedComponents.java
@@ -55,6 +55,10 @@ import eu.stratosphere.util.Collector;
 * </ul>
 * 
 * <p>
+ * Usage: <code>ConnectedComponents &lt;vertices path&gt; &lt;edges path&gt; &lt;result path&gt; &lt;max number of iterations&gt;</code><br>
+ * If no parameters are provided, the program is run with default data from {@link ConnectedComponentsData} and 10 iterations. 
+ * 
+ * <p>
 * This example shows how to use:
 * <ul>
 * <li>Delta Iterations
@@ -79,7 +83,7 @@ public class ConnectedComponents implements ProgramDescription {
 		DataSet<Long> vertices = getVertexDataSet(env);
 		DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env);
 		
-		// assign the initial components (equal to the vertex id.
+		// assign the initial components (equal to the vertex id)
 		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
 		
 		// open a delta iteration

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/EnumTrianglesBasic.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/EnumTrianglesBasic.java
@@ -54,6 +54,9 @@ import eu.stratosphere.util.Collector;
 *   (2)-(12)
 * </pre>
 * 
+ * Usage: <code>EnumTriangleBasic &lt;edge path&gt; &lt;result path&gt;</code><br>
+ * If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}. 
+ * 
 * <p>
 * This example shows how to use:
 * <ul>

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/EnumTrianglesOpt.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/EnumTrianglesOpt.java
@@ -64,6 +64,9 @@ import eu.stratosphere.util.Collector;
 *   (2)-(12)
 * </pre>
 * 
+ * Usage: <code>EnumTriangleOpt &lt;edge path&gt; &lt;result path&gt;</code><br>
+ * If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}.
+ * 
 * <p>
 * This example shows how to use:
 * <ul>

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/PageRankBasic.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/PageRankBasic.java
@@ -55,6 +55,10 @@ import eu.stratosphere.util.Collector;
 * </ul>
 * 
 * <p>
+ * Usage: <code>PageRankBasic &lt;vertices with initial ranks path&gt; &lt;edges path&gt; &lt;output path&gt; &lt;num vertices&gt; &lt;num iterations&gt;</code><br>
+ * If no parameters are provided, the program is run with default data from {@link PageRankData} and 10 iterations.
+ * 
+ * <p>
 * This example shows how to use:
 * <ul>
 * <li>Bulk Iterations
@@ -211,14 +215,14 @@ public class PageRankBasic {
 				numVertices = Integer.parseInt(args[3]);
 				maxIterations = Integer.parseInt(args[4]);
 			} else {
-				System.err.println("Usage: PageRankBasic <vertex with initial rank input> <edges path> <output path> <num vertices> <num iterations>");
+				System.err.println("Usage: PageRankBasic <vertices with initial ranks path> <edges path> <output path> <num vertices> <num iterations>");
 				System.exit(1);
 			}
 		} else {
 			System.out.println("Executing PageRank Basic example with default parameters and built-in default data.");
 			System.out.println("  Provide parameters to read input data from files.");
 			System.out.println("  See the documentation for the correct format of input files.");
-			System.out.println("  Usage: PageRankBasic <vertex with initial rank input> <edges path> <output path> <num vertices> <num iterations>");
+			System.out.println("  Usage: PageRankBasic <vertices with initial ranks path> <edges path> <output path> <num vertices> <num iterations>");
 			
 			numVertices = PageRankData.getNumberOfPages();
 		}

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/util/ConnectedComponentsData.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/util/ConnectedComponentsData.java
@@ -20,6 +20,11 @@ import eu.stratosphere.api.java.DataSet;
 import eu.stratosphere.api.java.ExecutionEnvironment;
 import eu.stratosphere.api.java.tuple.Tuple2;

+/**
+ * Provides the default data sets used for the Connected Components example program.
+ * The default data sets are used, if no parameters are given to the program.
+ *
+ */
 public class ConnectedComponentsData {

 	public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) {

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/util/EnumTrianglesData.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/util/EnumTrianglesData.java
@@ -19,6 +19,11 @@ import eu.stratosphere.api.java.DataSet;
 import eu.stratosphere.api.java.ExecutionEnvironment;
 import eu.stratosphere.example.java.graph.util.EnumTrianglesDataTypes.Edge;

+/**
+ * Provides the default data sets used for the Triangle Enumeration example programs.
+ * The default data sets are used, if no parameters are given to the program.
+ *
+ */
 public class EnumTrianglesData {

 	public static DataSet<Edge> getDefaultEdgeDataSet(ExecutionEnvironment env) {

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/util/PageRankData.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/graph/util/PageRankData.java
@@ -22,6 +22,11 @@ import eu.stratosphere.api.java.DataSet;
 import eu.stratosphere.api.java.ExecutionEnvironment;
 import eu.stratosphere.api.java.tuple.Tuple2;

+/**
+ * Provides the default data sets used for the PageRank example program.
+ * The default data sets are used, if no parameters are given to the program.
+ *
+ */
 public class PageRankData {

 	private static int numPages = 15;

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/TPCHQuery10.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/TPCHQuery10.java
@@ -71,6 +71,9 @@ import eu.stratosphere.api.java.tuple.Tuple6;
 * as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc.org/tpch/</a>.
 * 
 * <p>
+ * Usage: <code>TPCHQuery10 &lt;customer-csv path&gt; &lt;orders-csv path&gt; &lt;lineitem-csv path&gt; &lt;nation-csv path&gt; &lt;result path&gt;</code><br>
+ *  
+ * <p>
 * This example shows how to use:
 * <ul>
 * <li> tuple data types

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/TPCHQuery3.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/TPCHQuery3.java
@@ -71,6 +71,9 @@ import eu.stratosphere.api.java.tuple.Tuple5;
 * as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc.org/tpch/</a>.
 *
 *  <p>
+ * Usage: <code>TPCHQuery3 &lt;lineitem-csv path&gt; &lt;customer-csv path&gt; &lt;orders-csv path&gt; &lt;result path&gt;</code><br>
+ *  
+ * <p>
 * This example shows how to use:
 * <ul>
 * <li> custom data type derived from tuple data types
@@ -157,7 +160,7 @@ public class TPCHQuery3 {
 								}
 							});
 		
-		// Join the last join result with LineItems
+		// Join the last join result with Lineitems
 		DataSet<ShippingPriorityItem> joined = 
 				customerWithOrders.join(li)
 									.where(4)

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/WebLogAnalysis.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/WebLogAnalysis.java
@@ -74,6 +74,10 @@ import eu.stratosphere.util.Collector;
 * </pre></code>
 * 
 * <p>
+ * Usage: <code>WebLogAnalysis &lt;documents path&gt; &lt;ranks path&gt; &lt;visits path&gt; &lt;result path&gt;</code><br>
+ * If no parameters are provided, the program is run with default data from {@link WebLogData}.
+ * 
+ * <p>
 * This example shows how to use:
 * <ul>
 * <li> tuple data types
@@ -100,29 +104,28 @@ public class WebLogAnalysis {
 		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env);
 		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env);
 		
-		// Create DataSet for filtering the entries from the documents relation
+		// Retain documents with keywords
 		DataSet<Tuple1<String>> filterDocs = documents
-				.filter(new FilterDocs())
+				.filter(new FilterDocByKeyWords())
 				.project(0).types(String.class);

-		// Create DataSet for filtering the entries from the ranks relation
+		// Filter ranks by minimum rank
 		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
-				.filter(new FilterRanks());
+				.filter(new FilterByRank());

-		// Create DataSet for filtering the entries from the visits relation
+		// Filter visits by visit date
 		DataSet<Tuple1<String>> filterVisits = visits
-				.filter(new FilterVisits())
+				.filter(new FilterVisitsByDate())
 				.project(0).types(String.class);

-		// Create DataSet to join the filtered documents and ranks relation
+		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
 		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks = 
 				filterDocs.join(filterRanks)
 							.where(0).equalTo(1)
 							.projectSecond(0,1,2)
 							.types(Integer.class, String.class, Integer.class);

-		// Create DataSet to realize a anti join between the joined
-		// documents and ranks relation and the filtered visits relation
+		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
 		DataSet<Tuple3<Integer, String, Integer>> result = 
 				joinDocsRanks.coGroup(filterVisits)
 								.where(1).equalTo(0)
@@ -148,7 +151,7 @@ public class WebLogAnalysis {
 	 * MapFunction that filters for documents that contain a certain set of
 	 * keywords.
 	 */
-	public static class FilterDocs extends FilterFunction<Tuple2<String, String>> {
+	public static class FilterDocByKeyWords extends FilterFunction<Tuple2<String, String>> {

 		private static final String[] KEYWORDS = { " editors ", " oscillations " };

@@ -176,7 +179,7 @@ public class WebLogAnalysis {
 	/**
 	 * MapFunction that filters for records where the rank exceeds a certain threshold.
 	 */
-	public static class FilterRanks extends FilterFunction<Tuple3<Integer, String, Integer>> {
+	public static class FilterByRank extends FilterFunction<Tuple3<Integer, String, Integer>> {

 		private static final int RANKFILTER = 40;

@@ -199,7 +202,7 @@ public class WebLogAnalysis {
 	 * MapFunction that filters for records of the visits relation where the year
 	 * (from the date string) is equal to a certain value.
 	 */
-	public static class FilterVisits extends FilterFunction<Tuple2<String, String>> {
+	public static class FilterVisitsByDate extends FilterFunction<Tuple2<String, String>> {

 		private static final int YEARFILTER = 2007;


--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/util/WebLogData.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/util/WebLogData.java
@@ -23,6 +23,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment;
 import eu.stratosphere.api.java.tuple.Tuple2;
 import eu.stratosphere.api.java.tuple.Tuple3;

+/**
+ * Provides the default data sets used for the Weblog Analysis example program.
+ * The default data sets are used, if no parameters are given to the program.
+ *
+ */
 public class WebLogData {

 	public static DataSet<Tuple2<String, String>> getDocumentDataSet(ExecutionEnvironment env) {

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/WordCount.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/WordCount.java
@@ -30,6 +30,10 @@ import eu.stratosphere.util.Collector;
 * The input is a plain text file with lines separated by newline characters.
 * 
 * <p>
+ * Usage: <code>WordCount &lt;text path&gt; &lt;result path&gt;</code><br>
+ * If no parameters are provided, the program is run with default data from {@link WordCountData}.
+ * 
+ * <p>
 * This example shows how to:
 * <ul>
 * <li>write a simple Stratosphere program.

--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/util/WordCountData.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/util/WordCountData.java
@@ -17,6 +17,11 @@ package eu.stratosphere.example.java.wordcount.util;
 import eu.stratosphere.api.java.DataSet;
 import eu.stratosphere.api.java.ExecutionEnvironment;

+/**
+ * Provides the default data sets used for the WordCount example program.
+ * The default data sets are used, if no parameters are given to the program.
+ *
+ */
 public class WordCountData {

 	public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) {