tuple data types
diff --git a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/TPCHQuery3.java b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/TPCHQuery3.java
index 7b86c3bd95ee5937322b123cbbf0b39e03ac78a3..d52722d6e2163fac64d51d899c45ccd28738ef19 100644
--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/TPCHQuery3.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/TPCHQuery3.java
@@ -69,6 +69,9 @@ import eu.stratosphere.api.java.tuple.Tuple5;
*
* Input files are plain text CSV files using the pipe character ('|') as field separator
* as generated by the TPC-H data generator which is available at http://www.tpc.org/tpch/.
+ *
+ *
+ * Usage: TPCHQuery3 <lineitem-csv path> <customer-csv path> <orders-csv path> <result path>
*
*
* This example shows how to use:
@@ -157,7 +160,7 @@ public class TPCHQuery3 {
}
});
- // Join the last join result with LineItems
+ // Join the last join result with Lineitems
DataSet joined =
customerWithOrders.join(li)
.where(4)
diff --git a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/WebLogAnalysis.java b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/WebLogAnalysis.java
index 562f0f388c39142e9d8c72281e15ceece6e67b42..ebf1cf68a3fff6b42fe9a505aa2bc1800f2cc26c 100644
--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/WebLogAnalysis.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/WebLogAnalysis.java
@@ -74,6 +74,10 @@ import eu.stratosphere.util.Collector;
*
*
*
+ * Usage: WebLogAnalysis <documents path> <ranks path> <visits path> <result path>
+ * If no parameters are provided, the program is run with default data from {@link WebLogData}.
+ *
+ *
* This example shows how to use:
*
* - tuple data types
@@ -100,29 +104,28 @@ public class WebLogAnalysis {
DataSet> ranks = getRanksDataSet(env);
DataSet> visits = getVisitsDataSet(env);
- // Create DataSet for filtering the entries from the documents relation
+ // Retain documents with keywords
DataSet> filterDocs = documents
- .filter(new FilterDocs())
+ .filter(new FilterDocByKeyWords())
.project(0).types(String.class);
- // Create DataSet for filtering the entries from the ranks relation
+ // Filter ranks by minimum rank
DataSet> filterRanks = ranks
- .filter(new FilterRanks());
+ .filter(new FilterByRank());
- // Create DataSet for filtering the entries from the visits relation
+ // Filter visits by visit date
DataSet> filterVisits = visits
- .filter(new FilterVisits())
+ .filter(new FilterVisitsByDate())
.project(0).types(String.class);
- // Create DataSet to join the filtered documents and ranks relation
+ // Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
DataSet> joinDocsRanks =
filterDocs.join(filterRanks)
.where(0).equalTo(1)
.projectSecond(0,1,2)
.types(Integer.class, String.class, Integer.class);
- // Create DataSet to realize a anti join between the joined
- // documents and ranks relation and the filtered visits relation
+ // Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
DataSet> result =
joinDocsRanks.coGroup(filterVisits)
.where(1).equalTo(0)
@@ -148,7 +151,7 @@ public class WebLogAnalysis {
* MapFunction that filters for documents that contain a certain set of
* keywords.
*/
- public static class FilterDocs extends FilterFunction> {
+ public static class FilterDocByKeyWords extends FilterFunction> {
private static final String[] KEYWORDS = { " editors ", " oscillations " };
@@ -176,7 +179,7 @@ public class WebLogAnalysis {
/**
* MapFunction that filters for records where the rank exceeds a certain threshold.
*/
- public static class FilterRanks extends FilterFunction> {
+ public static class FilterByRank extends FilterFunction> {
private static final int RANKFILTER = 40;
@@ -199,7 +202,7 @@ public class WebLogAnalysis {
* MapFunction that filters for records of the visits relation where the year
* (from the date string) is equal to a certain value.
*/
- public static class FilterVisits extends FilterFunction> {
+ public static class FilterVisitsByDate extends FilterFunction> {
private static final int YEARFILTER = 2007;
diff --git a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/util/WebLogData.java b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/util/WebLogData.java
index 00045967cd4130c5dba7466c7f3ddeb8408e7b40..803731f1d74ae169bfb76fc1cdbd37de8b67c6d5 100644
--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/util/WebLogData.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/relational/util/WebLogData.java
@@ -23,6 +23,11 @@ import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.tuple.Tuple3;
+/**
+ * Provides the default data sets used for the Weblog Analysis example program.
+ * The default data sets are used, if no parameters are given to the program.
+ *
+ */
public class WebLogData {
public static DataSet> getDocumentDataSet(ExecutionEnvironment env) {
diff --git a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/WordCount.java b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/WordCount.java
index 6db69863321df598fe8614876c54e6ca905fa960..f8ccfbe3ab974fe621d3576d6c49357cf6880a97 100644
--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/WordCount.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/WordCount.java
@@ -30,6 +30,10 @@ import eu.stratosphere.util.Collector;
* The input is a plain text file with lines separated by newline characters.
*
*
+ * Usage: WordCount <text path> <result path>
+ * If no parameters are provided, the program is run with default data from {@link WordCountData}.
+ *
+ *
* This example shows how to:
*
* - write a simple Stratosphere program.
diff --git a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/util/WordCountData.java b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/util/WordCountData.java
index 12535bd2d75084c6391241e529122b197371af1a..bad13a5bf99feb9cf0d00d130ad578d010ca00ef 100644
--- a/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/util/WordCountData.java
+++ b/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/wordcount/util/WordCountData.java
@@ -17,6 +17,11 @@ package eu.stratosphere.example.java.wordcount.util;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
+/**
+ * Provides the default data sets used for the WordCount example program.
+ * The default data sets are used, if no parameters are given to the program.
+ *
+ */
public class WordCountData {
public static DataSet getDefaultTextLineDataSet(ExecutionEnvironment env) {