提交 f0d39267 编写于 作者: R Robert Metzger

Added links to programming guide / examples

remove reading from files from WC
上级 65602201
package ${package};
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.operators.FileDataSink;
import eu.stratosphere.api.common.operators.GenericDataSink;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.aggregation.Aggregations;
import eu.stratosphere.api.java.functions.FlatMapFunction;
import eu.stratosphere.api.java.record.io.DelimitedOutputFormat;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.client.LocalExecutor;
import eu.stratosphere.util.Collector;
/**
* Skeleton for a Stratosphere Job.
......@@ -30,7 +17,6 @@ import eu.stratosphere.util.Collector;
* target/stratosphere-quickstart-0.1-SNAPSHOT-Sample.jar
*
*/
@SuppressWarnings("serial")
public class Job {
public static void main(String[] args) throws Exception {
......@@ -51,12 +37,17 @@ public class Job {
* .join()
* .group()
* and many more.
* Have a look at the programming guide for the Java API:
*
* http://stratosphere.eu/docs/0.5/programming_guides/java.html
*
* and the examples
*
* Run it!
* http://stratosphere.eu/docs/0.5/programming_guides/examples.html
*
*/
// execute program
env.execute(" Example");
env.execute("Stratosphere Java API Skeleton");
}
}
\ No newline at end of file
......@@ -9,10 +9,7 @@ import eu.stratosphere.util.Collector;
/**
* Implements the "WordCount" program that computes a simple word occurrence histogram
* over text files.
*
* <p>
* The input is a plain text file with lines separated by newline characters.
* over some sample data
*
* <p>
* This example shows how to:
......@@ -26,48 +23,47 @@ import eu.stratosphere.util.Collector;
@SuppressWarnings("serial")
public class WordCountJob {
// *************************************************************************
// PROGRAM
// *************************************************************************
//
// Program.
//
public static void main(String[] args) throws Exception {
parseParameters(args);
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = getTextDataSet(env);
DataSet<String> text = env.fromElements(
"To be, or not to be,--that is the question:--",
"Whether 'tis nobler in the mind to suffer",
"The slings and arrows of outrageous fortune",
"Or to take arms against a sea of troubles,"
);
DataSet<Tuple2<String, Integer>> counts =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer())
text.flatMap(new LineSplitter())
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.aggregate(Aggregations.SUM, 1);
// emit result
if(fileOutput) {
counts.writeAsCsv(outputPath, "\n", " ");
} else {
counts.print();
}
counts.print();
// execute program
env.execute("WordCount Example");
}
// *************************************************************************
// USER FUNCTIONS
// *************************************************************************
//
// User Functions
//
/**
* Implements the string tokenizer that splits sentences into words as a user-defined
* FlatMapFunction. The function takes a line (String) and splits it into
* multiple pairs in the form of "(word,1)" (Tuple2<String, Integer>).
*/
public static final class Tokenizer extends FlatMapFunction<String, Tuple2<String, Integer>> {
public static final class LineSplitter extends FlatMapFunction<String, Tuple2<String, Integer>> {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
......@@ -82,50 +78,4 @@ public class WordCountJob {
}
}
}
// *************************************************************************
// UTIL METHODS
// *************************************************************************
private static boolean fileOutput = false;
private static String textPath;
private static String outputPath;
private static void parseParameters(String[] args) {
if(args.length > 0) {
// parse input arguments
fileOutput = true;
if(args.length == 2) {
textPath = args[0];
outputPath = args[1];
} else {
System.err.println("Usage: WordCount <text path> <result path>");
System.exit(1);
}
} else {
System.out.println("Executing WordCount example with built-in default data.");
System.out.println(" Provide parameters to read input data from a file.");
System.out.println(" Usage: WordCount <text path> <result path>");
}
}
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
if(fileOutput) {
// read the text file from given input path
return env.readTextFile(textPath);
} else {
// get default test text data
return getDefaultTextLineDataSet(env);
}
}
public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) {
return env.fromElements(
"To be, or not to be,--that is the question:--",
"Whether 'tis nobler in the mind to suffer",
"The slings and arrows of outrageous fortune",
"Or to take arms against a sea of troubles,"
);
}
}
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册