diff --git a/pom.xml b/pom.xml index a52433cee115fe3a98af441c2fbedb12e4b15f5d..2eedea55c56efa3b6c75996fc87666592acae146 100644 --- a/pom.xml +++ b/pom.xml @@ -47,6 +47,7 @@ stratosphere-tests stratosphere-test-utils stratosphere-addons + stratosphere-quickstart stratosphere-dist @@ -354,7 +355,7 @@ **/stratosphere-bin/conf/slaves - README.md + **/README.md tools/checkstyle.xml CHANGELOG **/*.creole diff --git a/stratosphere-dist/pom.xml b/stratosphere-dist/pom.xml index 1f54ac991625506f38b74654bc972a8b6c5a2576..09f1aa752a6ffda11d21ace4da99f45a122f025c 100644 --- a/stratosphere-dist/pom.xml +++ b/stratosphere-dist/pom.xml @@ -346,6 +346,7 @@ ${project.basedir}/../.git true + false false src/main/stratosphere-bin/.version.properties diff --git a/stratosphere-dist/src/main/assemblies/bin.xml b/stratosphere-dist/src/main/assemblies/bin.xml index c97b507e1c7383241674015a96d54684150dd09f..2d9f4786214679acf02bebb85895d8511affb7c8 100644 --- a/stratosphere-dist/src/main/assemblies/bin.xml +++ b/stratosphere-dist/src/main/assemblies/bin.xml @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the @@ -109,7 +109,8 @@ 0644 *.txt - *.properties + LICENSE* + NOTICE* diff --git a/stratosphere-examples/stratosphere-scala-examples/src/main/java/eu/stratosphere/examples/scala/Dummy.java b/stratosphere-examples/stratosphere-scala-examples/src/main/java/eu/stratosphere/examples/scala/Dummy.java index 4d758c0f2c763c6a7bc3dc9fb337e0a17f84c476..8cf40736132cf0b972979d3930aba7a77bddf6aa 100644 --- a/stratosphere-examples/stratosphere-scala-examples/src/main/java/eu/stratosphere/examples/scala/Dummy.java +++ b/stratosphere-examples/stratosphere-scala-examples/src/main/java/eu/stratosphere/examples/scala/Dummy.java @@ -1,5 +1,5 @@ /*********************************************************************************************************************** - * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) + * Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at diff --git a/stratosphere-quickstart/README.md b/stratosphere-quickstart/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b679a3cde752dbac7e7d9cc4981e21fcac7b80e1 --- /dev/null +++ b/stratosphere-quickstart/README.md @@ -0,0 +1,89 @@ +stratosphere-quickstart +======================= + +Two simple quickstart maven archetypes for Stratosphere. + +# Stratosphere Stable + +###Create an empty Java Stratosphere Job Project +Maven is required + +``` +curl https://raw.github.com/stratosphere/stratosphere-quickstart/master/quickstart.sh | bash +``` + +###Create a simple scala Stratosphere Job Project +Maven is required + +``` +curl https://raw.github.com/stratosphere/stratosphere-quickstart/master/quickstart-scala.sh | bash +``` + +When you import the scala project into eclipse you will also need the following plugins: + +Eclipse 4.x: + * scala-ide: http://download.scala-ide.org/sdk/e38/scala210/stable/site + * m2eclipse-scala: http://alchim31.free.fr/m2e-scala/update-site + * build-helper-maven-plugin: https://repository.sonatype.org/content/repositories/forge-sites/m2e-extras/0.15.0/N/0.15.0.201206251206/ + +Eclipse 3.7: + * scala-ide: http://download.scala-ide.org/sdk/e37/scala210/stable/site + * m2eclipse-scala: http://alchim31.free.fr/m2e-scala/update-site + * build-helper-maven-plugin: https://repository.sonatype.org/content/repositories/forge-sites/m2e-extras/0.14.0/N/0.14.0.201109282148/ + + + +###Generate project manually: +using this command. This call will ask you to name your newly created Job. +```bash +mvn archetype:generate \ + -DarchetypeGroupId=eu.stratosphere \ + -DarchetypeArtifactId=quickstart-java \ + -DarchetypeVersion=0.4 +``` + +###Generate scala project manually: +using this command. This call will ask you to name your newly created Job. +```bash +mvn archetype:generate \ + -DarchetypeGroupId=eu.stratosphere \ + -DarchetypeArtifactId=quickstart-scala \ + -DarchetypeVersion=0.4 +``` + + +# Stratosphere SNAPSHOT Archetypes + + + +# Repository Organization + +The quickstart bash scripts do not necessarily point to the most recent version in the code. Since the archetypes are versioned, the quickstarts usually differ by pointing to a specific version. + +The `quickstart.sh` script always points to the current stable release (v0.4, v0.5) +`-SNAPSHOT` points to the current snapshot version. + +Java: +``` +curl https://raw.github.com/stratosphere/stratosphere-quickstart/master/quickstart-SNAPSHOT.sh | bash +``` + +Manually: +``` +mvn archetype:generate \ + -DarchetypeGroupId=eu.stratosphere \ + -DarchetypeArtifactId=quickstart-java-SNAPSHOT \ + -DarchetypeVersion=0.5-SNAPSHOT \ + -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/ +``` + + +Scala: + +``` +curl https://raw.github.com/stratosphere/stratosphere-quickstart/master/quickstart-scala-SNAPSHOT.sh | bash +``` + +[![Build Status](https://travis-ci.org/stratosphere/stratosphere-quickstart.png?branch=master)](https://travis-ci.org/stratosphere/stratosphere-quickstart) + +(Use `-DarchetypeCatalog=local` for local testing during archetype development) diff --git a/stratosphere-quickstart/pom.xml b/stratosphere-quickstart/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..28b43640de397f229c662d648ba47b746bb336a3 --- /dev/null +++ b/stratosphere-quickstart/pom.xml @@ -0,0 +1,129 @@ + + 4.0.0 + + + eu.stratosphere + stratosphere + 0.5-SNAPSHOT + .. + + + stratosphere-quickstart + pom + + stratosphere-quickstart + http://github.com/stratosphere/stratosphere + Parent project for different quickstart archetypes for Stratosphere.eu + 2013 + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + stratosphere + + + + + https://github.com/stratosphere/stratosphere + scm:git:git@github.com:stratosphere/stratosphere.git + scm:git:git@github.com:stratosphere/stratosphere.git + + + + + stratosphere-team + The Stratosphere Team + stratosphere-dev@googlegroups.com + + + + + + + + + sonatype-nexus-snapshots + Sonatype Nexus Snapshots + http://oss.sonatype.org/content/repositories/snapshots + + + sonatype-nexus-staging + Nexus Release Repository + http://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + quickstart-java + quickstart-scala + + + + + release + + + + + org.apache.maven.plugins + maven-source-plugin + 2.2.1 + + + attach-sources + + jar + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9.1 + + + attach-javadocs + + jar + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.4 + + + sign-artifacts + verify + + sign + + + + + + + + + org.apache.maven.plugins + maven-release-plugin + 2.1 + + forked-path + false + ${arguments} -Psonatype-oss-release + + + + + + + + \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-SNAPSHOT.sh b/stratosphere-quickstart/quickstart-SNAPSHOT.sh new file mode 100755 index 0000000000000000000000000000000000000000..82459b9470a44060ca186004977537857380b773 --- /dev/null +++ b/stratosphere-quickstart/quickstart-SNAPSHOT.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +######################################################################################################################## +# Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +######################################################################################################################## + +PACKAGE=quickstart + +mvn archetype:generate \ + -DarchetypeGroupId=eu.stratosphere \ + -DarchetypeArtifactId=quickstart-java-SNAPSHOT \ + -DarchetypeVersion=0.5-SNAPSHOT \ + -DgroupId=eu.stratosphere \ + -DartifactId=$PACKAGE \ + -Dversion=0.1 \ + -Dpackage=eu.stratosphere.quickstart \ + -DinteractiveMode=false \ + -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/ + +# +# Give some guidance +# +echo -e "\\n\\n" +echo -e "\\tA sample quickstart Stratosphere Job has been created." +echo -e "\\tSwitch into the directory using" +echo -e "\\t\\t cd $PACKAGE" +echo -e "\\tImport the project there using your favorite IDE (Import it as a maven project)" +echo -e "\\tBuild a jar inside the directory using" +echo -e "\\t\\t mvn clean package" +echo -e "\\tYou will find the runnable jar in $PACKAGE/target" +echo -e "\\tConsult our mailing list if you have any troubles: https://groups.google.com/forum/#!forum/stratosphere-dev" +echo -e "\\n\\n" + + +# Use this command if you want to specify the coordinates of your generated artifact +# in an interactive menu: +# +# mvn archetype:generate \ +# -DarchetypeGroupId=eu.stratosphere \ +# -DarchetypeArtifactId=quickstart-java \ +# -DarchetypeVersion=0.4-SNAPSHOT \ +# -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/ diff --git a/stratosphere-quickstart/quickstart-java/pom.xml b/stratosphere-quickstart/quickstart-java/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..2191840aa3773c0a5881ab8c04402ae1c52675f4 --- /dev/null +++ b/stratosphere-quickstart/quickstart-java/pom.xml @@ -0,0 +1,19 @@ + + 4.0.0 + + + UTF-8 + + + + eu.stratosphere + stratosphere-quickstart + 0.5-SNAPSHOT + .. + + + quickstart-java + jar + + \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-java/src/main/java/eu/stratosphere/quickstart/Dummy.java b/stratosphere-quickstart/quickstart-java/src/main/java/eu/stratosphere/quickstart/Dummy.java new file mode 100644 index 0000000000000000000000000000000000000000..a6ab48860ccae23924db1a072d82f6440aef5e6f --- /dev/null +++ b/stratosphere-quickstart/quickstart-java/src/main/java/eu/stratosphere/quickstart/Dummy.java @@ -0,0 +1,22 @@ +/*********************************************************************************************************************** + * Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu) + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + **********************************************************************************************************************/ + +package eu.stratosphere.quickstart; + +/** + * This class solely exists to generate + * javadocs for the "quickstart-java" project. + **/ +public class Dummy { + // +} \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-java/src/main/resources/META-INF/maven/archetype.xml b/stratosphere-quickstart/quickstart-java/src/main/resources/META-INF/maven/archetype.xml new file mode 100644 index 0000000000000000000000000000000000000000..0e150fcecf8295bdb71be0e869802846f677980e --- /dev/null +++ b/stratosphere-quickstart/quickstart-java/src/main/resources/META-INF/maven/archetype.xml @@ -0,0 +1,8 @@ + + stratosphere-quickstart + + src/main/java/Job.java + src/main/java/WordCountJob.java + + \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/pom.xml b/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..e5788be2b8ce3fcb209186c88f7d2d3662fc523c --- /dev/null +++ b/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/pom.xml @@ -0,0 +1,59 @@ + + 4.0.0 + + ${groupId} + ${artifactId} + ${version} + jar + + Your Job's Name + http://www.myorganization.org + + + UTF-8 + + + + + + eu.stratosphere + stratosphere-java + 0.5-SNAPSHOT + + + eu.stratosphere + stratosphere-clients + 0.5-SNAPSHOT + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + ${package}.Job + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + 1.6 + 1.6 + + + + + diff --git a/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/src/main/java/Job.java b/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/src/main/java/Job.java new file mode 100644 index 0000000000000000000000000000000000000000..1512186422c5f15a6697b27351e3619bb1b1dc1b --- /dev/null +++ b/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/src/main/java/Job.java @@ -0,0 +1,62 @@ +package ${package}; + +import eu.stratosphere.api.common.Plan; +import eu.stratosphere.api.common.Program; +import eu.stratosphere.api.common.operators.FileDataSink; +import eu.stratosphere.api.common.operators.GenericDataSink; +import eu.stratosphere.api.java.DataSet; +import eu.stratosphere.api.java.ExecutionEnvironment; +import eu.stratosphere.api.java.aggregation.Aggregations; +import eu.stratosphere.api.java.functions.FlatMapFunction; +import eu.stratosphere.api.java.record.io.DelimitedOutputFormat; +import eu.stratosphere.api.java.tuple.Tuple2; +import eu.stratosphere.client.LocalExecutor; +import eu.stratosphere.util.Collector; + + + +/** + * Skeleton for a Stratosphere Job. + * + * For a full example of a Stratosphere Job, see the WordCountJob.java file in the + * same package/directory or have a look at the website. + * + * You can also generate a .jar file that you can submit on your Stratosphere + * cluster. + * Just type + * mvn clean package + * in the projects root directory. + * You will find the jar in + * target/stratosphere-quickstart-0.1-SNAPSHOT-Sample.jar + * + */ +@SuppressWarnings("serial") +public class Job { + + public static void main(String[] args) throws Exception { + // set up the execution environment + final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); + + + /** + * Here, you can start creating your execution plan for Stratosphere. + * + * Start with getting some data from the environment, like + * env.readTextFile(textPath); + * + * then, transform the resulting DataSet using operations + * like + * .filter() + * .flatMap() + * .join() + * .group() + * and many more. + * + * Run it! + * + */ + + // execute program + env.execute(" Example"); + } +} \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/src/main/java/WordCountJob.java b/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/src/main/java/WordCountJob.java new file mode 100644 index 0000000000000000000000000000000000000000..cb7eb28202fb15f3904438aba7bb03089b797904 --- /dev/null +++ b/stratosphere-quickstart/quickstart-java/src/main/resources/archetype-resources/src/main/java/WordCountJob.java @@ -0,0 +1,131 @@ +package ${package}; + +import eu.stratosphere.api.java.DataSet; +import eu.stratosphere.api.java.ExecutionEnvironment; +import eu.stratosphere.api.java.aggregation.Aggregations; +import eu.stratosphere.api.java.functions.FlatMapFunction; +import eu.stratosphere.api.java.tuple.Tuple2; +import eu.stratosphere.util.Collector; + +/** + * Implements the "WordCount" program that computes a simple word occurrence histogram + * over text files. + * + *

+ * The input is a plain text file with lines separated by newline characters. + * + *

+ * This example shows how to: + *

+ * + */ +@SuppressWarnings("serial") +public class WordCountJob { + + // ************************************************************************* + // PROGRAM + // ************************************************************************* + + public static void main(String[] args) throws Exception { + + parseParameters(args); + + // set up the execution environment + final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); + + // get input data + DataSet text = getTextDataSet(env); + + DataSet> counts = + // split up the lines in pairs (2-tuples) containing: (word,1) + text.flatMap(new Tokenizer()) + // group by the tuple field "0" and sum up tuple field "1" + .groupBy(0) + .aggregate(Aggregations.SUM, 1); + + // emit result + if(fileOutput) { + counts.writeAsCsv(outputPath, "\n", " "); + } else { + counts.print(); + } + + // execute program + env.execute("WordCount Example"); + } + + // ************************************************************************* + // USER FUNCTIONS + // ************************************************************************* + + /** + * Implements the string tokenizer that splits sentences into words as a user-defined + * FlatMapFunction. The function takes a line (String) and splits it into + * multiple pairs in the form of "(word,1)" (Tuple2). + */ + public static final class Tokenizer extends FlatMapFunction> { + + @Override + public void flatMap(String value, Collector> out) { + // normalize and split the line + String[] tokens = value.toLowerCase().split("\\W+"); + + // emit the pairs + for (String token : tokens) { + if (token.length() > 0) { + out.collect(new Tuple2(token, 1)); + } + } + } + } + + // ************************************************************************* + // UTIL METHODS + // ************************************************************************* + + private static boolean fileOutput = false; + private static String textPath; + private static String outputPath; + + private static void parseParameters(String[] args) { + + if(args.length > 0) { + // parse input arguments + fileOutput = true; + if(args.length == 2) { + textPath = args[0]; + outputPath = args[1]; + } else { + System.err.println("Usage: WordCount "); + System.exit(1); + } + } else { + System.out.println("Executing WordCount example with built-in default data."); + System.out.println(" Provide parameters to read input data from a file."); + System.out.println(" Usage: WordCount "); + } + } + + private static DataSet getTextDataSet(ExecutionEnvironment env) { + if(fileOutput) { + // read the text file from given input path + return env.readTextFile(textPath); + } else { + // get default test text data + return getDefaultTextLineDataSet(env); + } + } + + public static DataSet getDefaultTextLineDataSet(ExecutionEnvironment env) { + return env.fromElements( + "To be, or not to be,--that is the question:--", + "Whether 'tis nobler in the mind to suffer", + "The slings and arrows of outrageous fortune", + "Or to take arms against a sea of troubles," + ); + } +} \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-scala-SNAPSHOT.sh b/stratosphere-quickstart/quickstart-scala-SNAPSHOT.sh new file mode 100755 index 0000000000000000000000000000000000000000..1716973d424b1040815bc461fc8d661d255151db --- /dev/null +++ b/stratosphere-quickstart/quickstart-scala-SNAPSHOT.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +######################################################################################################################## +# Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +######################################################################################################################## + +PACKAGE=quickstart + +mvn archetype:generate \ + -DarchetypeGroupId=eu.stratosphere \ + -DarchetypeArtifactId=quickstart-scala-SNAPSHOT \ + -DarchetypeVersion=0.5-SNAPSHOT \ + -DgroupId=eu.stratoshere \ + -DartifactId=$PACKAGE \ + -Dversion=0.1 \ + -Dpackage=eu.stratosphere.quickstart \ + -DinteractiveMode=false \ + -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/ + +# +# Give some guidance +# +echo -e "\\n\\n" +echo -e "\\tA sample quickstart Stratosphere Job has been created." +echo -e "\\tSwitch into the directory using" +echo -e "\\t\\t cd $PACKAGE" +echo -e "\\tImport the project there using your favorite IDE (Import it as a maven project)" +echo -e "\\tBuild a jar inside the directory using" +echo -e "\\t\\t mvn clean package" +echo -e "\\tYou will find the runnable jar in $PACKAGE/target" +echo -e "\\tConsult our mailing list if you have any troubles: https://groups.google.com/forum/#!forum/stratosphere-dev" +echo -e "\\n\\n" + + +# Use this command if you want to specify the coordinates of your generated artifact +# in an interactive menu: +# +# mvn archetype:generate \ +# -DarchetypeGroupId=eu.stratosphere \ +# -DarchetypeArtifactId=quickstart-scala \ +# -DarchetypeVersion=0.4-SNAPSHOT \ +# -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/ diff --git a/stratosphere-quickstart/quickstart-scala.sh b/stratosphere-quickstart/quickstart-scala.sh new file mode 100755 index 0000000000000000000000000000000000000000..cc784877bb1671baff13278e6c0adee7f7b72482 --- /dev/null +++ b/stratosphere-quickstart/quickstart-scala.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +######################################################################################################################## +# Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +######################################################################################################################## + +PACKAGE=quickstart + +mvn archetype:generate \ + -DarchetypeGroupId=eu.stratosphere \ + -DarchetypeArtifactId=quickstart-scala \ + -DarchetypeVersion=0.4 \ + -DgroupId=eu.stratosphere \ + -DartifactId=$PACKAGE \ + -Dversion=0.1 \ + -Dpackage=eu.stratosphere.quickstart \ + -DinteractiveMode=false + +# +# Give some guidance +# +echo -e "\\n\\n" +echo -e "\\tA sample quickstart Stratosphere Job has been created." +echo -e "\\tSwitch into the directory using" +echo -e "\\t\\t cd $PACKAGE" +echo -e "\\tImport the project there using your favorite IDE (Import it as a maven project)" +echo -e "\\tBuild a jar inside the directory using" +echo -e "\\t\\t mvn clean package" +echo -e "\\tYou will find the runnable jar in $PACKAGE/target" +echo -e "\\tConsult our mailing list if you have any troubles: https://groups.google.com/forum/#!forum/stratosphere-dev" +echo -e "\\n\\n" + + +# Use this command if you want to specify the coordinates of your generated artifact +# in an interactive menu: +# +# mvn archetype:generate \ +# -DarchetypeGroupId=eu.stratosphere \ +# -DarchetypeArtifactId=quickstart-scala \ +# -DarchetypeVersion=0.4-SNAPSHOT \ +# -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/ diff --git a/stratosphere-quickstart/quickstart-scala/pom.xml b/stratosphere-quickstart/quickstart-scala/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..19daf8b75acaf88334d53ae02930387a5ec4c0d3 --- /dev/null +++ b/stratosphere-quickstart/quickstart-scala/pom.xml @@ -0,0 +1,19 @@ + + 4.0.0 + + + UTF-8 + + + + eu.stratosphere + stratosphere-quickstart + 0.5-SNAPSHOT + .. + + + quickstart-scala + jar + + diff --git a/stratosphere-quickstart/quickstart-scala/src/main/java/eu/stratosphere/quickstart/Dummy.java b/stratosphere-quickstart/quickstart-scala/src/main/java/eu/stratosphere/quickstart/Dummy.java new file mode 100644 index 0000000000000000000000000000000000000000..a6ab48860ccae23924db1a072d82f6440aef5e6f --- /dev/null +++ b/stratosphere-quickstart/quickstart-scala/src/main/java/eu/stratosphere/quickstart/Dummy.java @@ -0,0 +1,22 @@ +/*********************************************************************************************************************** + * Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu) + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + **********************************************************************************************************************/ + +package eu.stratosphere.quickstart; + +/** + * This class solely exists to generate + * javadocs for the "quickstart-java" project. + **/ +public class Dummy { + // +} \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-scala/src/main/resources/META-INF/maven/archetype-metadata.xml b/stratosphere-quickstart/quickstart-scala/src/main/resources/META-INF/maven/archetype-metadata.xml new file mode 100644 index 0000000000000000000000000000000000000000..1fb0fd8f19a7e23ed24782bcde480a1774398bb1 --- /dev/null +++ b/stratosphere-quickstart/quickstart-scala/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -0,0 +1,13 @@ + + + + + src/main/scala + + **/*.scala + + + + \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-scala/src/main/resources/META-INF/maven/archetype.xml b/stratosphere-quickstart/quickstart-scala/src/main/resources/META-INF/maven/archetype.xml new file mode 100644 index 0000000000000000000000000000000000000000..45e7fd255f2e08bbf91e6e04ce8baa59757aef10 --- /dev/null +++ b/stratosphere-quickstart/quickstart-scala/src/main/resources/META-INF/maven/archetype.xml @@ -0,0 +1,7 @@ + + stratosphere-quickstart-scala + + src/main/scala/Job.scala + + \ No newline at end of file diff --git a/stratosphere-quickstart/quickstart-scala/src/main/resources/archetype-resources/pom.xml b/stratosphere-quickstart/quickstart-scala/src/main/resources/archetype-resources/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..4721a810709fb0a35033bc4af6e76d4517fada0c --- /dev/null +++ b/stratosphere-quickstart/quickstart-scala/src/main/resources/archetype-resources/pom.xml @@ -0,0 +1,140 @@ + + 4.0.0 + + ${groupId} + ${artifactId} + ${version} + jar + + Your Job's Name + http://www.myorganization.org + + + + UTF-8 + + + + + + eu.stratosphere + stratosphere-scala + 0.5-SNAPSHOT + + + eu.stratosphere + stratosphere-clients + 0.5-SNAPSHOT + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + ${package}.Job + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + 1.6 + 1.6 + + + + net.alchim31.maven + scala-maven-plugin + 3.1.4 + + + + compile + testCompile + + + + + + + + org.apache.maven.plugins + maven-eclipse-plugin + 2.8 + + true + + org.scala-ide.sdt.core.scalanature + org.eclipse.jdt.core.javanature + + + org.scala-ide.sdt.core.scalabuilder + + + org.scala-ide.sdt.launching.SCALA_CONTAINER + org.eclipse.jdt.launching.JRE_CONTAINER + + + org.scala-lang:scala-library + org.scala-lang:scala-compiler + + + **/*.scala + **/*.java + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 1.7 + + + + add-source + generate-sources + + add-source + + + + src/main/scala + + + + + + add-test-source + generate-test-sources + + add-test-source + + + + src/test/scala + + + + + + + + diff --git a/stratosphere-quickstart/quickstart-scala/src/main/resources/archetype-resources/src/main/scala/Job.scala b/stratosphere-quickstart/quickstart-scala/src/main/resources/archetype-resources/src/main/scala/Job.scala new file mode 100644 index 0000000000000000000000000000000000000000..89a429fd23b8e05dbb4bd97c3680fcb2c853990f --- /dev/null +++ b/stratosphere-quickstart/quickstart-scala/src/main/resources/archetype-resources/src/main/scala/Job.scala @@ -0,0 +1,91 @@ +package ${package}; + + +import eu.stratosphere.api.common.Program +import eu.stratosphere.api.common.ProgramDescription +import eu.stratosphere.client.LocalExecutor +import eu.stratosphere.api.scala.TextFile +import eu.stratosphere.api.scala.ScalaPlan +import eu.stratosphere.api.scala._ +import eu.stratosphere.api.scala.operators._ +import eu.stratosphere.client.RemoteExecutor + +// You can run this locally using: +// mvn exec:exec -Dexec.executable="java" -Dexec.args="-cp %classpath ${package}.RunJobLocal 2 file:///some/path file:///some/other/path" +object RunJobLocal { + def main(args: Array[String]) { + val job = new Job + if (args.size < 3) { + println(job.getDescription) + return + } + val plan = job.getScalaPlan(args(0).toInt, args(1), args(2)) + LocalExecutor.execute(plan) + System.exit(0) + } +} + +// You can run this on a cluster using: +// mvn exec:exec -Dexec.executable="java" -Dexec.args="-cp %classpath ${package}.RunJobRemote 2 file:///some/path file:///some/other/path" +object RunJobRemote { + def main(args: Array[String]) { + val job = new Job + if (args.size < 3) { + println(job.getDescription) + return + } + val plan = job.getScalaPlan(args(0).toInt, args(1), args(2)) + // This will create an executor to run the plan on a cluster. We assume + // that the JobManager is running on the local machine on the default + // port. Change this according to your configuration. + // You will also need to change the name of the jar if you change the + // project name and/or version. Before running this you also need + // to run "mvn package" to create the jar. + val ex = new RemoteExecutor("localhost", 6123, "target/stratosphere-project-0.1-SNAPSHOT.jar"); + ex.executePlan(plan); + } +} + + +/** + * This is a outline for a Stratosphere scala job. It is actually the WordCount + * example from the stratosphere distribution. + * + * You can run it out of your IDE using the main() method of RunJob. + * This will use the LocalExecutor to start a little Stratosphere instance + * out of your IDE. + * + * You can also generate a .jar file that you can submit on your Stratosphere + * cluster. + * Just type + * mvn clean package + * in the projects root directory. + * You will find the jar in + * target/stratosphere-quickstart-0.1-SNAPSHOT-Sample.jar + * + */ +class Job extends Program with ProgramDescription with Serializable { + override def getDescription() = { + "Parameters: [numSubStasks] [input] [output]" + } + override def getPlan(args: String*) = { + getScalaPlan(args(0).toInt, args(1), args(2)) + } + + def formatOutput = (word: String, count: Int) => "%s %d".format(word, count) + + def getScalaPlan(numSubTasks: Int, textInput: String, wordsOutput: String) = { + val input = TextFile(textInput) + + val words = input flatMap { _.toLowerCase().split("""\W+""") filter { _ != "" } map { (_, 1) } } + val counts = words groupBy { case (word, _) => word } reduce { (w1, w2) => (w1._1, w1._2 + w2._2) } + + counts neglects { case (word, _) => word } + counts preserves({ case (word, _) => word }, { case (word, _) => word }) + val output = counts.write(wordsOutput, DelimitedOutputFormat(formatOutput.tupled)) + + val plan = new ScalaPlan(Seq(output), "Word Count (immutable)") + plan.setDefaultParallelism(numSubTasks) + plan + } +} diff --git a/stratosphere-quickstart/quickstart.sh b/stratosphere-quickstart/quickstart.sh new file mode 100755 index 0000000000000000000000000000000000000000..0455df5e505bee06e4fa9eda9bd9b02c19830e46 --- /dev/null +++ b/stratosphere-quickstart/quickstart.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +######################################################################################################################## +# Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +######################################################################################################################## + +PACKAGE=quickstart + +mvn archetype:generate \ + -DarchetypeGroupId=eu.stratosphere \ + -DarchetypeArtifactId=quickstart-java \ + -DarchetypeVersion=0.4 \ + -DgroupId=eu.stratosphere \ + -DartifactId=$PACKAGE \ + -Dversion=0.1 \ + -Dpackage=eu.stratosphere.quickstart \ + -DinteractiveMode=false + +# +# Give some guidance +# +echo -e "\\n\\n" +echo -e "\\tA sample quickstart Stratosphere Job has been created." +echo -e "\\tSwitch into the directory using" +echo -e "\\t\\t cd $PACKAGE" +echo -e "\\tImport the project there using your favorite IDE (Import it as a maven project)" +echo -e "\\tBuild a jar inside the directory using" +echo -e "\\t\\t mvn clean package" +echo -e "\\tYou will find the runnable jar in $PACKAGE/target" +echo -e "\\tConsult our mailing list if you have any troubles: https://groups.google.com/forum/#!forum/stratosphere-dev" +echo -e "\\n\\n" + + +# Use this command if you want to specify the coordinates of your generated artifact +# in an interactive menu: +# +# mvn archetype:generate \ +# -DarchetypeGroupId=eu.stratosphere \ +# -DarchetypeArtifactId=quickstart-java \ +# -DarchetypeVersion=0.4 \ +# -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/ diff --git a/stratosphere-runtime/pom.xml b/stratosphere-runtime/pom.xml index dbd52c99347c3fe0484072002327fab372268b20..6bc96a3f47f06e9005a60da6c996c642f55e84dd 100644 --- a/stratosphere-runtime/pom.xml +++ b/stratosphere-runtime/pom.xml @@ -112,6 +112,7 @@ ${project.basedir}/../../.git true false + false src/main/resources/.version.properties