提交 65602201 编写于 作者: R Robert Metzger

Fix binary distribution: add LICENSE and NOTICE

source should now build without the presence of a .git directory
included the quickstart-archetypes into the main project.
上级 468bcb0f
......@@ -47,6 +47,7 @@
<module>stratosphere-tests</module>
<module>stratosphere-test-utils</module>
<module>stratosphere-addons</module>
<module>stratosphere-quickstart</module>
<module>stratosphere-dist</module>
</modules>
......@@ -354,7 +355,7 @@
<!-- Configuration Files. -->
<exclude>**/stratosphere-bin/conf/slaves</exclude>
<!-- Administrative files in the main trunk. -->
<exclude>README.md</exclude>
<exclude>**/README.md</exclude>
<exclude>tools/checkstyle.xml</exclude>
<exclude>CHANGELOG</exclude>
<exclude>**/*.creole</exclude>
......
......@@ -346,6 +346,7 @@
<configuration>
<dotGitDirectory>${project.basedir}/../.git</dotGitDirectory>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<failOnNoGitDirectory>false</failOnNoGitDirectory>
<skipPoms>false</skipPoms>
<generateGitPropertiesFilename>src/main/stratosphere-bin/.version.properties</generateGitPropertiesFilename>
</configuration>
......
......@@ -4,7 +4,7 @@
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
......@@ -109,7 +109,8 @@
<fileMode>0644</fileMode>
<includes>
<include>*.txt</include>
<include>*.properties</include>
<include>LICENSE*</include>
<include>NOTICE*</include>
</includes>
</fileSet>
......
/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
* Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
......
stratosphere-quickstart
=======================
Two simple quickstart maven archetypes for Stratosphere.
# Stratosphere Stable
###Create an empty Java Stratosphere Job Project
Maven is required
```
curl https://raw.github.com/stratosphere/stratosphere-quickstart/master/quickstart.sh | bash
```
###Create a simple scala Stratosphere Job Project
Maven is required
```
curl https://raw.github.com/stratosphere/stratosphere-quickstart/master/quickstart-scala.sh | bash
```
When you import the scala project into eclipse you will also need the following plugins:
Eclipse 4.x:
* scala-ide: http://download.scala-ide.org/sdk/e38/scala210/stable/site
* m2eclipse-scala: http://alchim31.free.fr/m2e-scala/update-site
* build-helper-maven-plugin: https://repository.sonatype.org/content/repositories/forge-sites/m2e-extras/0.15.0/N/0.15.0.201206251206/
Eclipse 3.7:
* scala-ide: http://download.scala-ide.org/sdk/e37/scala210/stable/site
* m2eclipse-scala: http://alchim31.free.fr/m2e-scala/update-site
* build-helper-maven-plugin: https://repository.sonatype.org/content/repositories/forge-sites/m2e-extras/0.14.0/N/0.14.0.201109282148/
###Generate project manually:
using this command. This call will ask you to name your newly created Job.
```bash
mvn archetype:generate \
-DarchetypeGroupId=eu.stratosphere \
-DarchetypeArtifactId=quickstart-java \
-DarchetypeVersion=0.4
```
###Generate scala project manually:
using this command. This call will ask you to name your newly created Job.
```bash
mvn archetype:generate \
-DarchetypeGroupId=eu.stratosphere \
-DarchetypeArtifactId=quickstart-scala \
-DarchetypeVersion=0.4
```
# Stratosphere SNAPSHOT Archetypes
# Repository Organization
The quickstart bash scripts do not necessarily point to the most recent version in the code. Since the archetypes are versioned, the quickstarts usually differ by pointing to a specific version.
The `quickstart.sh` script always points to the current stable release (v0.4, v0.5)
`-SNAPSHOT` points to the current snapshot version.
Java:
```
curl https://raw.github.com/stratosphere/stratosphere-quickstart/master/quickstart-SNAPSHOT.sh | bash
```
Manually:
```
mvn archetype:generate \
-DarchetypeGroupId=eu.stratosphere \
-DarchetypeArtifactId=quickstart-java-SNAPSHOT \
-DarchetypeVersion=0.5-SNAPSHOT \
-DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/
```
Scala:
```
curl https://raw.github.com/stratosphere/stratosphere-quickstart/master/quickstart-scala-SNAPSHOT.sh | bash
```
[![Build Status](https://travis-ci.org/stratosphere/stratosphere-quickstart.png?branch=master)](https://travis-ci.org/stratosphere/stratosphere-quickstart)
(Use `-DarchetypeCatalog=local` for local testing during archetype development)
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere</artifactId>
<version>0.5-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<artifactId>stratosphere-quickstart</artifactId>
<packaging>pom</packaging>
<name>stratosphere-quickstart</name>
<url>http://github.com/stratosphere/stratosphere</url>
<description>Parent project for different quickstart archetypes for Stratosphere.eu</description>
<inceptionYear>2013</inceptionYear>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>stratosphere</distribution>
</license>
</licenses>
<scm>
<url>https://github.com/stratosphere/stratosphere</url>
<connection>scm:git:git@github.com:stratosphere/stratosphere.git</connection>
<developerConnection>scm:git:git@github.com:stratosphere/stratosphere.git</developerConnection>
</scm>
<developers>
<developer>
<id>stratosphere-team</id>
<name>The Stratosphere Team</name>
<email>stratosphere-dev@googlegroups.com</email>
</developer>
</developers>
<!-- See http://www.imixs.org/jee/archetype/build.html -->
<!-- Distributen Management oss.sonatype.org -->
<distributionManagement>
<snapshotRepository>
<id>sonatype-nexus-snapshots</id>
<name>Sonatype Nexus Snapshots</name>
<url>http://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
<repository>
<id>sonatype-nexus-staging</id>
<name>Nexus Release Repository</name>
<url>http://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
</repository>
</distributionManagement>
<modules>
<module>quickstart-java</module>
<module>quickstart-scala</module>
</modules>
<profiles>
<profile>
<id>release</id>
<build>
<plugins>
<!-- source attachment -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- Javadocs -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- signing -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.4</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>2.1</version>
<configuration>
<mavenExecutorId>forked-path</mavenExecutorId>
<useReleaseProfile>false</useReleaseProfile>
<arguments>${arguments} -Psonatype-oss-release</arguments>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</profile>
</profiles>
</project>
\ No newline at end of file
#!/usr/bin/env bash
########################################################################################################################
# Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
########################################################################################################################
PACKAGE=quickstart
mvn archetype:generate \
-DarchetypeGroupId=eu.stratosphere \
-DarchetypeArtifactId=quickstart-java-SNAPSHOT \
-DarchetypeVersion=0.5-SNAPSHOT \
-DgroupId=eu.stratosphere \
-DartifactId=$PACKAGE \
-Dversion=0.1 \
-Dpackage=eu.stratosphere.quickstart \
-DinteractiveMode=false \
-DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/
#
# Give some guidance
#
echo -e "\\n\\n"
echo -e "\\tA sample quickstart Stratosphere Job has been created."
echo -e "\\tSwitch into the directory using"
echo -e "\\t\\t cd $PACKAGE"
echo -e "\\tImport the project there using your favorite IDE (Import it as a maven project)"
echo -e "\\tBuild a jar inside the directory using"
echo -e "\\t\\t mvn clean package"
echo -e "\\tYou will find the runnable jar in $PACKAGE/target"
echo -e "\\tConsult our mailing list if you have any troubles: https://groups.google.com/forum/#!forum/stratosphere-dev"
echo -e "\\n\\n"
# Use this command if you want to specify the coordinates of your generated artifact
# in an interactive menu:
#
# mvn archetype:generate \
# -DarchetypeGroupId=eu.stratosphere \
# -DarchetypeArtifactId=quickstart-java \
# -DarchetypeVersion=0.4-SNAPSHOT \
# -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<parent>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-quickstart</artifactId>
<version>0.5-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<artifactId>quickstart-java</artifactId>
<packaging>jar</packaging>
</project>
\ No newline at end of file
/***********************************************************************************************************************
* Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.quickstart;
/**
* This class solely exists to generate
* javadocs for the "quickstart-java" project.
**/
public class Dummy {
//
}
\ No newline at end of file
<archetype xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype/1.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype/1.0.0 http://maven.apache.org/xsd/archetype-1.0.0.xsd">
<id>stratosphere-quickstart</id>
<sources>
<source>src/main/java/Job.java</source>
<source>src/main/java/WordCountJob.java</source>
</sources>
</archetype>
\ No newline at end of file
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>${groupId}</groupId>
<artifactId>${artifactId}</artifactId>
<version>${version}</version>
<packaging>jar</packaging>
<name>Your Job's Name</name>
<url>http://www.myorganization.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<!-- These two requirements are the minimum to use and develop Stratosphere.
You can add others like <artifactId>pact-scala-core</artifactId> for Scala! -->
<dependencies>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-java</artifactId>
<version>0.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-clients</artifactId>
<version>0.5-SNAPSHOT</version>
</dependency>
</dependencies>
<!-- We use the maven-jar-plugin to generate a runnable jar that you can
submit to your Stratosphere cluster. -->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifestEntries>
<program-class>${package}.Job</program-class>
</manifestEntries>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
package ${package};
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.operators.FileDataSink;
import eu.stratosphere.api.common.operators.GenericDataSink;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.aggregation.Aggregations;
import eu.stratosphere.api.java.functions.FlatMapFunction;
import eu.stratosphere.api.java.record.io.DelimitedOutputFormat;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.client.LocalExecutor;
import eu.stratosphere.util.Collector;
/**
* Skeleton for a Stratosphere Job.
*
* For a full example of a Stratosphere Job, see the WordCountJob.java file in the
* same package/directory or have a look at the website.
*
* You can also generate a .jar file that you can submit on your Stratosphere
* cluster.
* Just type
* mvn clean package
* in the projects root directory.
* You will find the jar in
* target/stratosphere-quickstart-0.1-SNAPSHOT-Sample.jar
*
*/
@SuppressWarnings("serial")
public class Job {
public static void main(String[] args) throws Exception {
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
/**
* Here, you can start creating your execution plan for Stratosphere.
*
* Start with getting some data from the environment, like
* env.readTextFile(textPath);
*
* then, transform the resulting DataSet<String> using operations
* like
* .filter()
* .flatMap()
* .join()
* .group()
* and many more.
*
* Run it!
*
*/
// execute program
env.execute(" Example");
}
}
\ No newline at end of file
package ${package};
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.aggregation.Aggregations;
import eu.stratosphere.api.java.functions.FlatMapFunction;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.util.Collector;
/**
* Implements the "WordCount" program that computes a simple word occurrence histogram
* over text files.
*
* <p>
* The input is a plain text file with lines separated by newline characters.
*
* <p>
* This example shows how to:
* <ul>
* <li>write a simple Stratosphere program.
* <li>use Tuple data types.
* <li>write and use user-defined functions.
* </ul>
*
*/
@SuppressWarnings("serial")
public class WordCountJob {
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
parseParameters(args);
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = getTextDataSet(env);
DataSet<Tuple2<String, Integer>> counts =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer())
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.aggregate(Aggregations.SUM, 1);
// emit result
if(fileOutput) {
counts.writeAsCsv(outputPath, "\n", " ");
} else {
counts.print();
}
// execute program
env.execute("WordCount Example");
}
// *************************************************************************
// USER FUNCTIONS
// *************************************************************************
/**
* Implements the string tokenizer that splits sentences into words as a user-defined
* FlatMapFunction. The function takes a line (String) and splits it into
* multiple pairs in the form of "(word,1)" (Tuple2<String, Integer>).
*/
public static final class Tokenizer extends FlatMapFunction<String, Tuple2<String, Integer>> {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
// normalize and split the line
String[] tokens = value.toLowerCase().split("\\W+");
// emit the pairs
for (String token : tokens) {
if (token.length() > 0) {
out.collect(new Tuple2<String, Integer>(token, 1));
}
}
}
}
// *************************************************************************
// UTIL METHODS
// *************************************************************************
private static boolean fileOutput = false;
private static String textPath;
private static String outputPath;
private static void parseParameters(String[] args) {
if(args.length > 0) {
// parse input arguments
fileOutput = true;
if(args.length == 2) {
textPath = args[0];
outputPath = args[1];
} else {
System.err.println("Usage: WordCount <text path> <result path>");
System.exit(1);
}
} else {
System.out.println("Executing WordCount example with built-in default data.");
System.out.println(" Provide parameters to read input data from a file.");
System.out.println(" Usage: WordCount <text path> <result path>");
}
}
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
if(fileOutput) {
// read the text file from given input path
return env.readTextFile(textPath);
} else {
// get default test text data
return getDefaultTextLineDataSet(env);
}
}
public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) {
return env.fromElements(
"To be, or not to be,--that is the question:--",
"Whether 'tis nobler in the mind to suffer",
"The slings and arrows of outrageous fortune",
"Or to take arms against a sea of troubles,"
);
}
}
\ No newline at end of file
#!/usr/bin/env bash
########################################################################################################################
# Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
########################################################################################################################
PACKAGE=quickstart
mvn archetype:generate \
-DarchetypeGroupId=eu.stratosphere \
-DarchetypeArtifactId=quickstart-scala-SNAPSHOT \
-DarchetypeVersion=0.5-SNAPSHOT \
-DgroupId=eu.stratoshere \
-DartifactId=$PACKAGE \
-Dversion=0.1 \
-Dpackage=eu.stratosphere.quickstart \
-DinteractiveMode=false \
-DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/
#
# Give some guidance
#
echo -e "\\n\\n"
echo -e "\\tA sample quickstart Stratosphere Job has been created."
echo -e "\\tSwitch into the directory using"
echo -e "\\t\\t cd $PACKAGE"
echo -e "\\tImport the project there using your favorite IDE (Import it as a maven project)"
echo -e "\\tBuild a jar inside the directory using"
echo -e "\\t\\t mvn clean package"
echo -e "\\tYou will find the runnable jar in $PACKAGE/target"
echo -e "\\tConsult our mailing list if you have any troubles: https://groups.google.com/forum/#!forum/stratosphere-dev"
echo -e "\\n\\n"
# Use this command if you want to specify the coordinates of your generated artifact
# in an interactive menu:
#
# mvn archetype:generate \
# -DarchetypeGroupId=eu.stratosphere \
# -DarchetypeArtifactId=quickstart-scala \
# -DarchetypeVersion=0.4-SNAPSHOT \
# -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/
#!/usr/bin/env bash
########################################################################################################################
# Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
########################################################################################################################
PACKAGE=quickstart
mvn archetype:generate \
-DarchetypeGroupId=eu.stratosphere \
-DarchetypeArtifactId=quickstart-scala \
-DarchetypeVersion=0.4 \
-DgroupId=eu.stratosphere \
-DartifactId=$PACKAGE \
-Dversion=0.1 \
-Dpackage=eu.stratosphere.quickstart \
-DinteractiveMode=false
#
# Give some guidance
#
echo -e "\\n\\n"
echo -e "\\tA sample quickstart Stratosphere Job has been created."
echo -e "\\tSwitch into the directory using"
echo -e "\\t\\t cd $PACKAGE"
echo -e "\\tImport the project there using your favorite IDE (Import it as a maven project)"
echo -e "\\tBuild a jar inside the directory using"
echo -e "\\t\\t mvn clean package"
echo -e "\\tYou will find the runnable jar in $PACKAGE/target"
echo -e "\\tConsult our mailing list if you have any troubles: https://groups.google.com/forum/#!forum/stratosphere-dev"
echo -e "\\n\\n"
# Use this command if you want to specify the coordinates of your generated artifact
# in an interactive menu:
#
# mvn archetype:generate \
# -DarchetypeGroupId=eu.stratosphere \
# -DarchetypeArtifactId=quickstart-scala \
# -DarchetypeVersion=0.4-SNAPSHOT \
# -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<parent>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-quickstart</artifactId>
<version>0.5-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<artifactId>quickstart-scala</artifactId>
<packaging>jar</packaging>
</project>
/***********************************************************************************************************************
* Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.quickstart;
/**
* This class solely exists to generate
* javadocs for the "quickstart-java" project.
**/
public class Dummy {
//
}
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<archetype-descriptor xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd" name="prj-scala-only"
xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<fileSets>
<fileSet encoding="UTF-8" filtered="true" packaged="true">
<directory>src/main/scala</directory>
<includes>
<include>**/*.scala</include>
</includes>
</fileSet>
</fileSets>
</archetype-descriptor>
\ No newline at end of file
<archetype xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype/1.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype/1.0.0 http://maven.apache.org/xsd/archetype-1.0.0.xsd">
<id>stratosphere-quickstart-scala</id>
<sources>
<source>src/main/scala/Job.scala</source>
</sources>
</archetype>
\ No newline at end of file
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>${groupId}</groupId>
<artifactId>${artifactId}</artifactId>
<version>${version}</version>
<packaging>jar</packaging>
<name>Your Job's Name</name>
<url>http://www.myorganization.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<!-- These two requirements are the minimum to use and develop Stratosphere.
You can add others like <artifactId>pact-scala-core</artifactId> for Scala!
-->
<dependencies>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-scala</artifactId>
<version>0.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-clients</artifactId>
<version>0.5-SNAPSHOT</version>
</dependency>
</dependencies>
<!-- We use the maven-jar-plugin to generate a runnable jar that you can
submit to your Stratosphere cluster.
-->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifestEntries>
<program-class>${package}.Job</program-class>
</manifestEntries>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.1.4</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- Eclipse Integration -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.8</version>
<configuration>
<downloadSources>true</downloadSources>
<projectnatures>
<projectnature>org.scala-ide.sdt.core.scalanature</projectnature>
<projectnature>org.eclipse.jdt.core.javanature</projectnature>
</projectnatures>
<buildcommands>
<buildcommand>org.scala-ide.sdt.core.scalabuilder</buildcommand>
</buildcommands>
<classpathContainers>
<classpathContainer>org.scala-ide.sdt.launching.SCALA_CONTAINER</classpathContainer>
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
</classpathContainers>
<excludes>
<exclude>org.scala-lang:scala-library</exclude>
<exclude>org.scala-lang:scala-compiler</exclude>
</excludes>
<sourceIncludes>
<sourceInclude>**/*.scala</sourceInclude>
<sourceInclude>**/*.java</sourceInclude>
</sourceIncludes>
</configuration>
</plugin>
<!-- Adding scala source directories to build path -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.7</version>
<executions>
<!-- Add src/main/scala to eclipse build path -->
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>src/main/scala</source>
</sources>
</configuration>
</execution>
<!-- Add src/test/scala to eclipse build path -->
<execution>
<id>add-test-source</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>src/test/scala</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
package ${package};
import eu.stratosphere.api.common.Program
import eu.stratosphere.api.common.ProgramDescription
import eu.stratosphere.client.LocalExecutor
import eu.stratosphere.api.scala.TextFile
import eu.stratosphere.api.scala.ScalaPlan
import eu.stratosphere.api.scala._
import eu.stratosphere.api.scala.operators._
import eu.stratosphere.client.RemoteExecutor
// You can run this locally using:
// mvn exec:exec -Dexec.executable="java" -Dexec.args="-cp %classpath ${package}.RunJobLocal 2 file:///some/path file:///some/other/path"
object RunJobLocal {
def main(args: Array[String]) {
val job = new Job
if (args.size < 3) {
println(job.getDescription)
return
}
val plan = job.getScalaPlan(args(0).toInt, args(1), args(2))
LocalExecutor.execute(plan)
System.exit(0)
}
}
// You can run this on a cluster using:
// mvn exec:exec -Dexec.executable="java" -Dexec.args="-cp %classpath ${package}.RunJobRemote 2 file:///some/path file:///some/other/path"
object RunJobRemote {
def main(args: Array[String]) {
val job = new Job
if (args.size < 3) {
println(job.getDescription)
return
}
val plan = job.getScalaPlan(args(0).toInt, args(1), args(2))
// This will create an executor to run the plan on a cluster. We assume
// that the JobManager is running on the local machine on the default
// port. Change this according to your configuration.
// You will also need to change the name of the jar if you change the
// project name and/or version. Before running this you also need
// to run "mvn package" to create the jar.
val ex = new RemoteExecutor("localhost", 6123, "target/stratosphere-project-0.1-SNAPSHOT.jar");
ex.executePlan(plan);
}
}
/**
* This is a outline for a Stratosphere scala job. It is actually the WordCount
* example from the stratosphere distribution.
*
* You can run it out of your IDE using the main() method of RunJob.
* This will use the LocalExecutor to start a little Stratosphere instance
* out of your IDE.
*
* You can also generate a .jar file that you can submit on your Stratosphere
* cluster.
* Just type
* mvn clean package
* in the projects root directory.
* You will find the jar in
* target/stratosphere-quickstart-0.1-SNAPSHOT-Sample.jar
*
*/
class Job extends Program with ProgramDescription with Serializable {
override def getDescription() = {
"Parameters: [numSubStasks] [input] [output]"
}
override def getPlan(args: String*) = {
getScalaPlan(args(0).toInt, args(1), args(2))
}
def formatOutput = (word: String, count: Int) => "%s %d".format(word, count)
def getScalaPlan(numSubTasks: Int, textInput: String, wordsOutput: String) = {
val input = TextFile(textInput)
val words = input flatMap { _.toLowerCase().split("""\W+""") filter { _ != "" } map { (_, 1) } }
val counts = words groupBy { case (word, _) => word } reduce { (w1, w2) => (w1._1, w1._2 + w2._2) }
counts neglects { case (word, _) => word }
counts preserves({ case (word, _) => word }, { case (word, _) => word })
val output = counts.write(wordsOutput, DelimitedOutputFormat(formatOutput.tupled))
val plan = new ScalaPlan(Seq(output), "Word Count (immutable)")
plan.setDefaultParallelism(numSubTasks)
plan
}
}
#!/usr/bin/env bash
########################################################################################################################
# Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
########################################################################################################################
PACKAGE=quickstart
mvn archetype:generate \
-DarchetypeGroupId=eu.stratosphere \
-DarchetypeArtifactId=quickstart-java \
-DarchetypeVersion=0.4 \
-DgroupId=eu.stratosphere \
-DartifactId=$PACKAGE \
-Dversion=0.1 \
-Dpackage=eu.stratosphere.quickstart \
-DinteractiveMode=false
#
# Give some guidance
#
echo -e "\\n\\n"
echo -e "\\tA sample quickstart Stratosphere Job has been created."
echo -e "\\tSwitch into the directory using"
echo -e "\\t\\t cd $PACKAGE"
echo -e "\\tImport the project there using your favorite IDE (Import it as a maven project)"
echo -e "\\tBuild a jar inside the directory using"
echo -e "\\t\\t mvn clean package"
echo -e "\\tYou will find the runnable jar in $PACKAGE/target"
echo -e "\\tConsult our mailing list if you have any troubles: https://groups.google.com/forum/#!forum/stratosphere-dev"
echo -e "\\n\\n"
# Use this command if you want to specify the coordinates of your generated artifact
# in an interactive menu:
#
# mvn archetype:generate \
# -DarchetypeGroupId=eu.stratosphere \
# -DarchetypeArtifactId=quickstart-java \
# -DarchetypeVersion=0.4 \
# -DarchetypeCatalog=https://oss.sonatype.org/content/repositories/snapshots/
......@@ -112,6 +112,7 @@
<dotGitDirectory>${project.basedir}/../../.git</dotGitDirectory>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<skipPoms>false</skipPoms>
<failOnNoGitDirectory>false</failOnNoGitDirectory>
<generateGitPropertiesFilename>src/main/resources/.version.properties</generateGitPropertiesFilename>
</configuration>
</plugin>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册