From 9656e83f3f8ba9307d1d91a1ab97b4cf6446c4d1 Mon Sep 17 00:00:00 2001 From: Robert Metzger Date: Tue, 11 Nov 2014 11:00:26 +0100 Subject: [PATCH] [FLINK-1234] Activate hadoop2 profile by default This closes #232 --- .travis.yml | 10 +-- docs/building.md | 33 ++++---- .../flink-hadoop-compatibility/pom.xml | 5 +- flink-addons/flink-yarn/pom.xml | 5 -- flink-addons/pom.xml | 10 +-- flink-dist/pom.xml | 15 ++-- flink-runtime/pom.xml | 6 +- pom.xml | 78 ++++++++++--------- tools/change-version | 2 +- tools/deploy_to_maven.sh | 48 ++++-------- tools/generate_specific_pom.sh | 4 +- 11 files changed, 102 insertions(+), 114 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3021c94a66b..7c745e7c37c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,15 +7,15 @@ language: java matrix: include: - jdk: "openjdk6" - env: PROFILE= + env: PROFILE="-Dhadoop.profile=1" - jdk: "oraclejdk7" - env: PROFILE= + env: PROFILE="-Dhadoop.profile=1" - jdk: "openjdk6" - env: PROFILE="-P!include-yarn -Dhadoop.profile=2 -Dhadoop.version=2.0.0-alpha" + env: PROFILE="-P!include-yarn -Dhadoop.version=2.0.0-alpha" - jdk: "oraclejdk7" - env: PROFILE="-Dhadoop.profile=2 -Dhadoop.version=2.2.0" + env: PROFILE="-Dhadoop.version=2.2.0" - jdk: "oraclejdk8" - env: PROFILE="-Dhadoop.profile=2 -Dhadoop.version=2.5.0" + env: PROFILE="-Dhadoop.version=2.5.1" git: diff --git a/docs/building.md b/docs/building.md index 544dfba99e6..815bd4d4425 100644 --- a/docs/building.md +++ b/docs/building.md @@ -14,16 +14,18 @@ The simplest way of building Flink is by running: ~~~bash cd incubator-flink -mvn clean package -DskipTests +mvn clean install -DskipTests ~~~ -This instructs Maven (`mvn`) to first remove all existing builds (`clean`) and then create a new Flink binary (`package`). The `-DskipTests` command prevents Maven from executing the unit tests. +This instructs Maven (`mvn`) to first remove all existing builds (`clean`) and then create a new Flink binary (`install`). The `-DskipTests` command prevents Maven from executing the unit tests. + +[Read more](http://maven.apache.org/) about Apache Maven. ## Build Flink for a specific Hadoop Version -This section covers building Flink for a specific Hadoop version. Most users do not need to do this manually. +This section covers building Flink for a specific Hadoop version. Most users do not need to do this manually. The download page of Flink contains binary packages for common setups. The problem is that Flink uses HDFS and YARN which are both dependencies from Apache Hadoop. There exist many different versions of Hadoop (from both the upstream project and the different Hadoop distributions). If a user is using a wrong combination of versions, exceptions like this one occur: @@ -39,45 +41,48 @@ There are two main versions of Hadoop that we need to differentiate: - Hadoop 2, with all versions starting with 2, like 2.2.0. The main differentiation between Hadoop 1 and Hadoop 2 is the availability of Hadoop YARN (Hadoops cluster resource manager). -**To build Flink for Hadoop 2**, issue the following command: +By default, Flink is using the Hadoop 2 dependencies. + +**To build Flink for Hadoop 1**, issue the following command: ~~~bash -mvn clean package -DskipTests -Dhadoop.profile=2 +mvn clean install -DskipTests -Dhadoop.profile=1 ~~~ -The `-Dhadoop.profile=2` flag instructs Maven to build Flink with YARN support and the Hadoop 2 HDFS client. +The `-Dhadoop.profile=1` flag instructs Maven to build Flink for Hadoop 1. Note that the features included in Flink change when using a different Hadoop profile. In particular the support for YARN and the build-in HBase support are not available in Hadoop 1 builds. -Usually, this flag is sufficient for full support of Flink for Hadoop 2-versions. -However, you can also **specify a specific Hadoop version to build against**: + +You can also **specify a specific Hadoop version to build against**: ~~~bash -mvn clean package -DskipTests -Dhadoop.profile=2 -Dhadoop.version=2.4.1 +mvn clean install -DskipTests -Dhadoop.version=2.4.1 ~~~ **To build Flink against a vendor specific Hadoop version**, issue the following command: ~~~bash -mvn clean package -DskipTests -Pvendor-repos -Dhadoop.profile=2 -Dhadoop.version=2.2.0-cdh5.0.0-beta-2 +mvn clean install -DskipTests -Pvendor-repos -Dhadoop.version=2.2.0-cdh5.0.0-beta-2 ~~~ The `-Pvendor-repos` activates a Maven [build profile](http://maven.apache.org/guides/introduction/introduction-to-profiles.html) that includes the repositories of popular Hadoop vendors such as Cloudera, Hortonworks, or MapR. **Build Flink for `hadoop2` versions before 2.2.0** -Maven will automatically build Flink with its YARN client if the `-Dhadoop.profile=2` is set. But there were some changes in Hadoop versions before the 2.2.0 Hadoop release that are not supported by Flink's YARN client. Therefore, you can disable building the YARN client with the following string: `-P\!include-yarn`. +Maven will automatically build Flink with its YARN client. But there were some changes in Hadoop versions before the 2.2.0 Hadoop release that are not supported by Flink's YARN client. Therefore, you can disable building the YARN client with the following string: `-P\!include-yarn`. So if you are building Flink for Hadoop `2.0.0-alpha`, use the following command: ~~~bash --P\!include-yarn -Dhadoop.profile=2 -Dhadoop.version=2.0.0-alpha +-P\!include-yarn -Dhadoop.version=2.0.0-alpha ~~~ ## Background The builds with Maven are controlled by [properties](http://maven.apache.org/pom.html#Properties) and build profiles. -There are two profiles, one for hadoop1 and one for hadoop2. When the hadoop2 profile is enabled, the system will also build the YARN client. -The hadoop1 profile is used by default. To enable the hadoop2 profile, set `-Dhadoop.profile=2` when building. +There are two profiles, one for hadoop1 and one for hadoop2. When the hadoop2 profile is enabled (default), the system will also build the YARN client. + +To enable the hadoop1 profile, set `-Dhadoop.profile=1` when building. Depending on the profile, there are two Hadoop versions, set via properties. For "hadoop1", we use 1.2.1 by default, for "hadoop2" it is 2.2.0. You can change these versions with the `hadoop-two.version` (or `hadoop-one.version`) property. For example `-Dhadoop-two.version=2.4.0`. diff --git a/flink-addons/flink-hadoop-compatibility/pom.xml b/flink-addons/flink-hadoop-compatibility/pom.xml index 79810e831bd..c494afe9d19 100644 --- a/flink-addons/flink-hadoop-compatibility/pom.xml +++ b/flink-addons/flink-hadoop-compatibility/pom.xml @@ -66,15 +66,14 @@ under the License. hadoop-2 - - hadoop.profile2 + + !hadoop.profile org.apache.hadoop hadoop-mapreduce-client-core - ${hadoop.version} asm diff --git a/flink-addons/flink-yarn/pom.xml b/flink-addons/flink-yarn/pom.xml index 862fc182aa7..1d16e4866d8 100644 --- a/flink-addons/flink-yarn/pom.xml +++ b/flink-addons/flink-yarn/pom.xml @@ -32,7 +32,6 @@ under the License. jar - org.apache.flink flink-runtime @@ -62,7 +61,6 @@ under the License. org.apache.hadoop hadoop-yarn-client - ${hadoop.version} asm @@ -102,7 +100,6 @@ under the License. org.apache.hadoop hadoop-common - ${hadoop.version} asm @@ -142,7 +139,6 @@ under the License. org.apache.hadoop hadoop-hdfs - ${hadoop.version} asm @@ -182,7 +178,6 @@ under the License. org.apache.hadoop hadoop-mapreduce-client-core - ${hadoop.version} asm diff --git a/flink-addons/pom.xml b/flink-addons/pom.xml index c76247d6aab..6b4083e9454 100644 --- a/flink-addons/pom.xml +++ b/flink-addons/pom.xml @@ -48,7 +48,7 @@ under the License. - !hadoop.profile + hadoop.profile1 @@ -59,8 +59,8 @@ under the License. hadoop-2 - - hadoop.profile2 + + !hadoop.profile @@ -71,8 +71,8 @@ under the License. include-yarn - - hadoop.profile2 + + !hadoop.profile diff --git a/flink-dist/pom.xml b/flink-dist/pom.xml index e9c67d1afb5..29b9a61174f 100644 --- a/flink-dist/pom.xml +++ b/flink-dist/pom.xml @@ -133,9 +133,12 @@ under the License. - !hadoop.profile + hadoop.profile1 + + ${hadoop-one.version} + @@ -145,8 +148,8 @@ under the License. include-yarn - - hadoop.profile2 + + !hadoop.profile @@ -214,8 +217,8 @@ under the License. hadoop-2 - - hadoop.profile2 + + !hadoop.profile @@ -357,8 +360,6 @@ under the License. - - diff --git a/flink-runtime/pom.xml b/flink-runtime/pom.xml index d09517dc471..4480e6fe486 100644 --- a/flink-runtime/pom.xml +++ b/flink-runtime/pom.xml @@ -178,7 +178,7 @@ under the License. - !hadoop.profile + hadoop.profile1 @@ -227,8 +227,8 @@ under the License. hadoop-2 - - hadoop.profile2 + + !hadoop.profile diff --git a/pom.xml b/pom.xml index 3b2b18d7d4f..ec921333bbe 100644 --- a/pom.xml +++ b/pom.xml @@ -79,9 +79,9 @@ under the License. true 1.7.7 17.0 - 2.10.4 - 2.10 - 2.0.1 + 2.10.4 + 2.10 + 2.0.1 @@ -202,7 +202,7 @@ under the License. 3.2.1 - + org.javassist javassist @@ -222,6 +222,39 @@ under the License. stax-api 1.0.1 + + + + org.apache.hadoop + hadoop-core + ${hadoop.version} + + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + + + org.apache.hadoop + hadoop-yarn-client + ${hadoop.version} + @@ -231,54 +264,24 @@ under the License. - !hadoop.profile + hadoop.profile1 ${hadoop-one.version} - - - - - org.apache.hadoop - hadoop-core - ${hadoop.version} - - - hadoop-2 - - hadoop.profile2 + + !hadoop.profile ${hadoop-two.version} - - - - - org.apache.hadoop - hadoop-common - ${hadoop.version} - - - org.apache.hadoop - hadoop-hdfs - ${hadoop.version} - - - org.apache.hadoop - hadoop-client - ${hadoop.version} - - - @@ -498,7 +501,6 @@ under the License. - org.apache.rat apache-rat-plugin diff --git a/tools/change-version b/tools/change-version index 9f2530655a7..34b0e542126 100755 --- a/tools/change-version +++ b/tools/change-version @@ -17,4 +17,4 @@ # limitations under the License. ################################################################################ -find .. -name 'pom.xml' -type f -exec sed -i 's#0.7-incubating-SNAPSHOT#0.8-incubating-SNAPSHOT#' {} \; +find .. -name 'pom.xml' -type f -exec sed -i 's#0.8-hadoop2-incubating-SNAPSHOT#0.8-incubating-SNAPSHOT#' {} \; diff --git a/tools/deploy_to_maven.sh b/tools/deploy_to_maven.sh index 0c1c04801ef..4525147410b 100755 --- a/tools/deploy_to_maven.sh +++ b/tools/deploy_to_maven.sh @@ -21,12 +21,11 @@ #Please ask @rmetzger (on GitHub) before changing anything here. It contains some magic. # Build Responsibilities -# 1. Deploy to sonatype (old hadoop) -# 2. Nothing -# 3. Deploy to s3 (old hadoop) -# 4. deploy to sonatype (yarn hadoop) (this build will also generate specific poms for yarn hadoop) -# 5. Nothing (formerly: Deploy Javadocs.) -# 6. deploy to s3 (yarn hadoop) +# 1. Deploy snapshot (hadoop1) +# 2. Deploy to s3 (hadoop1) +# 3. Nothing (hadoop200alpha) +# 4. deploy snapshot and s3 (hadoop2 - 2.2.0) +# 5. Nothing (hadoop2 - 2.5.1) # Changes (since travis changed the id assignment) # switched 2. with 3. @@ -54,12 +53,12 @@ function getVersion() { # this will take a while CURRENT_FLINK_VERSION=`getVersion` if [[ "$CURRENT_FLINK_VERSION" == *-SNAPSHOT ]]; then - CURRENT_FLINK_VERSION_YARN=${CURRENT_FLINK_VERSION/-incubating-SNAPSHOT/-hadoop2-incubating-SNAPSHOT} + CURRENT_FLINK_VERSION_HADOOP1=${CURRENT_FLINK_VERSION/-incubating-SNAPSHOT/-hadoop1-incubating-SNAPSHOT} else - CURRENT_FLINK_VERSION_YARN="$CURRENT_FLINK_VERSION-hadoop2" + CURRENT_FLINK_VERSION_HADOOP1="$CURRENT_FLINK_VERSION-hadoop1" fi -echo "detected current version as: '$CURRENT_FLINK_VERSION' ; yarn: $CURRENT_FLINK_VERSION_YARN " +echo "detected current version as: '$CURRENT_FLINK_VERSION' ; hadoop1: $CURRENT_FLINK_VERSION_HADOOP1 " # Check if push/commit is eligible for pushing echo "Job: $TRAVIS_JOB_NUMBER ; isPR: $TRAVIS_PULL_REQUEST" @@ -71,18 +70,16 @@ if [[ $TRAVIS_PULL_REQUEST == "false" ]] ; then # if [[ $TRAVIS_JOB_NUMBER == *1 ]] && [[ $TRAVIS_PULL_REQUEST == "false" ]] && [[ $CURRENT_FLINK_VERSION == *SNAPSHOT* ]] ; then - # Deploy regular hadoop v1 to maven - mvn -Pdocs-and-source -DskipTests -Drat.ignoreErrors=true deploy --settings deploysettings.xml; + # Deploy hadoop v1 to maven + echo "Generating poms for hadoop1" + ./tools/generate_specific_pom.sh $CURRENT_FLINK_VERSION $CURRENT_FLINK_VERSION_HADOOP1 pom.hadoop1.xml + mvn -B -f pom.hadoop1.xml -Pdocs-and-source -DskipTests -Drat.ignoreErrors=true deploy --settings deploysettings.xml; fi if [[ $TRAVIS_JOB_NUMBER == *4 ]] && [[ $TRAVIS_PULL_REQUEST == "false" ]] && [[ $CURRENT_FLINK_VERSION == *SNAPSHOT* ]] ; then # deploy hadoop v2 (yarn) - echo "Generating poms for hadoop-yarn." - ./tools/generate_specific_pom.sh $CURRENT_FLINK_VERSION $CURRENT_FLINK_VERSION_YARN - # all these tweaks assume a yarn build. - # performance tweaks here: no "clean deploy" so that actually nothing is being rebuild (could cause wrong poms inside the jars?) - # skip tests (they were running already) - mvn -B -f pom.hadoop2.xml -DskipTests -Pdocs-and-source -Drat.ignoreErrors=true deploy --settings deploysettings.xml; + echo "deploy standard version (hadoop2)" + mvn -B -DskipTests -Pdocs-and-source -Drat.ignoreErrors=true deploy --settings deploysettings.xml; fi # The block below took care of deploying javadoc to github.io. We now host the javadocs on the website. @@ -105,21 +102,11 @@ if [[ $TRAVIS_PULL_REQUEST == "false" ]] ; then # cd .. # fi - # - # Deploy binaries to S3 - # The TRAVIS_JOB_NUMBER here is kinda hacked. - # Currently, there are Builds 1-6. Build 1 is deploying to maven sonatype - # Build 2 has no special meaning, it is the openjdk7, hadoop 1.2.1 build - # Build 5 is openjdk7, hadoop yarn (2.0.5-beta) build. - # Please be sure not to use Build 1 as it will always be the yarn build. - # - - - if [[ $TRAVIS_JOB_NUMBER == *3 ]] || [[ $TRAVIS_JOB_NUMBER == *6 ]] ; then + if [[ $TRAVIS_JOB_NUMBER == *2 ]] || [[ $TRAVIS_JOB_NUMBER == *4 ]] ; then echo "Uploading build to amazon s3. Job Number: $TRAVIS_JOB_NUMBER" HD="hadoop1" - # job nr 6 is YARN - if [[ $TRAVIS_JOB_NUMBER == *6 ]] ; then + # job nr 4 is YARN + if [[ $TRAVIS_JOB_NUMBER == *4 ]] ; then # move to current dir mkdir flink-$CURRENT_FLINK_VERSION cp -r flink-dist/target/flink-*-bin/flink-yarn*/* flink-$CURRENT_FLINK_VERSION/ @@ -136,7 +123,6 @@ if [[ $TRAVIS_PULL_REQUEST == "false" ]] ; then echo "doing a ls -lisah:" ls -lisah fi - fi # pull request check diff --git a/tools/generate_specific_pom.sh b/tools/generate_specific_pom.sh index bcd2ae97e9f..cd7f9d1d76f 100755 --- a/tools/generate_specific_pom.sh +++ b/tools/generate_specific_pom.sh @@ -25,7 +25,7 @@ function usage { echo "Usage: $0 CURRENT_VERSION NEW_VERSION [POM_NAME]" - echo "For example, $0 0.4-SNAPSHOT 0.4-hadoop2-SNAPSHOT" + echo "For example, $0 0.8-incubating-SNAPSHOT 0.8-hadoop1-incubating-SNAPSHOT" echo "Presumes VERSION has hadoop1 or hadoop2 in it. POM_NAME is optional and" echo "allows to specify a different name for the generated pom." exit 1 @@ -96,7 +96,7 @@ for p in $poms; do -e "s/\([^<]*\)/\1\/${nupom}/" \ -e "s/\(relativePath>\.\.\)/\1\/${nupom}/" \ -e "s/.*name>.*/${hadoop1}/" \ - -e "s/.*.*/${hadoop2}/" \ + -e "s/.*name>.*/${hadoop2}/" \ $p > "$tmp_nuname" mv $tmp_nuname $nuname done -- GitLab