diff --git a/flink-container/docker/Dockerfile b/flink-container/docker/Dockerfile deleted file mode 100644 index a0d3d8fb1bce2c146cd4dfe4eb55179bc9e2ee1a..0000000000000000000000000000000000000000 --- a/flink-container/docker/Dockerfile +++ /dev/null @@ -1,68 +0,0 @@ -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -FROM openjdk:8-jre-alpine - -# Install requirements -RUN apk add --no-cache bash snappy libc6-compat - -# Flink environment variables -ENV FLINK_INSTALL_PATH=/opt -ENV FLINK_HOME $FLINK_INSTALL_PATH/flink -ENV FLINK_LIB_DIR $FLINK_HOME/lib -ENV FLINK_PLUGINS_DIR $FLINK_HOME/plugins -ENV FLINK_OPT_DIR $FLINK_HOME/opt -ENV FLINK_JOB_ARTIFACTS_DIR $FLINK_INSTALL_PATH/artifacts -ENV FLINK_USR_LIB_DIR $FLINK_HOME/usrlib -ENV PATH $PATH:$FLINK_HOME/bin - -# flink-dist can point to a directory or a tarball on the local system -ARG flink_dist=NOT_SET -ARG job_artifacts=NOT_SET -ARG python_version=NOT_SET -# hadoop jar is optional -ARG hadoop_jar=NOT_SET* - -# Install Python -RUN \ - if [ "$python_version" = "2" ]; then \ - apk add --no-cache python; \ - elif [ "$python_version" = "3" ]; then \ - apk add --no-cache python3 && ln -s /usr/bin/python3 /usr/bin/python; \ - fi - -# Install build dependencies and flink -ADD $flink_dist $hadoop_jar $FLINK_INSTALL_PATH/ -ADD $job_artifacts/* $FLINK_JOB_ARTIFACTS_DIR/ - -RUN set -x && \ - ln -s $FLINK_INSTALL_PATH/flink-[0-9]* $FLINK_HOME && \ - ln -s $FLINK_JOB_ARTIFACTS_DIR $FLINK_USR_LIB_DIR && \ - if [ -n "$python_version" ]; then ln -s $FLINK_OPT_DIR/flink-python*.jar $FLINK_LIB_DIR; fi && \ - if [ -f ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* ]; then ln -s ${FLINK_INSTALL_PATH}/flink-shaded-hadoop* $FLINK_LIB_DIR; fi && \ - addgroup -S flink && adduser -D -S -H -G flink -h $FLINK_HOME flink && \ - chown -R flink:flink ${FLINK_INSTALL_PATH}/flink-* && \ - chown -R flink:flink ${FLINK_JOB_ARTIFACTS_DIR}/ && \ - chown -h flink:flink $FLINK_HOME - -COPY docker-entrypoint.sh / - -USER flink -EXPOSE 8081 6123 -ENTRYPOINT ["/docker-entrypoint.sh"] -CMD ["--help"] diff --git a/flink-container/docker/README.md b/flink-container/docker/README.md deleted file mode 100644 index 42e49723909f7ca035bd4ec76ab7dc4b23e274ea..0000000000000000000000000000000000000000 --- a/flink-container/docker/README.md +++ /dev/null @@ -1,84 +0,0 @@ -# Apache Flink job cluster Docker image - -In order to deploy a job cluster on Docker, one needs to create an image which contains the Flink binaries as well as the user code jars of the job to execute. -This directory contains a `build.sh` which facilitates the process. -The script takes a Flink distribution either from an official release, an archive or a local distribution and combines it with the specified job jar. - -## Installing Docker - -Install the most recent stable version of [Docker](https://docs.docker.com/installation/). - -## Building the Docker image - -Images are based on the official Java Alpine (OpenJDK 8) image. - -Before building the image, one needs to build the user code jars for the job. -A Flink job can consist of multiple artifacts. In order to specify the required artifacts, they need to be passed to `--job-artifacts` of the build script. The individual paths are comma separated. - -If you want to build the Flink image from the version you have checked out locally run: - - build.sh --from-local-dist --job-artifacts [--with-python2|--with-python3] --image-name - -Note that you first need to call `mvn package -pl flink-dist -am` to build the Flink binaries. - -If you want to build the Flink image from an archive stored under `` run: - - build.sh --from-archive --job-artifacts [--with-python2|--with-python3] --image-name - -If you want to build the Flink image for a specific version of Flink/Hadoop/Scala run: - - build.sh --from-release --flink-version 1.6.0 --hadoop-version 2.8 --scala-version 2.11 --job-artifacts [--with-python2|--with-python3] --image-name - -Please note that from Flink-1.8, hadoop version is optional and you could build the Flink image without providing any hadoop version. - -The script will try to download the released version from the Apache archive. - -The artifacts specified in will be copied to directory /opt/artifacts of the built image. - -## Deploying via Docker compose - -The `docker-compose.yml` contains the following parameters: - -* `FLINK_DOCKER_IMAGE_NAME` - Image name to use for the deployment (default: `flink-job:latest`) -* `FLINK_JOB` - Name of the Flink job to execute (default: none) -* `DEFAULT_PARALLELISM` - Default parallelism with which to start the job (default: 1) -* `FLINK_JOB_ARGUMENTS` - Additional arguments which will be passed to the job cluster (default: none) -* `SAVEPOINT_OPTIONS` - Savepoint options to start the cluster with (default: none) - -The parameters can be set by exporting the corresponding environment variable. - -Deploy cluster and see config/setup log output (best run in a screen session) - - FLINK_DOCKER_IMAGE_NAME= FLINK_JOB= docker-compose up - -Deploy as a daemon (and return) - - FLINK_DOCKER_IMAGE_NAME= FLINK_JOB= docker-compose up -d - -In order to start the job with a different default parallelism set `DEFAULT_PARALLELISM`. -This will automatically start `DEFAULT_PARALLELISM` TaskManagers: - - FLINK_DOCKER_IMAGE_NAME= FLINK_JOB= DEFAULT_PARALLELISM= docker-compose up - -In order to resume the job from a savepoint set `SAVEPOINT_OPTIONS`. -Supported options are `--fromSavepoint ` and `--allowNonRestoredState` where `` is accessible from all containers. - - FLINK_DOCKER_IMAGE_NAME= FLINK_JOB= SAVEPOINT_OPTIONS="--fromSavepoint --allowNonRestoredState" docker-compose up - -One can also provide additional job arguments via `FLINK_JOB_ARGUMENTS` which are passed to the job: - - FLINK_DOCKER_IMAGE_NAME= FLINK_JOB= FLINK_JOB_ARGUMENTS= docker-compose up - -Scale the cluster up or down to *N* TaskManagers - - docker-compose scale taskmanager= - -Access the Job Manager container - - docker exec -it $(docker ps --filter name=flink_jobmanager --format={{.ID}}) /bin/sh - -Access the web UI by going to `:8081` in your web browser. - -Kill the cluster - - docker-compose kill diff --git a/flink-container/docker/build.sh b/flink-container/docker/build.sh deleted file mode 100755 index b08a7f208e6ebfb8e25f444d03f691e0a01b3826..0000000000000000000000000000000000000000 --- a/flink-container/docker/build.sh +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env bash -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -usage() { - cat < [--with-python2|--with-python3] --from-local-dist [--image-name ] - build.sh --job-artifacts [--with-python2|--with-python3] --from-archive [--image-name ] - build.sh --job-artifacts [--with-python2|--with-python3] --from-release --flink-version --scala-version [--hadoop-version ] [--image-name ] - build.sh --help - - If the --image-name flag is not used the built image name will be 'flink-job'. - Before Flink-1.8, the hadoop-version is required. And from Flink-1.8, the hadoop-version is optional and would download pre-bundled shaded Hadoop jar package if provided. -HERE - exit 1 -} - -while [[ $# -ge 1 ]] -do -key="$1" - case $key in - --job-artifacts) - JOB_ARTIFACTS_PATH="$2" - shift - ;; - --with-python2) - PYTHON_VERSION="2" - ;; - --with-python3) - PYTHON_VERSION="3" - ;; - --from-local-dist) - FROM_LOCAL="true" - ;; - --from-archive) - FROM_ARCHIVE="$2" - shift - ;; - --from-release) - FROM_RELEASE="true" - ;; - --image-name) - IMAGE_NAME="$2" - shift - ;; - --flink-version) - FLINK_VERSION="$2" - shift - ;; - --hadoop-version) - HADOOP_VERSION="$2" - HADOOP_MAJOR_VERSION="$(echo ${HADOOP_VERSION} | sed 's/\.//')" - shift - ;; - --scala-version) - SCALA_VERSION="$2" - shift - ;; - --kubernetes-certificates) - CERTIFICATES_DIR="$2" - shift - ;; - --help) - usage - ;; - *) - # unknown option - ;; - esac - shift -done - -IMAGE_NAME=${IMAGE_NAME:-flink-job} - -# TMPDIR must be contained within the working directory so it is part of the -# Docker context. (i.e. it can't be mktemp'd in /tmp) -TMPDIR=_TMP_ - -cleanup() { - rm -rf "${TMPDIR}" -} -trap cleanup EXIT - -mkdir -p "${TMPDIR}" - -JOB_ARTIFACTS_TARGET="${TMPDIR}/artifacts" -mkdir -p ${JOB_ARTIFACTS_TARGET} - -OLD_IFS="$IFS" -IFS="," -job_artifacts_array=(${JOB_ARTIFACTS_PATH}) -IFS="$OLD_IFS" -for artifact in ${job_artifacts_array[@]}; do - cp ${artifact} ${JOB_ARTIFACTS_TARGET}/ -done - -checkUrlAvailable() { - curl --output /dev/null --silent --head --fail $1 - ret=$? - if [[ ${ret} -ne 0 ]]; then - echo "The url $1 not available, please check your parameters, exit..." - usage - exit 2 - fi -} - -if [ -n "${FROM_RELEASE}" ]; then - - [[ -n "${FLINK_VERSION}" ]] && [[ -n "${SCALA_VERSION}" ]] || usage - - FLINK_BASE_URL="$(curl -s https://www.apache.org/dyn/closer.cgi\?preferred\=true)flink/flink-${FLINK_VERSION}/" - - FLINK_MAJOR_VERSION=$(echo "$FLINK_VERSION" | sed -e 's/\.//;s/\(..\).*/\1/') - - if [[ $FLINK_MAJOR_VERSION -ge 18 ]]; then - - # After Flink-1.8 we would let release pre-built package with hadoop - if [[ -n "${HADOOP_VERSION}" ]]; then - echo "After Flink-1.8, we would download pre-bundle hadoop jar package." - # list to get target pre-bundle package - SHADED_HADOOP_BASE_URL="https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop2-uber/" - SHADED_HADOOP_VERSION="$(curl -s ${SHADED_HADOOP_BASE_URL} | grep -o "title=\"[0-9.-]*/\"" | sed 's/title=\"//g; s/\/"//g' | grep ${HADOOP_VERSION} | head -1)" - SHADED_HADOOP_FILE_NAME="flink-shaded-hadoop2-uber-${SHADED_HADOOP_VERSION}.jar" - - CURL_OUTPUT_SHADED_HADOOP="${TMPDIR}/${SHADED_HADOOP_FILE_NAME}" - - DOWNLOAD_SHADED_HADOOP_URL=${SHADED_HADOOP_BASE_URL}${SHADED_HADOOP_VERSION}/${SHADED_HADOOP_FILE_NAME} - checkUrlAvailable ${DOWNLOAD_SHADED_HADOOP_URL} - - echo "Downloading ${SHADED_HADOOP_FILE_NAME} from ${DOWNLOAD_SHADED_HADOOP_URL}" - - curl -# ${DOWNLOAD_SHADED_HADOOP_URL} --output ${CURL_OUTPUT_SHADED_HADOOP} - SHADED_HADOOP="${CURL_OUTPUT_SHADED_HADOOP}" - fi - FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-scala_${SCALA_VERSION}.tgz" - elif [[ -z "${HADOOP_VERSION}" ]]; then - usage - else - FLINK_DIST_FILE_NAME="flink-${FLINK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala_${SCALA_VERSION}.tgz" - fi - - - CURL_OUTPUT="${TMPDIR}/${FLINK_DIST_FILE_NAME}" - - DOWNLOAD_FLINK_URL=${FLINK_BASE_URL}${FLINK_DIST_FILE_NAME} - checkUrlAvailable ${DOWNLOAD_FLINK_URL} - - echo "Downloading ${FLINK_DIST_FILE_NAME} from ${DOWNLOAD_FLINK_URL}" - - curl -# ${DOWNLOAD_FLINK_URL} --output ${CURL_OUTPUT} - - FLINK_DIST="${CURL_OUTPUT}" - -elif [ -n "${FROM_LOCAL}" ]; then - - DIST_DIR="../../flink-dist/target/flink-*-bin" - FLINK_DIST="${TMPDIR}/flink.tgz" - echo "Using flink dist: ${DIST_DIR}" - tar -C ${DIST_DIR} -cvzf "${FLINK_DIST}" . - -elif [ -n "${FROM_ARCHIVE}" ]; then - FLINK_DIST="${TMPDIR}/flink.tgz" - cp "${FROM_ARCHIVE}" "${FLINK_DIST}" - -else - - usage - -fi - -if [[ `uname -i` == 'aarch64' ]]; then - # openjdk:8-jre-alpine doesn't work on ARM. See bug: https://icedtea.classpath.org/bugzilla/show_bug.cgi?id=3740 - cp Dockerfile Dockerfile-ARM - sed -i 's/openjdk:8-jre-alpine/arm64v8\/openjdk:8u201-jdk-alpine/' Dockerfile-ARM - DOCKERFILE="Dockerfile-ARM" -else - DOCKERFILE="Dockerfile" -fi - -docker build --build-arg flink_dist="${FLINK_DIST}" --build-arg job_artifacts="${JOB_ARTIFACTS_TARGET}" --build-arg hadoop_jar="${SHADED_HADOOP}" --build-arg python_version="${PYTHON_VERSION}" -t "${IMAGE_NAME}" -f $DOCKERFILE . diff --git a/flink-container/docker/docker-compose.yml b/flink-container/docker/docker-compose.yml deleted file mode 100644 index a5e9b49f60c72bc5435e191d9fad7e07191e8c1d..0000000000000000000000000000000000000000 --- a/flink-container/docker/docker-compose.yml +++ /dev/null @@ -1,39 +0,0 @@ -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -# Docker compose file for a Flink job cluster deployment. -# -# Parameters: -# * FLINK_DOCKER_IMAGE_NAME - Image name to use for the deployment (default: flink-job:latest) -# * FLINK_JOB - Name of the Flink job to execute (default: none) -# * DEFAULT_PARALLELISM - Default parallelism with which to start the job (default: 1) -# * FLINK_JOB_ARGUMENTS - Additional arguments which will be passed to the job cluster (default: none) -# * SAVEPOINT_OPTIONS - Savepoint options to start the cluster with (default: none) - -version: "2.2" -services: - job-cluster: - image: ${FLINK_DOCKER_IMAGE_NAME:-flink-job} - ports: - - "8081:8081" - command: job-cluster --job-classname ${FLINK_JOB} -Djobmanager.rpc.address=job-cluster -Dparallelism.default=${DEFAULT_PARALLELISM:-1} ${SAVEPOINT_OPTIONS} ${FLINK_JOB_ARGUMENTS} - - taskmanager: - image: ${FLINK_DOCKER_IMAGE_NAME:-flink-job} - command: task-manager -Djobmanager.rpc.address=job-cluster - scale: ${DEFAULT_PARALLELISM:-1} diff --git a/flink-container/docker/docker-entrypoint.sh b/flink-container/docker/docker-entrypoint.sh deleted file mode 100755 index 0c1df000acb9b6ff3a4c3956745df5d4e5c9b2c4..0000000000000000000000000000000000000000 --- a/flink-container/docker/docker-entrypoint.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/sh - -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -### If unspecified, the hostname of the container is taken as the JobManager address -FLINK_HOME=${FLINK_HOME:-"/opt/flink"} - -JOB_CLUSTER="job-cluster" -TASK_MANAGER="task-manager" - -CMD="$1" -shift; - -if [ "${CMD}" == "--help" -o "${CMD}" == "-h" ]; then - echo "Usage: $(basename $0) (${JOB_CLUSTER}|${TASK_MANAGER})" - exit 0 -elif [ "${CMD}" == "${JOB_CLUSTER}" -o "${CMD}" == "${TASK_MANAGER}" ]; then - echo "Starting the ${CMD}" - - if [ "${CMD}" == "${TASK_MANAGER}" ]; then - exec $FLINK_HOME/bin/taskmanager.sh start-foreground "$@" - else - exec $FLINK_HOME/bin/standalone-job.sh start-foreground "$@" - fi -fi - -exec "$@"