提交 d106da1c 编写于 作者: C Chesnay Schepler

[FLINK-17656][tests] Migrate docker e2e tests to flink-docker

上级 0341b4a8
......@@ -33,8 +33,40 @@ function containers_health_check() {
done
}
function build_image_with_jar() {
local job_artifacts=$1
local image_name=${2:-flink-job}
./build.sh --from-local-dist --job-artifacts ${job_artifacts} --image-name ${image_name}
function build_image() {
local image_name=${1:-flink-job}
echo "Starting fileserver for Flink distribution"
pushd ${FLINK_DIR}/..
tar -czf "${TEST_DATA_DIR}/flink.tgz" flink-*
popd
pushd ${TEST_DATA_DIR}
start_file_server
local server_pid=$!
echo "Preparing Dockeriles"
git clone https://github.com/apache/flink-docker.git --branch dev-master --single-branch
cd flink-docker
./add-custom.sh -u localhost:9999/flink.tgz -n ${image_name}
echo "Building images"
docker build --no-cache --network="host" -t ${image_name} dev/${image_name}-debian
popd
}
function start_file_server() {
command -v python >/dev/null 2>&1
if [[ $? -eq 0 ]]; then
python ${TEST_INFRA_DIR}/python2_fileserver.py &
return
fi
command -v python3 >/dev/null 2>&1
if [[ $? -eq 0 ]]; then
python ${TEST_INFRA_DIR}/python3_fileserver.py &
return
fi
echo "Could not find python(3) installation for starting fileserver."
exit 1
}
......@@ -20,8 +20,6 @@
source "$(dirname "$0")"/common.sh
source "$(dirname "$0")"/common_docker.sh
DOCKER_MODULE_DIR=${END_TO_END_DIR}/../flink-container/docker
KUBERNETES_MODULE_DIR=${END_TO_END_DIR}/../flink-container/kubernetes
CONTAINER_SCRIPTS=${END_TO_END_DIR}/test-scripts/container-scripts
MINIKUBE_START_RETRIES=3
MINIKUBE_START_BACKOFF=5
......
......@@ -56,9 +56,10 @@ services:
- "8081:8081"
- ${JM_RPC_EX_PORT}:${JM_RPC_IN_PORT}
volumes:
- ${USER_LIB}:/opt/flink/usrlib
- ${INPUT_VOLUME}:${INPUT_PATH}
- ${OUTPUT_VOLUME}:${OUTPUT_PATH}
command: job-cluster --job-classname ${FLINK_JOB} -Dparallelism.default=2 -Djobmanager.rpc.address=${JM_EX_HOSTNAME} -Djobmanager.bind-host=0.0.0.0 -Djobmanager.rpc.port=${JM_RPC_EX_PORT} -Djobmanager.rpc.bind-port=${JM_RPC_IN_PORT} ${FLINK_JOB_ARGUMENTS}
command: standalone-job --job-classname ${FLINK_JOB} -Dparallelism.default=2 -Djobmanager.rpc.address=${JM_EX_HOSTNAME} -Djobmanager.bind-host=0.0.0.0 -Djobmanager.rpc.port=${JM_RPC_EX_PORT} -Djobmanager.rpc.bind-port=${JM_RPC_IN_PORT} ${FLINK_JOB_ARGUMENTS}
extra_hosts:
- ${TM_1_EX_HOSTNAME}:${HOST_IP}
- ${TM_2_EX_HOSTNAME}:${HOST_IP}
......@@ -69,9 +70,10 @@ services:
- ${TM_1_RPC_EX_PORT}:${TM_RPC_IN_PORT}
- ${TM_1_DATA_EX_PORT}:${TM_DATA_IN_PORT}
volumes:
- ${USER_LIB}:/opt/flink/usrlib
- ${INPUT_VOLUME}:${INPUT_PATH}
- ${OUTPUT_VOLUME}:${OUTPUT_PATH}
command: task-manager -Djobmanager.rpc.address=${JM_EX_HOSTNAME} -Djobmanager.rpc.port=${JM_RPC_EX_PORT} -Dtaskmanager.host=${TM_1_EX_HOSTNAME} -Dtaskmanager.bind-host=0.0.0.0 -Dtaskmanager.rpc.port=${TM_1_RPC_EX_PORT} -Dtaskmanager.rpc.bind-port=${TM_RPC_IN_PORT} -Dtaskmanager.data.port=${TM_1_DATA_EX_PORT} -Dtaskmanager.data.bind-port=${TM_DATA_IN_PORT}
command: taskmanager -Djobmanager.rpc.address=${JM_EX_HOSTNAME} -Djobmanager.rpc.port=${JM_RPC_EX_PORT} -Dtaskmanager.host=${TM_1_EX_HOSTNAME} -Dtaskmanager.bind-host=0.0.0.0 -Dtaskmanager.rpc.port=${TM_1_RPC_EX_PORT} -Dtaskmanager.rpc.bind-port=${TM_RPC_IN_PORT} -Dtaskmanager.data.port=${TM_1_DATA_EX_PORT} -Dtaskmanager.data.bind-port=${TM_DATA_IN_PORT}
extra_hosts:
- ${JM_EX_HOSTNAME}:${HOST_IP}
- ${TM_2_EX_HOSTNAME}:${HOST_IP}
......@@ -82,9 +84,10 @@ services:
- ${TM_2_RPC_EX_PORT}:${TM_RPC_IN_PORT}
- ${TM_2_DATA_EX_PORT}:${TM_DATA_IN_PORT}
volumes:
- ${USER_LIB}:/opt/flink/usrlib
- ${INPUT_VOLUME}:${INPUT_PATH}
- ${OUTPUT_VOLUME}:${OUTPUT_PATH}
command: task-manager -Djobmanager.rpc.address=${JM_EX_HOSTNAME} -Djobmanager.rpc.port=${JM_RPC_EX_PORT} -Dtaskmanager.host=${TM_2_EX_HOSTNAME} -Dtaskmanager.bind-host=0.0.0.0 -Dtaskmanager.rpc.port=${TM_2_RPC_EX_PORT} -Dtaskmanager.rpc.bind-port=${TM_RPC_IN_PORT} -Dtaskmanager.data.port=${TM_2_DATA_EX_PORT} -Dtaskmanager.data.bind-port=${TM_DATA_IN_PORT}
command: taskmanager -Djobmanager.rpc.address=${JM_EX_HOSTNAME} -Djobmanager.rpc.port=${JM_RPC_EX_PORT} -Dtaskmanager.host=${TM_2_EX_HOSTNAME} -Dtaskmanager.bind-host=0.0.0.0 -Dtaskmanager.rpc.port=${TM_2_RPC_EX_PORT} -Dtaskmanager.rpc.bind-port=${TM_RPC_IN_PORT} -Dtaskmanager.data.port=${TM_2_DATA_EX_PORT} -Dtaskmanager.data.bind-port=${TM_DATA_IN_PORT}
extra_hosts:
- ${JM_EX_HOSTNAME}:${HOST_IP}
- ${TM_1_EX_HOSTNAME}:${HOST_IP}
......@@ -16,15 +16,31 @@
# limitations under the License.
################################################################################
# Extensions to flink-container/docker/docker-compose.yml that mounts volumes needed for tests
# Docker compose file for a Flink job cluster deployment.
#
# Parameters:
# * FLINK_DOCKER_IMAGE_NAME - Image name to use for the deployment (default: flink-job:latest)
# * FLINK_JOB - Name of the Flink job to execute (default: none)
# * DEFAULT_PARALLELISM - Default parallelism with which to start the job (default: 1)
# * FLINK_JOB_ARGUMENTS - Additional arguments which will be passed to the job cluster (default: none)
# * SAVEPOINT_OPTIONS - Savepoint options to start the cluster with (default: none)
version: "2.2"
services:
job-cluster:
image: ${FLINK_DOCKER_IMAGE_NAME:-flink-job}
ports:
- "8081:8081"
command: standalone-job --job-classname ${FLINK_JOB} -Djobmanager.rpc.address=job-cluster -Dparallelism.default=${DEFAULT_PARALLELISM:-1} ${SAVEPOINT_OPTIONS} ${FLINK_JOB_ARGUMENTS}
volumes:
- ${USER_LIB}:/opt/flink/usrlib
- ${INPUT_VOLUME}:${INPUT_PATH}
- ${OUTPUT_VOLUME}:${OUTPUT_PATH}
taskmanager:
image: ${FLINK_DOCKER_IMAGE_NAME:-flink-job}
command: taskmanager -Djobmanager.rpc.address=job-cluster
scale: ${DEFAULT_PARALLELISM:-1}
volumes:
- ${USER_LIB}:/opt/flink/usrlib
- ${INPUT_VOLUME}:${INPUT_PATH}
- ${OUTPUT_VOLUME}:${OUTPUT_PATH}
......@@ -32,9 +32,12 @@ spec:
- name: flink-job-cluster
image: ${FLINK_IMAGE_NAME}
imagePullPolicy: Never
args: ["job-cluster", "--job-classname", "${FLINK_JOB}", "-Djobmanager.rpc.address=flink-job-cluster",
args: ["standalone-job", "--job-classname", "${FLINK_JOB}", "-Djobmanager.rpc.address=flink-job-cluster",
"-Dparallelism.default=${FLINK_JOB_PARALLELISM}", "-Dblob.server.port=6124", "-Dqueryable-state.server.ports=6125",
${FLINK_JOB_ARGUMENTS}]
volumeMounts:
- name: job-artifacts-volume
mountPath: /opt/flink/usrlib
ports:
- containerPort: 6123
name: rpc
......@@ -44,3 +47,7 @@ spec:
name: query
- containerPort: 8081
name: ui
volumes:
- name: job-artifacts-volume
hostPath:
path: ${USER_LIB}
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
apiVersion: v1
kind: Service
metadata:
name: flink-job-cluster
labels:
app: flink
component: job-cluster
spec:
ports:
- name: rpc
port: 6123
- name: blob
port: 6124
- name: query
port: 6125
nodePort: 30025
- name: ui
port: 8081
nodePort: 30081
type: NodePort
selector:
app: flink
component: job-cluster
......@@ -36,10 +36,15 @@ spec:
- name: flink-task-manager
image: ${FLINK_IMAGE_NAME}
imagePullPolicy: Never
args: ["task-manager", "-Djobmanager.rpc.address=flink-job-cluster"]
args: ["taskmanager", "-Djobmanager.rpc.address=flink-job-cluster"]
volumeMounts:
- mountPath: /cache
name: cache-volume
- mountPath: /cache
name: cache-volume
- name: job-artifacts-volume
mountPath: /opt/flink/usrlib
volumes:
- name: cache-volume
emptyDir: {}
- name: job-artifacts-volume
hostPath:
path: ${USER_LIB}
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
import SimpleHTTPServer
import SocketServer
handler = SimpleHTTPServer.SimpleHTTPRequestHandler
# azure says that ports are still in use if this is not set
SocketServer.TCPServer.allow_reuse_address = True
httpd = SocketServer.TCPServer(("", 9999), handler)
try:
httpd.handle_request()
except:
httpd.shutdown()
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
import http.server
import socketserver
handler = http.server.SimpleHTTPRequestHandler
# azure says that ports are still in use if this is not set
SocketServer.TCPServer.allow_reuse_address = True
httpd = socketserver.TCPServer(("", 9999), handler)
try:
httpd.handle_request()
except:
httpd.shutdown()
......@@ -20,7 +20,6 @@
source "$(dirname "$0")"/common.sh
source "$(dirname "$0")"/common_docker.sh
DOCKER_MODULE_DIR=${END_TO_END_DIR}/../flink-container/docker
DOCKER_SCRIPTS=${END_TO_END_DIR}/test-scripts/container-scripts
DOCKER_IMAGE_BUILD_RETRIES=3
BUILD_BACKOFF_TIME=5
......@@ -52,23 +51,18 @@ esac
export FLINK_JOB_ARGUMENTS="${INPUT_ARGS} --output ${OUTPUT_PATH}/docker_wc_out"
build_image() {
build_image_with_jar ${FLINK_DIR}/examples/batch/WordCount.jar ${FLINK_DOCKER_IMAGE_NAME}
}
# user inside the container must be able to create files, this is a workaround in-container permissions
mkdir -p $OUTPUT_VOLUME
chmod 777 $OUTPUT_VOLUME
pushd "$DOCKER_MODULE_DIR"
if ! retry_times $DOCKER_IMAGE_BUILD_RETRIES ${BUILD_BACKOFF_TIME} build_image; then
if ! retry_times $DOCKER_IMAGE_BUILD_RETRIES ${BUILD_BACKOFF_TIME} "build_image ${FLINK_DOCKER_IMAGE_NAME}"; then
echo "Failed to build docker image. Aborting..."
exit 1
fi
popd
docker-compose -f ${DOCKER_MODULE_DIR}/docker-compose.yml -f ${DOCKER_SCRIPTS}/docker-compose.test.yml up --abort-on-container-exit --exit-code-from job-cluster &> /dev/null
docker-compose -f ${DOCKER_MODULE_DIR}/docker-compose.yml -f ${DOCKER_SCRIPTS}/docker-compose.test.yml logs job-cluster > ${FLINK_DIR}/log/jobmanager.log
docker-compose -f ${DOCKER_MODULE_DIR}/docker-compose.yml -f ${DOCKER_SCRIPTS}/docker-compose.test.yml logs taskmanager > ${FLINK_DIR}/log/taskmanager.log
export USER_LIB=${FLINK_DIR}/examples/batch
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.test.yml up --abort-on-container-exit --exit-code-from job-cluster &> /dev/null
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.test.yml logs job-cluster > ${FLINK_DIR}/log/jobmanager.log
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.test.yml logs taskmanager > ${FLINK_DIR}/log/taskmanager.log
check_result_hash "WordCount" $OUTPUT_VOLUME/docker_wc_out "${RESULT_HASH}"
......@@ -33,8 +33,7 @@ setConsoleLogging
start_kubernetes
cd "$DOCKER_MODULE_DIR"
build_image_with_jar ${FLINK_DIR}/examples/batch/WordCount.jar ${FLINK_IMAGE_NAME}
build_image ${FLINK_IMAGE_NAME}
kubectl create clusterrolebinding ${CLUSTER_ROLE_BINDING} --clusterrole=edit --serviceaccount=default:default --namespace=default
......@@ -49,7 +48,7 @@ mkdir -p "$LOCAL_LOGS_PATH"
-Dkubernetes.taskmanager.cpu=0.5 \
-Dkubernetes.container-start-command-template="%java% %classpath% %jvmmem% %jvmopts% %logging% %class% %args%" \
-Dkubernetes.rest-service.exposed.type=NodePort \
local:///opt/flink/usrlib/WordCount.jar
local:///opt/flink/examples/batch/WordCount.jar
kubectl wait --for=condition=Available --timeout=30s deploy/${CLUSTER_ID} || exit 1
jm_pod_name=$(kubectl get pods --selector="app=${CLUSTER_ID},component=jobmanager" -o jsonpath='{..metadata.name}')
......
......@@ -36,14 +36,12 @@ start_kubernetes
mkdir -p $OUTPUT_VOLUME
cd "$DOCKER_MODULE_DIR"
build_image_with_jar ${FLINK_DIR}/examples/batch/WordCount.jar ${FLINK_IMAGE_NAME}
cd "$END_TO_END_DIR"
build_image ${FLINK_IMAGE_NAME}
kubectl create -f ${KUBERNETES_MODULE_DIR}/job-cluster-service.yaml
envsubst '${FLINK_IMAGE_NAME} ${FLINK_JOB} ${FLINK_JOB_PARALLELISM} ${FLINK_JOB_ARGUMENTS}' < ${CONTAINER_SCRIPTS}/job-cluster-job.yaml.template | kubectl create -f -
envsubst '${FLINK_IMAGE_NAME} ${FLINK_JOB_PARALLELISM}' < ${CONTAINER_SCRIPTS}/task-manager-deployment.yaml.template | kubectl create -f -
export USER_LIB=${FLINK_DIR}/examples/batch
kubectl create -f ${CONTAINER_SCRIPTS}/job-cluster-service.yaml
envsubst '${FLINK_IMAGE_NAME} ${FLINK_JOB} ${FLINK_JOB_PARALLELISM} ${FLINK_JOB_ARGUMENTS} ${USER_LIB}' < ${CONTAINER_SCRIPTS}/job-cluster-job.yaml.template | kubectl create -f -
envsubst '${FLINK_IMAGE_NAME} ${FLINK_JOB_PARALLELISM} ${USER_LIB}' < ${CONTAINER_SCRIPTS}/task-manager-deployment.yaml.template | kubectl create -f -
kubectl wait --for=condition=complete job/flink-job-cluster --timeout=1h
kubectl cp `kubectl get pods | awk '/task-manager/ {print $1}'`:/cache/${OUTPUT_FILE} ${OUTPUT_VOLUME}/${OUTPUT_FILE}
......
......@@ -47,9 +47,7 @@ setConsoleLogging
start_kubernetes
cd "$DOCKER_MODULE_DIR"
# Build a Flink image without any user jars
build_image_with_jar ${TEST_INFRA_DIR}/test-data/words ${FLINK_IMAGE_NAME}
build_image ${FLINK_IMAGE_NAME}
kubectl create clusterrolebinding ${CLUSTER_ROLE_BINDING} --clusterrole=edit --serviceaccount=default:default --namespace=default
......
......@@ -20,7 +20,6 @@
source "$(dirname "$0")"/common.sh
source "$(dirname "$0")"/common_docker.sh
DOCKER_MODULE_DIR=${END_TO_END_DIR}/../flink-container/docker
DOCKER_SCRIPTS=${END_TO_END_DIR}/test-scripts/container-scripts
DOCKER_IMAGE_BUILD_RETRIES=3
BUILD_BACKOFF_TIME=5
......@@ -54,24 +53,21 @@ OUTPUT_PREFIX="docker_wc_out"
export FLINK_JOB_ARGUMENTS="${INPUT_ARGS} --output ${OUTPUT_PATH}/${OUTPUT_PREFIX}"
build_image() {
build_image_with_jar ${FLINK_DIR}/examples/batch/WordCount.jar ${FLINK_DOCKER_IMAGE_NAME}
}
# user inside the container must be able to create files, this is a workaround in-container permissions
mkdir -p $OUTPUT_VOLUME
chmod 777 $OUTPUT_VOLUME
pushd "$DOCKER_MODULE_DIR"
if ! retry_times $DOCKER_IMAGE_BUILD_RETRIES ${BUILD_BACKOFF_TIME} build_image; then
if ! retry_times $DOCKER_IMAGE_BUILD_RETRIES ${BUILD_BACKOFF_TIME} "build_image ${FLINK_DOCKER_IMAGE_NAME}"; then
echo "Failed to build docker image. Aborting..."
exit 1
fi
popd
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.nat.yml up --abort-on-container-exit --exit-code-from job-cluster &> /dev/null
export USER_LIB=${FLINK_DIR}/examples/batch
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.nat.yml up --force-recreate --abort-on-container-exit --exit-code-from job-cluster &> /dev/null
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.nat.yml logs job-cluster > ${FLINK_DIR}/log/jobmanager.log
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.nat.yml logs taskmanager1 > ${FLINK_DIR}/log/taskmanager1.log
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.nat.yml logs taskmanager2 > ${FLINK_DIR}/log/taskmanager2.log
docker-compose -f ${DOCKER_SCRIPTS}/docker-compose.nat.yml rm -f
check_result_hash "WordCount" ${OUTPUT_VOLUME}/${OUTPUT_PREFIX}/ "${RESULT_HASH}"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册