提交 af41f018 编写于 作者: Q Qiyang Min 提交者: minqiyang

Fix kill fail bug (#11635)

* 1. Remove PYTHON_FLAGS from paddle_build.sh in paddlepaddle/paddle:latest-dev

* 1. Add PYTHON_FLAGS back
2. Change SIGKILL to SIGINT and SIGTERM

* 1. Add setup.py.in back

* 1. add pip install open-cv in Dockerfile to avoid libusb_exit hanging up which is caused by the opencv-python package missing

* 1. Add the && \ to line above

* 1. Remove the notice comment
上级 57780401
...@@ -76,7 +76,8 @@ RUN easy_install -U pip && \ ...@@ -76,7 +76,8 @@ RUN easy_install -U pip && \
pip install sphinx-rtd-theme==0.1.9 recommonmark pip install sphinx-rtd-theme==0.1.9 recommonmark
RUN pip install pre-commit 'ipython==5.3.0' && \ RUN pip install pre-commit 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install opencv-python
#For docstring checker #For docstring checker
RUN pip install pylint pytest astroid isort RUN pip install pylint pytest astroid isort
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
function print_usage() { function print_usage() {
echo -e "\n${RED}Usage${NONE}: echo -e "\n${RED}Usage${NONE}:
${BOLD}${SCRIPT_NAME}${NONE} [OPTION]" ${BOLD}${SCRIPT_NAME}${NONE} [OPTION]"
echo -e "\n${RED}Options${NONE}: echo -e "\n${RED}Options${NONE}:
${BLUE}build${NONE}: run build for x86 platform ${BLUE}build${NONE}: run build for x86 platform
${BLUE}build_android${NONE}: run build for android platform ${BLUE}build_android${NONE}: run build for android platform
...@@ -198,7 +198,7 @@ function build_android() { ...@@ -198,7 +198,7 @@ function build_android() {
fi fi
ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API
cat <<EOF cat <<EOF
============================================ ============================================
Generating the standalone toolchain ... Generating the standalone toolchain ...
...@@ -212,13 +212,13 @@ EOF ...@@ -212,13 +212,13 @@ EOF
--arch=$ANDROID_ARCH \ --arch=$ANDROID_ARCH \
--platform=android-$ANDROID_API \ --platform=android-$ANDROID_API \
--install-dir=$ANDROID_STANDALONE_TOOLCHAIN --install-dir=$ANDROID_STANDALONE_TOOLCHAIN
BUILD_ROOT=${PADDLE_ROOT}/build_android BUILD_ROOT=${PADDLE_ROOT}/build_android
DEST_ROOT=${PADDLE_ROOT}/install_android DEST_ROOT=${PADDLE_ROOT}/install_android
mkdir -p $BUILD_ROOT mkdir -p $BUILD_ROOT
cd $BUILD_ROOT cd $BUILD_ROOT
if [ $ANDROID_ABI == "armeabi-v7a" ]; then if [ $ANDROID_ABI == "armeabi-v7a" ]; then
cmake -DCMAKE_SYSTEM_NAME=Android \ cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
...@@ -286,7 +286,7 @@ function build_ios() { ...@@ -286,7 +286,7 @@ function build_ios() {
-DWITH_TESTING=OFF \ -DWITH_TESTING=OFF \
-DWITH_SWIG_PY=OFF \ -DWITH_SWIG_PY=OFF \
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
make -j 2 make -j 2
} }
...@@ -331,14 +331,14 @@ EOF ...@@ -331,14 +331,14 @@ EOF
function bind_test() { function bind_test() {
# the number of process to run tests # the number of process to run tests
NUM_PROC=6 NUM_PROC=6
# calculate and set the memory usage for each process # calculate and set the memory usage for each process
MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`) MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`)
export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE
# get the CUDA device count # get the CUDA device count
CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l) CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)
for (( i = 0; i < $NUM_PROC; i++ )); do for (( i = 0; i < $NUM_PROC; i++ )); do
cuda_list=() cuda_list=()
for (( j = 0; j < $CUDA_DEVICE_COUNT; j++ )); do for (( j = 0; j < $CUDA_DEVICE_COUNT; j++ )); do
......
...@@ -15,7 +15,7 @@ if(NOT WITH_DISTRIBUTE) ...@@ -15,7 +15,7 @@ if(NOT WITH_DISTRIBUTE)
endif(NOT WITH_DISTRIBUTE) endif(NOT WITH_DISTRIBUTE)
list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184 list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184
list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185 list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185
list(REMOVE_ITEM TEST_OPS test_nce) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778 list(REMOVE_ITEM TEST_OPS test_nce) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778
list(REMOVE_ITEM TEST_OPS test_recurrent_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/6152 list(REMOVE_ITEM TEST_OPS test_recurrent_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/6152
...@@ -43,8 +43,6 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op) ...@@ -43,8 +43,6 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op)
list(REMOVE_ITEM TEST_OPS test_dist_train) list(REMOVE_ITEM TEST_OPS test_dist_train)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed) list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed)
# TODO(wuyi): this test hungs on CI, will add it back later
list(REMOVE_ITEM TEST_OPS test_listen_and_serv_op)
foreach(TEST_OP ${TEST_OPS}) foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP}) py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP) endforeach(TEST_OP)
...@@ -52,3 +50,4 @@ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=$ ...@@ -52,3 +50,4 @@ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=$
py_test_modules(test_dist_train MODULES test_dist_train SERIAL) py_test_modules(test_dist_train MODULES test_dist_train SERIAL)
py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL) py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL)
py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20)
...@@ -94,7 +94,7 @@ class TestListenAndServOp(OpTest): ...@@ -94,7 +94,7 @@ class TestListenAndServOp(OpTest):
self._wait_ps_ready(p1.pid) self._wait_ps_ready(p1.pid)
# raise SIGTERM to pserver # raise SIGTERM to pserver
os.kill(p1.pid, signal.SIGKILL) os.kill(p1.pid, signal.SIGINT)
p1.join() p1.join()
# run pserver on CPU in async mode # run pserver on CPU in async mode
...@@ -102,7 +102,7 @@ class TestListenAndServOp(OpTest): ...@@ -102,7 +102,7 @@ class TestListenAndServOp(OpTest):
self._wait_ps_ready(p2.pid) self._wait_ps_ready(p2.pid)
# raise SIGTERM to pserver # raise SIGTERM to pserver
os.kill(p2.pid, signal.SIGKILL) os.kill(p2.pid, signal.SIGTERM)
p2.join() p2.join()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册