diff --git a/paddle/fluid/operators/distributed/collective_server_test.cc b/paddle/fluid/operators/distributed/collective_server_test.cc index 90f2f9fd65bf1b8c1edda6a2ebe0ce5288ddcb5d..be8c7a7dd40697d4abb8e53215ce09ae6619f18e 100644 --- a/paddle/fluid/operators/distributed/collective_server_test.cc +++ b/paddle/fluid/operators/distributed/collective_server_test.cc @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include +#include #include #include // NOLINT @@ -98,6 +100,9 @@ void Gather(const std::vector& vars, } TEST(CollectiveServer, GPU) { + setenv("http_proxy", "", 1); + setenv("https_proxy", "", 1); + platform::CUDAPlace place; platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& ctx = *pool.Get(place); diff --git a/paddle/fluid/operators/distributed/rpc_server_test.cc b/paddle/fluid/operators/distributed/rpc_server_test.cc index 089ea623f18a27d14342d1d69700ef624477eba4..45e97d966fc9d469d24e40f8c77784d618280461 100644 --- a/paddle/fluid/operators/distributed/rpc_server_test.cc +++ b/paddle/fluid/operators/distributed/rpc_server_test.cc @@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include +#include #include #include // NOLINT +#include #include "gtest/gtest.h" #include "paddle/fluid/framework/block_desc.h" @@ -122,6 +125,8 @@ void StartServer(const std::string& rpc_name) { } TEST(PREFETCH, CPU) { + setenv("http_proxy", "", 1); + setenv("https_proxy", "", 1); g_req_handler.reset(new distributed::RequestPrefetchHandler(true)); g_rpc_service.reset(new RPCSERVER_T("127.0.0.1:0", 1)); distributed::RPCClient* client = @@ -162,6 +167,8 @@ TEST(PREFETCH, CPU) { } TEST(COMPLETE, CPU) { + setenv("http_proxy", "", 1); + setenv("https_proxy", "", 1); g_req_handler.reset(new distributed::RequestSendHandler(true)); g_rpc_service.reset(new RPCSERVER_T("127.0.0.1:0", 2)); distributed::RPCClient* client = diff --git a/paddle/fluid/operators/distributed_ops/listen_and_serv_op.cc b/paddle/fluid/operators/distributed_ops/listen_and_serv_op.cc index a672fb2a9141a81383d947dcc961a112aee3f7ac..14b53086d1c848bd313f5dee85cf1db851d63bd1 100644 --- a/paddle/fluid/operators/distributed_ops/listen_and_serv_op.cc +++ b/paddle/fluid/operators/distributed_ops/listen_and_serv_op.cc @@ -511,6 +511,8 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker { void SignalHandler::StopAndExit(int signal_num) { // Do not use VLOG here for the device for printing maybe already released. // exit will release interal allocated resoureces. + auto file_path = string::Sprintf("/tmp/paddle.%d.port", ::getpid()); + remove(file_path.c_str()); exit(0); } diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 410c853cda05a23fed33d738533b0306058131a1..933121ee3f5d08802fdb178c42e065e7fd920cbe 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1,6 +1,7 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0 FLAGS_fast_eager_deletion_mode=1 FLAGS_memory_fraction_of_eager_deletion=1.0) +set(dist_ENVS http_proxy="" https_proxy="") if(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_OPS test_recv_op) @@ -25,6 +26,7 @@ if(NOT WITH_DISTRIBUTE) LIST(REMOVE_ITEM TEST_OPS test_dist_fleet_ctr) endif(NOT WITH_DISTRIBUTE) + if(NOT WITH_GPU OR WIN32) LIST(REMOVE_ITEM TEST_OPS test_allgather) LIST(REMOVE_ITEM TEST_OPS test_allreduce) @@ -136,7 +138,12 @@ function(bash_test_modules TARGET_NAME) endfunction() list(REMOVE_ITEM TEST_OPS test_warpctc_op) -list(REMOVE_ITEM TEST_OPS test_dist_train) + +LIST(REMOVE_ITEM TEST_OPS test_lookup_remote_table_op) +LIST(REMOVE_ITEM TEST_OPS test_hsigmoid_remote_table_op) +LIST(REMOVE_ITEM TEST_OPS test_nce_remote_table_op) +LIST(REMOVE_ITEM TEST_OPS test_dist_train) +LIST(REMOVE_ITEM TEST_OPS test_listen_and_serv_op) list(REMOVE_ITEM TEST_OPS test_dist_transpiler) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf_auto_growth) @@ -229,8 +236,14 @@ py_test_modules(test_install_check MODULES test_install_check ENVS FLAGS_cudnn_deterministic=1 SERIAL) set_tests_properties(test_install_check PROPERTIES LABELS "RUN_TYPE=DIST") if(WITH_DISTRIBUTE) - py_test_modules(test_dist_train MODULES test_dist_train) + py_test_modules(test_dist_train MODULES test_dist_train ENVS ${dist_ENVS}) + py_test_modules(test_lookup_remote_table_op MODULES test_lookup_remote_table_op ENVS ${dist_ENVS}) + py_test_modules(test_hsigmoid_remote_table_op MODULES test_hsigmoid_remote_table_op ENVS ${dist_ENVS}) + py_test_modules(test_nce_remote_table_op MODULES test_nce_remote_table_op ENVS ${dist_ENVS}) + py_test_modules(test_listen_and_serv_op MODULES test_listen_and_serv_op ENVS ${dist_ENVS}) set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20 LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_listen_and_serv_op test_nce_remote_table_op test_hsigmoid_remote_table_op + PROPERTIES LABELS "RUN_TYPE=DIST") if(WITH_DGC) py_test_modules(test_dgc_op MODULES test_dgc_op) endif() @@ -292,11 +305,6 @@ if (WITH_MKLDNN) add_subdirectory(mkldnn) endif() -if(WITH_DISTRIBUTE) - set_tests_properties(test_listen_and_serv_op test_nce_remote_table_op test_hsigmoid_remote_table_op - PROPERTIES LABELS "RUN_TYPE=DIST") -endif() - set_tests_properties(test_recordio_reader test_parallel_executor_test_while_train test_parallel_executor_mnist test_parallel_executor_seresnext test_parallel_executor_crf test_sync_batch_norm_op test_parallel_executor_crf_auto_growth test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass