From 03b42d9fa7d5d2814c9a2b6d46aed0f750476b67 Mon Sep 17 00:00:00 2001
From: LoveAn <mr.avin0323@gmail.com>
Date: Tue, 8 Dec 2020 14:36:15 +0800
Subject: [PATCH] fix unittest on windows, test=develop (#29365)

---
 .../details/broadcast_op_handle_test.cc        |  2 +-
 .../details/fused_broadcast_op_handle_test.cc  |  2 +-
 paddle/fluid/framework/parallel_executor.cc    | 16 +++++++++-------
 paddle/scripts/paddle_build.bat                | 14 +++++++-------
 .../fluid/contrib/slim/tests/CMakeLists.txt    | 18 ++++++++++++++++++
 .../fluid/tests/unittests/CMakeLists.txt       | 16 ++++++++++------
 .../test_gpu_package_without_gpu_device.py     |  3 ++-
 7 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.cc b/paddle/fluid/framework/details/broadcast_op_handle_test.cc
index 650de5a48de..94ae3349a50 100644
--- a/paddle/fluid/framework/details/broadcast_op_handle_test.cc
+++ b/paddle/fluid/framework/details/broadcast_op_handle_test.cc
@@ -34,7 +34,7 @@ TEST(BroadcastTester, TestCPUBroadcastTestSelectedRows) {
   test_op.TestBroadcastSelectedRows(input_scope_idx);
 }
 
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_NCCL)
 TEST(BroadcastTester, TestGPUBroadcastTestLodTensor) {
   TestBroadcastOpHandle test_op;
   size_t input_scope_idx = 0;
diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
index ce7621d4e35..8b1fb4c7996 100644
--- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
+++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
@@ -160,7 +160,7 @@ TEST(FusedBroadcastTester, CPUSelectedRows) {
   test_op.TestFusedBroadcastSelectedRows(input_scope_idxes);
 }
 
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_NCCL)
 TEST(FusedBroadcastTester, GPULodTensor) {
   TestFusedBroadcastOpHandle test_op;
   std::vector<size_t> input_scope_idxes = {0, 1};
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index d9ddf49f46b..579733c2a3a 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -537,13 +537,15 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
 #endif
 
 #if defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_NCCL)
-  PADDLE_ENFORCE_EQ(
-      places.size(), 1,
-      platform::errors::PermissionDenied(
-          "Your machine has multiple cards, "
-          "but the WITH_NCCL option is not turned on during compilation, "
-          "and you cannot use multi-card training or prediction. "
-          "Please recompile and turn on the WITH_NCCL option."));
+  if (member_->use_cuda_) {
+    PADDLE_ENFORCE_EQ(
+        places.size(), 1,
+        platform::errors::PermissionDenied(
+            "Your machine has multiple cards, "
+            "but the WITH_NCCL option is not turned on during compilation, "
+            "and you cannot use multi-card training or prediction. "
+            "Please recompile and turn on the WITH_NCCL option."));
+  }
 #endif
 
   VLOG(1) << string::Sprintf(
diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat
index 79c2fad3a9e..375a691e933 100644
--- a/paddle/scripts/paddle_build.bat
+++ b/paddle/scripts/paddle_build.bat
@@ -404,18 +404,18 @@ set CUDA_DEVICE_COUNT=1
 
 rem TODO: fix these unittest that is bound to fail
 rem /*==================Disabled Windows unite==============================*/
-set diable_wingpu_test=broadcast_op_test^|fused_broadcast_op_test^|test_analysis_predictor^|test_model^|test_add_reader_dependency^|test_bilateral_slice_op^|^
+set diable_wingpu_test=test_analysis_predictor^|test_model^|test_add_reader_dependency^|test_bilateral_slice_op^|^
 test_cholesky_op^|test_dataloader_early_reset^|test_decoupled_py_reader^|test_decoupled_py_reader_data_check^|test_eager_deletion_delete_vars^|^
 test_eager_deletion_while_op^|test_feed_data_check_shape_type^|test_fetch_lod_tensor_array^|test_fleet_base_single^|test_fuse_all_reduce_pass^|test_fuse_elewise_add_act_pass^|^
-test_fuse_optimizer_pass^|test_generator_dataloader^|test_gpu_package_without_gpu_device^|test_ir_memory_optimize_ifelse_op^|test_ir_memory_optimize_nlp^|test_lr_scheduler^|^
-test_multiprocess_dataloader_iterable_dataset_dynamic^|test_multiprocess_dataloader_iterable_dataset_static^|test_nvprof^|test_parallel_dygraph_sync_batch_norm^|test_parallel_executor_drop_scope^|^
+test_fuse_optimizer_pass^|test_generator_dataloader^|test_ir_memory_optimize_ifelse_op^|test_lr_scheduler^|^
+test_multiprocess_dataloader_iterable_dataset_dynamic^|test_multiprocess_dataloader_iterable_dataset_static^|test_parallel_dygraph_sync_batch_norm^|test_parallel_executor_drop_scope^|^
 test_parallel_executor_dry_run^|test_partial_eager_deletion_transformer^|test_prune^|test_py_reader_combination^|test_py_reader_pin_memory^|^
 test_py_reader_push_pop^|test_py_reader_using_executor^|test_reader_reset^|test_update_loss_scaling_op^|test_imperative_static_runner_while^|^
-test_parallel_executor_transformer^|test_parallel_executor_transformer_auto_growth^|test_flags_use_mkldnn^|test_optimizer_in_control_flow^|test_fuse_bn_act_pass^|^
+test_flags_use_mkldnn^|test_optimizer_in_control_flow^|test_fuse_bn_act_pass^|^
 test_fuse_bn_add_act_pass^|test_activation_mkldnn_op^|test_tsm^|test_gru_rnn_op^|test_rnn_op^|test_simple_rnn_op^|test_pass_builder^|test_lstm_cudnn_op^|test_inplace_addto_strategy^|^
 test_ir_inplace_pass^|test_ir_memory_optimize_pass^|test_memory_reuse_exclude_feed_var^|test_mix_precision_all_reduce_fuse^|test_parallel_executor_pg^|test_print_op^|test_py_func_op^|^
-test_weight_decay^|test_mobile_net^|test_graph^|test_imperative_out_scale^|test_imperative_qat^|test_imperative_qat_channelwise^|test_moving_average_abs_max_scale_op^|^
-test_quantization_pass^|test_quantization_scale_pass^|test_user_defined_quantization^|test_conv2d_int8_mkldnn_op^|^
+test_weight_decay^|test_mobile_net^|^
+test_conv2d_int8_mkldnn_op^|^
 test_crypto^|test_callbacks^|test_program_prune_backward^|test_imperative_ocr_attention_model
 rem /*===============================================================*/
 
@@ -430,7 +430,7 @@ test_imperative_save_load^|test_imperative_selected_rows_to_lod_tensor^|test_imp
 test_masked_select_op^|test_multiclass_nms_op^|test_naive_best_fit_gpu_memory_limit^|test_nearest_interp_v2_op^|test_nn_grad^|test_norm_nn_grad^|^
 test_normal^|test_pool3d_op^|test_pool2d_op^|test_prroi_pool_op^|test_regularizer^|test_regularizer_api^|test_sgd_op^|test_softmax_with_cross_entropy_op^|test_static_save_load^|^
 test_trilinear_interp_op^|test_trilinear_interp_v2_op^|test_bilinear_interp_op^|test_nearest_interp_op^|test_sequence_conv^|test_transformer^|^
-test_beam_search_decoder^|test_argsort_op^|test_eager_deletion_gru_net^|test_lstmp_op^|test_label_semantic_roles^|test_user_defined_quantization^|^
+test_beam_search_decoder^|test_argsort_op^|test_eager_deletion_gru_net^|test_lstmp_op^|test_label_semantic_roles^|^
 test_machine_translation^|test_row_conv_op^|test_deformable_conv_op^|test_inplace_softmax_with_cross_entropy^|test_conv2d_transpose_op^|test_conv3d_transpose_op^|^
 test_cyclic_cifar_dataset^|test_deformable_psroi_pooling^|test_elementwise_mul_op^|test_imperative_auto_mixed_precision^|test_imperative_optimizer_v2^|test_imperative_ptb_rnn_sorted_gradient^|^
 test_imperative_save_load_v2^|test_nan_inf^|test_norm_op^|test_reduce_op^|test_sigmoid_cross_entropy_with_logits_op^|test_stack_op^|test_strided_slice_op^|test_transpose_op
diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
index c3379a9a573..00d78adc28b 100644
--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -269,6 +269,24 @@ list(REMOVE_ITEM TEST_OPS
 LIST(REMOVE_ITEM TEST_OPS test_auto_pruning)
 LIST(REMOVE_ITEM TEST_OPS test_filter_pruning)
 
+# fix
+if(WIN32)
+    SET(SINGLE_CARD_TEST_OPS
+        test_user_defined_quantization
+        test_quantization_scale_pass
+        test_quantization_pass
+        test_moving_average_abs_max_scale_op
+        test_imperative_qat_channelwise
+        test_imperative_qat
+        test_imperative_out_scale
+        test_graph)
+    LIST(REMOVE_ITEM TEST_OPS ${SINGLE_CARD_TEST_OPS})
+    foreach(src ${SINGLE_CARD_TEST_OPS})
+        py_test(${src} SRCS ${src}.py ENVS CUDA_VISIBLE_DEVICES=0)
+    endforeach()
+endif()
+
+
 foreach(src ${TEST_OPS})
     py_test(${src} SRCS ${src}.py)
 endforeach()
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 3dae35ad863..10fb99dd971 100644
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -81,10 +81,6 @@ if(NOT WITH_GPU OR WIN32)
     LIST(REMOVE_ITEM TEST_OPS test_collective_allgather_api)
 endif()
 
-if(WIN32)
-    LIST(REMOVE_ITEM TEST_OPS test_multiprocess_reader_exception)
-endif()
-
 if(WIN32)
     LIST(REMOVE_ITEM TEST_OPS test_multiprocess_reader_exception)
     LIST(REMOVE_ITEM TEST_OPS test_trainer_desc)
@@ -96,6 +92,8 @@ if(WIN32)
     LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_3)
     LIST(REMOVE_ITEM TEST_OPS test_fleet_unitaccessor)
     LIST(REMOVE_ITEM TEST_OPS test_ps_dispatcher)
+    LIST(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_nlp)
+    LIST(REMOVE_ITEM TEST_OPS test_nvprof)
 
     # TODO: Fix these unittests failed on Windows
     LIST(REMOVE_ITEM TEST_OPS test_debugger)
@@ -558,7 +556,11 @@ endif()
 py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf)
 py_test_modules(test_parallel_executor_profiler MODULES test_parallel_executor_profiler)
 py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer)
-py_test_modules(test_parallel_executor_transformer_auto_growth MODULES test_parallel_executor_transformer_auto_growth ENVS FLAGS_allocator_strategy=auto_growth)
+if(WIN32)
+    py_test_modules(test_parallel_executor_transformer_auto_growth MODULES test_parallel_executor_transformer_auto_growth ENVS FLAGS_allocator_strategy=auto_growth CUDA_VISIBLE_DEVICES=0)
+else()
+    py_test_modules(test_parallel_executor_transformer_auto_growth MODULES test_parallel_executor_transformer_auto_growth ENVS FLAGS_allocator_strategy=auto_growth)
+endif()
 
 py_test_modules(test_data_norm_op MODULES test_data_norm_op)
 py_test_modules(test_fuse_bn_act_pass MODULES test_fuse_bn_act_pass ENVS FLAGS_cudnn_deterministic=1 FLAGS_cudnn_batchnorm_spatial_persistent=1 FLAGS_conv_workspace_size_limit=1000)
@@ -696,7 +698,9 @@ set_tests_properties(test_parallel_executor_transformer PROPERTIES TIMEOUT 120)
 set_tests_properties(test_elementwise_div_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_regularizer_api PROPERTIES TIMEOUT 150)
 set_tests_properties(test_multiclass_nms_op PROPERTIES TIMEOUT 120)
-set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 120)
+if(NOT WIN32)
+    set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 120)
+endif()
 set_tests_properties(test_add_reader_dependency PROPERTIES TIMEOUT 120)
 set_tests_properties(test_bilateral_slice_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_buffer_shared_memory_reuse_pass PROPERTIES TIMEOUT 120)
diff --git a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
index cef77cc5f84..d854372bbc6 100644
--- a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
+++ b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
@@ -42,7 +42,8 @@ assert x.place.is_gpu_place() is False, "There is no CUDA device, but Tensor's p
             ps_proc = subprocess.Popen(
                 ps_cmd.strip().split(" "),
                 stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE)
+                stderr=subprocess.PIPE,
+                env=os.environ)
             stdout, stderr = ps_proc.communicate()
 
             assert 'CPU device will be used by default' in str(
-- 
GitLab