提交 86e912c5 编写于 作者: Q qingqing01 提交者: wopeizl

Fix windows compiling (#16230)

test=develop
上级 38898c28
...@@ -102,7 +102,9 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g ...@@ -102,7 +102,9 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g
cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto) cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto) cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto)
cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass) cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass)
cc_test(test_sync_batch_norm_pass SRCS sync_batch_norm_pass_tester.cc DEPS sync_batch_norm_pass) if(NOT WIN32)
cc_test(test_sync_batch_norm_pass SRCS sync_batch_norm_pass_tester.cc DEPS sync_batch_norm_pass)
endif()
cc_test(test_cpu_quantize_squash_pass SRCS cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) cc_test(test_cpu_quantize_squash_pass SRCS cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor)
if (WITH_MKLDNN) if (WITH_MKLDNN)
cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass) cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass)
......
...@@ -58,8 +58,10 @@ if (WITH_GPU) ...@@ -58,8 +58,10 @@ if (WITH_GPU)
op_library(conv_fusion_op) op_library(conv_fusion_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n") file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n")
endif() endif()
op_library(sync_batch_norm_op) if (NOT WIN32)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(sync_batch_norm);\n") op_library(sync_batch_norm_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(sync_batch_norm);\n")
endif()
else() else()
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
endif() endif()
......
...@@ -316,7 +316,9 @@ CUDADeviceContext::~CUDADeviceContext() { ...@@ -316,7 +316,9 @@ CUDADeviceContext::~CUDADeviceContext() {
eigen_stream_.reset(); eigen_stream_.reset();
eigen_device_.reset(); eigen_device_.reset();
PADDLE_ENFORCE(cudaStreamDestroy(stream_)); PADDLE_ENFORCE(cudaStreamDestroy(stream_));
#if !defined(_WIN32)
PADDLE_ENFORCE(dynload::ncclCommDestroy(nccl_comm_)); PADDLE_ENFORCE(dynload::ncclCommDestroy(nccl_comm_));
#endif
} }
Place CUDADeviceContext::GetPlace() const { return place_; } Place CUDADeviceContext::GetPlace() const { return place_; }
......
...@@ -265,11 +265,13 @@ class CUDADeviceContext : public DeviceContext { ...@@ -265,11 +265,13 @@ class CUDADeviceContext : public DeviceContext {
/*! \brief Return cuda stream in the device context. */ /*! \brief Return cuda stream in the device context. */
cudaStream_t stream() const; cudaStream_t stream() const;
#if !defined(_WIN32)
/*! \brief Return nccl communicators. */ /*! \brief Return nccl communicators. */
ncclComm_t nccl_comm() const { return nccl_comm_; } ncclComm_t nccl_comm() const { return nccl_comm_; }
/*! \brief Set nccl communicators. */ /*! \brief Set nccl communicators. */
void set_nccl_comm(ncclComm_t comm) { nccl_comm_ = comm; } void set_nccl_comm(ncclComm_t comm) { nccl_comm_ = comm; }
#endif
template <typename Callback> template <typename Callback>
void RecordEvent(cudaEvent_t ev, Callback callback) { void RecordEvent(cudaEvent_t ev, Callback callback) {
...@@ -295,12 +297,14 @@ class CUDADeviceContext : public DeviceContext { ...@@ -295,12 +297,14 @@ class CUDADeviceContext : public DeviceContext {
std::unique_ptr<CublasHandleHolder> cublas_handle_; std::unique_ptr<CublasHandleHolder> cublas_handle_;
std::unique_ptr<CublasHandleHolder> cublas_tensor_core_handle_; std::unique_ptr<CublasHandleHolder> cublas_tensor_core_handle_;
#if !defined(_WIN32)
// NCCL communicator (single process version) for NCCL collective operations. // NCCL communicator (single process version) for NCCL collective operations.
// NCCL collective operations provides fast collectives over multiple GPUs // NCCL collective operations provides fast collectives over multiple GPUs
// both within and across nodes. // both within and across nodes.
// But, this collectives is used for collectives over multiple GPUs within // But, this collectives is used for collectives over multiple GPUs within
// nodes. // nodes.
ncclComm_t nccl_comm_{nullptr}; ncclComm_t nccl_comm_{nullptr};
#endif
int compute_capability_; int compute_capability_;
int runtime_version_; int runtime_version_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册