From 75d1571995edd5efdd31288563fc43bce4cd458b Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Tue, 3 Sep 2019 21:14:32 +0800 Subject: [PATCH] refine PADDLE_ENFORCE codes for unify PADDLE_ASSERT_MSG (#19603) test=develop --- paddle/fluid/framework/ir/node.h | 4 +- paddle/fluid/framework/tensor_util.cc | 6 +-- paddle/fluid/framework/tensor_util.h | 2 +- paddle/fluid/operators/detail/safe_ref.h | 2 +- paddle/fluid/operators/dgc_op.h | 7 +-- paddle/fluid/operators/dropout_op.cu | 7 +-- paddle/fluid/operators/math/im2col.cu | 16 +++---- paddle/fluid/operators/math/sample_prob.cu | 6 +-- .../operators/math/selected_rows_functor.cu | 10 ++-- paddle/fluid/operators/math/vol2col.cu | 8 ++-- .../operators/optimizers/lars_momentum_op.h | 2 +- paddle/fluid/operators/sample_logits_op.h | 22 +++++---- paddle/fluid/operators/sum_op.h | 6 +-- paddle/fluid/operators/sync_batch_norm_op.cu | 4 +- paddle/fluid/platform/cuda_helper.h | 10 ++-- paddle/fluid/platform/cudnn_helper.h | 46 +++++++++++-------- paddle/fluid/platform/device_context.h | 4 +- paddle/fluid/platform/nccl_helper.h | 24 +++++----- paddle/fluid/platform/profiler.cu | 8 ++-- 19 files changed, 103 insertions(+), 91 deletions(-) diff --git a/paddle/fluid/framework/ir/node.h b/paddle/fluid/framework/ir/node.h index 55fedadc837..fbc0d7599ea 100644 --- a/paddle/fluid/framework/ir/node.h +++ b/paddle/fluid/framework/ir/node.h @@ -66,12 +66,12 @@ class Node { std::string Name() const { return name_; } VarDesc* Var() const { - PADDLE_ENFORCE(IsVar()); + PADDLE_ENFORCE_EQ(IsVar(), true); return var_desc_.get(); } OpDesc* Op() const { - PADDLE_ENFORCE(IsOp()); + PADDLE_ENFORCE_EQ(IsOp(), true); return op_desc_.get(); } diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 3bc24bc1e5d..fb6cc1f210b 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -53,7 +53,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto src_gpu_place = boost::get(src_place); auto dst_cpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); auto ctx_gpu_place = boost::get(ctx_place); PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); auto stream = @@ -64,7 +64,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto src_cpu_place = boost::get(src_place); auto dst_gpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); auto ctx_gpu_place = boost::get(ctx_place); PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); auto stream = @@ -75,7 +75,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto src_gpu_place = boost::get(src_place); auto dst_gpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); auto stream = reinterpret_cast(ctx).stream(); if (platform::is_same_place(src_place, dst_place)) { diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index e382f920399..bb7bbc4cefb 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -146,7 +146,7 @@ void TensorToVector(const Tensor& src, std::vector* dst) { dst->resize(src.numel()); auto dst_ptr = static_cast(dst->data()); - PADDLE_ENFORCE(platform::is_cpu_place(src.place())); + PADDLE_ENFORCE_EQ(platform::is_cpu_place(src.place()), true); memory::Copy(dst_place, dst_ptr, boost::get(src.place()), src_ptr, size); diff --git a/paddle/fluid/operators/detail/safe_ref.h b/paddle/fluid/operators/detail/safe_ref.h index 8660bc219c1..c56329d9ee5 100644 --- a/paddle/fluid/operators/detail/safe_ref.h +++ b/paddle/fluid/operators/detail/safe_ref.h @@ -25,7 +25,7 @@ namespace detail { */ template inline T& Ref(T* ptr, ARGS&&... args) { - PADDLE_ENFORCE(ptr != nullptr, ::paddle::string::Sprintf(args...)); + PADDLE_ENFORCE_NOT_NULL(ptr, ::paddle::string::Sprintf(args...)); return *ptr; } diff --git a/paddle/fluid/operators/dgc_op.h b/paddle/fluid/operators/dgc_op.h index 8d1683bdb2d..a1dcc2bcc13 100644 --- a/paddle/fluid/operators/dgc_op.h +++ b/paddle/fluid/operators/dgc_op.h @@ -23,14 +23,14 @@ namespace operators { inline float get_period_sparcity(const std::vector& sparsity, float cur_step, float rampup_steps) { - PADDLE_ENFORCE(static_cast(cur_step) >= 0); + PADDLE_ENFORCE_GE(static_cast(cur_step), 0); size_t idx = static_cast(cur_step * sparsity.size() / rampup_steps); if (idx >= sparsity.size()) { return 0.999; } - PADDLE_ENFORCE(idx < sparsity.size()); + PADDLE_ENFORCE_LT(idx, sparsity.size()); return sparsity[idx]; } @@ -63,7 +63,8 @@ class DGCOpKernel : public framework::OpKernel { float ratio = 1 - get_period_sparcity(sparsity, static_cast(*current_step), rampup_step); - PADDLE_ENFORCE(ratio > 0.0 && ratio < 1.0); + PADDLE_ENFORCE_GE(ratio, 0.0); + PADDLE_ENFORCE_LT(ratio, 1.0); int k = static_cast(g->numel() * ratio); VLOG(10) << "m:" << m << ", use_nesterov:" << use_nesterov diff --git a/paddle/fluid/operators/dropout_op.cu b/paddle/fluid/operators/dropout_op.cu index e3d758c3a24..3e0cb76d043 100644 --- a/paddle/fluid/operators/dropout_op.cu +++ b/paddle/fluid/operators/dropout_op.cu @@ -86,9 +86,10 @@ class GPUDropoutKernel : public framework::OpKernel { auto* x_data = x->data(); auto* y_data = y->mutable_data(context.GetPlace()); if (dropout_prob == 1.0f) { - PADDLE_ENFORCE(cudaMemsetAsync(y_data, 0, x_numel * sizeof(T), stream)); - PADDLE_ENFORCE(cudaMemsetAsync(mask_data, 0, - x_numel * sizeof(*mask_data), stream)); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaMemsetAsync(y_data, 0, x_numel * sizeof(T), stream)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemsetAsync( + mask_data, 0, x_numel * sizeof(*mask_data), stream)); return; } diff --git a/paddle/fluid/operators/math/im2col.cu b/paddle/fluid/operators/math/im2col.cu index 4897767f4d8..809014ea3d6 100644 --- a/paddle/fluid/operators/math/im2col.cu +++ b/paddle/fluid/operators/math/im2col.cu @@ -66,8 +66,8 @@ class Im2ColFunctor& dilation, const std::vector& stride, const std::vector& padding, framework::Tensor* col) { - PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col->dims().size() == 5); + PADDLE_ENFORCE_EQ(im.dims().size(), 3); + PADDLE_ENFORCE_EQ(col->dims().size(), 5); int im_channels = im.dims()[0]; int im_height = im.dims()[1]; @@ -152,8 +152,8 @@ class Col2ImFunctor& dilation, const std::vector& stride, const std::vector& padding, framework::Tensor* im) { - PADDLE_ENFORCE(im->dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); + PADDLE_ENFORCE_EQ(im->dims().size(), 3); + PADDLE_ENFORCE_EQ(col.dims().size(), 5); int im_channels = im->dims()[0]; int im_height = im->dims()[1]; @@ -249,8 +249,8 @@ class Im2ColFunctor& dilation, const std::vector& stride, const std::vector& padding, framework::Tensor* col) { - PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col->dims().size() == 5); + PADDLE_ENFORCE_EQ(im.dims().size(), 3); + PADDLE_ENFORCE_EQ(col->dims().size(), 5); int im_channels = im.dims()[0]; int im_height = im.dims()[1]; int im_width = im.dims()[2]; @@ -331,8 +331,8 @@ class Col2ImFunctor& dilation, const std::vector& stride, const std::vector& padding, framework::Tensor* im) { - PADDLE_ENFORCE(im->dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); + PADDLE_ENFORCE_EQ(im->dims().size(), 3); + PADDLE_ENFORCE_EQ(col.dims().size(), 5); int im_channels = im->dims()[0]; int im_height = im->dims()[1]; int im_width = im->dims()[2]; diff --git a/paddle/fluid/operators/math/sample_prob.cu b/paddle/fluid/operators/math/sample_prob.cu index 8f939159156..6aabfb06945 100644 --- a/paddle/fluid/operators/math/sample_prob.cu +++ b/paddle/fluid/operators/math/sample_prob.cu @@ -142,9 +142,9 @@ void GPUSampleWithProb::operator()( int num_tries = UniqSampler(sampler, num_samples, s_data); VLOG(1) << "num_tries: " << num_tries; - PADDLE_ENFORCE(cudaMemcpy(samples_data + num_true, s_data, - sizeof(int64_t) * num_samples, - cudaMemcpyHostToDevice)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpy(samples_data + num_true, s_data, + sizeof(int64_t) * num_samples, + cudaMemcpyHostToDevice)); int threads = 512; const size_t size = batch_size * num_sampled_classes; diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index 0d63f641c86..b3e2c8a6674 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -55,11 +55,11 @@ struct SelectedRowsAdd { auto* in1_data = in1_value.data(); auto in1_place = input1.place(); - PADDLE_ENFORCE(platform::is_gpu_place(in1_place)); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(in1_place), true); auto in2_place = input2.place(); - PADDLE_ENFORCE(platform::is_gpu_place(in2_place)); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(in2_place), true); auto out_place = context.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(out_place)); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_place), true); memory::Copy(boost::get(out_place), out_data, boost::get(in1_place), in1_data, @@ -162,9 +162,9 @@ struct SelectedRowsAddTo { } auto in1_place = input1.place(); - PADDLE_ENFORCE(platform::is_gpu_place(in1_place)); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(in1_place), true); auto in2_place = input2->place(); - PADDLE_ENFORCE(platform::is_gpu_place(in2_place)); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(in2_place), true); auto* in1_data = in1_value.data(); auto* in2_data = in2_value->data(); diff --git a/paddle/fluid/operators/math/vol2col.cu b/paddle/fluid/operators/math/vol2col.cu index 28e1a752e34..25d8a247bca 100644 --- a/paddle/fluid/operators/math/vol2col.cu +++ b/paddle/fluid/operators/math/vol2col.cu @@ -78,8 +78,8 @@ class Vol2ColFunctor { const std::vector& strides, const std::vector& paddings, framework::Tensor* col) const { - PADDLE_ENFORCE(vol.dims().size() == 4); - PADDLE_ENFORCE(col->dims().size() == 7); + PADDLE_ENFORCE_EQ(vol.dims().size(), 4); + PADDLE_ENFORCE_EQ(col->dims().size(), 7); int input_channels = vol.dims()[0]; int input_depth = vol.dims()[1]; @@ -204,8 +204,8 @@ class Col2VolFunctor { const std::vector& strides, const std::vector& paddings, framework::Tensor* vol) const { - PADDLE_ENFORCE(vol->dims().size() == 4); - PADDLE_ENFORCE(col.dims().size() == 7); + PADDLE_ENFORCE_EQ(vol->dims().size(), 4); + PADDLE_ENFORCE_EQ(col.dims().size(), 7); int input_channels = vol->dims()[0]; int input_depth = vol->dims()[1]; diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.h b/paddle/fluid/operators/optimizers/lars_momentum_op.h index e85be99fc42..e0064c20182 100644 --- a/paddle/fluid/operators/optimizers/lars_momentum_op.h +++ b/paddle/fluid/operators/optimizers/lars_momentum_op.h @@ -30,7 +30,7 @@ class LarsMomentumOpKernel : public framework::OpKernel { auto learning_rate = ctx.Input("LearningRate"); auto* grad_var = ctx.InputVar("Grad"); // only support dense for now. - PADDLE_ENFORCE(grad_var->IsType()); + PADDLE_ENFORCE_EQ(grad_var->IsType(), true); auto grad = ctx.Input("Grad"); param_out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/sample_logits_op.h b/paddle/fluid/operators/sample_logits_op.h index 7e78fca714d..6da533da718 100644 --- a/paddle/fluid/operators/sample_logits_op.h +++ b/paddle/fluid/operators/sample_logits_op.h @@ -49,11 +49,12 @@ static void CPUTakeAlongD1(const platform::DeviceContext& ctx, const framework::Tensor& array, const framework::Tensor& index, framework::Tensor* value) { - PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace())); + PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true); // UNDERSTAND: check shape src(B, C), index(B, K), out should also be (B, K) - PADDLE_ENFORCE(index.dims().size() == 2 && array.dims().size() == 2 && - index.dims()[0] == array.dims()[0] && - index.dims() == value->dims()); + PADDLE_ENFORCE_EQ(index.dims().size(), 2); + PADDLE_ENFORCE_EQ(array.dims().size(), 2); + PADDLE_ENFORCE_EQ(index.dims()[0], array.dims()[0]); + PADDLE_ENFORCE_EQ(index.dims(), value->dims()); const auto batch_size = index.dims()[0]; const auto num_take = index.dims()[1]; @@ -88,11 +89,12 @@ static void CPUPutAlongD1(const platform::DeviceContext& ctx, framework::Tensor* array, const framework::Tensor& index, const framework::Tensor& value) { - PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace())); + PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true); // UNDERSTAND: check shape src(B, C), index(B, K), out should also be (B, K) - PADDLE_ENFORCE(index.dims().size() == 2 && array->dims().size() == 2 && - index.dims()[0] == array->dims()[0] && - index.dims() == value.dims()); + PADDLE_ENFORCE_EQ(index.dims().size(), 2); + PADDLE_ENFORCE_EQ(array->dims().size(), 2); + PADDLE_ENFORCE_EQ(index.dims()[0], array->dims()[0]); + PADDLE_ENFORCE_EQ(index.dims(), value.dims()); const auto batch_size = index.dims()[0]; const auto num_put = index.dims()[1]; auto array_dims = array->dims(); @@ -147,8 +149,8 @@ class SampleLogitsKernel : public framework::OpKernel { public: using Tensor = framework::Tensor; void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE(platform::is_cpu_place(context.GetPlace()), - "This kernel only runs on CPU."); + PADDLE_ENFORCE_EQ(platform::is_cpu_place(context.GetPlace()), true, + "This kernel only runs on CPU."); VLOG(3) << "Enter SampleLogitsKernel"; // get necessary inputs const Tensor* logits = context.Input("Logits"); diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 1f9f9486145..3b7f4292791 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -92,8 +92,8 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) { bool in_place = out_var == in_vars[0]; auto &out_array = *out_var->GetMutable(); for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) { - PADDLE_ENFORCE(in_vars[i]->IsType(), - "Only support all inputs are TensorArray"); + PADDLE_ENFORCE_EQ(in_vars[i]->IsType(), true, + "Only support all inputs are TensorArray"); auto &in_array = in_vars[i]->Get(); for (size_t i = 0; i < in_array.size(); ++i) { @@ -106,7 +106,7 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) { context.device_context(), &out_array[i]); out_array[i].set_lod(in_array[i].lod()); } else { - PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); + PADDLE_ENFORCE_EQ(out_array[i].lod(), in_array[i].lod()); auto in = EigenVector::Flatten(in_array[i]); auto result = EigenVector::Flatten(out_array[i]); result.device(*context.template device_context() diff --git a/paddle/fluid/operators/sync_batch_norm_op.cu b/paddle/fluid/operators/sync_batch_norm_op.cu index a5984bfaaaf..8c57b0c9dd9 100644 --- a/paddle/fluid/operators/sync_batch_norm_op.cu +++ b/paddle/fluid/operators/sync_batch_norm_op.cu @@ -178,7 +178,7 @@ class SyncBatchNormKernel : public framework::OpKernel { int dtype = platform::ToNCCLDataType(x->type()); // In-place operation - PADDLE_ENFORCE(platform::dynload::ncclAllReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce( stats, stats, 2 * C + 1, static_cast(dtype), ncclSum, comm, stream)); @@ -398,7 +398,7 @@ class SyncBatchNormGradKernel : public framework::OpKernel { } int dtype = platform::ToNCCLDataType(x->type()); // In-place operation - PADDLE_ENFORCE(platform::dynload::ncclAllReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce( stats, stats, 2 * C + 1, static_cast(dtype), ncclSum, comm, stream)); diff --git a/paddle/fluid/platform/cuda_helper.h b/paddle/fluid/platform/cuda_helper.h index 122de72e15d..c21924ae483 100644 --- a/paddle/fluid/platform/cuda_helper.h +++ b/paddle/fluid/platform/cuda_helper.h @@ -29,17 +29,19 @@ namespace platform { class CublasHandleHolder { public: CublasHandleHolder(cudaStream_t stream, cublasMath_t math_type) { - PADDLE_ENFORCE(dynload::cublasCreate(&handle_)); - PADDLE_ENFORCE(dynload::cublasSetStream(handle_, stream)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasCreate(&handle_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasSetStream(handle_, stream)); #if CUDA_VERSION >= 9000 if (math_type == CUBLAS_TENSOR_OP_MATH) { - PADDLE_ENFORCE( + PADDLE_ENFORCE_CUDA_SUCCESS( dynload::cublasSetMathMode(handle_, CUBLAS_TENSOR_OP_MATH)); } #endif } - ~CublasHandleHolder() { PADDLE_ENFORCE(dynload::cublasDestroy(handle_)); } + ~CublasHandleHolder() { + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cublasDestroy(handle_)); + } template inline void Call(Callback &&callback) const { diff --git a/paddle/fluid/platform/cudnn_helper.h b/paddle/fluid/platform/cudnn_helper.h index 61a25064d17..8c124e71583 100644 --- a/paddle/fluid/platform/cudnn_helper.h +++ b/paddle/fluid/platform/cudnn_helper.h @@ -221,10 +221,10 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( class ScopedTensorDescriptor { public: ScopedTensorDescriptor() { - PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_)); } ~ScopedTensorDescriptor() { - PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_)); } inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, @@ -243,7 +243,7 @@ class ScopedTensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor( desc_, type, dims_with_group.size(), dims_with_group.data(), strides.data())); return desc_; @@ -265,10 +265,10 @@ class ScopedTensorDescriptor { class ScopedFilterDescriptor { public: ScopedFilterDescriptor() { - PADDLE_ENFORCE(dynload::cudnnCreateFilterDescriptor(&desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_)); } ~ScopedFilterDescriptor() { - PADDLE_ENFORCE(dynload::cudnnDestroyFilterDescriptor(desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_)); } inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, @@ -284,7 +284,7 @@ class ScopedFilterDescriptor { kernel_with_group[0] /= groups; // NOTE: input filter(C) of the filter is already asserted to be C/groups. } - PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor( + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetFilterNdDescriptor( desc_, type, format, kernel_with_group.size(), kernel_with_group.data())); return desc_; @@ -306,10 +306,12 @@ class ScopedFilterDescriptor { class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor() { - PADDLE_ENFORCE(dynload::cudnnCreateConvolutionDescriptor(&desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + dynload::cudnnCreateConvolutionDescriptor(&desc_)); } ~ScopedConvolutionDescriptor() { - PADDLE_ENFORCE(dynload::cudnnDestroyConvolutionDescriptor(desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + dynload::cudnnDestroyConvolutionDescriptor(desc_)); } inline cudnnConvolutionDescriptor_t descriptor( @@ -332,7 +334,7 @@ class ScopedConvolutionDescriptor { cudnnDataType_t compute_type = (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; - PADDLE_ENFORCE(dynload::cudnnSetConvolutionNdDescriptor( + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetConvolutionNdDescriptor( desc_, pads.size(), pads.data(), strides.data(), dilations.data(), CUDNN_CROSS_CORRELATION, compute_type)); return desc_; @@ -353,10 +355,10 @@ class ScopedConvolutionDescriptor { class ScopedPoolingDescriptor { public: ScopedPoolingDescriptor() { - PADDLE_ENFORCE(dynload::cudnnCreatePoolingDescriptor(&desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_)); } ~ScopedPoolingDescriptor() { - PADDLE_ENFORCE(dynload::cudnnDestroyPoolingDescriptor(desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_)); } inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode, @@ -365,7 +367,7 @@ class ScopedPoolingDescriptor { const std::vector& strides) { PADDLE_ENFORCE_EQ(kernel.size(), pads.size()); PADDLE_ENFORCE_EQ(kernel.size(), strides.size()); - PADDLE_ENFORCE(dynload::cudnnSetPoolingNdDescriptor( + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetPoolingNdDescriptor( desc_, (GetPoolingMode(mode)), CUDNN_PROPAGATE_NAN, // Always propagate nans. kernel.size(), kernel.data(), pads.data(), strides.data())); @@ -380,16 +382,18 @@ class ScopedPoolingDescriptor { class ScopedSpatialTransformerDescriptor { public: ScopedSpatialTransformerDescriptor() { - PADDLE_ENFORCE(dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); } ~ScopedSpatialTransformerDescriptor() { - PADDLE_ENFORCE(dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); } template inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims, const int dimA[]) { - PADDLE_ENFORCE(dynload::cudnnSetSpatialTransformerNdDescriptor( + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor( desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType::type, nbDims, dimA)); return desc_; } @@ -402,10 +406,12 @@ class ScopedSpatialTransformerDescriptor { class ScopedActivationDescriptor { public: ScopedActivationDescriptor() { - PADDLE_ENFORCE(dynload::cudnnCreateActivationDescriptor(&desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + dynload::cudnnCreateActivationDescriptor(&desc_)); } ~ScopedActivationDescriptor() { - PADDLE_ENFORCE(dynload::cudnnDestroyActivationDescriptor(desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + dynload::cudnnDestroyActivationDescriptor(desc_)); } template @@ -467,15 +473,15 @@ inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { class ScopedCTCLossDescriptor { public: ScopedCTCLossDescriptor() { - PADDLE_ENFORCE(dynload::cudnnCreateCTCLossDescriptor(&desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_)); } ~ScopedCTCLossDescriptor() { - PADDLE_ENFORCE(dynload::cudnnDestroyCTCLossDescriptor(desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_)); } template inline cudnnCTCLossDescriptor_t descriptor() { - PADDLE_ENFORCE( + PADDLE_ENFORCE_CUDA_SUCCESS( dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType::type)); return desc_; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index a17a0bdfb9a..8ad9f14a786 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -167,7 +167,7 @@ class CudnnHolder { inline void ResetWorkspace() { if (workspace_) { // Maybe someone is using the current workspace - PADDLE_ENFORCE(cudaStreamSynchronize(*stream_)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(*stream_)); workspace_ = nullptr; } } @@ -306,7 +306,7 @@ class CUDADeviceContext : public DeviceContext { template void RecordEvent(cudaEvent_t ev, Callback callback) { callback(); - PADDLE_ENFORCE(cudaEventRecord(ev, stream_)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventRecord(ev, stream_)); } template diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index 4680b070bb9..a843a7680f3 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -63,11 +63,11 @@ class NCCLGroupGuard { inline NCCLGroupGuard() { NCCLMutex().lock(); - PADDLE_ENFORCE(dynload::ncclGroupStart()); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::ncclGroupStart()); } inline ~NCCLGroupGuard() { - PADDLE_ENFORCE(dynload::ncclGroupEnd()); + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::ncclGroupEnd()); NCCLMutex().unlock(); } }; @@ -94,7 +94,7 @@ struct NCCLContextMap { explicit NCCLContextMap(const std::vector &places, ncclUniqueId *nccl_id = nullptr, size_t num_trainers = 1, size_t trainer_id = 0) { - PADDLE_ENFORCE(!places.empty()); + PADDLE_ENFORCE_EQ(!places.empty(), true); order_.reserve(places.size()); for (auto &p : places) { int dev_id = boost::get(p).device; @@ -109,7 +109,7 @@ struct NCCLContextMap { // if num_trainers == 1, should create a new nccl id for local comms. if (num_trainers == 1 && nccl_id == nullptr) { std::lock_guard guard(NCCLGroupGuard::NCCLMutex()); - PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitAll( comms.get(), static_cast(order_.size()), order_.data())); } else { PADDLE_ENFORCE_NOT_NULL(nccl_id); @@ -126,8 +126,8 @@ struct NCCLContextMap { } VLOG(1) << "init nccl rank:" << rank << ", nranks:" << nranks << ", gpu_id:" << gpu_id << ", dev_id:" << order_[i]; - PADDLE_ENFORCE(cudaSetDevice(gpu_id)); - PADDLE_ENFORCE(platform::dynload::ncclCommInitRank( + PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(gpu_id)); + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitRank( comms.get() + i, nranks, *nccl_id, rank)); } } @@ -249,13 +249,13 @@ class NCCLCommunicator { size_t trainers_num, size_t trainer_id, size_t inter_trainers_num, size_t exter_trainers_num) { - PADDLE_ENFORCE(trainers_num == inter_trainers_num * exter_trainers_num, - "trainers_num:%llu != inter_trainers_num:%llu * " - "exter_trainers_num:%llu", - trainers_num, inter_trainers_num, exter_trainers_num); + PADDLE_ENFORCE_EQ(trainers_num, inter_trainers_num * exter_trainers_num, + "trainers_num:%llu != inter_trainers_num:%llu * " + "exter_trainers_num:%llu", + trainers_num, inter_trainers_num, exter_trainers_num); - PADDLE_ENFORCE(inter_trainers_num > 1, "inter_trainers_num:%llu must > 1", - inter_trainers_num); + PADDLE_ENFORCE_GT(inter_trainers_num, 1, "inter_trainers_num:%llu must > 1", + inter_trainers_num); int inter_trainer_id = trainer_id % inter_trainers_num; for (size_t i = 0; i < inter_nccl_ids.size(); i++) { diff --git a/paddle/fluid/platform/profiler.cu b/paddle/fluid/platform/profiler.cu index aed276b16e9..d4db65060bb 100644 --- a/paddle/fluid/platform/profiler.cu +++ b/paddle/fluid/platform/profiler.cu @@ -35,13 +35,13 @@ void DummyKernelAndEvent() { ForEachDevice([](int d) { platform::SetDeviceId(d); cudaStream_t stream; - PADDLE_ENFORCE(cudaStreamCreate(&stream)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamCreate(&stream)); Mark("_cuda_startup_"); int *ptr; - PADDLE_ENFORCE(cudaMalloc(&ptr, sizeof(int))); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaMalloc(&ptr, sizeof(int))); DummyKernel<<<1, 1, 0, stream>>>(ptr); - PADDLE_ENFORCE(cudaStreamSynchronize(stream)); - PADDLE_ENFORCE(cudaFree(ptr)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaFree(ptr)); }); } } -- GitLab