From 1cb883dadcbf98b774b502f1c6b811135d850a12 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Fri, 12 Aug 2022 11:29:31 +0800 Subject: [PATCH] fix extra output of kernels for inference (#45048) --- paddle/fluid/operators/dropout_impl.cu.h | 2 +- paddle/phi/infermeta/ternary.cc | 18 +++------ paddle/phi/kernels/cpu/dropout_kernel.cc | 4 +- .../phi/kernels/cpu/instance_norm_kernel.cc | 24 +++++++++--- paddle/phi/kernels/gpu/dropout_kernel.cu | 10 +++-- .../phi/kernels/gpu/instance_norm_kernel.cu | 37 ++++++++++++++----- 6 files changed, 61 insertions(+), 34 deletions(-) diff --git a/paddle/fluid/operators/dropout_impl.cu.h b/paddle/fluid/operators/dropout_impl.cu.h index c7a55273560..fb046481e20 100644 --- a/paddle/fluid/operators/dropout_impl.cu.h +++ b/paddle/fluid/operators/dropout_impl.cu.h @@ -307,7 +307,7 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx, auto* x_data = x.data(); auto* y_data = y->data(); - if (!is_test) { + if (!is_test && mask) { auto* mask_data = mask->data(); size_t size = phi::product(mask->dims()); diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc index 342c9e46023..013a58e30ed 100644 --- a/paddle/phi/infermeta/ternary.cc +++ b/paddle/phi/infermeta/ternary.cc @@ -317,16 +317,6 @@ void InstanceNormInferMeta(const MetaTensor& x, nullptr, phi::errors::InvalidArgument( "The y in InstanceNormInferMeta can't be nullptr.")); - PADDLE_ENFORCE_NE( - saved_mean, - nullptr, - phi::errors::InvalidArgument( - "The saved_mean in InstanceNormInferMeta can't be nullptr.")); - PADDLE_ENFORCE_NE( - saved_variance, - nullptr, - phi::errors::InvalidArgument( - "The saved_variance in InstanceNormInferMeta can't be nullptr.")); const auto x_dims = x.dims(); PADDLE_ENFORCE_NE(phi::product(x_dims), 0, @@ -401,11 +391,15 @@ void InstanceNormInferMeta(const MetaTensor& x, } } y->set_dims(x_dims); - saved_mean->set_dims({NxC}); - saved_variance->set_dims({NxC}); y->share_lod(x); y->set_dtype(x.dtype()); y->set_layout(x.layout()); + if (saved_mean) { + saved_mean->set_dims({NxC}); + } + if (saved_variance) { + saved_variance->set_dims({NxC}); + } } void GraphSendRecvInferMeta(const MetaTensor& x, diff --git a/paddle/phi/kernels/cpu/dropout_kernel.cc b/paddle/phi/kernels/cpu/dropout_kernel.cc index 41c33fcf5dd..6f1dc161798 100644 --- a/paddle/phi/kernels/cpu/dropout_kernel.cc +++ b/paddle/phi/kernels/cpu/dropout_kernel.cc @@ -63,7 +63,7 @@ void DropoutRawKernel(const Context& dev_ctx, auto& dropout_implementation = mode; bool upscale_in_train = (dropout_implementation == "upscale_in_train"); - if (!is_test) { + if (!is_test && mask) { auto* mask_data = dev_ctx.template Alloc(mask); size_t size = phi::product(mask->dims()); @@ -124,7 +124,7 @@ void DropoutNdKernel(const Context& dev_ctx, auto& dropout_implementation = mode; bool upscale_in_train = (dropout_implementation == "upscale_in_train"); - if (!is_test) { + if (!is_test && mask) { DenseTensor t_mask; t_mask.Resize(mask->dims()); T* t_mask_data = dev_ctx.template Alloc(&t_mask); diff --git a/paddle/phi/kernels/cpu/instance_norm_kernel.cc b/paddle/phi/kernels/cpu/instance_norm_kernel.cc index 4deced5499e..c428e7e6f89 100644 --- a/paddle/phi/kernels/cpu/instance_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/instance_norm_kernel.cc @@ -21,6 +21,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/eigen/extensions.h" @@ -63,14 +64,25 @@ void InstanceNormKernel(const Context& dev_ctx, #endif phi::funcs::SetConstant set_constant; - dev_ctx.template Alloc(saved_mean); - dev_ctx.template Alloc(saved_variance); - set_constant(dev_ctx, saved_mean, static_cast(0)); - set_constant(dev_ctx, saved_variance, static_cast(0)); + DenseTensor saved_mean_tmp, saved_variance_tmp; + if (saved_mean) { + dev_ctx.template Alloc(saved_mean); + set_constant(dev_ctx, saved_mean, static_cast(0)); + } else { + saved_mean_tmp = phi::Full(dev_ctx, {NxC}, 0); + } + if (saved_variance) { + dev_ctx.template Alloc(saved_variance); + set_constant(dev_ctx, saved_variance, static_cast(0)); + } else { + saved_variance_tmp = phi::Full(dev_ctx, {NxC}, 0); + } - auto saved_mean_a = EigenVector::Flatten(*saved_mean); + auto saved_mean_a = + EigenVector::Flatten(saved_mean ? *saved_mean : saved_mean_tmp); auto saved_mean_e = saved_mean_a.reshape(NxC_shape); - auto saved_variance_a = EigenVector::Flatten(*saved_variance); + auto saved_variance_a = EigenVector::Flatten( + saved_variance ? *saved_variance : saved_variance_tmp); auto saved_variance_e = saved_variance_a.reshape(NxC_shape); auto x_e = EigenVector::Flatten(x); diff --git a/paddle/phi/kernels/gpu/dropout_kernel.cu b/paddle/phi/kernels/gpu/dropout_kernel.cu index 0f2a8d9c938..34707bfd665 100644 --- a/paddle/phi/kernels/gpu/dropout_kernel.cu +++ b/paddle/phi/kernels/gpu/dropout_kernel.cu @@ -32,8 +32,10 @@ void DropoutRawKernel(const Context& dev_ctx, DenseTensor* out, DenseTensor* mask) { bool upscale_in_train = (mode == "upscale_in_train"); - out->mutable_data(dev_ctx.GetPlace()); - mask->mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(out); + if (mask) { + dev_ctx.template Alloc(mask); + } paddle::operators::DropoutFwGPUKernelDriver(dev_ctx, is_test, p.to(), @@ -61,7 +63,9 @@ void DropoutNdKernel(const Context& dev_ctx, DenseTensor* mask) { bool upscale_in_train = (mode == "upscale_in_train"); dev_ctx.template Alloc(out); - dev_ctx.template Alloc(mask); + if (mask) { + dev_ctx.template Alloc(mask); + } paddle::operators::DropoutFwGPUKernelDriver(dev_ctx, is_test, p.to(), diff --git a/paddle/phi/kernels/gpu/instance_norm_kernel.cu b/paddle/phi/kernels/gpu/instance_norm_kernel.cu index b7292236898..803d218c804 100644 --- a/paddle/phi/kernels/gpu/instance_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/instance_norm_kernel.cu @@ -17,6 +17,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/norm_utils.h" #include "paddle/phi/kernels/gpu/instance_norm_utils.h" @@ -143,11 +144,29 @@ void InstanceNormKernel(const Context &dev_ctx, auto handle = dev_ctx.cudnn_handle(); + DenseTensor saved_mean_tmp, saved_variance_tmp; phi::funcs::SetConstant> functor; - dev_ctx.template Alloc>(saved_mean); - dev_ctx.template Alloc>(saved_variance); - functor(dev_ctx, saved_mean, static_cast>(0)); - functor(dev_ctx, saved_variance, static_cast>(0)); + if (saved_mean) { + dev_ctx.template Alloc>(saved_mean); + functor(dev_ctx, saved_mean, static_cast>(0)); + } else { + saved_mean_tmp = phi::Full>( + dev_ctx, {NxC}, static_cast>(0)); + } + if (saved_variance) { + dev_ctx.template Alloc>(saved_variance); + functor(dev_ctx, saved_variance, static_cast>(0)); + } else { + saved_variance_tmp = phi::Full>( + dev_ctx, {NxC}, static_cast>(0)); + } + + auto *saved_mean_data = saved_mean + ? saved_mean->data>() + : saved_mean_tmp.data>(); + auto *saved_variance_data = + saved_variance ? saved_variance->data>() + : saved_variance_tmp.data>(); #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS( @@ -171,10 +190,8 @@ void InstanceNormKernel(const Context &dev_ctx, nullptr, nullptr, epsilon, - static_cast( - saved_mean->template data>()), - static_cast( - saved_variance->template data>()))); + static_cast(saved_mean_data), + static_cast(saved_variance_data))); PADDLE_ENFORCE_GPU_SUCCESS( paddle::platform::dynload::miopenDestroyTensorDescriptor(data_desc_)); @@ -198,8 +215,8 @@ void InstanceNormKernel(const Context &dev_ctx, nullptr, nullptr, epsilon, - saved_mean->template data>(), - saved_variance->template data>())); + saved_mean_data, + saved_variance_data)); PADDLE_ENFORCE_GPU_SUCCESS( paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); -- GitLab