未验证 提交 1cb883da 编写于 作者: Z zyfncg 提交者: GitHub

fix extra output of kernels for inference (#45048)

上级 615b15a3
...@@ -307,7 +307,7 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx, ...@@ -307,7 +307,7 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx,
auto* x_data = x.data<T>(); auto* x_data = x.data<T>();
auto* y_data = y->data<T>(); auto* y_data = y->data<T>();
if (!is_test) { if (!is_test && mask) {
auto* mask_data = mask->data<uint8_t>(); auto* mask_data = mask->data<uint8_t>();
size_t size = phi::product(mask->dims()); size_t size = phi::product(mask->dims());
......
...@@ -317,16 +317,6 @@ void InstanceNormInferMeta(const MetaTensor& x, ...@@ -317,16 +317,6 @@ void InstanceNormInferMeta(const MetaTensor& x,
nullptr, nullptr,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The y in InstanceNormInferMeta can't be nullptr.")); "The y in InstanceNormInferMeta can't be nullptr."));
PADDLE_ENFORCE_NE(
saved_mean,
nullptr,
phi::errors::InvalidArgument(
"The saved_mean in InstanceNormInferMeta can't be nullptr."));
PADDLE_ENFORCE_NE(
saved_variance,
nullptr,
phi::errors::InvalidArgument(
"The saved_variance in InstanceNormInferMeta can't be nullptr."));
const auto x_dims = x.dims(); const auto x_dims = x.dims();
PADDLE_ENFORCE_NE(phi::product(x_dims), PADDLE_ENFORCE_NE(phi::product(x_dims),
0, 0,
...@@ -401,11 +391,15 @@ void InstanceNormInferMeta(const MetaTensor& x, ...@@ -401,11 +391,15 @@ void InstanceNormInferMeta(const MetaTensor& x,
} }
} }
y->set_dims(x_dims); y->set_dims(x_dims);
saved_mean->set_dims({NxC});
saved_variance->set_dims({NxC});
y->share_lod(x); y->share_lod(x);
y->set_dtype(x.dtype()); y->set_dtype(x.dtype());
y->set_layout(x.layout()); y->set_layout(x.layout());
if (saved_mean) {
saved_mean->set_dims({NxC});
}
if (saved_variance) {
saved_variance->set_dims({NxC});
}
} }
void GraphSendRecvInferMeta(const MetaTensor& x, void GraphSendRecvInferMeta(const MetaTensor& x,
......
...@@ -63,7 +63,7 @@ void DropoutRawKernel(const Context& dev_ctx, ...@@ -63,7 +63,7 @@ void DropoutRawKernel(const Context& dev_ctx,
auto& dropout_implementation = mode; auto& dropout_implementation = mode;
bool upscale_in_train = (dropout_implementation == "upscale_in_train"); bool upscale_in_train = (dropout_implementation == "upscale_in_train");
if (!is_test) { if (!is_test && mask) {
auto* mask_data = dev_ctx.template Alloc<uint8_t>(mask); auto* mask_data = dev_ctx.template Alloc<uint8_t>(mask);
size_t size = phi::product(mask->dims()); size_t size = phi::product(mask->dims());
...@@ -124,7 +124,7 @@ void DropoutNdKernel(const Context& dev_ctx, ...@@ -124,7 +124,7 @@ void DropoutNdKernel(const Context& dev_ctx,
auto& dropout_implementation = mode; auto& dropout_implementation = mode;
bool upscale_in_train = (dropout_implementation == "upscale_in_train"); bool upscale_in_train = (dropout_implementation == "upscale_in_train");
if (!is_test) { if (!is_test && mask) {
DenseTensor t_mask; DenseTensor t_mask;
t_mask.Resize(mask->dims()); t_mask.Resize(mask->dims());
T* t_mask_data = dev_ctx.template Alloc<T>(&t_mask); T* t_mask_data = dev_ctx.template Alloc<T>(&t_mask);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h" #include "paddle/phi/kernels/funcs/eigen/extensions.h"
...@@ -63,14 +64,25 @@ void InstanceNormKernel(const Context& dev_ctx, ...@@ -63,14 +64,25 @@ void InstanceNormKernel(const Context& dev_ctx,
#endif #endif
phi::funcs::SetConstant<CPUContext, T> set_constant; phi::funcs::SetConstant<CPUContext, T> set_constant;
dev_ctx.template Alloc<T>(saved_mean); DenseTensor saved_mean_tmp, saved_variance_tmp;
dev_ctx.template Alloc<T>(saved_variance); if (saved_mean) {
set_constant(dev_ctx, saved_mean, static_cast<T>(0)); dev_ctx.template Alloc<T>(saved_mean);
set_constant(dev_ctx, saved_variance, static_cast<T>(0)); set_constant(dev_ctx, saved_mean, static_cast<T>(0));
} else {
saved_mean_tmp = phi::Full<T>(dev_ctx, {NxC}, 0);
}
if (saved_variance) {
dev_ctx.template Alloc<T>(saved_variance);
set_constant(dev_ctx, saved_variance, static_cast<T>(0));
} else {
saved_variance_tmp = phi::Full<T>(dev_ctx, {NxC}, 0);
}
auto saved_mean_a = EigenVector<T>::Flatten(*saved_mean); auto saved_mean_a =
EigenVector<T>::Flatten(saved_mean ? *saved_mean : saved_mean_tmp);
auto saved_mean_e = saved_mean_a.reshape(NxC_shape); auto saved_mean_e = saved_mean_a.reshape(NxC_shape);
auto saved_variance_a = EigenVector<T>::Flatten(*saved_variance); auto saved_variance_a = EigenVector<T>::Flatten(
saved_variance ? *saved_variance : saved_variance_tmp);
auto saved_variance_e = saved_variance_a.reshape(NxC_shape); auto saved_variance_e = saved_variance_a.reshape(NxC_shape);
auto x_e = EigenVector<T>::Flatten(x); auto x_e = EigenVector<T>::Flatten(x);
......
...@@ -32,8 +32,10 @@ void DropoutRawKernel(const Context& dev_ctx, ...@@ -32,8 +32,10 @@ void DropoutRawKernel(const Context& dev_ctx,
DenseTensor* out, DenseTensor* out,
DenseTensor* mask) { DenseTensor* mask) {
bool upscale_in_train = (mode == "upscale_in_train"); bool upscale_in_train = (mode == "upscale_in_train");
out->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(out);
mask->mutable_data<uint8_t>(dev_ctx.GetPlace()); if (mask) {
dev_ctx.template Alloc<uint8_t>(mask);
}
paddle::operators::DropoutFwGPUKernelDriver<T>(dev_ctx, paddle::operators::DropoutFwGPUKernelDriver<T>(dev_ctx,
is_test, is_test,
p.to<float>(), p.to<float>(),
...@@ -61,7 +63,9 @@ void DropoutNdKernel(const Context& dev_ctx, ...@@ -61,7 +63,9 @@ void DropoutNdKernel(const Context& dev_ctx,
DenseTensor* mask) { DenseTensor* mask) {
bool upscale_in_train = (mode == "upscale_in_train"); bool upscale_in_train = (mode == "upscale_in_train");
dev_ctx.template Alloc<T>(out); dev_ctx.template Alloc<T>(out);
dev_ctx.template Alloc<uint8_t>(mask); if (mask) {
dev_ctx.template Alloc<uint8_t>(mask);
}
paddle::operators::DropoutFwGPUKernelDriver<T>(dev_ctx, paddle::operators::DropoutFwGPUKernelDriver<T>(dev_ctx,
is_test, is_test,
p.to<float>(), p.to<float>(),
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/norm_utils.h" #include "paddle/phi/kernels/funcs/norm_utils.h"
#include "paddle/phi/kernels/gpu/instance_norm_utils.h" #include "paddle/phi/kernels/gpu/instance_norm_utils.h"
...@@ -143,11 +144,29 @@ void InstanceNormKernel(const Context &dev_ctx, ...@@ -143,11 +144,29 @@ void InstanceNormKernel(const Context &dev_ctx,
auto handle = dev_ctx.cudnn_handle(); auto handle = dev_ctx.cudnn_handle();
DenseTensor saved_mean_tmp, saved_variance_tmp;
phi::funcs::SetConstant<GPUContext, BatchNormParamType<T>> functor; phi::funcs::SetConstant<GPUContext, BatchNormParamType<T>> functor;
dev_ctx.template Alloc<BatchNormParamType<T>>(saved_mean); if (saved_mean) {
dev_ctx.template Alloc<BatchNormParamType<T>>(saved_variance); dev_ctx.template Alloc<BatchNormParamType<T>>(saved_mean);
functor(dev_ctx, saved_mean, static_cast<BatchNormParamType<T>>(0)); functor(dev_ctx, saved_mean, static_cast<BatchNormParamType<T>>(0));
functor(dev_ctx, saved_variance, static_cast<BatchNormParamType<T>>(0)); } else {
saved_mean_tmp = phi::Full<BatchNormParamType<T>>(
dev_ctx, {NxC}, static_cast<BatchNormParamType<T>>(0));
}
if (saved_variance) {
dev_ctx.template Alloc<BatchNormParamType<T>>(saved_variance);
functor(dev_ctx, saved_variance, static_cast<BatchNormParamType<T>>(0));
} else {
saved_variance_tmp = phi::Full<BatchNormParamType<T>>(
dev_ctx, {NxC}, static_cast<BatchNormParamType<T>>(0));
}
auto *saved_mean_data = saved_mean
? saved_mean->data<BatchNormParamType<T>>()
: saved_mean_tmp.data<BatchNormParamType<T>>();
auto *saved_variance_data =
saved_variance ? saved_variance->data<BatchNormParamType<T>>()
: saved_variance_tmp.data<BatchNormParamType<T>>();
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
...@@ -171,10 +190,8 @@ void InstanceNormKernel(const Context &dev_ctx, ...@@ -171,10 +190,8 @@ void InstanceNormKernel(const Context &dev_ctx,
nullptr, nullptr,
nullptr, nullptr,
epsilon, epsilon,
static_cast<void *>( static_cast<void *>(saved_mean_data),
saved_mean->template data<BatchNormParamType<T>>()), static_cast<void *>(saved_variance_data)));
static_cast<void *>(
saved_variance->template data<BatchNormParamType<T>>())));
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::miopenDestroyTensorDescriptor(data_desc_)); paddle::platform::dynload::miopenDestroyTensorDescriptor(data_desc_));
...@@ -198,8 +215,8 @@ void InstanceNormKernel(const Context &dev_ctx, ...@@ -198,8 +215,8 @@ void InstanceNormKernel(const Context &dev_ctx,
nullptr, nullptr,
nullptr, nullptr,
epsilon, epsilon,
saved_mean->template data<BatchNormParamType<T>>(), saved_mean_data,
saved_variance->template data<BatchNormParamType<T>>())); saved_variance_data));
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册