未验证 提交 1cb883da 编写于 作者: Z zyfncg 提交者: GitHub

fix extra output of kernels for inference (#45048)

上级 615b15a3
......@@ -307,7 +307,7 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx,
auto* x_data = x.data<T>();
auto* y_data = y->data<T>();
if (!is_test) {
if (!is_test && mask) {
auto* mask_data = mask->data<uint8_t>();
size_t size = phi::product(mask->dims());
......
......@@ -317,16 +317,6 @@ void InstanceNormInferMeta(const MetaTensor& x,
nullptr,
phi::errors::InvalidArgument(
"The y in InstanceNormInferMeta can't be nullptr."));
PADDLE_ENFORCE_NE(
saved_mean,
nullptr,
phi::errors::InvalidArgument(
"The saved_mean in InstanceNormInferMeta can't be nullptr."));
PADDLE_ENFORCE_NE(
saved_variance,
nullptr,
phi::errors::InvalidArgument(
"The saved_variance in InstanceNormInferMeta can't be nullptr."));
const auto x_dims = x.dims();
PADDLE_ENFORCE_NE(phi::product(x_dims),
0,
......@@ -401,11 +391,15 @@ void InstanceNormInferMeta(const MetaTensor& x,
}
}
y->set_dims(x_dims);
saved_mean->set_dims({NxC});
saved_variance->set_dims({NxC});
y->share_lod(x);
y->set_dtype(x.dtype());
y->set_layout(x.layout());
if (saved_mean) {
saved_mean->set_dims({NxC});
}
if (saved_variance) {
saved_variance->set_dims({NxC});
}
}
void GraphSendRecvInferMeta(const MetaTensor& x,
......
......@@ -63,7 +63,7 @@ void DropoutRawKernel(const Context& dev_ctx,
auto& dropout_implementation = mode;
bool upscale_in_train = (dropout_implementation == "upscale_in_train");
if (!is_test) {
if (!is_test && mask) {
auto* mask_data = dev_ctx.template Alloc<uint8_t>(mask);
size_t size = phi::product(mask->dims());
......@@ -124,7 +124,7 @@ void DropoutNdKernel(const Context& dev_ctx,
auto& dropout_implementation = mode;
bool upscale_in_train = (dropout_implementation == "upscale_in_train");
if (!is_test) {
if (!is_test && mask) {
DenseTensor t_mask;
t_mask.Resize(mask->dims());
T* t_mask_data = dev_ctx.template Alloc<T>(&t_mask);
......
......@@ -21,6 +21,7 @@
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
......@@ -63,14 +64,25 @@ void InstanceNormKernel(const Context& dev_ctx,
#endif
phi::funcs::SetConstant<CPUContext, T> set_constant;
dev_ctx.template Alloc<T>(saved_mean);
dev_ctx.template Alloc<T>(saved_variance);
set_constant(dev_ctx, saved_mean, static_cast<T>(0));
set_constant(dev_ctx, saved_variance, static_cast<T>(0));
DenseTensor saved_mean_tmp, saved_variance_tmp;
if (saved_mean) {
dev_ctx.template Alloc<T>(saved_mean);
set_constant(dev_ctx, saved_mean, static_cast<T>(0));
} else {
saved_mean_tmp = phi::Full<T>(dev_ctx, {NxC}, 0);
}
if (saved_variance) {
dev_ctx.template Alloc<T>(saved_variance);
set_constant(dev_ctx, saved_variance, static_cast<T>(0));
} else {
saved_variance_tmp = phi::Full<T>(dev_ctx, {NxC}, 0);
}
auto saved_mean_a = EigenVector<T>::Flatten(*saved_mean);
auto saved_mean_a =
EigenVector<T>::Flatten(saved_mean ? *saved_mean : saved_mean_tmp);
auto saved_mean_e = saved_mean_a.reshape(NxC_shape);
auto saved_variance_a = EigenVector<T>::Flatten(*saved_variance);
auto saved_variance_a = EigenVector<T>::Flatten(
saved_variance ? *saved_variance : saved_variance_tmp);
auto saved_variance_e = saved_variance_a.reshape(NxC_shape);
auto x_e = EigenVector<T>::Flatten(x);
......
......@@ -32,8 +32,10 @@ void DropoutRawKernel(const Context& dev_ctx,
DenseTensor* out,
DenseTensor* mask) {
bool upscale_in_train = (mode == "upscale_in_train");
out->mutable_data<T>(dev_ctx.GetPlace());
mask->mutable_data<uint8_t>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(out);
if (mask) {
dev_ctx.template Alloc<uint8_t>(mask);
}
paddle::operators::DropoutFwGPUKernelDriver<T>(dev_ctx,
is_test,
p.to<float>(),
......@@ -61,7 +63,9 @@ void DropoutNdKernel(const Context& dev_ctx,
DenseTensor* mask) {
bool upscale_in_train = (mode == "upscale_in_train");
dev_ctx.template Alloc<T>(out);
dev_ctx.template Alloc<uint8_t>(mask);
if (mask) {
dev_ctx.template Alloc<uint8_t>(mask);
}
paddle::operators::DropoutFwGPUKernelDriver<T>(dev_ctx,
is_test,
p.to<float>(),
......
......@@ -17,6 +17,7 @@
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/norm_utils.h"
#include "paddle/phi/kernels/gpu/instance_norm_utils.h"
......@@ -143,11 +144,29 @@ void InstanceNormKernel(const Context &dev_ctx,
auto handle = dev_ctx.cudnn_handle();
DenseTensor saved_mean_tmp, saved_variance_tmp;
phi::funcs::SetConstant<GPUContext, BatchNormParamType<T>> functor;
dev_ctx.template Alloc<BatchNormParamType<T>>(saved_mean);
dev_ctx.template Alloc<BatchNormParamType<T>>(saved_variance);
functor(dev_ctx, saved_mean, static_cast<BatchNormParamType<T>>(0));
functor(dev_ctx, saved_variance, static_cast<BatchNormParamType<T>>(0));
if (saved_mean) {
dev_ctx.template Alloc<BatchNormParamType<T>>(saved_mean);
functor(dev_ctx, saved_mean, static_cast<BatchNormParamType<T>>(0));
} else {
saved_mean_tmp = phi::Full<BatchNormParamType<T>>(
dev_ctx, {NxC}, static_cast<BatchNormParamType<T>>(0));
}
if (saved_variance) {
dev_ctx.template Alloc<BatchNormParamType<T>>(saved_variance);
functor(dev_ctx, saved_variance, static_cast<BatchNormParamType<T>>(0));
} else {
saved_variance_tmp = phi::Full<BatchNormParamType<T>>(
dev_ctx, {NxC}, static_cast<BatchNormParamType<T>>(0));
}
auto *saved_mean_data = saved_mean
? saved_mean->data<BatchNormParamType<T>>()
: saved_mean_tmp.data<BatchNormParamType<T>>();
auto *saved_variance_data =
saved_variance ? saved_variance->data<BatchNormParamType<T>>()
: saved_variance_tmp.data<BatchNormParamType<T>>();
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
......@@ -171,10 +190,8 @@ void InstanceNormKernel(const Context &dev_ctx,
nullptr,
nullptr,
epsilon,
static_cast<void *>(
saved_mean->template data<BatchNormParamType<T>>()),
static_cast<void *>(
saved_variance->template data<BatchNormParamType<T>>())));
static_cast<void *>(saved_mean_data),
static_cast<void *>(saved_variance_data)));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::miopenDestroyTensorDescriptor(data_desc_));
......@@ -198,8 +215,8 @@ void InstanceNormKernel(const Context &dev_ctx,
nullptr,
nullptr,
epsilon,
saved_mean->template data<BatchNormParamType<T>>(),
saved_variance->template data<BatchNormParamType<T>>()));
saved_mean_data,
saved_variance_data));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册