未验证 提交 52be62c5 编写于 作者: C ceci3 提交者: GitHub

fix instance norm in dy (#24717)

* fix bn & in in dy, test=develop

* update instance_norm,test=develop

* fix bugs,test=develop

* add more case in unittest,test=develop

* fix,test=develop

* fix,test=develop
上级 619848fa
......@@ -24,8 +24,6 @@ namespace operators {
void InstanceNormOp::InferShape(framework::InferShapeContext *ctx) const {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "InstanceNorm");
OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "InstanceNorm");
OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", "InstanceNorm");
OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "InstanceNorm");
OP_INOUT_CHECK(ctx->HasOutput("SavedMean"), "Output", "SavedMean",
"InstanceNorm");
......@@ -51,37 +49,45 @@ void InstanceNormOp::InferShape(framework::InferShapeContext *ctx) const {
auto C = x_dims[1];
auto NxC = N * C;
auto scale_dim = ctx->GetInputDim("Scale");
auto bias_dim = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(
scale_dim.size(), 1UL,
platform::errors::InvalidArgument(
"ShapeError: the dimension of scale must equal to 1."
"But received: the shape of scale is [%s], the dimension "
"of scale is [%d]",
scale_dim, scale_dim.size()));
PADDLE_ENFORCE_EQ(bias_dim.size(), 1UL,
platform::errors::InvalidArgument(
"ShapeError: the dimension of bias must equal to 1."
"But received: the shape of bias is [%s],the dimension "
"of bias is [%d]",
bias_dim, bias_dim.size()));
bool check = !((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0 ||
framework::product(bias_dim) <= 0));
if (check) {
PADDLE_ENFORCE_EQ(scale_dim[0], C,
platform::errors::InvalidArgument(
"ShapeError: the shape of scale must equal to [%d]"
"But received: the shape of scale is [%d]",
C, scale_dim[0]));
PADDLE_ENFORCE_EQ(bias_dim[0], C,
platform::errors::InvalidArgument(
"ShapeError: the shape of bias must equal to [%d]"
"But received: the shape of bias is [%d]",
C, bias_dim[0]));
if (ctx->HasInput("Scale")) {
auto scale_dim = ctx->GetInputDim("Scale");
PADDLE_ENFORCE_EQ(
scale_dim.size(), 1UL,
platform::errors::InvalidArgument(
"ShapeError: the dimension of scale must equal to 1."
"But received: the shape of scale is [%s], the dimension "
"of scale is [%d]",
scale_dim, scale_dim.size()));
bool check = !((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0));
if (check) {
PADDLE_ENFORCE_EQ(scale_dim[0], C,
platform::errors::InvalidArgument(
"ShapeError: the shape of scale must equal to [%d]"
"But received: the shape of scale is [%d]",
C, scale_dim[0]));
}
}
if (ctx->HasInput("Bias")) {
auto bias_dim = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(
bias_dim.size(), 1UL,
platform::errors::InvalidArgument(
"ShapeError: the dimension of bias must equal to 1."
"But received: the shape of bias is [%s],the dimension "
"of bias is [%d]",
bias_dim, bias_dim.size()));
bool check = !((!ctx->IsRuntime()) && (framework::product(bias_dim) <= 0));
if (check) {
PADDLE_ENFORCE_EQ(bias_dim[0], C,
platform::errors::InvalidArgument(
"ShapeError: the shape of bias must equal to [%d]"
"But received: the shape of bias is [%d]",
C, bias_dim[0]));
}
}
ctx->SetOutputDim("Y", x_dims);
......@@ -100,12 +106,16 @@ framework::OpKernelType InstanceNormOp::GetExpectedKernelType(
if (input_data_type == framework::proto::VarType::FP64) {
in_param_type = framework::proto::VarType::FP64;
}
PADDLE_ENFORCE_EQ(
in_param_type, ctx.Input<Tensor>("Scale")->type(),
platform::errors::InvalidArgument("Scale input should be of float type"));
PADDLE_ENFORCE_EQ(
in_param_type, ctx.Input<Tensor>("Bias")->type(),
platform::errors::InvalidArgument("Bias input should be of float type"));
if (ctx.HasInput("Scale")) {
PADDLE_ENFORCE_EQ(in_param_type, ctx.Input<Tensor>("Scale")->type(),
platform::errors::InvalidArgument(
"Scale input should be of float type"));
}
if (ctx.HasInput("Bias")) {
PADDLE_ENFORCE_EQ(in_param_type, ctx.Input<Tensor>("Bias")->type(),
platform::errors::InvalidArgument(
"Bias input should be of float type"));
}
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
......@@ -121,10 +131,12 @@ void InstanceNormOpMaker::Make() {
AddInput("X", "The input tensor");
AddInput("Scale",
"Scale is a 1-dimensional tensor of size C "
"that is applied to the output");
"that is applied to the output")
.AsDispensable();
AddInput("Bias",
"Bias is a 1-dimensional tensor of size C "
"that is applied to the output");
"that is applied to the output")
.AsDispensable();
AddOutput("Y", "result after normalization");
AddOutput("SavedMean",
"Mean of the current mini batch, "
......@@ -199,9 +211,26 @@ class InstanceNormKernel<platform::CPUDeviceContext, T>
const auto *scale = ctx.Input<Tensor>("Scale");
const auto *bias = ctx.Input<Tensor>("Bias");
auto scale_e = framework::EigenVector<T>::Flatten(*scale);
Tensor scale_data;
Tensor bias_data;
if (!scale) {
scale_data.mutable_data<T>({C}, ctx.GetPlace());
set_constant(dev_ctx, &scale_data, static_cast<T>(1));
}
if (!bias) {
bias_data.mutable_data<T>({C}, ctx.GetPlace());
set_constant(dev_ctx, &bias_data, static_cast<T>(0));
}
auto scale_e = scale
? framework::EigenVector<T>::Flatten(*scale)
: framework::EigenVector<T>::Flatten(
const_cast<const framework::Tensor &>(scale_data));
auto scale_arr = scale_e.reshape(C_shape);
auto bias_e = framework::EigenVector<T>::Flatten(*bias);
auto bias_e = bias ? framework::EigenVector<T>::Flatten(*bias)
: framework::EigenVector<T>::Flatten(
const_cast<const framework::Tensor &>(bias_data));
auto bias_arr = bias_e.reshape(C_shape);
y->mutable_data<T>(ctx.GetPlace());
......@@ -219,7 +248,6 @@ class InstanceNormKernel<platform::CPUDeviceContext, T>
void InstanceNormGradOp::InferShape(framework::InferShapeContext *ctx) const {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "InstanceNormGrad");
OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "InstanceNormGrad");
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), "Input",
framework::GradVarName("Y"), "InstanceNormGrad");
OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean",
......@@ -230,15 +258,13 @@ void InstanceNormGradOp::InferShape(framework::InferShapeContext *ctx) const {
// check output
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Output",
framework::GradVarName("X"), "InstanceNormGrad");
if (ctx->HasOutput(framework::GradVarName("Scale"))) {
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Bias")), "Output",
framework::GradVarName("Bias"), "InstanceNormGrad");
}
const auto x_dims = ctx->GetInputDim("X");
const int C = x_dims[1];
ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
if (ctx->HasOutput(framework::GradVarName("Scale"))) {
ctx->SetOutputDim(framework::GradVarName("Scale"), {C});
}
if (ctx->HasOutput(framework::GradVarName("Bias"))) {
ctx->SetOutputDim(framework::GradVarName("Bias"), {C});
}
}
......@@ -299,7 +325,18 @@ class InstanceNormGradKernel<platform::CPUDeviceContext, T>
Eigen::DSizes<int, 2> param_shape(N, C);
Eigen::DSizes<int, 2> shape(NxC, sample_size);
auto scale_e = framework::EigenVector<T>::Flatten(*scale);
math::SetConstant<platform::CPUDeviceContext, T> set_constant;
Tensor scale_data;
if (!scale) {
scale_data.mutable_data<T>({C}, ctx.GetPlace());
set_constant(dev_ctx, &scale_data, static_cast<T>(1));
}
auto scale_e = scale
? framework::EigenVector<T>::Flatten(*scale)
: framework::EigenVector<T>::Flatten(
const_cast<const framework::Tensor &>(scale_data));
auto mean_e = framework::EigenVector<T>::Flatten(*saved_mean);
auto inv_var_e = framework::EigenVector<T>::Flatten(*saved_inv_variance);
auto dy_e = framework::EigenVector<T>::Flatten(*d_y);
......@@ -314,7 +351,6 @@ class InstanceNormGradKernel<platform::CPUDeviceContext, T>
auto tmp = (x_arr - mean_arr.eval().broadcast(bcast)) *
inv_var_arr.eval().broadcast(bcast);
math::SetConstant<platform::CPUDeviceContext, T> set_constant;
// math: d_bias = np.sum(d_y, axis=(n,h,w))
// math: d_scale = np.sum((X-mean) / inv_std * dy, axis=(n, h,w))
if (d_scale && d_bias) {
......@@ -324,8 +360,8 @@ class InstanceNormGradKernel<platform::CPUDeviceContext, T>
set_constant(dev_ctx, d_bias, static_cast<T>(0));
auto d_scale_e = framework::EigenVector<T>::Flatten(*d_scale);
auto d_bias_e = framework::EigenVector<T>::Flatten(*d_bias);
auto d_scale_data = d_scale_e.reshape(C_shape);
auto d_bias_e = framework::EigenVector<T>::Flatten(*d_bias);
auto d_bias_data = d_bias_e.reshape(C_shape);
d_bias_data.device(*place) =
dy_arr.sum(mean_rdims).reshape(param_shape).sum(rdims);
......@@ -360,8 +396,6 @@ class InstanceNormGradKernel<platform::CPUDeviceContext, T>
void InstanceNormDoubleGradOp::InferShape(
framework::InferShapeContext *ctx) const {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "InstanceNormDoubleGrad");
OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale",
"InstanceNormDoubleGrad");
OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean",
"InstanceNormDoubleGrad");
OP_INOUT_CHECK(ctx->HasInput("SavedVariance"), "Input", "SavedVariance",
......@@ -426,6 +460,9 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T>
auto *dScale = ctx.Output<Tensor>("DScale");
auto *ddY = ctx.Output<Tensor>("DDY");
auto &dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
math::SetConstant<platform::CPUDeviceContext, T> set_constant;
const auto &x_dims = X->dims();
int N, C, H, W, D;
ExtractNCWHD(x_dims, DataLayout::kNCHW, &N, &C, &H, &W, &D);
......@@ -455,7 +492,13 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T>
mean_tile_data = mean_arr.transpose().replicate(sample_size, 1);
inv_var_tile_data = inv_var_arr.transpose().replicate(sample_size, 1);
ConstEigenVectorArrayMap<T> scale_arr(Scale->data<T>(), C);
Tensor Scale_data;
if (!Scale) {
Scale_data.mutable_data<T>({C}, ctx.GetPlace());
set_constant(dev_ctx, &Scale_data, static_cast<T>(1));
}
ConstEigenVectorArrayMap<T> scale_arr(
Scale ? Scale->data<T>() : Scale_data.data<T>(), C);
Tensor scale_tile;
scale_tile.Resize({sample_size, NxC});
......@@ -483,9 +526,6 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T>
// inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean),
// axis=(h,w))))
auto &dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
math::SetConstant<platform::CPUDeviceContext, T> set_constant;
Tensor x_sub_mean_mul_invstd;
x_sub_mean_mul_invstd.Resize({sample_size, NxC});
x_sub_mean_mul_invstd.mutable_data<T>(ctx.GetPlace());
......
......@@ -146,10 +146,19 @@ class InstanceNormKernel<platform::CUDADeviceContext, T>
const int max_blocks = std::max(max_threads / block, 1);
const int grid = std::min((NxC + block - 1) / block, max_blocks);
repeat_param<T><<<grid, block, 0, dev_ctx.stream()>>>(
scale->data<T>(), scale_tmp.data<T>(), N, C);
repeat_param<T><<<grid, block, 0, dev_ctx.stream()>>>(
bias->data<T>(), bias_tmp.data<T>(), N, C);
math::SetConstant<platform::CUDADeviceContext, T> set_constant;
if (scale) {
repeat_param<T><<<grid, block, 0, dev_ctx.stream()>>>(
scale->data<T>(), scale_tmp.data<T>(), N, C);
} else {
set_constant(dev_ctx, &scale_tmp, static_cast<T>(1));
}
if (bias) {
repeat_param<T><<<grid, block, 0, dev_ctx.stream()>>>(
bias->data<T>(), bias_tmp.data<T>(), N, C);
} else {
set_constant(dev_ctx, &bias_tmp, static_cast<T>(0));
}
auto handle = dev_ctx.cudnn_handle();
......@@ -267,24 +276,27 @@ class InstanceNormGradKernel<platform::CUDADeviceContext, T>
d_scale->mutable_data<T>(ctx.GetPlace());
d_bias->mutable_data<T>(ctx.GetPlace());
}
PADDLE_ENFORCE_EQ(
scale->dims().size(), 1UL,
platform::errors::InvalidArgument(
"The `shape` in InstanceNormOp is invalid: "
"the size of scale's dimensions must be equal to 1. But "
"received: the size of scale's dimensions"
"is [%d]",
scale->dims().size()));
PADDLE_ENFORCE_EQ(scale->dims()[0], C,
platform::errors::InvalidArgument(
"The `shape` in InstanceNormOp is invalid: "
"the first dimension of scale must be equal to "
"Channels([%d]). But received: "
"the first dimension of scale is [%d],"
"the dimensions of scale is [%s], ",
C, scale->dims()[0], scale->dims()));
if (scale) {
PADDLE_ENFORCE_EQ(
scale->dims().size(), 1UL,
platform::errors::InvalidArgument(
"The `shape` in InstanceNormOp is invalid: "
"the size of scale's dimensions must be equal to 1. But "
"received: the size of scale's dimensions"
"is [%d]",
scale->dims().size()));
PADDLE_ENFORCE_EQ(scale->dims()[0], C,
platform::errors::InvalidArgument(
"The `shape` in InstanceNormOp is invalid: "
"the first dimension of scale must be equal to "
"Channels([%d]). But received: "
"the first dimension of scale is [%d],"
"the dimensions of scale is [%s], ",
C, scale->dims()[0], scale->dims()));
}
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
math::SetConstant<platform::CUDADeviceContext, T> set_constant;
const int n = x->numel();
const int block = 512;
......@@ -300,8 +312,12 @@ class InstanceNormGradKernel<platform::CUDADeviceContext, T>
ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({NxC}, dev_ctx);
Tensor d_bias_tmp =
ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({NxC}, dev_ctx);
repeat_param<T><<<grid, block, 0, dev_ctx.stream()>>>(
scale->data<T>(), scale_tmp.data<T>(), N, C);
if (scale) {
repeat_param<T><<<grid, block, 0, dev_ctx.stream()>>>(
scale->data<T>(), scale_tmp.data<T>(), N, C);
} else {
set_constant(dev_ctx, &scale_tmp, static_cast<T>(1));
}
std::vector<int> dims;
std::vector<int> strides;
......@@ -361,7 +377,7 @@ class InstanceNormGradKernel<platform::CUDADeviceContext, T>
} else {
if (d_x) {
GradComputeDX<T, block><<<NxC, block, 0, dev_ctx.stream()>>>(
d_y->data<T>(), scale->data<BatchNormParamType<T>>(),
d_y->data<T>(), scale_tmp.data<BatchNormParamType<T>>(),
saved_mean_data, x->data<T>(), saved_var_data, C, H * W * D,
d_x->data<T>());
}
......@@ -610,7 +626,6 @@ class InstanceNormDoubleGradKernel<platform::CUDADeviceContext, T>
auto *ddY = ctx.Output<Tensor>("DDY");
const T *x_data = X->data<T>();
const T *scale_data = Scale->data<T>();
const T *dy_data = dY->data<T>();
const T *ddx_data = (ddX == nullptr ? nullptr : ddX->data<T>());
......@@ -620,6 +635,9 @@ class InstanceNormDoubleGradKernel<platform::CUDADeviceContext, T>
const T *mean_data = Saved_mean->data<T>();
const T *variance_data = Saved_variance->data<T>();
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
math::SetConstant<platform::CUDADeviceContext, T> set_zero;
auto &x_dims = X->dims();
int N, C, H, W, D;
ExtractNCWHD(x_dims, DataLayout::kNCHW, &N, &C, &H, &W, &D);
......@@ -627,15 +645,19 @@ class InstanceNormDoubleGradKernel<platform::CUDADeviceContext, T>
const int n = X->numel();
int sample_size = n / N / C;
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
Tensor scale_tmp;
if (!Scale) {
scale_tmp.mutable_data<T>({C}, ctx.GetPlace());
set_zero(dev_ctx, &scale_tmp, static_cast<T>(1));
}
const T *scale_data = Scale ? Scale->data<T>() : scale_tmp.data<T>();
const int block = 512;
int max_threads = dev_ctx.GetMaxPhysicalThreadCount();
const int max_blocks = std::max(max_threads / block, 1);
const int grid = NxC;
const int grid1 = (C + block - 1) / block;
math::SetConstant<platform::CUDADeviceContext, T> set_zero;
if (dX) {
T *dx_data = dX->mutable_data<T>(ctx.GetPlace());
set_zero(dev_ctx, dX, static_cast<T>(0));
......
......@@ -34,6 +34,7 @@
// need to manually specify them in this map.
std::map<std::string, std::set<std::string>> op_ins_map = {
{"layer_norm", {"X", "Scale", "Bias"}},
{"instance_norm", {"X", "Scale", "Bias"}},
{"gru_unit", {"Input", "HiddenPrev", "Weight", "Bias"}},
{"label_smooth", {"X", "PriorDist"}},
{"assign", {"X"}},
......
......@@ -1028,16 +1028,16 @@ class InstanceNorm(layers.Layer):
num_channels(int): Indicate the number of channels of the input ``Tensor``.
epsilon(float, optional): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale`
param_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
If the Initializer of the param_attr is not set, the parameter is initialized
one. Default: None.
bias_attr(ParamAttr, optional): The parameter attribute for the bias of instance_norm.
one. If it is set to False, will not create param_attr. Default: None.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm.
If it is set to None or one attribute of ParamAttr, instance_norm
will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
If the Initializer of the bias_attr is not set, the bias is initialized zero.
Default: None.
If it is set to False, will not create bias_attr. Default: None.
dtype(str, optional): Indicate the data type of the input ``Tensor``,
which can be float32 or float64. Default: float32.
......@@ -1071,25 +1071,30 @@ class InstanceNorm(layers.Layer):
bias_attr=None,
dtype='float32'):
super(InstanceNorm, self).__init__()
assert bias_attr is not False, "bias_attr should not be False in InstanceNorm."
if param_attr == False or bias_attr == False:
assert bias_attr == param_attr, "param_attr and bias_attr must be set to Fasle at the same time in InstanceNorm"
self._epsilon = epsilon
self._param_attr = param_attr
self._bias_attr = bias_attr
self._dtype = dtype
self.scale = self.create_parameter(
attr=self._param_attr,
shape=[num_channels],
dtype=self._dtype,
default_initializer=Constant(1.0),
is_bias=False)
self.bias = self.create_parameter(
attr=self._bias_attr,
shape=[num_channels],
dtype=self._dtype,
default_initializer=Constant(0.0),
is_bias=True)
if param_attr != False and bias_attr != False:
self.scale = self.create_parameter(
attr=self._param_attr,
shape=[num_channels],
dtype=self._dtype,
default_initializer=Constant(1.0),
is_bias=False)
self.bias = self.create_parameter(
attr=self._bias_attr,
shape=[num_channels],
dtype=self._dtype,
default_initializer=Constant(0.0),
is_bias=True)
else:
self.scale = None
self.bias = None
def forward(self, input):
if in_dygraph_mode():
......@@ -1102,7 +1107,10 @@ class InstanceNorm(layers.Layer):
attrs = {"epsilon": self._epsilon}
inputs = {"X": [input], "Scale": [self.scale], "Bias": [self.bias]}
if self.scale and self.bias:
inputs = {"X": [input], "Scale": [self.scale], "Bias": [self.bias]}
else:
inputs = {"X": [input]}
saved_mean = self._helper.create_variable_for_type_inference(
dtype=self._dtype, stop_gradient=True)
......
......@@ -3114,15 +3114,17 @@ def instance_norm(input,
The data type is float32 or float64.
epsilon(float, Default 1e-05): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
param_attr(ParamAttr|None|bool, optional): The parameter attribute for Parameter `scale`
of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
If the Initializer of the param_attr is not set, the parameter is initialized
with Xavier. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the bias of instance_norm.
with Xavier. If the param_attr is set to False, instance_norm will not create param_attr.
Default: None.
bias_attr(ParamAttr|None|bool, optional): The parameter attribute for the bias of instance_norm.
If it is set to None or one attribute of ParamAttr, instance_norm
will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
If the Initializer of the bias_attr is not set, the bias is initialized zero.
If the bias_attr is set to False, instance_norm will not create bias_attr.
Default: None.
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
......@@ -3142,7 +3144,9 @@ def instance_norm(input,
"""
check_variable_and_dtype(input, 'input', ['float32', 'float64'],
'instance_norm')
assert bias_attr is not False, "bias_attr should not be False in instance_norm."
if param_attr is False:
assert bias_attr is False, "param_attr and bias_attr must be set to Fasle at the same time in instance_norm"
helper = LayerHelper('instance_norm', **locals())
dtype = helper.input_dtype()
......@@ -3155,18 +3159,19 @@ def instance_norm(input,
param_shape = [channel_num]
# create parameter
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0))
bias = helper.create_parameter(
attr=helper.bias_attr,
shape=param_shape,
dtype=dtype,
is_bias=True,
default_initializer=Constant(0.0))
if param_attr and bias_attr:
# create parameter
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0))
bias = helper.create_parameter(
attr=helper.bias_attr,
shape=param_shape,
dtype=dtype,
is_bias=True,
default_initializer=Constant(0.0))
# create output
saved_mean = helper.create_variable_for_type_inference(
......@@ -3176,13 +3181,14 @@ def instance_norm(input,
instance_norm_out = helper.create_variable_for_type_inference(dtype)
inputs = {"X": input}
if param_attr and bias_attr:
inputs["Scale"] = scale
inputs["Bias"] = bias
helper.append_op(
type="instance_norm",
inputs={
"X": input,
"Scale": scale,
"Bias": bias,
},
inputs=inputs,
outputs={
"Y": instance_norm_out,
"SavedMean": saved_mean,
......
......@@ -20,6 +20,7 @@ import paddle.fluid as fluid
from paddle.fluid.op import Operator
from op_test import OpTest
from paddle.fluid import Program, program_guard
from paddle.fluid.dygraph import to_variable
def _reference_instance_norm_naive(x, scale, bias, epsilon, mean, var):
......@@ -214,5 +215,63 @@ class TestInstanceNormOpError(unittest.TestCase):
self.assertRaises(TypeError, fluid.layers.instance_norm, x2)
class TestElasticNormOp(unittest.TestCase):
def init_test_case(self):
self.epsilon = 1e-5
self.places = [core.CPUPlace()]
if core.is_compiled_with_cuda() and core.op_support_gpu(
"instance_norm"):
self.places.append(core.CUDAPlace(0))
def test_norm(self):
self.init_test_case()
inputs = np.random.random((2, 3, 5, 5)).astype(np.float32)
shape = inputs.shape
n, c, h, w = shape[0], shape[1], shape[2], shape[3]
scale_shape = [c]
mean_shape = [n * c]
scale = np.ones(scale_shape).astype(np.float32)
bias = np.zeros(scale_shape).astype(np.float32)
mean, variance = _cal_mean_variance(inputs, self.epsilon, mean_shape)
out_np, _, _ = _reference_instance_norm_naive(
inputs, scale, bias, self.epsilon, mean, variance)
for place in self.places:
with fluid.dygraph.guard(place):
instance_norm = fluid.dygraph.InstanceNorm(
5, param_attr=False, bias_attr=False)
outputs = instance_norm(to_variable(inputs))
self.assertTrue(np.allclose(outputs.numpy(), out_np, atol=1e-6))
class TestElasticNormOpCase2(unittest.TestCase):
def init_test_case(self):
self.epsilon = 1e-5
self.places = [core.CPUPlace()]
if core.is_compiled_with_cuda() and core.op_support_gpu(
"instance_norm"):
self.places.append(core.CUDAPlace(0))
def test_norm(self):
self.init_test_case()
inputs = np.random.random((2, 3, 5, 5)).astype(np.float32)
shape = inputs.shape
n, c, h, w = shape[0], shape[1], shape[2], shape[3]
scale_shape = [c]
mean_shape = [n * c]
scale = np.ones(scale_shape).astype(np.float32)
bias = np.zeros(scale_shape).astype(np.float32)
mean, variance = _cal_mean_variance(inputs, self.epsilon, mean_shape)
out_np, _, _ = _reference_instance_norm_naive(
inputs, scale, bias, self.epsilon, mean, variance)
for place in self.places:
with fluid.dygraph.guard(place):
instance_norm = fluid.dygraph.InstanceNorm(
3, param_attr=True, bias_attr=True)
outputs = instance_norm(to_variable(inputs))
self.assertTrue(np.allclose(outputs.numpy(), out_np, atol=1e-6))
if __name__ == '__main__':
unittest.main()
......@@ -49,5 +49,24 @@ class TestInstanceNormDoubleGradCheck(unittest.TestCase):
self.func(p)
class TestInstanceNormDoubleGradCheckWithoutParamBias(
TestInstanceNormDoubleGradCheck):
@prog_scope()
def func(self, place):
prog = fluid.Program()
with fluid.program_guard(prog):
np.random.seed()
shape = [2, 3, 4, 5]
dtype = "float32"
eps = 0.005
atol = 1e-4
x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
z = fluid.layers.instance_norm(
input=x, param_attr=False, bias_attr=False)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], z, x_init=x_arr, atol=atol, place=place, eps=eps)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册