[论文复现] 通过creater_parameter创建的层中参数在反向传播的过程之中没有更新
Created by: Costwen
`
class adaILN(fluid.dygraph.Layer):
def __init__(self, in_channels, eps=1e-5):
super(adaILN, self).__init__()
self.eps = eps
self.rho = fluid.layers.create_parameter((1, in_channels, 1, 1), dtype='float32', is_bias=True,
default_initializer=fluid.initializer.ConstantInitializer(0.9))
def var(self, input, dim):
mean = fluid.layers.reduce_mean(input, dim, keep_dim=True)
tmp = fluid.layers.reduce_mean((input - mean)**2, dim, keep_dim=True)
return tmp
def forward(self, input, gamma, beta):
in_mean, in_var = fluid.layers.reduce_mean(input, dim=[2,3], keep_dim=True), self.var(input, dim =[2,3])
ln_mean, ln_var = fluid.layers.reduce_mean(input, dim=[1,2,3], keep_dim=True), self.var(input, dim=[1,2,3])
out_in = (input - in_mean) / fluid.layers.sqrt(in_var + self.eps)
out_ln = (input - ln_mean) / fluid.layers.sqrt(ln_var + self.eps)
ex_rho = fluid.layers.expand(self.rho, expand_times = [input.shape[0], 1, 1, 1])
print("rho",fluid.layers.reduce_mean(self.rho).numpy())
out = ex_rho * out_in + (1 - ex_rho)*out_ln
out = out * gamma + beta
return out
class ILN(fluid.dygraph.Layer):
def __init__(self, in_channels, eps=1e-5):
super(ILN, self).__init__()
self.eps = eps
self.rho = fluid.layers.create_parameter((1, in_channels, 1, 1), dtype='float32', is_bias=True,
default_initializer=fluid.initializer.ConstantInitializer(0.0))
self.gamma = fluid.layers.create_parameter((1, in_channels, 1, 1), dtype='float32', is_bias=True,
default_initializer=fluid.initializer.ConstantInitializer(1.0))
self.beta = fluid.layers.create_parameter((1, in_channels, 1, 1), dtype='float32', is_bias=True,
default_initializer=fluid.initializer.ConstantInitializer(0.0))
def var(self, input, dim):
mean = fluid.layers.reduce_mean(input, dim, keep_dim=True)
tmp = fluid.layers.reduce_mean((input - mean)**2, dim, keep_dim=True)
return tmp
def forward(self, input):
in_mean, in_var = fluid.layers.reduce_mean(input, dim=[2,3], keep_dim=True), self.var(input, dim =[2,3])
ln_mean, ln_var = fluid.layers.reduce_mean(input, dim=[1,2,3], keep_dim=True), self.var(input, dim=[1,2,3])
out_in = (input - in_mean) / fluid.layers.sqrt(in_var + self.eps)
out_ln = (input - ln_mean) / fluid.layers.sqrt(ln_var + self.eps)
ex_rho = fluid.layers.expand(self.rho, expand_times = [input.shape[0], 1, 1, 1])
ex_gamma = fluid.layers.expand(self.gamma, expand_times = [input.shape[0], 1, 1, 1])
ex_beta = fluid.layers.expand(self.beta, expand_times = [input.shape[0], 1, 1, 1])
print("rho",fluid.layers.reduce_mean(self.rho).numpy())
print("gamma",fluid.layers.reduce_mean(self.gamma).numpy())
print("beta",fluid.layers.reduce_mean(self.beta).numpy())
out = ex_rho * out_in + (1 - ex_rho) * out_ln
out = out * ex_gamma + ex_beta
return out
` 通过上述代码创建的rho beta gamma 在反向传播的过程之中参数一直都维持着初始化时候的值
是我的用法不对吗