有关反向传播过程中出现的错误
Created by: luwanglin
环境:aistudio 硬件信息 当前环境高级版 切换环境 CPU 8 RAM 32GB GPU v100 显存 16GB 磁盘 100GB 环境配置 Python版本 python3.7 框架版本 PaddlePaddle 1.7.1
- 模型实现如下,自己实现的LRN算子
LRN 的实现
class LocalResponseNorm(fluid.dygraph.Layer):
__constants__ = ['size', 'alpha', 'beta', 'k']
def __init__(self, size, alpha=1e-4, beta=0.75, k=1.):
super(LocalResponseNorm, self).__init__()
self.size = size
self.alpha = alpha
self.beta = beta
self.k = k
def forward(self, input):
dim = len(input.shape)
if dim < 3:
raise ValueError('Expected 3D or higher dimensionality \
input (got {} dimensions)'.format(dim))
div = fluid.layers.unsqueeze(input * input, axes=1)
if dim == 3:
div = fluid.layers.pad(div, (0, 0, 0, 0, self.size // 2, (self.size - 1) // 2, 0, 0, 0, 0))
div = fluid.layers.pool2d(div, pool_size=(self.size, 1), pool_stride=1, pool_type='avg')
div_shape = div.shape
div_shape.pop(1)
div = fluid.layers.reshape(div, shape=div_shape)
else:
sizes = input.shape
div = fluid.layers.reshape(div, (sizes[0], 1, sizes[1], sizes[2], -1))
div = fluid.layers.pad(div, (0, 0, 0, 0, self.size // 2, (self.size - 1) // 2, 0, 0, 0, 0))
div = fluid.layers.pool3d(div, (self.size, 1, 1), pool_stride=1, pool_type='avg')
div_shape = div.shape
div_shape.pop(1)
div = fluid.layers.reshape(div, shape=div_shape)
div = fluid.layers.reshape(div, sizes)
div = fluid.layers.pow((div * self.alpha + self.k), factor=self.beta)
# print('final', div.shape)
return input / div
class LBNet_highwayThree(fluid.dygraph.Layer): # SHT网络
def __init__(self, nc=2):
super(LBNet_highwayThree, self).__init__()
self.convolutions1 = fluid.dygraph.Sequential(
nn.Conv2D(nc, 16, filter_size=7, stride=1),
nn.BatchNorm(16, act='relu'),
nn.Conv2D(16, 16, filter_size=1, stride=1),
nn.BatchNorm(16, act='relu'),
nn.Conv2D(16, 16, filter_size=3, stride=1, padding=1),
nn.BatchNorm(16, act='relu'),
nn.Conv2D(16, 16, filter_size=1, stride=1)
)
self.high_way = fluid.dygraph.Sequential(
nn.BatchNorm(2, act='relu'),
nn.Conv2D(2, 16, filter_size=7, stride=1),
)
self.convolutions2 = fluid.dygraph.Sequential(
nn.BatchNorm(16, act='relu'),
LocalResponseNorm(5, 0.0001, 0.75, 2),
nn.Pool2D(pool_size=2, pool_stride=2),
nn.Conv2D(16, 64, filter_size=7, stride=1),
nn.BatchNorm(64, act='relu'),
LocalResponseNorm(5, 0.0001, 0.75, 2),
nn.Pool2D(pool_size=2, pool_stride=2),
nn.Conv2D(64, 256, filter_size=7, stride=1)
)
self.mlp = fluid.dygraph.Sequential(
nn.Linear(21 * 21 * 256, 1)
)
def forward(self, x):
x1 = self.convolutions1(x)
x2 = self.high_way(x)
x = x1 + x2
x = self.convolutions2(x)
x = fluid.layers.reshape(x, (-1, 21 * 21 * 256))
x = fluid.layers.relu(x)
x = fluid.layers.dropout(x, dropout_prob=0.5)
x = self.mlp(x)
return x
2.复现信息,前向传播和反向传播测试如下:
if __name__ == '__main__':
test_data = np.ones((2, 2, 126, 126)).astype('float32')
label = np.array([[0], [1]]).astype('float32')
with fluid.dygraph.guard():
lb = LBNet_highwayThree()
# lrn = LocalResponseNorm(5, 0.0001, 0.75, 2)
x = fluid.dygraph.base.to_variable(test_data)
label = fluid.dygraph.base.to_variable(label)
output = lb(x)
print(output)
loss = fluid.layers.sigmoid_cross_entropy_with_logits(output, label)
print('loss:', loss)
loss.backward()
- 问题描述 前向传播能够正常进行,但是反向传播计算梯度时报错
Traceback (most recent call last):
File "work/SHT/model.py", line 104, in <module>
loss.backward()
File "</opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/decorator.py:decorator-gen-60>", line 2, in backward
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/wrapped_decorator.py", line 25, in __impl__
return wrapped_func(*args, **kwargs)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/framework.py", line 207, in __impl__
return func(*args, **kwargs)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/varbase_patch_methods.py", line 116, in backward
self._run_backward(backward_strategy, framework._dygraph_tracer())
paddle.fluid.core_avx.EnforceNotMet:
Error Message Summary:
----------------------
Error: at (/paddle/paddle/fluid/operators/batch_norm_op.cc:445)