dygraph 显存问题
Created by: wwjjy
- 环境信息 1)PaddlePaddle版本:paddle 1.5
- 训练信息 1)单机,单卡 2)显存信息
- 复现信息:模型部分代码大致如下:
# -*- coding: utf-8 -*-
import paddle.fluid as fluid
import numpy as np
class ConvBnLayer(fluid.dygraph.Layer):
def __init__(self, name_scope, num_filters, filter_size,
stride=1, groups=1, act=None):
super(ConvBnLayer, self).__init__(name_scope)
self.conv2d = fluid.dygraph.Conv2D('conv2d', num_filters=num_filters, filter_size=filter_size,
stride=stride, padding=(filter_size - 1) // 2,
groups=groups, bias_attr=False,
param_attr=fluid.ParamAttr(name="weights"))
self.batch_norm = fluid.dygraph.BatchNorm(self.full_name(), num_filters, act=act)
def forward(self, inputs):
out = self.conv2d(inputs)
out = self.batch_norm(out)
return out
class ShortCut(fluid.dygraph.Layer):
def __init__(self, name_scope, ch_out, stride):
super(ShortCut, self).__init__(name_scope)
self.ch_out = ch_out
self.stride = stride
self.conv = ConvBnLayer(self.full_name(), ch_out, 1, stride)
def forward(self, inputs):
ch_in = inputs.shape[1]
if ch_in != self.ch_out or self.stride != 1:
return self.conv(inputs)
else:
return inputs
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, name_scope, num_filters, stride):
super(BottleneckBlock, self).__init__(name_scope)
self.conv0 = ConvBnLayer(self.full_name(), num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBnLayer(self.full_name(), num_filters, filter_size=3,
stride=stride, act='relu')
self.conv2 = ConvBnLayer(self.full_name(), num_filters * 4, filter_size=1,
act=None)
self.short = ShortCut(self.full_name(), num_filters * 4, stride)
def forward(self, inputs):
out = self.conv0(inputs)
out = self.conv1(out)
out = self.conv2(out)
short = self.short(inputs)
return fluid.layers.elementwise_add(short, out, act='relu')
class DecoderBlock(fluid.dygraph.Layer):
def __init__(self, name_scope, num_filters):
super(DecoderBlock, self).__init__(name_scope)
self.dimension_reduction = ConvBnLayer(self.full_name(), num_filters // 2,
filter_size=1, act='relu')
self.conv1 = ConvBnLayer(self.full_name(), num_filters // 2,
filter_size=3, stride=1, act='relu')
self.conv2 = ConvBnLayer(self.full_name(), num_filters // 2,
filter_size=3, stride=1, act='relu')
def forward(self, inputs, feature_map):
out = self.dimension_reduction(inputs)
b, c, w, h = out.shape
# 对out上采样
out = fluid.layers.resize_bilinear(out, out_shape=[w * 2, h * 2])
# 和feature_map拼接
out = fluid.layers.concat([out, feature_map], axis=1)
out = self.conv1(out)
out = self.conv2(out)
return out
class Decoder(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Decoder, self).__init__(name_scope)
self.decode_1 = DecoderBlock(self.full_name(), 2048)
self.decode_2 = DecoderBlock(self.full_name(), 1024)
self.decode_3 = DecoderBlock(self.full_name(), 512)
self.decode_4 = ConvBnLayer(self.full_name(), 3, 1)
def forward(self, inputs, feature_map):
out = self.decode_1(inputs, feature_map[2])
out = self.decode_2(out, feature_map[1])
out = self.decode_3(out, feature_map[0])
out = self.decode_4(out)
return out
class DisResNet(fluid.dygraph.Layer):
def __init__(self, name_scope, layers):
super(DisResNet, self).__init__(name_scope)
self.layers = layers
support_layers = [50, 101, 152]
assert layers in support_layers, \
"supported layers are {} but input layer is {}".format(support_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [1, 4, 23, 3]
else:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
self.bottleneck_deep_list = []
for block in range(len(depth)):
bottleneck_block_list = []
for i in range(depth[block]):
bottleneck_block = BottleneckBlock(self.full_name(),
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1)
bottleneck_block_list.append(bottleneck_block)
self.bottleneck_deep_list.append(bottleneck_block_list)
self.decoder = Decoder(self.full_name())
self.feature_map = []
def forward(self, inputs):
out = inputs
for bottleneck_block_list in self.bottleneck_deep_list:
for bottleneck_block in bottleneck_block_list:
out = bottleneck_block(out)
self.feature_map.append(out)
out = self.decoder(out, self.feature_map)
return out
if __name__ == '__main__':
with fluid.dygraph.guard():
seresnext = DisResNet('seresnext', 50)
img = np.zeros([2, 3, 224, 224]).astype('float32')
label = np.zeros([2, 3, 224, 224]).astype('float32')
gt_box = [[30, 30], [60, 60]]
local_label = label[:, :, gt_box[0][0]:gt_box[1][0], gt_box[0][1]:gt_box[1][1]]
img = fluid.dygraph.to_variable(img)
label = fluid.dygraph.to_variable(label)
local_label = fluid.dygraph.to_variable(local_label)
outs = seresnext(img)
local_out = outs[:, :, gt_box[0][0]:gt_box[1][0], gt_box[0][1]:gt_box[1][1]]
loss = fluid.layers.square_error_cost(outs, label)
local_loss = fluid.layers.square_error_cost(local_out, local_label)
mean_loss = fluid.layers.mean(loss)
mean_local_loss = fluid.layers.mean(local_loss)
total_loss = 0.7 * mean_local_loss + 0.3 * mean_loss
total_loss.backward()
print(total_loss)