提交 f715c740 编写于 作者: C chengduoZH

Add_config_parser_for_Conv3D_DeConv3D

上级 d5768ebc
......@@ -489,6 +489,7 @@ message LayerConfig {
// to indicate rectangle image data
optional uint64 height = 50;
optional uint64 width = 51;
optional uint64 depth = 57 [ default = 1 ];
// blank label used in ctc loss
optional uint32 blank = 52 [ default = 0 ];
......
......@@ -881,6 +881,42 @@ class Conv(Cfg):
config_assert(output_x <= 0)
# please refer to the comments in proto/ModelConfig.proto
@config_class
class Conv3D(Cfg):
def __init__(self,
filter_size,
channels,
padding=None,
stride=None,
groups=None,
filter_channels=None,
output_x=None,
img_size=None,
caffe_mode=True,
filter_size_y=None,
padding_y=None,
stride_y=None,
filter_size_z=None,
padding_z=None,
stride_z=None):
self.add_keys(locals())
if filter_size_y is None:
self.filter_size_y = filter_size
if padding_y is None:
self.padding_y = padding
if stride_y is None:
self.stride_y = stride
if output_x is not None:
config_assert(output_x <= 0)
if filter_size_z is None:
self.filter_size_z = filter_size
if padding_z is None:
self.padding_z = padding
if stride_z is None:
self.stride_z = stride
@config_class
class BilinearInterp(Cfg):
def __init__(self, out_size_x=None, out_size_y=None, channels=None):
......@@ -1167,6 +1203,20 @@ def get_img_size(input_layer_name, channels):
return img_size, img_size_y
def get_img3d_size(input_layer_name, channels):
input = g_layer_map[input_layer_name]
img_pixels = input.size / channels
img_size = input.width if input.width > 0 else int(img_pixels**0.5)
img_size_y = input.height if input.height > 0 else int(img_pixels /
img_size)
img_size_z = input.depth if input.depth > 1 else 1
config_assert(
img_size * img_size_y * img_size_z == img_pixels,
"Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d"
% (input_layer_name, img_size, img_size_y, img_size_z, img_pixels))
return img_size, img_size_y, img_size_z
def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
parse_image(bilinear, input_layer_name, bilinear_conf.image_conf)
bilinear_conf.out_size_x = bilinear.out_size_x
......@@ -1277,6 +1327,50 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.stride_y, conv_conf.caffe_mode)
#caffe_mode: compute the output size using floor instead of ceil,
# which is consistent of caffe and CuDNN's convention.
def parse_conv3d(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.filter_size = conv.filter_size
conv_conf.filter_size_y = conv.filter_size_y
conv_conf.filter_size_z = conv.filter_size_z
conv_conf.channels = conv.channels
conv_conf.padding = conv.padding
conv_conf.padding_y = conv.padding_y
conv_conf.padding_z = conv.padding_z
conv_conf.stride = conv.stride
conv_conf.stride_y = conv.stride_y
conv_conf.stride_z = conv.stride_z
conv_conf.groups = conv.groups
conv_conf.caffe_mode = conv.caffe_mode
if not trans:
conv_conf.filter_channels = conv.channels / conv.groups
conv_conf.img_size, conv_conf.img_size_y, conv_conf.img_size_z = \
get_img3d_size(input_layer_name, conv.channels)
conv_conf.output_x = cnn_output_size(
conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode)
conv_conf.output_y = cnn_output_size(
conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode)
conv_conf.output_z = cnn_output_size(
conv_conf.img_size_z, conv_conf.filter_size_z, conv_conf.padding_z,
conv_conf.stride_z, conv_conf.caffe_mode)
else:
conv_conf.filter_channels = num_filters / conv.groups
conv_conf.output_x, conv_conf.output_y, conv_conf.output_z = \
get_img3d_size(input_layer_name, conv.channels)
conv_conf.img_size = cnn_image_size(
conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode)
conv_conf.img_size_y = cnn_image_size(
conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode)
conv_conf.img_size_z = cnn_image_size(
conv_conf.output_z, conv_conf.filter_size_z, conv_conf.padding_z,
conv_conf.stride_z, conv_conf.caffe_mode)
def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand_conf.channels = block_expand.channels
block_expand_conf.stride_x = block_expand.stride_x
......@@ -1580,6 +1674,9 @@ class LayerBase(object):
self.config.height = height
self.config.width = width
def set_layer_depth(self, depth):
self.config.depth = depth
def set_cnn_layer(self,
input_layer_name,
height,
......@@ -1763,11 +1860,19 @@ class DetectionOutputLayer(LayerBase):
@config_layer('data')
class DataLayer(LayerBase):
def __init__(self, name, size, height=None, width=None, device=None):
def __init__(self,
name,
size,
height=None,
width=None,
depth=None,
device=None):
super(DataLayer, self).__init__(
name, 'data', size, inputs=[], device=device)
if height and width:
self.set_layer_height_width(height, width)
if depth:
self.set_layer_depth(depth)
'''
......@@ -1895,6 +2000,163 @@ class ConvLayer(ConvLayerBase):
layer_type = 'cudnn_conv'
@config_layer('conv_3d')
class Conv3DLayerBase(LayerBase):
def __init__(self,
name,
inputs=[],
bias=True,
num_filters=None,
shared_biases=False,
**xargs):
super(Conv3DLayerBase, self).__init__(
name, self.layer_type, 0, inputs=inputs, **xargs)
if num_filters is not None:
self.config.num_filters = num_filters
use_gpu = int(g_command_config_args.get("use_gpu", 0))
parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
# Automatically select cudnn_type for GPU and exconv for CPU
# if set type=conv, but still reserve the way user specify
# exconv or cudnn_conv manually.
if self.layer_type == "cudnn_conv3d":
config_assert(use_gpu, "cudnn_conv3d only support GPU")
# need to specify layer in config
self.config.type = self.layer_type
if shared_biases is not None:
self.config.shared_biases = shared_biases
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
conv_conf = self.config.inputs[input_index].conv_conf
parse_conv3d(
self.inputs[input_index].conv, input_layer.name, conv_conf,
num_filters
) # for z-axis pad:0, strid:1, filter_size:1, img_size:1
psize = self.calc_parameter_size(conv_conf)
self.create_input_parameter(input_index, psize)
self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y,
conv_conf.output_x, self.config.num_filters)
psize = self.config.size
if shared_biases:
psize = self.config.num_filters
self.create_bias_parameter(bias, psize, [psize, 1])
def calc_parameter_size(self, conv_conf):
return self.config.num_filters * conv_conf.filter_channels \
* (conv_conf.filter_size * conv_conf.filter_size_y \
* conv_conf.filter_size_z)
def set_layer_height_width(self, depth, height, width):
self.config.depth = depth
self.config.height = height
self.config.width = width
def set_cnn_layer(self,
input_layer_name,
depth,
height,
width,
channels,
is_print=True):
size = depth * height * width * channels
self.set_layer_size(size)
self.set_layer_height_width(depth, height, width)
if is_print:
print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
(input_layer_name, channels, depth, height, width, size))
@config_layer('conv3d')
class Conv3DLayer(Conv3DLayerBase):
layer_type = 'conv3d'
@config_layer('convt_3d')
class Conv3DTransLayerBase(LayerBase):
def __init__(self,
name,
inputs=[],
bias=True,
num_filters=None,
shared_biases=False,
**xargs):
super(Conv3DTransLayerBase, self).__init__(
name, self.layer_type, 0, inputs=inputs, **xargs)
if num_filters is not None:
self.config.num_filters = num_filters
use_gpu = int(g_command_config_args.get("use_gpu", 0))
parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
# Automatically select cudnn_type for GPU and exconv for CPU
# if set type=conv, but still reserve the way user specify
# exconv or cudnn_conv manually.
if self.layer_type == "cudnn_deconv3d":
config_assert(use_gpu, "cudnn_conv3d only support GPU")
# need to specify layer in config
self.config.type = self.layer_type
if shared_biases is not None:
self.config.shared_biases = shared_biases
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
conv_conf = self.config.inputs[input_index].conv_conf
parse_conv3d(
self.inputs[input_index].conv,
input_layer.name,
conv_conf,
num_filters,
trans=True
) # for z-axis pad:0, strid:1, filter_size:1, img_size:1
psize = self.calc_parameter_size(conv_conf)
self.create_input_parameter(input_index, psize)
self.set_cnn_layer(name, conv_conf.img_size_z, conv_conf.img_size_y,
conv_conf.img_size, self.config.num_filters)
psize = self.config.size
if shared_biases:
psize = self.config.num_filters
self.create_bias_parameter(bias, psize, [psize, 1])
def calc_parameter_size(self, conv_conf):
return self.config.num_filters * conv_conf.filter_channels \
* (conv_conf.filter_size * conv_conf.filter_size_y \
* conv_conf.filter_size_z)
def set_layer_height_width(self, depth, height, width):
self.config.depth = depth
self.config.height = height
self.config.width = width
def set_cnn_layer(self,
input_layer_name,
depth,
height,
width,
channels,
is_print=True):
size = depth * height * width * channels
self.set_layer_size(size)
self.set_layer_height_width(depth, height, width)
if is_print:
print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
(input_layer_name, channels, depth, height, width, size))
@config_layer('deconv3d')
class DeConv3DLayer(Conv3DTransLayerBase):
layer_type = 'deconv3d'
@config_layer('convt')
class ConvTransLayerBase(LayerBase):
layer_type = 'convt'
......
文件模式从 100755 更改为 100644
......@@ -31,108 +31,34 @@ except ImportError:
import copy
__all__ = [
'full_matrix_projection',
'AggregateLevel',
'ExpandLevel',
'identity_projection',
'dotmul_projection',
'dotmul_operator',
'repeat_layer',
'seq_reshape_layer',
'table_projection',
'mixed_layer',
'data_layer',
'embedding_layer',
'fc_layer',
'grumemory',
'pooling_layer',
'lstmemory',
'last_seq',
'first_seq',
'cos_sim',
'hsigmoid',
'conv_projection',
'mse_cost',
'regression_cost',
'classification_cost',
'LayerOutput',
'img_conv_layer',
'img_pool_layer',
'batch_norm_layer',
'img_cmrnorm_layer',
'addto_layer',
'concat_layer',
'seq_concat_layer',
'lstm_step_layer',
'recurrent_group',
'memory',
'StaticInput',
'expand_layer',
'scaling_layer',
'scaling_projection',
'power_layer',
'interpolation_layer',
'bilinear_interp_layer',
'trans_layer',
'rotate_layer',
'sum_to_one_norm_layer',
'row_l2_norm_layer',
'get_output_layer',
'LayerType',
'context_projection',
'beam_search',
'maxid_layer',
'GeneratedInput',
'SubsequenceInput',
'gru_step_layer',
'gru_step_naive_layer',
'recurrent_layer',
'BaseGeneratedInput',
'conv_operator',
'conv_shift_layer',
'tensor_layer',
'selective_fc_layer',
'sampling_id_layer',
'slope_intercept_layer',
'trans_full_matrix_projection',
'linear_comb_layer',
'convex_comb_layer',
'ctc_layer',
'warp_ctc_layer',
'crf_layer',
'crf_decoding_layer',
'nce_layer',
'cross_entropy_with_selfnorm',
'cross_entropy',
'multi_binary_label_cross_entropy',
'sum_cost',
'rank_cost',
'lambda_cost',
'huber_cost',
'block_expand_layer',
'maxout_layer',
'out_prod_layer',
'printer_layer',
'print_layer',
'priorbox_layer',
'cross_channel_norm_layer',
'multibox_loss_layer',
'detection_output_layer',
'spp_layer',
'pad_layer',
'eos_layer',
'smooth_l1_cost',
'layer_support',
'multiplex_layer',
'row_conv_layer',
'dropout_layer',
'prelu_layer',
'gated_unit_layer',
'crop_layer',
'sub_nested_seq_layer',
'clip_layer',
'slice_projection',
'kmax_sequence_score_layer',
'full_matrix_projection', 'AggregateLevel', 'ExpandLevel',
'identity_projection', 'dotmul_projection', 'dotmul_operator',
'repeat_layer', 'seq_reshape_layer', 'table_projection', 'mixed_layer',
'data_layer', 'embedding_layer', 'fc_layer', 'grumemory', 'pooling_layer',
'lstmemory', 'last_seq', 'first_seq', 'cos_sim', 'hsigmoid',
'conv_projection', 'mse_cost', 'regression_cost', 'classification_cost',
'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer',
'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'seq_concat_layer',
'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput',
'expand_layer', 'scaling_layer', 'scaling_projection', 'power_layer',
'interpolation_layer', 'bilinear_interp_layer', 'trans_layer',
'rotate_layer', 'sum_to_one_norm_layer', 'row_l2_norm_layer',
'get_output_layer', 'LayerType', 'context_projection', 'beam_search',
'maxid_layer', 'GeneratedInput', 'SubsequenceInput', 'gru_step_layer',
'gru_step_naive_layer', 'recurrent_layer', 'BaseGeneratedInput',
'conv_operator', 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer',
'sampling_id_layer', 'slope_intercept_layer',
'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer',
'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer',
'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy',
'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost',
'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer',
'printer_layer', 'print_layer', 'priorbox_layer',
'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer',
'spp_layer', 'pad_layer', 'eos_layer', 'smooth_l1_cost', 'layer_support',
'multiplex_layer', 'row_conv_layer', 'dropout_layer', 'prelu_layer',
'gated_unit_layer', 'crop_layer', 'sub_nested_seq_layer', 'clip_layer',
'slice_projection', 'kmax_sequence_score_layer', 'img_conv3d_layer'
]
......@@ -214,6 +140,9 @@ class LayerType(object):
CRF_DECODING_LAYER = 'crf_decoding'
NCE_LAYER = 'nce'
CONV3D_LAYER = 'conv3d'
DECONV3D_LAYER = 'deconv3d'
RANK_COST = 'rank-cost'
LAMBDA_COST = 'lambda_cost'
HUBER = 'huber'
......@@ -878,7 +807,8 @@ def mixed_layer(size=0,
@layer_support()
def data_layer(name, size, height=None, width=None, layer_attr=None):
def data_layer(name, size, height=None, width=None, depth=None,
layer_attr=None):
"""
Define DataLayer For NeuralNetwork.
......@@ -907,6 +837,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
size=size,
height=height,
width=width,
depth=depth,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.DATA, size=size)
......@@ -6210,3 +6141,182 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
return LayerOutput(
name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size)
@wrap_name_default("conv3d")
@wrap_param_attr_default()
@wrap_bias_attr_default()
@wrap_act_default(act=ReluActivation())
@layer_support(DROPOUT)
def img_conv3d_layer(input,
filter_size,
num_filters,
name=None,
num_channels=None,
act=None,
groups=1,
stride=1,
padding=0,
bias_attr=None,
param_attr=None,
shared_biases=True,
layer_attr=None,
filter_size_y=None,
stride_y=None,
padding_y=None,
filter_size_z=None,
stride_z=None,
padding_z=None,
trans=False,
layer_type=None):
"""
The example usage is:
.. code-block:: python
conv = img_conv3d_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=16, stride=1,
bias_attr=False,
act=ReluActivation())
:param name: Layer name.
:type name: basestring
:param input: Layer Input.
:type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. Or input a tuple for
two image dimension.
:type filter_size: int|tuple|list
:param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle
currently supports rectangular filters, the filter's
shape will be (filter_size, filter_size_y).
:type filter_size_y: int|None
:param num_filters: Each filter group's number of filter
:param act: Activation type. Default is tanh
:type act: BaseActivation
:param groups: Group size of filters.
:type groups: int
:param stride: The x dimension of the stride. Or input a tuple for two image
dimension.
:type stride: int|tuple|list
:param stride_y: The y dimension of the stride.
:type stride_y: int
:param padding: The x dimension of the padding. Or input a tuple for two
image dimension
:type padding: int|tuple|list
:param padding_y: The y dimension of the padding.
:type padding_y: int
:param bias_attr: Convolution bias attribute. None means default bias.
False means no bias.
:type bias_attr: ParameterAttribute|False
:param num_channels: number of input channels. If None will be set
automatically from previous output.
:type num_channels: int
:param param_attr: Convolution param attribute. None means default attribute
:type param_attr: ParameterAttribute
:param shared_biases: Is biases will be shared between filters or not.
:type shared_biases: bool
:param layer_attr: Layer Extra Attribute.
:type layer_attr: ExtraLayerAttribute
:param trans: true if it is a convTransLayer, false if it is a convLayer
:type trans: bool
:param layer_type: specify the layer_type, default is None. If trans=True,
layer_type has to be "exconvt" or "cudnn_convt",
otherwise layer_type has to be either "exconv" or
"cudnn_conv"
:type layer_type: String
:return: LayerOutput object.
:rtype: LayerOutput
"""
if num_channels is None:
assert input.num_filters is not None
num_channels = input.num_filters
if filter_size_y is None:
if isinstance(filter_size, collections.Sequence):
assert len(filter_size) == 2
filter_size, filter_size_y = filter_size
else:
filter_size_y = filter_size
if filter_size_z is None:
if isinstance(filter_size, collections.Sequence):
assert len(filter_size) == 2
filter_size, filter_size_z = filter_size
else:
filter_size_z = filter_size
if stride_y is None:
if isinstance(stride, collections.Sequence):
assert len(stride) == 2
stride, stride_y = stride
else:
stride_y = stride
if stride_z is None:
if isinstance(stride, collections.Sequence):
assert len(stride) == 2
stride, stride_z = stride
else:
stride_z = stride
if padding_y is None:
if isinstance(padding, collections.Sequence):
assert len(padding) == 2
padding, padding_y = padding
else:
padding_y = padding
if padding_z is None:
if isinstance(padding, collections.Sequence):
assert len(padding) == 2
padding, padding_z = padding
else:
padding_z = padding
if param_attr.attr.get('initial_smart'):
# special initial for conv layers.
init_w = (2.0 / (filter_size**2 * num_channels))**0.5
param_attr.attr["initial_mean"] = 0.0
param_attr.attr["initial_std"] = init_w
param_attr.attr["initial_strategy"] = 0
param_attr.attr["initial_smart"] = False
if layer_type:
if trans:
assert layer_type in ["deconv3d"]
lt = layer_type
else:
lt = LayerType.DECONV3D_LAYER if trans else LayerType.CONV3D_LAYER
l = Layer(
name=name,
inputs=Input(
input.name,
conv=Conv3D(
filter_size=filter_size,
padding=padding,
stride=stride,
channels=num_channels,
groups=groups,
filter_size_y=filter_size_y,
padding_y=padding_y,
stride_y=stride_y,
filter_size_z=filter_size_z,
padding_z=padding_z,
stride_z=stride_z),
**param_attr.attr),
active_type=act.name,
num_filters=num_filters,
bias=ParamAttr.to_bias(bias_attr),
shared_biases=shared_biases,
type=lt,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name,
lt,
parents=[input],
activation=act,
num_filters=num_filters,
size=l.config.size)
......@@ -1406,7 +1406,7 @@ def inputs(layers, *args):
if len(args) != 0:
layers.extend(args)
Inputs(*[l.name for l in layers])
Inputs(* [l.name for l in layers])
def outputs(layers, *args):
......@@ -1456,7 +1456,7 @@ def outputs(layers, *args):
assert len(layers) > 0
if HasInputsSet(): # input already set
Outputs(*[l.name for l in layers])
Outputs(* [l.name for l in layers])
return # just return outputs.
if len(layers) != 1:
......
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
num_channels = 3
filter_size = 3
filter_size_y = 3
filter_size_z = 3
stride = 2
stride_y = 2
stride_z = 2
padding = 1
padding_y = 1
padding_z = 1
groups = 1
data = data_layer(
name='data1', size=12096 * num_channels, height=48, width=42, depth=6)
conv3d = img_conv3d_layer(
input=data,
name='conv3d_1',
num_filters=16,
num_channels=num_channels,
filter_size=filter_size,
filter_size_y=filter_size,
filter_size_z=filter_size,
stride=stride,
stride_y=stride_y,
stride_z=stride_z,
padding=padding,
padding_y=padding_y,
padding_z=padding_z,
groups=groups,
bias_attr=True,
shared_biases=True,
trans=False,
layer_type="conv3d",
act=LinearActivation())
deconv3d = img_conv3d_layer(
input=data,
name='deconv3d_1',
num_filters=16,
num_channels=num_channels,
filter_size=filter_size,
filter_size_y=filter_size,
filter_size_z=filter_size,
stride=stride,
stride_y=stride_y,
stride_z=stride_z,
padding=padding,
padding_y=padding_y,
padding_z=padding_z,
groups=groups,
bias_attr=True,
shared_biases=True,
trans=True,
layer_type="deconv3d",
act=LinearActivation())
data = data_layer(name="input", size=8 * 16 * 16)
conv1 = img_conv_layer(
input=data,
filter_size=1,
filter_size_y=1,
num_channels=8,
num_filters=16,
stride=1,
bias_attr=False,
act=ReluActivation(),
layer_type="exconv")
conv2 = img_conv_layer(
input=data,
filter_size=1,
filter_size_y=1,
num_channels=8,
num_filters=16,
stride=1,
bias_attr=False,
act=ReluActivation(),
layer_type="exconv")
concat = concat_layer(input=[conv1, conv2])
conv = img_conv_layer(
input=data,
filter_size=1,
filter_size_y=1,
num_channels=8,
num_filters=16,
stride=1,
bias_attr=True,
act=LinearActivation(),
groups=2,
layer_type="exconv")
outputs(concat, conv)
......@@ -16,4 +16,6 @@ from paddle.trainer.config_parser import parse_config_and_serialize
if __name__ == '__main__':
parse_config_and_serialize(
'trainer_config_helpers/tests/layers_test_config.py', '')
'trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py',
'')
# layers_test_config.py
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册