diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b7418101d83fde1b91781d3a42b056cc7708cba9..2965c922fa60bb00b77410563b040013af251ce6 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1741,6 +1741,59 @@ class ParameterReluLayer(LayerBase): self.create_input_parameter(0, input_layer.size / partial_sum) +@config_layer('depthwise_conv') +class DepthwiseConvLayer(LayerBase): + layer_type = 'depthwise_conv' + + def __init__(self, + name, + inputs=[], + bias=True, + num_filters=None, + shared_biases=False, + **xargs): + super(DepthwiseConvLayer, self).__init__( + name, self.layer_type, 0, inputs=inputs, **xargs) + + if num_filters is not None: + self.config.num_filters = num_filters + + use_gpu = int(g_command_config_args.get("use_gpu", 0)) + parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) + + # Automatically select cudnn_type for GPU and exconv for CPU + # if set type=conv, but still reserve the way user specify + # exconv or cudnn_conv manually. + self.layer_type = "depthwise_conv" + # need to specify layer in config + self.config.type = self.layer_type + + if shared_biases is not None: + self.config.shared_biases = shared_biases + + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + conv_conf = self.config.inputs[input_index].conv_conf + #set the groups + self.inputs[input_index].conv.groups = self.inputs[ + input_index].conv.channels + parse_conv(self.inputs[input_index].conv, input_layer.name, + conv_conf, num_filters) + psize = self.calc_parameter_size(conv_conf) + self.create_input_parameter(input_index, psize) + self.set_cnn_layer(name, conv_conf.output_y, conv_conf.output_x, + self.config.num_filters) + + psize = self.config.size + if shared_biases: + psize = self.config.num_filters + self.create_bias_parameter(bias, psize, [psize, 1]) + + def calc_parameter_size(self, conv_conf): + return self.config.num_filters * conv_conf.filter_channels \ + * (conv_conf.filter_size * conv_conf.filter_size_y) + + @config_layer('conv') class ConvLayerBase(LayerBase): layer_type = 'conv' @@ -3145,6 +3198,10 @@ def ParameterHook(type, **kwargs): if sparsity_ratio is not None: hook.sparsity_ratio = sparsity_ratio return hook + elif type == 'dpruning': + hook = ParameterUpdaterHookConfig() + hook.type = type + return hook else: return None diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a601d5c84ad222785e68b9fa81c51b1e120b4f29..073e853bc2d1bace19bae3aa8a9a0c08c869bdeb 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -57,6 +57,7 @@ __all__ = [ 'classification_cost', 'LayerOutput', 'img_conv_layer', + 'img_depthwise_conv_layer', 'img_pool_layer', 'batch_norm_layer', 'img_cmrnorm_layer', @@ -148,6 +149,7 @@ class LayerType(object): HSIGMOID = 'hsigmoid' CONV_LAYER = 'conv' CONVTRANS_LAYER = 'convt' + DEPTHWISE_CONV_LAYER = 'depthwise_conv' EXCONV_LAYER = 'exconv' EXCONVTRANS_LAYER = 'exconvt' CUDNNCONV_LAYER = 'cudnn_conv' @@ -2085,6 +2087,94 @@ def hsigmoid(input, name, LayerType.HSIGMOID, parents=parents, size=l.config.size) +@wrap_name_default("depthwise_conv") +@wrap_param_attr_default() +@wrap_bias_attr_default() +@wrap_act_default(act=ReluActivation()) +@layer_support(DROPOUT) +def img_depthwise_conv_layer(input, + filter_size, + num_filters, + name=None, + num_channels=None, + act=None, + groups=1, + stride=1, + padding=0, + bias_attr=None, + param_attr=None, + shared_biases=True, + layer_attr=None, + filter_size_y=None, + stride_y=None, + padding_y=None, + trans=False, + layer_type=None): + + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + + if filter_size_y is None: + if isinstance(filter_size, collections.Sequence): + assert len(filter_size) == 2 + filter_size, filter_size_y = filter_size + else: + filter_size_y = filter_size + + if stride_y is None: + if isinstance(stride, collections.Sequence): + assert len(stride) == 2 + stride, stride_y = stride + else: + stride_y = stride + + if padding_y is None: + if isinstance(padding, collections.Sequence): + assert len(padding) == 2 + padding, padding_y = padding + else: + padding_y = padding + + if param_attr.attr.get('initial_smart'): + # special initial for conv layers. + init_w = (2.0 / (filter_size**2 * num_channels))**0.5 + param_attr.attr["initial_mean"] = 0.0 + param_attr.attr["initial_std"] = init_w + param_attr.attr["initial_strategy"] = 0 + param_attr.attr["initial_smart"] = False + + lt = LayerType.DEPTHWISE_CONV_LAYER + + l = Layer( + name=name, + inputs=Input( + input.name, + conv=Conv( + filter_size=filter_size, + padding=padding, + stride=stride, + channels=num_channels, + groups=groups, + filter_size_y=filter_size_y, + padding_y=padding_y, + stride_y=stride_y), + **param_attr.attr), + active_type=act.name, + num_filters=num_filters, + bias=ParamAttr.to_bias(bias_attr), + shared_biases=shared_biases, + type=lt, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, + lt, + parents=[input], + activation=act, + num_filters=num_filters, + size=l.config.size) + + @wrap_name_default("conv") @wrap_param_attr_default() @wrap_bias_attr_default()