From 70e44732c2c1a2186d26a076c3b3be69b6a91bc4 Mon Sep 17 00:00:00 2001
From: wangyang59 <wangyang59@baidu.com>
Date: Tue, 25 Oct 2016 13:55:40 -0700
Subject: [PATCH] added convTrans test and python components

---
 .gitignore                                    |   2 +
 paddle/gserver/tests/CMakeLists.txt           |   8 +
 paddle/gserver/tests/test_ConvTrans.cpp       | 139 ++++++++++++++++++
 python/paddle/trainer/config_parser.py        |  95 ++++++++++++
 .../paddle/trainer_config_helpers/layers.py   | 123 ++++++++++++++++
 5 files changed, 367 insertions(+)
 create mode 100644 paddle/gserver/tests/test_ConvTrans.cpp
diff --git a/.gitignore b/.gitignore
index 65ba217de3..ee8489c1d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@ build/
 .vscode
 .idea
 .project
+.cproject
 .pydevproject
+Makefile
diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index 26ee2b3aae..0651d0b473 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -26,6 +26,14 @@ add_unittest_without_exec(test_ActivationGrad
     TestUtil.cpp)
 add_test(NAME test_ActivationGrad
     COMMAND test_ActivationGrad)
+################# test_ConvTrans #######################
+add_unittest_without_exec(test_ConvTrans
+    test_ConvTrans.cpp
+    LayerGradUtil.cpp
+    TestUtil.cpp)
+
+add_test(NAME test_ConvTrans
+    COMMAND test_ConvTrans)
 
 ################## test_Evaluator #######################
 add_unittest(test_Evaluator
diff --git a/paddle/gserver/tests/test_ConvTrans.cpp b/paddle/gserver/tests/test_ConvTrans.cpp
new file mode 100644
index 0000000000..e7cbe2614f
--- /dev/null
+++ b/paddle/gserver/tests/test_ConvTrans.cpp
@@ -0,0 +1,139 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include <vector>
+#include <string>
+#include "paddle/gserver/layers/DataLayer.h"
+#include "ModelConfig.pb.h"
+#include "paddle/trainer/Trainer.h"
+#include "paddle/utils/GlobalConstants.h"
+#include "paddle/gserver/layers/ExpandConvTransLayer.h"
+
+#include "TestUtil.h"
+#include "LayerGradUtil.h"
+
+using namespace paddle;  // NOLINT
+using namespace std;     // NOLINT
+
+P_DECLARE_bool(use_gpu);
+P_DECLARE_int32(gpu_id);
+P_DECLARE_double(checkgrad_eps);
+P_DECLARE_bool(thread_local_rand_use_global_seed);
+P_DECLARE_bool(prev_batch_state);
+
+TEST(Layer, convTransLayerFwd) {
+    TestConfig configt;
+    configt.biasSize = 3;
+    configt.layerConfig.set_type("exconvt");
+    configt.layerConfig.set_num_filters(3);
+    configt.layerConfig.set_partial_sum(1);
+    configt.layerConfig.set_shared_biases(true);
+
+    configt.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288});
+    LayerInputConfig* input = configt.layerConfig.add_inputs();
+    ConvConfig* conv = input->mutable_conv_conf();
+    conv->set_filter_size(2);
+    conv->set_filter_size_y(3);
+    conv->set_channels(16);
+    conv->set_padding(0);
+    conv->set_padding_y(1);
+    conv->set_stride(2);
+    conv->set_stride_y(2);
+    conv->set_groups(1);
+    conv->set_filter_channels(3 / conv->groups());
+    conv->set_img_size(16);
+    conv->set_output_x(
+        (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
+            ((float)conv->stride()) +
+        1.5);
+
+    configt.layerConfig.set_size(conv->img_size() * conv->img_size() *
+                                configt.layerConfig.num_filters());
+    configt.layerConfig.set_name("convTrans");
+
+    // data layer initialize
+    std::vector<DataLayerPtr> dataLayers;
+    LayerMap layerMap;
+    vector<Argument> datas;
+    initDataLayer(configt, &dataLayers, &datas, &layerMap, "convTrans",
+                  100, false, useGpu);
+    // test layer initialize
+    std::vector<ParameterPtr> parameters;
+    LayerPtr convtLayer;
+    initTestLayer(configt, &layerMap, &parameters, &convtLayer);
+    convtLayer->getBiasParameter()->zeroMem();
+    convtLayer->forward(PASS_GC);
+
+    TestConfig config;
+    config.biasSize = 16;
+    config.layerConfig.set_type("exconv");
+    config.layerConfig.set_num_filters(16);
+    config.layerConfig.set_partial_sum(1);
+    config.layerConfig.set_shared_biases(true);
+
+    config.inputDefs.push_back({INPUT_DATA, "layer_1", 768, 288});
+    input = config.layerConfig.add_inputs();
+    conv = input->mutable_conv_conf();
+    conv->set_filter_size(2);
+    conv->set_filter_size_y(3);
+    conv->set_channels(3);
+    conv->set_padding(0);
+    conv->set_padding_y(1);
+    conv->set_stride(2);
+    conv->set_stride_y(2);
+    conv->set_groups(1);
+    conv->set_filter_channels(conv->channels() / conv->groups());
+    conv->set_img_size(16);
+    conv->set_output_x(
+        (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
+            ((float)conv->stride()) +
+        1.5);
+    config.layerConfig.set_size(conv->output_x() * conv->output_x() *
+                                config.layerConfig.num_filters());
+    config.layerConfig.set_name("conv");
+
+    // data layer initialize
+    std::vector<DataLayerPtr> dataLayers2;
+    LayerMap layerMap2;
+    vector<Argument> datas2;
+    initDataLayer(config, &dataLayers2, &datas2, &layerMap2, "conv",
+                  100, false, useGpu);
+    // test layer initialize
+    std::vector<ParameterPtr> parameters2;
+    LayerPtr convLayer;
+    initTestLayer(config, &layerMap2, &parameters2, &convLayer);
+
+    convLayer->getBiasParameter()->zeroMem();
+    convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom(
+            *(convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)));
+
+    convLayer->forward(PASS_GC);
+    convLayer->getOutput().grad->copyFrom(*(dataLayers[0]->getOutputValue()));
+
+    vector<int> callbackFlags(parameters2.size(), 0);
+    auto callback = [&](Parameter* para) { ++callbackFlags[para->getID()]; };
+    convLayer->backward(callback);
+
+    checkMatrixEqual(convtLayer->getOutputValue(),
+                     dataLayers2[0]->getOutputGrad());
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  initMain(argc, argv);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  return RUN_ALL_TESTS();
+}
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 73631602a9..2d28b34999 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1106,6 +1106,37 @@ def parse_conv(conv, input_layer_name, conv_conf):
                                          conv_conf.padding, conv_conf.stride,
                                          conv_conf.caffe_mode)
 
+
+def parse_convt(conv, input_layer_name, conv_conf):
+    conv_conf.filter_size = conv.filter_size
+    conv_conf.filter_size_y = conv.filter_size_y
+    conv_conf.channels = conv.channels
+    conv_conf.padding = conv.padding
+    conv_conf.padding_y = conv.padding_y
+    conv_conf.stride = conv.stride
+    conv_conf.stride_y = conv.stride_y
+    conv_conf.groups = conv.groups
+    conv_conf.filter_channels = conv.channels / conv.groups
+    conv_conf.caffe_mode = conv.caffe_mode
+
+    outputSize = g_layer_map[input_layer_name].size / conv.channels
+    print('channels=%d size=%d'%(conv.channels,
+      g_layer_map[input_layer_name].size))
+    conv_conf.output_x = int(outputSize ** 0.5)
+    config_assert((conv_conf.output_x ** 2) == outputSize,
+                  ("Input layer %s: Incorrect input image size %d for input "
+                   + "image pixels %d")
+                  % (input_layer_name, conv_conf.img_size, img_pixels))
+    if conv.caffe_mode:
+        conv_conf.img_size = \
+            (conv_conf.output_x - 1) * conv.stride \
+            + conv.filter_size - 2 * conv.padding
+    else:
+        conv_conf.img_size = \
+            (conv_conf.output_x - 1) * conv.stride \
+            + conv.filter_size - 2 * conv.padding + 1
+
+
 def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
     block_expand_conf.channels = block_expand.channels
     block_expand_conf.stride_x = block_expand.stride_x
@@ -1612,6 +1643,70 @@ class ConvLayer(ConvLayerBase):
 class ConvLayer(ConvLayerBase):
     layer_type = 'cudnn_conv'
 
+
+@config_layer('convt')
+class ConvTransLayerBase(LayerBase):
+    layer_type = 'convt'
+    def __init__(
+            self,
+            name,
+            inputs=[],
+            bias=True,
+            num_filters=None,
+            shared_biases=False,
+            **xargs):
+        super(ConvLayerBase, self).__init__(
+            name, self.layer_type, 0, inputs=inputs, **xargs)
+
+        if num_filters is not None:
+            self.config.num_filters = num_filters
+
+        use_gpu = int(g_command_config_args.get("use_gpu", 0))
+        parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
+
+        # Automatically select cudnn_type for GPU and exconv for CPU
+        # if set type=conv, but still reserve the way user specify
+        # exconv or cudnn_conv manually.
+        if self.layer_type == "cudnn_convt":
+            config_assert(use_gpu, "cudnn_convt only support GPU")
+
+        if (use_gpu == 1 and self.layer_type != "exconvt" and
+           (parallel_nn == 0 or self.config.device > -1)):
+            self.layer_type = "cudnn_convt"
+        else:
+            self.layer_type = "exconvt"
+        # need to specify layer in config
+        self.config.type = self.layer_type
+
+        if shared_biases is not None:
+            self.config.shared_biases = shared_biases
+
+        for input_index in xrange(len(self.inputs)):
+            input_layer = self.get_input_layer(input_index)
+            parse_convt(
+                self.inputs[input_index].conv,
+                input_layer.name,
+                self.config.inputs[input_index].conv_conf)
+            conv_conf = self.config.inputs[input_index].conv_conf
+            psize = self.calc_parameter_size(conv_conf)
+            print("output size for %s is %d " % (name, conv_conf.output_x))
+            self.create_input_parameter(input_index, psize)
+            self.set_layer_size(
+                (conv_conf.img_size ** 2) * self.config.num_filters)
+
+        psize = self.config.size
+        if shared_biases:
+            psize = self.config.num_filters
+        self.create_bias_parameter(bias, psize, [psize, 1])
+
+    def calc_parameter_size(self, conv_conf):
+        return conv_conf.channels() * conv_conf.filter_channels \
+                    * (conv_conf.filter_size * conv_conf.filter_size_y)
+
+@config_layer('exconvt')
+class ConvTransLayer(ConvTransLayerBase):
+    layer_type = 'exconvt'
+
 @config_layer('norm')
 class NormLayer(LayerBase):
     def __init__(
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 49f0ff3289..853df8b837 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -78,6 +78,7 @@ class LayerType(object):
     COSINE_SIM = 'cos'
     HSIGMOID = 'hsigmoid'
     CONV_LAYER = "conv"
+    CONVTRANS_LAYER = "convt"
     POOL_LAYER = "pool"
     BATCH_NORM_LAYER = 'batch_norm'
     NORM_LAYER = 'norm'
@@ -1625,6 +1626,128 @@ def img_conv_layer(input, filter_size, num_filters,
     return LayerOutput(name, LayerType.CONV_LAYER, parents=[input],
                        activation=act, num_filters=num_filters)
 
+@wrap_name_default("convt")
+@wrap_param_attr_default()
+@wrap_bias_attr_default()
+@wrap_act_default(act=ReluActivation())
+@layer_support(DROPOUT)
+def img_convTrans_layer(input, filter_size, num_filters,
+                   name=None, num_channels=None,
+                   act=None, groups=1, stride=1, padding=0, bias_attr=None,
+                   param_attr=None, shared_biases=True, layer_attr=None,
+                   filter_size_y=None, stride_y=None, padding_y=None):
+    """
+    Convolution Transpose (deconv) layer for image. Paddle only support square 
+    input currently and thus input image's width equals height.
+
+    The details of convolution transpose layer, 
+    please refer to the following explanation and references therein
+    <http://datascience.stackexchange.com/questions/6107/
+    what-are-deconvolutional-layers/>`_ .
+
+    The num_channel means input image's channel number. It may be 1 or 3 when
+    input is raw pixels of image(mono or RGB), or it may be the previous layer's
+    num_filters * num_group.
+
+    There are several group of filter in PaddlePaddle implementation.
+    Each group will process some channel of the inputs. For example, if an input
+    num_channel = 256, group = 4, num_filter=32, the PaddlePaddle will create
+    32*4 = 128 filters to process inputs. The channels will be split into 4
+    pieces. First 256/4 = 64 channels will process by first 32 filters. The
+    rest channels will be processed by rest group of filters.
+
+    :param name: Layer name.
+    :type name: basestring
+    :param input: Layer Input.
+    :type input: LayerOutput
+    :param filter_size: The x dimension of a filter kernel. Or input a tuple for
+                        two image dimension.
+    :type filter_size: int|tuple|list
+    :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle
+                        currently supports rectangular filters, the filter's
+                        shape will be (filter_size, filter_size_y).
+    :type filter_size_y: int|None
+    :param num_filters: Each filter group's number of filter
+    :param act: Activation type. Default is tanh
+    :type act: BaseActivation
+    :param groups: Group size of filters.
+    :type groups: int
+    :param stride: The x dimension of the stride. Or input a tuple for two image
+                   dimension.
+    :type stride: int|tuple|list
+    :param stride_y: The y dimension of the stride.
+    :type stride_y: int
+    :param padding: The x dimension of the padding. Or input a tuple for two
+                    image dimension
+    :type padding: int|tuple|list
+    :param padding_y: The y dimension of the padding.
+    :type padding_y: int
+    :param bias_attr: Convolution bias attribute. None means default bias.
+                      False means no bias.
+    :type bias_attr: ParameterAttribute|False
+    :param num_channels: number of input channels. If None will be set
+                        automatically from previous output.
+    :type num_channels: int
+    :param param_attr: Convolution param attribute. None means default attribute
+    :type param_attr: ParameterAttribute
+    :param shared_biases: Is biases will be shared between filters or not.
+    :type shared_biases: bool
+    :param layer_attr: Layer Extra Attribute.
+    :type layer_attr: ExtraLayerAttribute
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    if num_channels is None:
+        assert input.num_filters is not None
+        num_channels = input.num_filters
+
+    if filter_size_y is None:
+        if isinstance(filter_size, collections.Sequence):
+            assert len(filter_size) == 2
+            filter_size, filter_size_y = filter_size
+        else:
+            filter_size_y = filter_size
+
+    if stride_y is None:
+        if isinstance(stride, collections.Sequence):
+            assert len(stride) == 2
+            stride, stride_y = stride
+        else:
+            stride_y = stride
+
+    if padding_y is None:
+        if isinstance(padding, collections.Sequence):
+            assert len(padding) == 2
+            padding, padding_y = padding
+        else:
+            padding_y = padding
+
+    if param_attr.attr.get('initial_smart'):
+        # special initial for conv layers.
+        init_w = (2.0 / (filter_size ** 2 * num_channels)) ** 0.5
+        param_attr.attr["initial_mean"] = 0.0
+        param_attr.attr["initial_std"] = init_w
+        param_attr.attr["initial_strategy"] = 0
+        param_attr.attr["initial_smart"] = False
+    Layer(
+        name=name,
+        inputs=Input(input.name, conv=Conv(
+            filter_size=filter_size, padding=padding, stride=stride,
+            channels=num_channels, groups=groups,
+            filter_size_y=filter_size_y, padding_y=padding_y,
+            stride_y=stride_y),
+                     **param_attr.attr),
+        active_type=act.name,
+        num_filters=num_filters,
+        bias=ParamAttr.to_bias(bias_attr),
+        shared_biases=shared_biases,
+        type=LayerType.CONVTRANS_LAYER,
+        **ExtraLayerAttribute.to_kwargs(layer_attr)
+    )
+    return LayerOutput(name, LayerType.CONVTRANS_LAYER, parents=[input],
+                       activation=act, num_filters=num_filters)
+
+
 
 @wrap_name_default("pool")
 @layer_support()
-- 
GitLab