From e82f1008a82232936529ec4bba70a59880915912 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Wed, 17 Jan 2018 00:42:20 +0800
Subject: [PATCH] Finish block expand op 1. Add lod to output 2. Fix im2col
 arguments list 3. Refine code and doc 4. Fix output shape

---
 paddle/operators/block_expand_op.cc           | 119 +++++++----
 paddle/operators/block_expand_op.h            | 140 ++++++------
 .../v2/fluid/tests/test_block_expand_op.py    | 202 ++++++++----------
 3 files changed, 239 insertions(+), 222 deletions(-)
diff --git a/paddle/operators/block_expand_op.cc b/paddle/operators/block_expand_op.cc
index f25cc4f9d..317a43bb7 100644
--- a/paddle/operators/block_expand_op.cc
+++ b/paddle/operators/block_expand_op.cc
@@ -32,37 +32,27 @@ class BlockExpandOp : public framework::OperatorWithKernel {
     auto in_dim = ctx->GetInputDim("X");
     PADDLE_ENFORCE_EQ(in_dim.size(), 4,
                       "Input(X) format  must be 4D tensor, eg., NCHW.");
-    PADDLE_ENFORCE_GE(in_dim[0], 1, "Input batchsize must >= 1.");
 
-    int block_height = ctx->Attrs().Get<int>("blockHeight");
-    int block_width = ctx->Attrs().Get<int>("blockWidth");
-    int stride_height = ctx->Attrs().Get<int>("strideHeight");
-    int stride_width = ctx->Attrs().Get<int>("strideWidth");
-    int padding_height = ctx->Attrs().Get<int>("paddingHeight");
-    int padding_width = ctx->Attrs().Get<int>("paddingWidth");
+    int block_height = ctx->Attrs().Get<int>("block_height");
+    int block_width = ctx->Attrs().Get<int>("block_width");
+    int stride_height = ctx->Attrs().Get<int>("stride_height");
+    int stride_width = ctx->Attrs().Get<int>("stride_width");
+    int padding_height = ctx->Attrs().Get<int>("padding_height");
+    int padding_width = ctx->Attrs().Get<int>("padding_width");
 
-    int N = in_dim[0];
-    int C = in_dim[1];
+    int batch_size = in_dim[0];
+    int img_channels = in_dim[1];
     int img_height = in_dim[2];
     int img_width = in_dim[3];
 
-    int output_height = 0;
-    int output_width = 0;
+    int output_height = get_output_size(img_height, block_height, stride_height,
+                                        padding_height);
+    int output_width =
+        get_output_size(img_width, block_width, stride_width, padding_width);
 
-    get_blockexpand_output_shape(img_height, img_width, block_height,
-                                 block_width, stride_height, stride_width,
-                                 padding_height, padding_width, output_height,
-                                 output_width);
-
-    // The result of im2col is [output_height, output_width,
-    // inputChannels, filterHeight, filterWidth], and it is easy to
-    // reshape into [seqLength, stepSize], where seqLength is equal
-    // output_height * output_width, stepSize is equal
-    // input_channels * blockHeight * blockWidth
-    ctx->SetOutputDim(
-        "Out", {N, output_height, output_width, C, block_height, block_width});
-
-    // ctx->ShareLoD("X", /*->*/ "Out");
+    ctx->SetOutputDim("Out", {batch_size * output_height * output_width,
+                              img_channels * block_height * block_width});
+    // TODO(wanghaoshuang): cal lod in complie time
   }
 };
 
@@ -79,28 +69,69 @@ class BlockExpandOpMaker : public framework::OpProtoAndCheckerMaker {
     W: width
 )DOC");
     AddOutput("Out", "(LodTensor)The output data of block_expand op,");
-    AddAttr<int>("blockHeight", "(int)height of block.");
-    AddAttr<int>("blockWidth", "(int)width of block.");
-    AddAttr<int>("strideHeight", "(int)height of stride.");
-    AddAttr<int>("strideWidth", "(int)width of stride.");
-    AddAttr<int>("paddingHeight", "(int)height of padding.");
-    AddAttr<int>("paddingWidth", "(int)width of padding.");
+    AddAttr<int>("block_height", "(int)height of block.");
+    AddAttr<int>("block_width", "(int)width of block.");
+    AddAttr<int>("stride_height", "(int)height of stride.");
+    AddAttr<int>("stride_width", "(int)width of stride.");
+    AddAttr<int>("padding_height", "(int)height of padding.");
+    AddAttr<int>("padding_width", "(int)width of padding.");
     AddComment(R"DOC(
 Expand feature map to minibatch matrix.
 - matirx height is: output_height * output_width
-- matrix width is: blockHeight * blockWidth * channels
-
-output_height = 
-    1 + (2 * paddingHeight + img_height - blockHeight + strideHeight - 1) /
-            strideHeight;
-output_width = 
-    1 + (2 * paddingWidth + img_width - blockWidth + strideWidth - 1) /
-            strideWidth;
-
-The expand method is the same with ExpandConvLayer, but saved the transposed
-value. After expanding, The number of time steps are output_height * output_width
-and the dimension of each time step is blockHeight * blockWidth * channels.
-This layer can be used after convolution neural network, and before recurrent neural network.
+- matrix width is: block_height * block_width * channels
+
+output_height =
+    1 + (2 * padding_height + img_height - block_height + stride_height - 1) /
+            stride_height;
+output_width =
+    1 + (2 * padding_width + img_width - block_width + stride_width - 1) /
+            stride_width;
+
+After expanding, The number of time steps are output_height * output_width
+and the dimension of each time step is block_height * block_width * channels.
+This op can be used after convolution neural network, and before recurrent neural network.
+
+Given:
+
+x = [[[[ 6.  2.  1.]
+       [ 8.  3.  5.]
+       [ 0.  2.  6.]]
+
+      [[ 2.  4.  4.]
+       [ 6.  3.  0.]
+       [ 6.  4.  7.]]]
+
+     [[[ 6.  7.  1.]
+       [ 5.  7.  9.]
+       [ 2.  4.  8.]]
+
+      [[ 1.  2.  1.]
+       [ 1.  3.  5.]
+       [ 9.  0.  8.]]]]
+x.dims = {2, 2, 3, 3}
+
+And:
+
+block_height = 2
+block_width = 2
+stride_height = 1
+stride_width = 1
+padding_height = 0
+padding_width = 0
+
+Then:
+
+output.data = [[ 6.  2.  8.  3.  2.  4.  6.  3.]
+               [ 2.  1.  3.  5.  4.  4.  3.  0.]
+               [ 8.  3.  0.  2.  6.  3.  6.  4.]
+               [ 3.  5.  2.  6.  3.  0.  4.  7.]
+               [ 6.  7.  5.  7.  1.  2.  1.  3.]
+               [ 7.  1.  7.  9.  2.  1.  3.  5.]
+               [ 5.  7.  2.  4.  1.  3.  9.  0.]
+               [ 7.  9.  4.  8.  3.  5.  0.  8.]]
+output.dims = {8, 9}
+output.lod = [[0, 4, 8]]
+
 )DOC");
   }
 };
diff --git a/paddle/operators/block_expand_op.h b/paddle/operators/block_expand_op.h
index aa0db2705..022dc3a12 100644
--- a/paddle/operators/block_expand_op.h
+++ b/paddle/operators/block_expand_op.h
@@ -23,20 +23,9 @@
 namespace paddle {
 namespace operators {
 
-inline void get_blockexpand_output_shape(int img_height, int img_width,
-                                         int block_height, int block_width,
-                                         int stride_height, int stride_width,
-                                         int padding_height, int padding_width,
-                                         int& outputHeight, int& outputWidth) {
-  outputHeight =
-      1 +
-      (img_height + 2 * padding_height - block_height + stride_height - 1) /
-          stride_height;
-
-  outputWidth =
-      1 +
-      (img_width + 2 * padding_width - block_width + stride_width - 1) /
-          stride_width;
+inline int get_output_size(int img_size, int block_size, int stride,
+                           int padding) {
+  return (1 + (img_size + 2 * padding - block_size + stride - 1) / stride);
 }
 
 template <typename Place, typename T>
@@ -45,40 +34,54 @@ class BlockExpandKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     using namespace framework;
     const Tensor* in = ctx.Input<Tensor>("X");
-    Tensor* out = ctx.Output<Tensor>("Out");
+    LoDTensor* out = ctx.Output<LoDTensor>("Out");
     out->mutable_data<T>(ctx.GetPlace());
 
     auto in_dim = in->dims();
-    int N = in_dim[0];
-    int C = in_dim[1];
+    int batch_size = in_dim[0];
+    int img_channels = in_dim[1];
     int img_height = in_dim[2];
     int img_width = in_dim[3];
-
-    int block_height = ctx.Attr<int>("blockHeight");
-    int block_width = ctx.Attr<int>("blockWidth");
-    int stride_height = ctx.Attr<int>("strideHeight");
-    int stride_width = ctx.Attr<int>("strideWidth");
-    int padding_height = ctx.Attr<int>("paddingHeight");
-    int padding_width = ctx.Attr<int>("paddingWidth");
-
-    int outputHeight = 0;
-    int outputWidth = 0;
-
-    get_blockexpand_output_shape(
-        img_height, img_width, block_height, block_width, stride_height,
-        stride_width, padding_height, padding_width, outputHeight, outputWidth);
-
-    std::vector<int> stride({stride_height, stride_width});
-    std::vector<int> padding({padding_height, padding_width});
-
-    for (int i = 0; i < N; i++) {
-      Tensor src = in->Slice(i, i + 1).Resize({C, img_height, img_width});
-      Tensor dst = out->Slice(i, i + 1).Resize(
-          {outputHeight, outputWidth, C, block_height, block_width});
+    int block_height = ctx.Attr<int>("block_height");
+    int block_width = ctx.Attr<int>("block_width");
+    int stride_height = ctx.Attr<int>("stride_height");
+    int stride_width = ctx.Attr<int>("stride_width");
+    int padding_height = ctx.Attr<int>("padding_height");
+    int padding_width = ctx.Attr<int>("padding_width");
+
+    int output_height = get_output_size(img_height, block_height, stride_height,
+                                        padding_height);
+    int output_width =
+        get_output_size(img_width, block_width, stride_width, padding_width);
+
+    const std::vector<int> dilations({1, 1});
+    const std::vector<int> strides(
+        {stride_height, stride_width, stride_height, stride_width});
+    const std::vector<int> paddings(
+        {padding_height, padding_width, padding_height, padding_width});
+
+    auto out_dims = out->dims();
+    out->Resize({batch_size, out->numel() / batch_size});
+    for (int i = 0; i < batch_size; i++) {
+      const Tensor src =
+          in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
+      Tensor dst = out->Slice(i, i + 1).Resize({output_height, output_width,
+                                                img_channels, block_height,
+                                                block_width});
 
       math::Im2ColFunctor<math::ColFormat::kOCF, Place, T> f;
-      f(ctx.device_context(), src, stride, padding, &dst);
+      f(ctx.device_context(), src, dilations, strides, paddings, &dst);
     }
+    out->Resize(out_dims);
+
+    // set lod information
+    // TODO(wanghaoshuang): Move this to InferShape
+    framework::LoD lod(1);
+    for (int i = 0, offset = 0; i < batch_size + 1; ++i) {
+      lod[0].push_back(offset);
+      offset += output_height * output_width;
+    }
+    out->set_lod(lod);
   }
 };
 
@@ -88,7 +91,8 @@ class BlockExpandGradKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     using namespace framework;
     auto* in = ctx.Input<Tensor>("X");
-    auto* d_out = ctx.Input<Tensor>(framework::GradVarName("Out"));
+    Tensor* d_out =
+        const_cast<Tensor*>(ctx.Input<Tensor>(framework::GradVarName("Out")));
     auto* d_x = ctx.Output<Tensor>(GradVarName("X"));
     d_x->mutable_data<T>(ctx.GetPlace());
 
@@ -96,36 +100,40 @@ class BlockExpandGradKernel : public framework::OpKernel<T> {
     x_v.device(ctx.GetEigenDevice<Place>()) = x_v.constant(0.0);
 
     auto in_dim = in->dims();
-    int N = in_dim[0];
-    int C = in_dim[1];
+    int batch_size = in_dim[0];
+    int img_channels = in_dim[1];
     int img_height = in_dim[2];
     int img_width = in_dim[3];
 
-    int block_height = ctx.Attr<int>("blockHeight");
-    int block_width = ctx.Attr<int>("blockWidth");
-    int stride_height = ctx.Attr<int>("strideHeight");
-    int stride_width = ctx.Attr<int>("strideWidth");
-    int padding_height = ctx.Attr<int>("paddingHeight");
-    int padding_width = ctx.Attr<int>("paddingWidth");
-
-    int outputHeight = 0;
-    int outputWidth = 0;
-
-    get_blockexpand_output_shape(
-        img_height, img_width, block_height, block_width, stride_height,
-        stride_width, padding_height, padding_width, outputHeight, outputWidth);
-
-    std::vector<int> stride({stride_height, stride_width});
-    std::vector<int> padding({padding_height, padding_width});
-    // std::vector<int> stride({stride_height, stride_width});
-
-    for (int i = 0; i < N; i++) {
-      Tensor dst = d_x->Slice(i, i + 1).Resize({C, img_height, img_width});
-      Tensor src = d_out->Slice(i, i + 1).Resize(
-          {outputHeight, outputWidth, C, block_height, block_width});
+    int block_height = ctx.Attr<int>("block_height");
+    int block_width = ctx.Attr<int>("block_width");
+    int stride_height = ctx.Attr<int>("stride_height");
+    int stride_width = ctx.Attr<int>("stride_width");
+    int padding_height = ctx.Attr<int>("padding_height");
+    int padding_width = ctx.Attr<int>("padding_width");
+    int output_height = get_output_size(img_height, block_height, stride_height,
+                                        padding_height);
+    int output_width =
+        get_output_size(img_width, block_width, stride_width, padding_width);
+
+    const std::vector<int> dilations({1, 1});
+    const std::vector<int> strides(
+        {stride_height, stride_width, stride_height, stride_width});
+    const std::vector<int> paddings(
+        {padding_height, padding_width, padding_height, padding_width});
+
+    auto d_out_dims = d_out->dims();
+    d_out->Resize({batch_size, d_out->numel() / batch_size});
+    for (int i = 0; i < batch_size; i++) {
+      Tensor dst =
+          d_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
+      const Tensor src = d_out->Slice(i, i + 1).Resize(
+          {output_height, output_width, img_channels, block_height,
+           block_width});
       math::Col2ImFunctor<math::ColFormat::kOCF, Place, T> f;
-      f(ctx.device_context(), dst, stride, padding, &src);
+      f(ctx.device_context(), src, dilations, strides, paddings, &dst);
     }
+    d_out->Resize(d_out_dims);
   }
 };
 
diff --git a/python/paddle/v2/fluid/tests/test_block_expand_op.py b/python/paddle/v2/fluid/tests/test_block_expand_op.py
index b31ed53f4..424bc7dc6 100644
--- a/python/paddle/v2/fluid/tests/test_block_expand_op.py
+++ b/python/paddle/v2/fluid/tests/test_block_expand_op.py
@@ -4,20 +4,20 @@ from op_test import OpTest
 
 
 def get_output_shape(attrs, x):
-    img_height = x.shape[1]
-    img_width = x.shape[2]
+    img_height = x.shape[2]
+    img_width = x.shape[3]
 
-    padding_height = attrs['paddingHeight']
-    padding_width = attrs['paddingWidth']
-    block_height = attrs['blockHeight']
-    block_width = attrs['blockWidth']
-    stride_height = attrs['strideHeight']
-    stride_width = attrs['strideWidth']
+    padding_height = attrs['padding_height']
+    padding_width = attrs['padding_width']
+    block_height = attrs['block_height']
+    block_width = attrs['block_width']
+    stride_height = attrs['stride_height']
+    stride_width = attrs['stride_width']
 
     output_height = \
       1 +  \
       (img_height + 2 * padding_height - block_height + stride_height - 1) / \
-          strideHeight
+          stride_height
 
     output_width = \
       1 + \
@@ -42,10 +42,10 @@ def im2col(attrs, im, col):
     filter_height = col.shape[3]
     filter_width = col.shape[4]
 
-    stride_height = attrs['strideHeight']
-    stride_width = attrs['strideWidth']
-    padding_height = attrs['paddingHeight']
-    padding_width = attrs['paddingWidth']
+    stride_height = attrs['stride_height']
+    stride_width = attrs['stride_width']
+    padding_height = attrs['padding_height']
+    padding_width = attrs['padding_width']
 
     for col_row_idx in range(0, output_height):
         for col_col_idx in range(0, output_width):
@@ -73,83 +73,51 @@ def im2col(attrs, im, col):
                                     im_row_offset][im_col_offset]
 
 
-def col2img(attrs, col, img):
-    """
-    img: {CHW}
-    col:
-        {output_height, outputWidth, inputChannels, filterHeight, filterWidth}
-    """
-    input_channels = im.shape[0]
-    input_height = im.shape[1]
-    input_width = im.shape[2]
-
-    output_height = col.shape[0]
-    output_width = col.shape[1]
-    filter_height = col.shape[3]
-    filter_width = col.shape[4]
+def block_expand(inputs, attrs):
+    output_height, output_width = get_output_shape(attrs, inputs)
+    img_channels = inputs.shape[1]
+    batch_size = inputs.shape[0]
+    out = np.zeros([
+        batch_size, output_height, output_width, img_channels,
+        attrs['block_height'], attrs['block_width']
+    ]).astype("float32")
 
-    stride_height = attrs['strideHeight']
-    stride_width = attrs['strideWidth']
-    padding_height = attrs['paddingHeight']
-    padding_width = attrs['paddingWidth']
+    for i in range(len(inputs)):
+        im2col(attrs, inputs[i], out[i])
 
-    for col_row_idx in range(0, output_height):
-        for col_col_idx in range(0, output_width):
-            for channel in range(0, input_channels):
-                for filter_row_idx in range(0, filter_height):
-                    for filter_col_idx in range(0, filter_width):
-                        im_row_offset = \
-                            col_row_idx * stride_height + filter_row_idx - padding_height
-                        im_col_offset = \
-                            col_col_idx * stride_width + filter_col_idx - padding_width
-                        if (im_row_offset >= 0 and
-                                im_row_offset < input_height and
-                                im_col_offset >= 0 and
-                                im_col_offset < input_width):
-                            im[channel][im_row_offset][im_col_offset] = \
-                                col[col_row_idx][col_col_idx][channel][filter_row_idx][filter_col_idx]
-
-
-def get_input_data(C, H, W):
-    x = np.random.uniform(0.1, 1, [C, H, W]).astype("float32")
-    for c in range(0, C):
-        for h in range(0, H):
-            for w in range(0, W):
-                #x[c][h][w] = c * H * W + h *W + w
-                x[c][h][w] = 0.2 + 0.01 * (c * H * W + h * W + w)
-        return x
+    out = out.reshape([
+        batch_size * output_height * output_width,
+        img_channels * attrs['block_height'] * attrs['block_width']
+    ])
+    return out
 
 
 class TestBlockExpandOp(OpTest):
-    def setUp(self):
-        C = 3
-        H = 4
-        W = 4
-        x = get_input_data(C, H, W)
-
-        attrs = {
-            'blockHeight': 2,
-            'blockWidth': 2,
-            'strideHeight': 1,
-            'strideWidth': 1,
-            'paddingHeight': 1,
-            'paddingWidth': 1,
+    def config(self):
+        self.batch_size = 1
+        self.img_channels = 3
+        self.img_height = 4
+        self.img_width = 4
+        self.attrs = {
+            'block_height': 2,
+            'block_width': 2,
+            'stride_height': 1,
+            'stride_width': 1,
+            'padding_height': 1,
+            'padding_width': 1,
         }
 
-        output_height, output_width = get_output_shape(attrs, x)
-        out = np.random.uniform(0.1, 1,\
-                    [output_height, output_width, x.shape[0], \
-                     attrs['blockHeight'], attrs['blockWidth']]).astype("float32")
-
+    def setUp(self):
+        self.config()
         self.op_type = "block_expand"
-        self.inputs = {'X': x.reshape(1, C, H, W)}
-        self.attrs = attrs
+        #x = np.random.uniform(0.1, 1,
+        x = np.random.randint(0, 10, [
+            self.batch_size, self.img_channels, self.img_height, self.img_width
+        ]).astype("float32")
 
-        im2col(attrs, x, out)
-        self.outputs = {
-            'Out':out.reshape(1, output_height, output_width, x.shape[0], \
-                     attrs['blockHeight'], attrs['blockWidth'])
-            }
+        out = block_expand(x, self.attrs)
+        self.inputs = {'X': x}
+        self.outputs = {'Out': out}
 
     def test_check_output(self):
         self.check_output()
@@ -158,42 +126,52 @@ class TestBlockExpandOp(OpTest):
         self.check_grad(['X'], 'Out')
 
 
-class TestBlockExpandOp2(OpTest):
-    def setUp(self):
-        C = 3
-        H = 4
-        W = 5
-        x = get_input_data(C, H, W)
-
-        attrs = {
-            'blockHeight': 2,
-            'blockWidth': 1,
-            'strideHeight': 2,
-            'strideWidth': 1,
-            'paddingHeight': 2,
-            'paddingWidth': 1,
+class TestBlockExpandOpCase2(TestBlockExpandOp):
+    def config(self):
+        self.batch_size = 2
+        self.img_channels = 3
+        self.img_height = 4
+        self.img_width = 5
+        self.attrs = {
+            'block_height': 2,
+            'block_width': 1,
+            'stride_height': 2,
+            'stride_width': 1,
+            'padding_height': 2,
+            'padding_width': 1,
         }
 
-        output_height, output_width = get_output_shape(attrs, x)
-        out = np.random.uniform(0.1, 1,\
-                    [output_height, output_width, x.shape[0], \
-                     attrs['blockHeight'], attrs['blockWidth']]).astype("float32")
-
-        self.op_type = "block_expand"
-        self.inputs = {'X': x.reshape(1, C, H, W)}
-        self.attrs = attrs
 
-        im2col(attrs, x, out)
-        self.outputs = {
-            'Out':out.reshape(1, output_height, output_width, x.shape[0], \
-                     attrs['blockHeight'], attrs['blockWidth'])
-            }
+class TestBlockExpandOpCase3(TestBlockExpandOp):
+    def config(self):
+        self.batch_size = 3
+        self.img_channels = 1
+        self.img_height = 4
+        self.img_width = 5
+        self.attrs = {
+            'block_height': 2,
+            'block_width': 1,
+            'stride_height': 2,
+            'stride_width': 1,
+            'padding_height': 2,
+            'padding_width': 0,
+        }
 
-    def test_check_output(self):
-        self.check_output()
 
-    def test_check_grad_normal(self):
-        self.check_grad(['X'], 'Out')
+class TestBlockExpandOpCase4(TestBlockExpandOp):
+    def config(self):
+        self.batch_size = 2
+        self.img_channels = 2
+        self.img_height = 3
+        self.img_width = 3
+        self.attrs = {
+            'block_height': 2,
+            'block_width': 2,
+            'stride_height': 1,
+            'stride_width': 1,
+            'padding_height': 0,
+            'padding_width': 0,
+        }
 
 
 if __name__ == '__main__':
-- 
GitLab