From 701827f59cb5727676818c2fffb2b07766528436 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Wed, 5 Jul 2017 00:53:32 +0800
Subject: [PATCH] Add grad test and python wrapper for crop layer

---
 paddle/function/CropOp.cpp                    |  2 +-
 paddle/function/CropOpTest.cpp                |  2 +-
 paddle/gserver/layers/CropLayer.cpp           | 23 ++++----
 paddle/gserver/tests/CMakeLists.txt           |  2 +-
 paddle/gserver/tests/test_LayerGrad.cpp       | 28 ++++++++++
 proto/ModelConfig.proto                       |  8 ++-
 python/paddle/trainer/config_parser.py        | 45 ++++++++++++++++
 .../paddle/trainer_config_helpers/layers.py   | 54 +++++++++++++++++++
 8 files changed, 147 insertions(+), 17 deletions(-)

diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp
index 0d511ceef5..1bb194a9bc 100644
--- a/paddle/function/CropOp.cpp
+++ b/paddle/function/CropOp.cpp
@@ -148,7 +148,7 @@ public:
   void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
     CHECK_EQ(1UL, inputs.size());
     CHECK_EQ(1UL, outputs.size());
-    CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
+    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
 
     TensorShape outShape = outputs[0].shape();
 
diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp
index c331a70d1f..71d9b05812 100644
--- a/paddle/function/CropOpTest.cpp
+++ b/paddle/function/CropOpTest.cpp
@@ -25,7 +25,7 @@ TEST(Crop, real) {
           VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
                   << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
           for (bool test_grad : {false, true}) {
-            FunctionCompare compare(
+            CpuGpuFuncCompare compare(
                 test_grad ? "CropGrad" : "Crop",
                 FuncConfig()
                     .set<std::vector<uint32_t>>("crop_corner", {0, 1, 1, 1})
diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/gserver/layers/CropLayer.cpp
index 198ceffb46..b2fa17b400 100644
--- a/paddle/gserver/layers/CropLayer.cpp
+++ b/paddle/gserver/layers/CropLayer.cpp
@@ -14,7 +14,6 @@ limitations under the License. */
 
 #include "CropLayer.h"
 #include "paddle/utils/Stat.h"
-
 namespace paddle {
 
 REGISTER_LAYER(crop, CropLayer);
@@ -24,10 +23,9 @@ bool CropLayer::init(const LayerMap& layerMap,
   /* Initialize the basic parent class */
   Layer::init(layerMap, parameterMap);
 
-  auto& crop_conf = config_.inputs(0).crop_conf();
-  crop_axis_ = crop_conf.axis();
-  for (int i = 0; i < crop_conf.offset_size(); i++) {
-    crop_offsets_[i] = crop_conf.offset(i);
+  crop_axis_ = config_.axis();
+  for (int i = 0; i < config_.offset_size(); i++) {
+    crop_offsets_.push_back(config_.offset(i));
   }
 
   // 1. get input_0 shape
@@ -38,7 +36,6 @@ bool CropLayer::init(const LayerMap& layerMap,
                              ? input0_img_conf.img_size_y()
                              : input0_img_conf.img_size(),
                          input0_img_conf.img_size()});
-
   // 2. get output shape from input_1 or crop shap conf
   if (config_.inputs_size() == 2) {
     auto& input1_img_conf = config_.inputs(1).image_conf();
@@ -49,19 +46,19 @@ bool CropLayer::init(const LayerMap& layerMap,
                                    : input1_img_conf.img_size(),
                                input1_img_conf.img_size()});
   } else {
-    targetDims_ = TensorShape({crop_conf.shape(0),
-                               crop_conf.shape(1),
-                               crop_conf.shape(2),
-                               crop_conf.shape(3)});
+    targetDims_ = TensorShape({config_.shape(0),
+                               config_.shape(1),
+                               config_.shape(2),
+                               config_.shape(3)});
   }
 
   // 3. get final crop shape
   int dimSize = 4;
   for (int i = 0; i < dimSize; i++) {
     if (i >= crop_axis_) {
-      crop_shape_[i] = targetDims_[i];
+      crop_shape_.push_back(targetDims_[i]);
     } else {
-      crop_shape_[i] = inDims_[i];
+      crop_shape_.push_back(inDims_[i]);
     }
   }
 
@@ -99,7 +96,7 @@ void CropLayer::setOutDims(const size_t batchSize) {
 }
 
 void CropLayer::setTensorDim(const size_t batchSize) {
-  CHECK_EQ(static_cast<int>(inputLayers_.size()), 1);
+  CHECK_EQ(static_cast<int>(inputLayers_.size()), 2);
   inDims_.setDim(0, batchSize);
   int h = inputLayers_[0]->getOutput().getFrameHeight();
   if (h != 0) inDims_.setDim(2, h);
diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index 92f6cbcfe5..a43adc7ce7 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -56,7 +56,7 @@ add_test(NAME test_DetectionOutput
 add_unittest_without_exec(test_ConvUnify
     test_ConvUnify.cpp
     LayerGradUtil.cpp)
-    
+
 add_test(NAME test_ConvUnify
     COMMAND test_ConvUnify)
 ################# test_BatchNorm #######################
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 59d1e9273d..20a83d7aa1 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -1792,6 +1792,34 @@ TEST(Layer, RowConvLayer) {
   }
 }
 
+TEST(Layer, CropLayer) {
+  TestConfig config;
+  // config input_0
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0});
+  LayerInputConfig* input = config.layerConfig.add_inputs();
+  ImageConfig* img = input->mutable_image_conf();
+  img->set_channels(4);
+  img->set_img_size(16);
+  config.layerConfig.set_axis(2);
+  config.layerConfig.add_offset(0);
+  config.layerConfig.add_offset(0);
+
+  // config input_1
+  config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0});
+  input = config.layerConfig.add_inputs();
+  img = input->mutable_image_conf();
+  img->set_channels(2);
+  img->set_img_size(8);
+
+  // config crop layer
+  config.layerConfig.set_type("crop");
+  config.layerConfig.set_name("cropLayer");
+
+  for (auto useGpu : {false, true}) {
+    testLayerGrad(config, "crop", 100, false, useGpu, false);
+  }
+}
+
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
   initMain(argc, argv);
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 37cd16c798..83f72c137b 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -472,10 +472,16 @@ message LayerConfig {
   // blank label used in ctc loss
   optional uint32 blank = 52 [default = 0];
 
-  // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which 
+  // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which
   // controls the scope of pooling operation. can be set > 0.
   // leave empty or set to -1 to disable this stride pooling.
   optional int32 seq_pool_stride = 53 [default = -1];
+
+  // for crop layer
+  optional int32 axis = 54 [default = 2];
+  repeated uint32 offset = 55;
+  repeated uint32 shape = 56;
+
 }
 
 message EvaluatorConfig {
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 370529ed97..8c529fdfd3 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1986,6 +1986,51 @@ class PadLayer(LayerBase):
         self.config.size = out_ch * out_h * out_w
 
 
+@config_layer('crop')
+class CropLayer(LayerBase):
+    def __init__(self, inputs, axis, offset, shape, name, **xargs):
+        super(CropLayer, self).__init__(name, 'crop', 0, inputs=inputs, **xargs)
+        self.conf.axis = axis
+        self.conf.axis = offset
+        self.conf.axis = shape
+
+        crop = self.inputs[0].crop
+        self.config.inputs[0].crop_conf.axis = crop.axis
+        self.config.inputs[0].crop_conf.offset.extend(crop.offset)
+        self.config.inputs[0].crop_conf.shape.extend(crop.shape)
+
+        # get channel, width and height from input_0 layer
+        input_layer = self.get_input_layer(0)
+        image_conf = self.config.inputs[0].image_conf
+        image_conf.img_size = input_layer.width
+        image_conf.img_size_y = input_layer.height
+        image_conf.channels = input_layer.size / (input_layer.width *
+                                                  input_layer.height)
+        out_ch = image_conf.channels
+        out_h = image_conf.img_size
+        out_w = image_conf.img_size_y
+        if len(self.inputs) == 2:
+            # get channels, width and height from input_1 layer
+            input_layer = self.get_input_layer(1)
+            image_conf = self.config.inputs[1].image_conf
+            image_conf.img_size = input_layer.width
+            image_conf.img_size_y = input_layer.height
+            image_conf.channels = input_layer.size / (input_layer.width *
+                                                      input_layer.height)
+            out_ch = image_conf.channels
+            out_h = image_conf.img_size_y
+            out_w = image_conf.img_size
+        else:
+            # set channels, width and heigth of current layer
+            if len(shape) > 2:
+                out_ch = shape[-3]
+            if len(shape) > 1:
+                out_h = shape[-2]
+            if len(shape) > 0:
+                out_w = shape[-1]
+        self.set_cnn_layer(name, out_h, out_w, out_ch)
+
+
 @config_layer('batch_norm')
 class BatchNormLayer(LayerBase):
     layer_type = 'batch_norm'
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 206de1f8e1..f9de086cba 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -217,6 +217,7 @@ class LayerType(object):
     SMOOTH_L1 = 'smooth_l1'
 
     PRELU = 'prelu'
+    CROP_LAYER = 'crop'
 
     @staticmethod
     def is_layer_type(type_name):
@@ -5853,3 +5854,56 @@ def prelu_layer(input,
         layer_type=LayerType.PRELU,
         parents=input,
         size=l.config.size)
+
+
+@wrap_name_default()
+@layer_support()
+def crop_layer(input, axis, offset, shape=None, name=None, layer_attr=None):
+    """
+    The crop layer crop images by offset and shape. User can set crop shape by
+    args 'shape' explicitly or by reference input layer.
+
+
+    The example usage is:
+
+    .. code-block:: python
+
+       crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3])
+
+    :param input: The input layer.If two inputs were setted,
+                    the second input will be regarded as reference input
+    :type input: LayerOutput or Sequence
+    :param axis: start axis to be cropped. To image input layer:
+        - 0: batch size
+        - 1: channels
+        - 2: height
+        - 3: width
+    :type partial_sum: int
+    :param offset: The crop offset
+    :type offset: Sequence
+    :param shape: The shape to be cropped. Default is None.
+    :type shape: Sqquence | None
+    :param name: Name of this layer.
+    :type name: basestring
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    if isinstance(input, LayerOutput):
+        input = [input]
+    elif isinstance(input, Projection):
+        input = [input]
+    else:
+        assert isinstance(input, collections.Sequence)
+    l = Layer(
+        inputs=[x.name for x in input],
+        axis=axis,
+        offset=offset,
+        shape=shape,
+        name=name,
+        type=LayerType.CROP_LAYER,
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name=name,
+        layer_type=LayerType.CROP_LAYER,
+        parents=input,
+        size=l.config.size)
-- 
GitLab