1. Reading image shape from input data instead of image_config

2. Add crop layer unitest 3. Fix bugs

1. Reading image shape from input data instead of image_config
2. Add crop layer unitest 3. Fix bugs
3e7819c2 · wanghaoshuang · de5ded6b · 3e7819c2 · 3e7819c2 · 3e7819c2
9 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,7 +13,7 @@
 # limitations under the License
 cmake_minimum_required(VERSION 3.0)
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl -lpthread")
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR})

--- a/paddle/function/CropOp.cpp
+++ b/paddle/function/CropOp.cpp
@@ -22,11 +22,10 @@ template <>
 void Crop<DEVICE_TYPE_CPU>(real* outputs,
                           const real* inputs,
                           const TensorShape inShape,
+                           const TensorShape outShape,
                           const FuncConfig& conf) {
  std::vector<uint32_t> crop_corner =
      conf.get<std::vector<uint32_t>>("crop_corner");
-  std::vector<uint32_t> crop_shape =
-      conf.get<std::vector<uint32_t>>("crop_shape");
  int cCrop = crop_corner[1];
  int hCrop = crop_corner[2];
  int wCrop = crop_corner[3];
@@ -36,9 +35,9 @@ void Crop<DEVICE_TYPE_CPU>(real* outputs,
  int inH = inShape[2];
  int inW = inShape[3];
-  int outC = crop_shape[1];
+  int outC = outShape[1];
-  int outH = crop_shape[2];
+  int outH = outShape[2];
-  int outW = crop_shape[3];
+  int outW = outShape[3];
  for (int n = 0; n < num; n++) {
    for (int c = 0; c < outC; c++) {
@@ -54,12 +53,11 @@ void Crop<DEVICE_TYPE_CPU>(real* outputs,
 template <>
 void CropGrad<DEVICE_TYPE_CPU>(const real* inGrad,
                               real* outGrad,
+                               const TensorShape inShape,
                               const TensorShape outShape,
                               const FuncConfig& conf) {
  std::vector<uint32_t> crop_corner =
      conf.get<std::vector<uint32_t>>("crop_corner");
-  std::vector<uint32_t> crop_shape =
-      conf.get<std::vector<uint32_t>>("crop_shape");
  int cCrop = crop_corner[1];
  int hCrop = crop_corner[2];
  int wCrop = crop_corner[3];
@@ -69,9 +67,9 @@ void CropGrad<DEVICE_TYPE_CPU>(const real* inGrad,
  int outH = outShape[2];
  int outW = outShape[3];
-  int inC = crop_shape[1];
+  int inC = inShape[1];
-  int inH = crop_shape[2];
+  int inH = inShape[2];
-  int inW = crop_shape[3];
+  int inW = inShape[3];
  for (int n = 0; n < num; n++) {
    for (int c = 0; c < inC; c++) {
@@ -123,9 +121,13 @@ public:
    CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
    TensorShape inShape = inputs[0].shape();
+    TensorShape outShape = outputs[0].shape();
-    Crop<Device>(
+    Crop<Device>(outputs[0].data<real>(),
-        outputs[0].data<real>(), inputs[0].data<real>(), inShape, conf_);
+                 inputs[0].data<real>(),
+                 inShape,
+                 outShape,
+                 conf_);
  }
 private:
@@ -152,9 +154,13 @@ public:
    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
    TensorShape outShape = outputs[0].shape();
+    TensorShape inShape = inputs[0].shape();
-    CropGrad<Device>(
+    CropGrad<Device>(inputs[0].data<real>(),
-        inputs[0].data<real>(), outputs[0].data<real>(), outShape, conf_);
+                     outputs[0].data<real>(),
+                     inShape,
+                     outShape,
+                     conf_);
  }
 private:

--- a/paddle/function/CropOp.h
+++ b/paddle/function/CropOp.h
@@ -31,6 +31,7 @@ template <DeviceType Device>
 void Crop(real* outputs,
          const real* inputs,
          const TensorShape inShape,
+          const TensorShape outShape,
          const FuncConfig& conf);
 /**
@@ -45,5 +46,6 @@ template <DeviceType Device>
 void CropGrad(const real* inGrad,
              real* outGrad,
              const TensorShape inShape,
+              const TensorShape outShape,
              const FuncConfig& conf);
 }  // namespace paddle
--- a/paddle/function/CropOpGpu.cu
+++ b/paddle/function/CropOpGpu.cu
@@ -37,9 +37,9 @@ template <>
 void Crop<DEVICE_TYPE_GPU>(real* outputs,
                          const real* inputs,
 						  const TensorShape inShape,
+						  const TensorShape outShape,
                          const FuncConfig& conf) {
  std::vector<uint32_t> crop_corner = conf.get<std::vector<uint32_t>>("crop_corner");
-  std::vector<uint32_t> crop_shape = conf.get<std::vector<uint32_t>>("crop_shape");
  int cropC = crop_corner[1];
  int cropH = crop_corner[2];
  int cropW = crop_corner[3];
@@ -49,14 +49,14 @@ void Crop<DEVICE_TYPE_GPU>(real* outputs,
  int inH = inShape[2];
  int inW = inShape[3];
-  int outC = crop_shape[1];
+  int outC = outShape[1];
-  int outH = crop_shape[2];
+  int outH = outShape[2];
-  int outW = crop_shape[3];
+  int outW = outShape[3];
  size_t nth = num * outC * outH * outW;
  int blockSize = 1024;
  int gridSize = (nth + blockSize - 1) / blockSize;
  KeCrop<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
    (outputs, inputs, inC, inH, inW, cropC, cropH, cropW,
     outC, outH, outW, nth);
@@ -75,7 +75,7 @@ __global__ void KeCropDiff(const real* inGrad, real* outGrad,
    const int n = idx / inW / inH / inC;
    const int off = ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w;
    outGrad[off] += inGrad[idx];
  }
 }
@@ -83,10 +83,10 @@ __global__ void KeCropDiff(const real* inGrad, real* outGrad,
 template <>
 void CropGrad<DEVICE_TYPE_GPU>(const real* inGrad,
                              real* outGrad,
+                              const TensorShape inShape,
                              const TensorShape outShape,
                              const FuncConfig& conf) {
  std::vector<uint32_t> crop_corner = conf.get<std::vector<uint32_t>>("crop_corner");
-  std::vector<uint32_t> crop_shape = conf.get<std::vector<uint32_t>>("crop_shape");
  int cropC = crop_corner[1];
  int cropH = crop_corner[2];
  int cropW = crop_corner[3];
@@ -96,10 +96,10 @@ void CropGrad<DEVICE_TYPE_GPU>(const real* inGrad,
  int outH = outShape[2];
  int outW = outShape[3];
-  int inC = crop_shape[1];
+  int inC = inShape[1];
-  int inH = crop_shape[2];
+  int inH = inShape[2];
-  int inW = crop_shape[3];
+  int inW = inShape[3];
  size_t nth = num * inC * inH * inW;
  int blockSize = 1024;
  int gridSize = (nth + blockSize - 1) / blockSize;

--- a/paddle/gserver/layers/CropLayer.cpp
+++ b/paddle/gserver/layers/CropLayer.cpp
@@ -22,7 +22,8 @@ bool CropLayer::init(const LayerMap& layerMap,
                     const ParameterMap& parameterMap) {
  /* Initialize the basic parent class */
  Layer::init(layerMap, parameterMap);
+  CHECK_LE(static_cast<int>(inputLayers_.size()), 2);
+  CHECK_GE(static_cast<int>(inputLayers_.size()), 1);
  crop_axis_ = config_.axis();
  for (int i = 0; i < config_.offset_size(); i++) {
    crop_offsets_.push_back(config_.offset(i));
@@ -36,8 +37,14 @@ bool CropLayer::init(const LayerMap& layerMap,
                             ? input0_img_conf.img_size_y()
                             : input0_img_conf.img_size(),
                         input0_img_conf.img_size()});
-  // 2. get output shape from input_1 or crop shap conf
+  // 2. get target dims from config
-  if (config_.inputs_size() == 2) {
+  if (config_.inputs_size() == 1) {
+    targetDims_ = TensorShape({config_.shape(0),
+                               config_.shape(1),
+                               config_.shape(2),
+                               config_.shape(3)});
+  } else {
+    // 2. get input_1 shape
    auto& input1_img_conf = config_.inputs(1).image_conf();
    targetDims_ = TensorShape({0,
                               input1_img_conf.channels(),
@@ -45,24 +52,10 @@ bool CropLayer::init(const LayerMap& layerMap,
                                   ? input1_img_conf.img_size_y()
                                   : input1_img_conf.img_size(),
                               input1_img_conf.img_size()});
-  } else {
-    targetDims_ = TensorShape({config_.shape(0),
-                               config_.shape(1),
-                               config_.shape(2),
-                               config_.shape(3)});
  }
-  // 3. get final crop shape
+  // 3. get final crop corner
  int dimSize = 4;
-  for (int i = 0; i < dimSize; i++) {
-    if (i >= crop_axis_) {
-      crop_shape_.push_back(targetDims_[i]);
-    } else {
-      crop_shape_.push_back(inDims_[i]);
-    }
-  }
-  // 4. get final crop corner
  crop_corner_ = {0, 0, 0, 0};
  for (int i = 0; i < dimSize; i++) {
    if (i >= crop_axis_) {
@@ -75,43 +68,61 @@ bool CropLayer::init(const LayerMap& layerMap,
  }
  outDims_ = TensorShape(4);
-  setOutDims(0);
+  createFunction(
-  createFunction(forward_,
+      forward_, "Crop", FuncConfig().set("crop_corner", crop_corner_));
-                 "Crop",
+  createFunction(
-                 FuncConfig()
+      backward_, "CropGrad", FuncConfig().set("crop_corner", crop_corner_));
-                     .set("crop_corner", crop_corner_)
-                     .set("crop_shape", crop_shape_));
-  createFunction(backward_,
-                 "CropGrad",
-                 FuncConfig()
-                     .set("crop_corner", crop_corner_)
-                     .set("crop_shape", crop_shape_));
  return true;
 }
-void CropLayer::setOutDims(const size_t batchSize) {
+void CropLayer::setOutDims() {
-  outDims_.reshape({batchSize, crop_shape_[1], crop_shape_[2], crop_shape_[3]});
+  MatrixPtr input = inputLayers_[1]->getOutputValue();
+  size_t batchSize = input->getHeight();
+  // get target dims from input_1
+  if (config_.inputs_size() == 2) {
+    targetDims_.setDim(0, batchSize);
+    int ch = config_.inputs(0).image_conf().channels();
+    if (ch != 0) targetDims_.setDim(1, ch);
+    int h = inputLayers_[1]->getOutput().getFrameHeight();
+    if (h != 0) targetDims_.setDim(2, h);
+    int w = inputLayers_[1]->getOutput().getFrameWidth();
+    if (w != 0) targetDims_.setDim(3, w);
+  }
+  // get final crop shape from target dims and crop axis
+  std::vector<uint32_t> crop_shape;
+  int dimSize = 4;
+  for (int i = 0; i < dimSize; i++) {
+    if (i >= crop_axis_) {
+      crop_shape.push_back(targetDims_[i]);
+    } else {
+      crop_shape.push_back(inDims_[i]);
+    }
+  }
+  outDims_.reshape(
+      {crop_shape[0], crop_shape[1], crop_shape[2], crop_shape[3]});
+  output_.setFrameHeight(crop_shape[2]);
+  output_.setFrameWidth(crop_shape[3]);
 }
-void CropLayer::setTensorDim(const size_t batchSize) {
+void CropLayer::setInDims() {
-  CHECK_EQ(static_cast<int>(inputLayers_.size()), 2);
+  MatrixPtr input = inputLayers_[0]->getOutputValue();
+  size_t batchSize = input->getHeight();
  inDims_.setDim(0, batchSize);
  int h = inputLayers_[0]->getOutput().getFrameHeight();
  if (h != 0) inDims_.setDim(2, h);
  int w = inputLayers_[0]->getOutput().getFrameWidth();
  if (w != 0) inDims_.setDim(3, w);
-  setOutDims(batchSize);
 }
 void CropLayer::forward(PassType passType) {
  Layer::forward(passType);
-  MatrixPtr input = inputLayers_[0]->getOutputValue();
+  setInDims();
-  size_t batchSize = input->getHeight();
+  setOutDims();
-  setTensorDim(batchSize);
  int size = outDims_[1] * outDims_[2] * outDims_[3];
-  resetOutput(batchSize, size);
+  resetOutput(outDims_[0], size);
  MatrixPtr outV = getOutputValue();
  REGISTER_TIMER_INFO("CropForward", getName().c_str());

--- a/paddle/gserver/layers/CropLayer.h
+++ b/paddle/gserver/layers/CropLayer.h
@@ -39,13 +39,12 @@ public:
  void backward(const UpdateCallback& callback = nullptr) override;
 protected:
-  void setOutDims(const size_t batchSize);
+  void setOutDims();
-  void setTensorDim(const size_t batchSize);
+  void setInDims();
  int32_t crop_axis_;
  std::vector<uint32_t> crop_offsets_;
  std::vector<uint32_t> crop_corner_;
-  std::vector<uint32_t> crop_shape_;
  TensorShape inDims_;
  TensorShape targetDims_;
  TensorShape outDims_;

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2005,29 +2005,6 @@ class CropLayer(LayerBase):
        image_conf.img_size_y = input_layer.height
        image_conf.channels = input_layer.size / (input_layer.width *
                                                  input_layer.height)
-        out_ch = image_conf.channels
-        out_h = image_conf.img_size
-        out_w = image_conf.img_size_y
-        if len(self.inputs) == 2:
-            # get channels, width and height from input_1 layer
-            input_layer = self.get_input_layer(1)
-            image_conf = self.config.inputs[1].image_conf
-            image_conf.img_size = input_layer.width
-            image_conf.img_size_y = input_layer.height
-            image_conf.channels = input_layer.size / (input_layer.width *
-                                                      input_layer.height)
-            out_ch = image_conf.channels
-            out_h = image_conf.img_size_y
-            out_w = image_conf.img_size
-        else:
-            # set channels, width and heigth of current layer
-            if len(shape) > 2:
-                out_ch = shape[-3]
-            if len(shape) > 1:
-                out_h = shape[-2]
-            if len(shape) > 0:
-                out_w = shape[-1]
-        self.set_cnn_layer(name, out_h, out_w, out_ch)
 @config_layer('batch_norm')

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -5881,9 +5881,9 @@ def prelu_layer(input,
 @wrap_name_default()
 @layer_support()
-def crop_layer(input, axis, offset, shape=None, name=None, layer_attr=None):
+def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
    """
-    The crop layer crop images by offset and shape. User can set crop shape by
+    The crop layer crops images by offset and shape. User can set crop shape by
    args 'shape' explicitly or by reference input layer.
@@ -5896,16 +5896,16 @@ def crop_layer(input, axis, offset, shape=None, name=None, layer_attr=None):
    :param input: The input layer.If two inputs were setted,
                    the second input will be regarded as reference input
    :type input: LayerOutput or Sequence
+    :param offset: The crop offset
+    :type offset: Sequence
    :param axis: start axis to be cropped. To image input layer:
        - 0: batch size
        - 1: channels
        - 2: height
        - 3: width
    :type partial_sum: int
-    :param offset: The crop offset
-    :type offset: Sequence
    :param shape: The shape to be cropped. Default is None.
-    :type shape: Sqquence | None
+    :type shape: Sequence | None
    :param name: Name of this layer.
    :type name: basestring
    :return: LayerOutput object.
@@ -5913,8 +5913,6 @@ def crop_layer(input, axis, offset, shape=None, name=None, layer_attr=None):
    """
    if isinstance(input, LayerOutput):
        input = [input]
-    elif isinstance(input, Projection):
-        input = [input]
    else:
        assert isinstance(input, collections.Sequence)
    l = Layer(

--- a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
+from paddle.trainer_config_helpers import *
+settings(batch_size=1000, learning_rate=1e-5)
+data = data_layer(name='data', size=2016, height=48, width=42)
+refernce_data = data_layer(name='data', size=768, height=16, width=16)
+conv = img_conv_layer(
+    input=data,
+    filter_size=3,
+    num_channels=1,
+    num_filters=16,
+    padding=1,
+    act=LinearActivation(),
+    bias_attr=True)
+pool = img_pool_layer(input=conv, pool_size=2, stride=2, pool_type=MaxPooling())
+crop = crop_layer(input=[pool, refernce_data], axis=2)
+outputs(pad)