BlockExpandLayer based on the ImageExpand Function.

61aa1098 · hedaoyuan · 48e0f432 · 61aa1098 · 61aa1098
显示空白变更内容
内联并排

Showing with 60 addition and 29 deletion

paddle/function/ImageExpandOp.cpp paddle/function/ImageExpandOp.cpp +8 -1

paddle/gserver/layers/BlockExpandLayer.cpp paddle/gserver/layers/BlockExpandLayer.cpp +52 -28

未找到文件。
--- a/paddle/function/ImageExpandOp.cpp
+++ b/paddle/function/ImageExpandOp.cpp
@@ -119,12 +119,17 @@ public:
        1 +
        (inputWidth + 2 * paddingW() - blockW() + strideW() - 1) / strideW();
    CHECK_EQ(seqLength, outputHeight * outputWidth);
-    CHECK_EQ(stepSize, inputChannels * blockH() * blockH());
+    CHECK_EQ(stepSize, inputChannels * blockH() * blockW());
    real* inputData = inputs[0].data<real>();
    real* outputData = outputs[0].data<real>();
    Im2ColFunctor<kOCF, Device, real> im2col;
    for (size_t i = 0; i < batchSize; i++) {
+      // The result of im2col is [output_height, output_width,
+      // input_channels, filter_height, filter_width], and it is easy to
+      // reshape into [seqLength, stepSize], where seqLength is equal
+      // output_height * output_width, stepSize is equal
+      // input_channels * filter_height * filter_width
      im2col(inputData,
             inputChannels,
             inputHeight,
@@ -161,4 +166,6 @@ protected:
  inline int blockW() const { return blocks_[1]; }
 };
+REGISTER_TYPED_FUNC(ImageExpand, CPU, ImageExpandFunction);
 }  // namespace paddle
--- a/paddle/gserver/layers/BlockExpandLayer.cpp
+++ b/paddle/gserver/layers/BlockExpandLayer.cpp
@@ -37,6 +37,18 @@ bool BlockExpandLayer::init(const LayerMap& layerMap,
  imgSizeH_ = blockConf.img_size_y();
  imgSizeW_ = blockConf.img_size_x();
+  if (!useGpu_) {
+    std::vector<size_t> strides = {(size_t)strideH_, (size_t)strideW_};
+    std::vector<size_t> paddings = {(size_t)paddingH_, (size_t)paddingW_};
+    std::vector<size_t> blocks = {(size_t)blockH_, (size_t)blockW_};
+    createFunction(forward_,
+                   "ImageExpand",
+                   FuncConfig()
+                       .set("strides", strides)
+                       .set("paddings", paddings)
+                       .set("blocks", blocks));
+  }
  return true;
 }
@@ -63,10 +75,11 @@ void BlockExpandLayer::forward(PassType passType) {
  Layer::forward(passType);
  size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
  size_t blockNum = getBlockNum();
  size_t blockSize = blockH_ * blockW_ * channels_;
  resetOutput(blockNum * batchSize, blockSize);
+  // TODO(hedaoyuan): After completing the GPU version of ImageExpand,
+  // refactor the following code.
  Argument& out = getOutput();
  MatrixPtr outV = getOutputValue();
@@ -78,6 +91,7 @@ void BlockExpandLayer::forward(PassType passType) {
  int* start = out.sequenceStartPositions->getMutableData(false);
  int* dims = out.cpuSequenceDims->getData();
  for (size_t i = 0; i < batchSize; i++) {
+    if (useGpu_) {
      outVTrans_->zeroMem();
      /* expand each block as one row */
      MatrixPtr inputTmp =
@@ -105,11 +119,21 @@ void BlockExpandLayer::forward(PassType passType) {
                         false,
                         useGpu_);
      outVTrans_->transpose(outVTmp, false);
+    }
    start[i] = i * blockNum;
    dims[2 * i] = outputH_;
    dims[2 * i + 1] = outputW_;
  }
  start[batchSize] = batchSize * blockNum;
+  if (!useGpu_) {
+    TensorShape inputShape({batchSize, channels_, imgSizeH_, imgSizeW_});
+    TensorShape outputShape({batchSize, blockNum, blockSize});
+    BufferArgs inputs;
+    BufferArgs outputs;
+    inputs.addArg(*getInputValue(0), inputShape);
+    outputs.addArg(*getOutputValue(), outputShape, ASSIGN_TO);
+    forward_[0]->calc(inputs, outputs);
+  }
 }
 void BlockExpandLayer::backward(const UpdateCallback& callback) {