From cb6436b50ce40985609cf18ae81ef308c32c8602 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Thu, 1 Jun 2017 00:56:07 +0800
Subject: [PATCH] CPU implementation of row convolution

---
 paddle/function/RowConvOp.cpp           | 172 ++++++++++++++++++++++++
 paddle/function/RowConvOp.h             |  42 ++++++
 paddle/gserver/layers/RowConvLayer.cpp  | 105 +++++++++++++++
 paddle/gserver/layers/RowConvLayer.h    |  46 +++++++
 paddle/gserver/tests/test_LayerGrad.cpp |  20 +++
 proto/ModelConfig.proto                 |   5 +
 6 files changed, 390 insertions(+)
 create mode 100644 paddle/function/RowConvOp.cpp
 create mode 100644 paddle/function/RowConvOp.h
 create mode 100644 paddle/gserver/layers/RowConvLayer.cpp
 create mode 100644 paddle/gserver/layers/RowConvLayer.h

diff --git a/paddle/function/RowConvOp.cpp b/paddle/function/RowConvOp.cpp
new file mode 100644
index 00000000000..f92b286c697
--- /dev/null
+++ b/paddle/function/RowConvOp.cpp
@@ -0,0 +1,172 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "RowConvOp.h"
+#include "paddle/math/Vector.h"
+
+namespace paddle {
+
+template <>
+void RowConv<DEVICE_TYPE_CPU>(CpuMatrix& out,
+                              const CpuMatrix& in,
+                              const CpuMatrix& filter,
+                              const CpuIVector& seq) {
+  const int* starts = seq.getData();
+  const size_t numSeq = seq.getSize() - 1;
+  const size_t contextLength = filter.getHeight();
+  for (size_t i = 0; i < numSeq; ++i) {
+    size_t begin = starts[i];
+    size_t end = starts[i + 1];
+    for (size_t j = begin; j < end; ++j) {
+      MatrixPtr x;
+      MatrixPtr w;
+      if ((j + contextLength) < end) {
+        x = (const_cast<CpuMatrix&>(in)).subMatrix(j, contextLength);
+        w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, contextLength);
+      } else {
+        x = (const_cast<CpuMatrix&>(in)).subMatrix(j, end - j);
+        w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, end - j);
+      }
+      MatrixPtr y = out.subMatrix(j, 1);
+      y->addDotMulVMM(*x, *w);
+    }
+  }
+}
+
+template <>
+void RowConvGrad<DEVICE_TYPE_CPU>(const CpuMatrix& outG,
+                                  const CpuMatrix& in,
+                                  const CpuMatrix& filter,
+                                  CpuMatrix& inG,
+                                  CpuMatrix& filterG,
+                                  const CpuIVector& seq) {
+  // gradient w.r.t filter
+  const int* starts = seq.getData();
+  const size_t numSeq = seq.getSize() - 1;
+  const size_t contextLength = filter.getHeight();
+  if (filterG) {
+    for (size_t i = 0; i < numSeq; ++i) {
+      size_t begin = starts[i];
+      size_t end = starts[i + 1];
+      size_t steps = end - begin;
+      for (size_t j = 0; j < contextLength; ++j) {
+        MatrixPtr x =
+            (const_cast<CpuMatrix&>(in)).subMatrix(begin + j, steps - j);
+        MatrixPtr dy =
+            (const_cast<CpuMatrix&>(outG)).subMatrix(begin, steps - j);
+        MatrixPtr dw = filterG.subMatrix(j, 1);
+        dw->addDotMulVMM(*dy, *x);
+      }
+    }
+  }
+
+  // gradient w.r.t input feature
+  if (inG) {
+    for (size_t i = 0; i < numSeq; ++i) {
+      size_t begin = starts[i];
+      size_t end = starts[i + 1];
+      size_t steps = end - begin;
+      for (size_t j = 0; j < steps; ++j) {
+        MatrixPtr dx = inG.subMatrix(begin + j, 1);
+        for (size_t t = 0; t < contextLength; ++t) {
+          if ((int(j) - int(t)) >= 0) {
+            MatrixPtr dy =
+                (const_cast<CpuMatrix&>(outG)).subMatrix(begin + j - t, 1);
+            MatrixPtr w = (const_cast<CpuMatrix&>(filter)).subMatrix(t, 1);
+            dx->addDotMul(*dy, *w, 1.0, 1.0);
+          }
+        }
+      }
+    }
+  }
+}
+
+/**
+ * \brief TODO(qingqing)
+ *
+ */
+
+template <DeviceType Device>
+class RowConvFunc : public FunctionBase {
+public:
+  void init(const FuncConfig& config) override {}
+
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
+    // check
+    CHECK_EQ(2UL, inputs.size());
+    CHECK_EQ(1UL, outputs.size());
+    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
+    CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
+        << "SequenceArg required here.";
+    const auto in = dynamic_cast<const SequenceArg&>(inputs[0]);
+    auto out = dynamic_cast<const SequenceArg&>(outputs[0]);
+    auto w = inputs[1];
+    CHECK(in.data() && out.data() && in.getSequenceId().data());
+    CHECK_EQ(in.shape().ndims(), 2UL);
+    CHECK_EQ(out.shape().ndims(), 2UL);
+    CHECK_EQ(in.shape()[1], out.shape()[1]);
+    CHECK_EQ(in.shape()[0], out.shape()[0]);
+    CHECK_EQ(w.shape()[1], in.shape()[1]);
+
+    auto outMat = out.matrix<Device>();
+    const auto inMat = in.matrix<Device>();
+    const auto wMat = w.matrix<Device>();
+    const auto seqId = in.getSequenceId().vector<int, Device>();
+
+    RowConv<Device>(outMat, inMat, wMat, seqId);
+  }
+};
+
+/**
+ * \brief The backward propagation of padding Function. Remove the elements
+ *        in the padding positions of forward.
+ *
+ * Argument in this Function:
+ */
+
+template <DeviceType Device>
+class RowConvGradFunc : public FunctionBase {
+public:
+  void init(const FuncConfig& config) override {}
+
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
+    const auto outGrad = dynamic_cast<const SequenceArg&>(inputs[0]);
+    const auto in = dynamic_cast<const SequenceArg&>(inputs[1]);
+    const auto w = inputs[2];
+    auto inGrad = dynamic_cast<const SequenceArg&>(outputs[0]);
+    auto wGrad = outputs[1];
+
+    const auto outGMat = outGrad.matrix<Device>();
+    const auto inMat = in.matrix<Device>();
+    const auto wMat = w.matrix<Device>();
+    auto inGMat = inGrad.data()
+                      ? inGrad.matrix<Device>()
+                      : typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
+    auto wGMat = wGrad.data()
+                     ? wGrad.matrix<Device>()
+                     : typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
+    const auto seqId = in.getSequenceId().vector<int, Device>();
+
+    RowConvGrad<Device>(outGMat, inMat, wMat, inGMat, wGMat, seqId);
+  }
+};
+
+REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
+REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
+#ifndef PADDLE_ONLY_CPU
+REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
+REGISTER_TYPED_FUNC(RowConvGrad, GPU, PadGradFunc);
+#endif
+
+}  // namespace paddle
diff --git a/paddle/function/RowConvOp.h b/paddle/function/RowConvOp.h
new file mode 100644
index 00000000000..cd78ec724ab
--- /dev/null
+++ b/paddle/function/RowConvOp.h
@@ -0,0 +1,42 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "Function.h"
+
+namespace paddle {
+
+/**
+ * \brief TODO(qingqing)
+ *
+ */
+template <DeviceType DType>
+void RowConv(typename Tensor<real, DType>::Matrix& out,
+             const typename Tensor<real, DType>::Matrix& in,
+             const typename Tensor<real, DType>::Matrix& filter,
+             const typename Tensor<int, DType>::Vector& seq);
+
+/**
+ * \brief  TODO(qingqing)
+ *
+ */
+template <DeviceType DType>
+void RowConvGrad(const typename Tensor<real, DType>::Matrix& outG,
+                 const typename Tensor<real, DType>::Matrix& in,
+                 const typename Tensor<real, DType>::Matrix& filter,
+                 typename Tensor<real, DType>::Matrix& inG,
+                 typename Tensor<real, DType>::Matrix& filterG,
+                 const typename Tensor<int, DType>::Vector& seq);
+}  // namespace paddle
diff --git a/paddle/gserver/layers/RowConvLayer.cpp b/paddle/gserver/layers/RowConvLayer.cpp
new file mode 100644
index 00000000000..d4b14062977
--- /dev/null
+++ b/paddle/gserver/layers/RowConvLayer.cpp
@@ -0,0 +1,105 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "RowConvLayer.h"
+#include "paddle/utils/Stat.h"
+
+namespace paddle {
+
+REGISTER_LAYER(row_conv, RowConvLayer);
+
+bool RowConvLayer::init(const LayerMap& layerMap,
+                        const ParameterMap& parameterMap) {
+  /* Initialize the basic parent class */
+  Layer::init(layerMap, parameterMap);
+
+  contexLength_ = config_.inputs(0).row_conv_conf().context_length();
+
+  CHECK_EQ(inputLayers_.size(), 1UL);
+  weight_.reset(new Weight(contexLength_, getSize(), parameters_[0]));
+  createFunction(forward_, "RowConv", FuncConfig());
+  createFunction(backward_, "RowConvGrad", FuncConfig());
+
+  return true;
+}
+
+void RowConvLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  MatrixPtr input = getInputValue(0);
+  size_t height = input->getHeight();
+  size_t width = input->getWidth();
+  CHECK_EQ(width, getSize());
+  resetOutput(height, width);
+
+  const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
+  wDims_ = TensorShape({contexLength_, width});
+
+  MatrixPtr outV = getOutputValue();
+  BufferArgs inputs;
+  BufferArgs outputs;
+  inputs.addArg(*getInputValue(0), *startPos);
+  inputs.addArg(*weight_->getW(), wDims_);
+  outputs.addArg(*getOutputValue(), *startPos, ADD_TO);
+
+  {
+    REGISTER_TIMER_INFO("RowConvForward", getName().c_str());
+    forward_[0]->calc(inputs, outputs);
+  }
+
+  /* activation */ {
+    REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
+    forwardActivation();
+  }
+}
+
+void RowConvLayer::backward(const UpdateCallback& callback) {
+  /* Do derivation */ {
+    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
+    backwardActivation();
+  }
+
+  const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
+
+  BufferArgs inputs;
+  BufferArgs outputs;
+  inputs.addArg(*getOutputGrad(), *startPos);
+  inputs.addArg(*getInputValue(0), *startPos);
+  inputs.addArg(*weight_->getW(), *startPos);
+
+  MatrixPtr inGrad = getInputGrad(0);
+  MatrixPtr wGrad = weight_->getWGrad();
+  size_t h = getInputValue(0)->getHeight();
+  size_t w = getInputValue(0)->getWidth();
+  outputs.addArg(
+      inGrad ? (*inGrad) : *(Matrix::create(nullptr, h, w, false, useGpu_)),
+      *startPos,
+      ADD_TO);
+  outputs.addArg(
+      wGrad ? (*wGrad)
+            : *(Matrix::create(nullptr, contexLength_, w, false, useGpu_)),
+      wDims_,
+      ADD_TO);
+
+  {
+    REGISTER_TIMER_INFO("RowConvBackward", getName().c_str());
+    backward_[0]->calc(inputs, outputs);
+  }
+
+  {
+    REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
+    weight_->getParameterPtr()->incUpdate(callback);
+  }
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/RowConvLayer.h b/paddle/gserver/layers/RowConvLayer.h
new file mode 100644
index 00000000000..05be6ca6a9b
--- /dev/null
+++ b/paddle/gserver/layers/RowConvLayer.h
@@ -0,0 +1,46 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "Layer.h"
+
+namespace paddle {
+
+/**
+ * \brief Row Convolution Layer.
+ */
+class RowConvLayer : public Layer {
+public:
+  explicit RowConvLayer(const LayerConfig& config) : Layer(config) {}
+
+  ~RowConvLayer() {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback = nullptr) override;
+
+protected:
+  // Row convolution weight, context_lenght_ * fan_out.
+  // fan_out is the size of output feature.
+  std::unique_ptr<Weight> weight_;
+
+  // std::unique_ptr<Weight> biases_;
+
+  // how many steps to look ahead
+  size_t contexLength_;
+  TensorShape wDims_;
+};
+}  // namespace paddle
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index e1e8e7fae7c..6adffcf53b7 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -1705,6 +1705,26 @@ TEST(Layer, TransLayer) {
   }
 }
 
+TEST(Layer, RowConvLayer) {
+  const int context = 3;
+  const int size = 512;
+
+  TestConfig config;
+  config.layerConfig.set_type("row_conv");
+  config.layerConfig.set_size(size);
+  config.layerConfig.set_active_type("sigmoid");
+
+  config.inputDefs.push_back(
+      {INPUT_SEQUENCE_DATA, "layer_0", size, context * size});
+  LayerInputConfig* input = config.layerConfig.add_inputs();
+  RowConvConfig* conv = input->mutable_row_conv_conf();
+  conv->set_context_length(context);
+
+  for (auto useGpu : {false, true}) {
+    testLayerGrad(config, "row_conv", 100, false, useGpu, false);
+  }
+}
+
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
   initMain(argc, argv);
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 4f9b53d6f65..29270829bbc 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -194,6 +194,10 @@ message MaxOutConfig {
   required uint32 groups = 2;
 }
 
+message RowConvConfig {
+  required uint32 context_length = 1;
+}
+
 message ProjectionConfig {
   required string type = 1;
   required string name = 2;
@@ -279,6 +283,7 @@ message LayerInputConfig {
   optional SppConfig spp_conf = 12;
   optional PriorBoxConfig priorbox_conf = 13;
   optional PadConfig pad_conf = 14;
+  optional RowConvConfig row_conv_conf = 15;
 }
 
 message LayerConfig {
-- 
GitLab