add Context Projection functions to Layer's forward and backward,

resolve merge conflicts

add Context Projection functions to Layer's forward and backward,
resolve merge conflicts
f23a1170 · xutianbing · 590ecba3 · f23a1170 · f23a1170 · f23a1170
8 changed file
--- a/paddle/function/context_projection_op.cpp
+++ b/paddle/function/context_projection_op.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "context_projection_op.h"
+#include "ContextProjectionOp.h"
 #include "paddle/math/Matrix.h"
 #include "paddle/math/Vector.h"


--- a/paddle/function/context_projection_op.h
+++ b/paddle/function/context_projection_op.h
--- a/paddle/function/context_projection_op_gpu.cu
+++ b/paddle/function/context_projection_op_gpu.cu
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "hl_base.h"
-#include "context_projection_op.h"
+#include "ContextProjectionOp.h"

 namespace paddle {

@@ -327,7 +327,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(Tensor& out_grad,
                                                      int context_start,
                                                      size_t total_pad,
                                                      size_t begin_pad) {
-  CHECK(w_grad.getData() && out_grad.getData());
+  CHECK(w_grad.getData() && out_grad.getData() && sequence.getData());
  CHECK_EQ(out_grad.dims_.size(), 2);
  CHECK_EQ(w_grad.dims_.size(), 2);
  CHECK_EQ(sequence.dims_.size(), 1);

--- a/paddle/function/context_projection_op_test.cpp
+++ b/paddle/function/context_projection_op_test.cpp
--- a/paddle/function/Function.cpp
+++ b/paddle/function/Function.cpp
@@ -60,14 +60,14 @@ FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {

 template <>
 FuncConfig& FuncConfig::set<int>(const std::string& key, int v) {
-  CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
+  CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key;
  valueMap_[key].i = v;
  return *this;
 }

 template <>
 FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
-  CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
+  CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key;
  valueMap_[key].b = v;
  return *this;
 }

--- a/paddle/gserver/layers/ContextProjection.cpp
+++ b/paddle/gserver/layers/ContextProjection.cpp
@@ -38,6 +38,53 @@ ContextProjection::ContextProjection(const ProjectionConfig& config,
    CHECK_EQ(inputDim * totalPad, parameter->getSize());
    weight_.reset(new Weight(totalPad, inputDim, parameter));
  }
+  // init forward_ and backward_ functions
+  init();
+}
+
+bool ContextProjection::init() {
+  size_t context_length = config_.context_length();
+  int context_start = config_.context_start();
+  bool is_padding = config_.trainable_padding();
+  size_t total_pad = is_padding ? beginPad_ + endPad_ : 0;
+  if (!useGpu_) {  // CPU functions
+    createFunction(forward_,
+                   "ContextProjectionForward-CPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start)
+                       .set("begin_pad", beginPad_)
+                       .set("is_padding", is_padding));
+    createFunction(backward_,
+                   "ContextProjectionBackward-CPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start)
+                       .set("begin_pad", beginPad_)
+                       .set("is_padding", is_padding));
+  } else {  // GPU functions
+    createFunction(forward_,
+                   "ContextProjectionForward-GPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start)
+                       .set("begin_pad", beginPad_)
+                       .set("is_padding", is_padding));
+    createFunction(backward_,
+                   "ContextProjectionBackwardData-GPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start));
+
+    createFunction(backward_,
+                   "ContextProjectionBackwardWeight-GPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start)
+                       .set("begin_pad", beginPad_)
+                       .set("total_pad", total_pad));
+  }
+  return true;
 }

 void ContextProjection::resetState() {
@@ -78,25 +125,35 @@ LayerStatePtr ContextProjection::getState() {
 }

 void ContextProjection::forward() {
-  CHECK(in_->value);
+  CHECK(in_->value && out_->value);
  CHECK(in_->sequenceStartPositions);

-  auto startPositions = in_->sequenceStartPositions->getVector(useGpu_);
-
-  int64_t inputDim = in_->value->getWidth();
-  int64_t dim = out_->value->getWidth();
-  CHECK_EQ(dim, inputDim * config_.context_length());
+  size_t input_dim = in_->value->getWidth();
+  size_t dim = out_->value->getWidth();
+  CHECK_EQ(dim, input_dim * config_.context_length());
+  size_t batch_size = in_->value->getHeight();
+  CHECK_EQ(batch_size, out_->value->getHeight());

  REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str());
-  bool isPadding = config_.trainable_padding();
-  out_->value->contextProjectionForward(
-      *(in_->value),
-      state_ ? state_.get() : isPadding ? weight_->getW().get() : nullptr,
-      *startPositions,
-      config_.context_length(),
-      config_.context_start(),
-      beginPad_,
-      state_ ? true : isPadding);
+  bool is_padding = config_.trainable_padding();
+  /// first use state_, otherwise use weight_(padding false === w nullptr)
+  auto w_ptr =
+      state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr;
+  auto start_pos = in_->sequenceStartPositions;
+  /// if use state_ as weight_, w_ptr already has mem, so padding true
+  forward_[0]->init(FuncConfig()
+                        .set("context_length", config_.context_length())
+                        .set("context_start", config_.context_start())
+                        .set("begin_pad", beginPad_)
+                        .set("is_padding", state_ ? true : is_padding));
+  forward_[0]->calc({Tensor(in_->value->getData(), Dims{batch_size, input_dim}),
+                     Tensor(w_ptr ? w_ptr->getData() : nullptr,
+                            Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
+                     Tensor(reinterpret_cast<real*>(
+                                const_cast<int*>(start_pos->getData(useGpu_))),
+                            Dims{start_pos->getSize()})},
+                    {Tensor(out_->value->getData(), Dims{batch_size, dim})},
+                    {});

  if (state_ && config_.context_start() < 0) {
    CHECK_EQ(1, in_->getNumSequences());
@@ -118,39 +175,46 @@ void ContextProjection::forward() {
 }

 void ContextProjection::backward(const UpdateCallback& callback) {
-  CHECK(in_->value);
-  int64_t inputDim = in_->value->getWidth();
-  int64_t dim = out_->value->getWidth();
-  CHECK_EQ(dim, inputDim * config_.context_length());
-  auto startPositions = in_->sequenceStartPositions->getVector(useGpu_);
+  CHECK(in_->value && out_->value && out_->grad);
+  size_t input_dim = in_->value->getWidth();
+  size_t dim = out_->value->getWidth();
+  CHECK_EQ(dim, input_dim * config_.context_length());
+  size_t batch_size = in_->value->getHeight();
+  CHECK_EQ(batch_size, out_->value->getHeight());

  REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str());
-  bool isPadding = config_.trainable_padding();
+  bool is_padding = config_.trainable_padding();
+  auto start_pos = in_->sequenceStartPositions;
  if (!out_->grad->useGpu()) {
-    out_->grad->contextProjectionBackward(
-        in_->grad.get(),
-        isPadding ? weight_->getWGrad().get() : nullptr,
-        *startPositions,
-        config_.context_length(),
-        config_.context_start(),
-        beginPad_,
-        isPadding);
+    auto w_ptr = is_padding ? weight_->getWGrad() : nullptr;
+    backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr,
+                               Dims{batch_size, input_dim}),
+                        Tensor(w_ptr ? w_ptr->getData() : nullptr,
+                               Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
+                        Tensor(reinterpret_cast<real*>(const_cast<int*>(
+                                   start_pos->getData(useGpu_))),
+                               Dims{start_pos->getSize()})},
+                       {Tensor(out_->grad->getData(), Dims{batch_size, dim})},
+                       {});
  } else {
    if (in_->grad) {
-      out_->grad->contextProjectionBackwardData(*(in_->grad),
-                                                *startPositions,
-                                                config_.context_length(),
-                                                config_.context_start());
+      backward_[0]->calc(
+          {Tensor(in_->grad->getData(), Dims{batch_size, input_dim}),
+           Tensor(reinterpret_cast<real*>(
+                      const_cast<int*>(start_pos->getData(useGpu_))),
+                  Dims{start_pos->getSize()})},
+          {Tensor(out_->grad->getData(), Dims{batch_size, dim})},
+          {});
    }
-
-    if (isPadding && weight_->getWGrad()) {
-      out_->grad->contextProjectionBackwardWeight(
-          *(weight_->getWGrad()),
-          *startPositions,
-          config_.context_length(),
-          config_.context_start(),
-          weight_->getWGrad()->getHeight(),
-          beginPad_);
+    if (is_padding && weight_->getWGrad()) {
+      backward_[1]->calc(
+          {Tensor(weight_->getWGrad()->getData(),
+                  Dims{weight_->getWGrad()->getHeight(), input_dim}),
+           Tensor(reinterpret_cast<real*>(
+                      const_cast<int*>(start_pos->getData(useGpu_))),
+                  Dims{start_pos->getSize()})},
+          {Tensor(out_->grad->getData(), Dims{batch_size, dim})},
+          {});
    }
  }


--- a/paddle/gserver/layers/ContextProjection.h
+++ b/paddle/gserver/layers/ContextProjection.h
@@ -61,6 +61,8 @@ public:

  virtual LayerStatePtr getState();

+  virtual bool init();
+
 protected:
  std::unique_ptr<Weight> weight_;
  /// number of extra timesteps added at the beginning

--- a/paddle/gserver/layers/Projection.h
+++ b/paddle/gserver/layers/Projection.h
@@ -88,11 +88,31 @@ public:
   */
  virtual LayerStatePtr getState() { return nullptr; }

+  /**
+   * init forward_ and backward_ functions
+   */
+  virtual bool init() { return true; }
+
  /**
   * Get output size of projection.
   */
  size_t getOutputSize() const { return config_.output_size(); }

+protected:
+  /**
+   * Create layer function. Function is called in forward or backward.
+   * \param function, Layer::forward_ or Layer::backward_
+   * \param name, function name, include -GPU or -CPU
+   * \param config, initialization configuration for the function
+   */
+  void createFunction(std::vector<std::shared_ptr<FunctionBase>>& function,
+                      const std::string& name,
+                      const FuncConfig& config) {
+    function.emplace_back(FunctionBase::funcRegistrar_.createByType(name));
+    auto& func = function.back();
+    func->init(config);
+  }
+
 protected:
  /// Config of projection
  ProjectionConfig config_;
@@ -106,5 +126,9 @@ protected:
  const Argument* out_;
  /// Store `passType` passed to forward()
  PassType passType_;
+  /// Layer forward function
+  std::vector<std::shared_ptr<FunctionBase>> forward_;
+  /// Layer backward function
+  std::vector<std::shared_ptr<FunctionBase>> backward_;
 };
 }  // namespace paddle