fix CrossMapNormalFunc and ContextProjectionFunc(remove inouts argument)

df9be2d4 · hedaoyuan · 57e25211 · df9be2d4 · df9be2d4 · df9be2d4
7 changed file
--- a/paddle/function/BufferArg.h
+++ b/paddle/function/BufferArg.h
@@ -57,58 +57,67 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
 * output Buffer or added to the output Buffer is determined by the
 * argType_ property of the output BufferArg.
 */
+
+// ArgType is only used by output BufferArg.
+// For input argument, argType_ is ignored.
+// For output argument, need to set the argType_ of the BufferArg.
+enum ArgType {
+  UNSPECIFIED = 0,
+  ASSIGN_TO = 1,
+  ADD_TO = 2,
+};
 class BufferArg {
 public:
-  // ArgType is only used by output BufferArg.
-  // For input argument, argType_ is ignored.
-  // For output argument, need to set the argType_ of the BufferArg.
-  enum ArgType {
-    UNSPECIFIED = 0,
-    ASSIGN_TO = 1,
-    ADD_TO = 2,
-  };
-
  void setArgType(ArgType argType) { argType_ = argType; }

  ArgType getArgType() const { return argType_; }

 public:
-  BufferArg(void* buf, ValueType valueType, const TensorShape& shape)
-      : buf_(buf), valueType_(valueType), shape_(shape) {}
+  BufferArg(void* buf,
+            ValueType valueType,
+            const TensorShape& shape,
+            ArgType argType = UNSPECIFIED)
+      : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {}

  BufferArg(void* buf, ValueType valueType)
      : buf_(buf), valueType_(valueType) {}

-  BufferArg(const Matrix& matrix)
+  BufferArg(const Matrix& matrix, ArgType argType = UNSPECIFIED)
      : buf_(
            const_cast<void*>(reinterpret_cast<const void*>(matrix.getData()))),
        valueType_(DataType<real>::value),
-        shape_(2) {
+        shape_(2),
+        argType_(argType) {
    shape_.setDim(0, matrix.getHeight());
    shape_.setDim(1, matrix.getWidth());
  }

-  BufferArg(const Matrix& matrix, const TensorShape& shape)
+  BufferArg(const Matrix& matrix,
+            const TensorShape& shape,
+            ArgType argType = UNSPECIFIED)
      : buf_(
            const_cast<void*>(reinterpret_cast<const void*>(matrix.getData()))),
        valueType_(DataType<real>::value),
-        shape_(shape) {
+        shape_(shape),
+        argType_(argType) {
    CHECK_EQ(matrix.getElementCnt(), shape.getElements());
  }

-  BufferArg(const Vector& vector)
+  BufferArg(const Vector& vector, ArgType argType = UNSPECIFIED)
      : buf_(
            const_cast<void*>(reinterpret_cast<const void*>(vector.getData()))),
        valueType_(DataType<real>::value),
-        shape_(1) {
+        shape_(1),
+        argType_(argType) {
    shape_.setDim(0, vector.getSize());
  }

-  BufferArg(const IVector& vector)
+  BufferArg(const IVector& vector, ArgType argType = UNSPECIFIED)
      : buf_(
            const_cast<void*>(reinterpret_cast<const void*>(vector.getData()))),
        valueType_(VALUE_TYPE_INT32),
-        shape_(1) {
+        shape_(1),
+        argType_(argType) {
    shape_.setDim(0, vector.getSize());
  }

@@ -163,8 +172,10 @@ protected:
 // if a < b then value_.buf_[a] < value_.buf_[b]
 class SequenceIdArg : public BufferArg {
 public:
-  SequenceIdArg(void* buf, const TensorShape& shape)
-      : BufferArg(buf, VALUE_TYPE_INT32, shape) {
+  SequenceIdArg(void* buf,
+                const TensorShape& shape,
+                ArgType argType = UNSPECIFIED)
+      : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) {
    CHECK_EQ(shape_.ndims(), 1);
    numSeqs_ = shape_[0] - 1;
  }
@@ -187,11 +198,15 @@ public:
  SequenceArg(void* buf,
              ValueType valueType,
              const TensorShape& shape,
-              const SequenceIdArg& startPositions)
-      : BufferArg(buf, valueType, shape), startPositions_(startPositions) {}
+              const SequenceIdArg& startPositions,
+              ArgType argType = UNSPECIFIED)
+      : BufferArg(buf, valueType, shape, argType),
+        startPositions_(startPositions) {}

-  SequenceArg(const Matrix& matrix, const IVector& vector)
-      : BufferArg(matrix), startPositions_(vector) {}
+  SequenceArg(const Matrix& matrix,
+              const IVector& vector,
+              ArgType argType = UNSPECIFIED)
+      : BufferArg(matrix, argType), startPositions_(vector) {}

  ~SequenceArg() {}

@@ -214,8 +229,9 @@ public:
                  const BufferArg& col,
                  size_t nnz,
                  SparseDataFormat format,
-                  SparseDataType type)
-      : BufferArg(buf, valueType, shape),
+                  SparseDataType type,
+                  ArgType argType = UNSPECIFIED)
+      : BufferArg(buf, valueType, shape, argType),
        row_(row),
        col_(col),
        nnz_(nnz),
@@ -232,13 +248,13 @@ public:
    }
  }

-  SparseMatrixArg(const CpuSparseMatrix& sparse)
-      : BufferArg(sparse),
+  SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED)
+      : BufferArg(sparse, argType),
        row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
        col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}

-  SparseMatrixArg(const GpuSparseMatrix& sparse)
-      : BufferArg(sparse),
+  SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED)
+      : BufferArg(sparse, argType),
        row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
        col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}


--- a/paddle/function/ContextProjectionOp.cpp
+++ b/paddle/function/ContextProjectionOp.cpp
@@ -84,12 +84,9 @@ public:
    begin_pad_ = config.get<size_t>("begin_pad");
  }

-  void calc(const BufferArgs& inputs,
-            const BufferArgs& outputs,
-            const BufferArgs& inouts) override {
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
    CHECK_EQ(3, inputs.size());
    CHECK_EQ(1, outputs.size());
-    CHECK_EQ(0, inouts.size());

    CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data());
    CHECK_EQ(outputs[0].shape().ndims(), 2);
@@ -103,6 +100,7 @@ public:
    /// input and output has the same batch_size
    CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]);

+    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
    auto out_mat = outputs[0].matrix<Device>();
    auto in_mat = inputs[0].matrix<Device>();
    auto w_mat = !inputs[1].data()
@@ -194,12 +192,9 @@ public:
    total_pad_ = config.get<size_t>("total_pad");
  }

-  void calc(const BufferArgs& inputs,
-            const BufferArgs& outputs,
-            const BufferArgs& inouts) override {
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
    CHECK_EQ(3, inputs.size());
    CHECK_EQ(1, outputs.size());
-    CHECK_EQ(0, inouts.size());

    CHECK(outputs[0].data() && inputs[2].data());
    CHECK_EQ(outputs[0].shape().ndims(), 2);
@@ -214,6 +209,8 @@ public:
    /// dim of output = dim of input * context_length
    CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);

+    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
+
    auto out_grad_mat = outputs[0].matrix<Device>();
    auto in_grad_mat =
        !inputs[0].data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)

--- a/paddle/function/CrossMapNormalOp.cpp
+++ b/paddle/function/CrossMapNormalOp.cpp
@@ -112,6 +112,8 @@ void CrossMapNormalGrad<DEVICE_TYPE_CPU>(real* inputsGrad,
 }

 /**
+ * \brief {o_0, o_1} = calc(i_0)
+ *
 * \param inputs[0] input value.
 * \param outputs[0] output value.
 * \param outputs[1] denoms.
@@ -125,17 +127,16 @@ public:
    pow_ = config.get<real>("pow");
  }

-  void calc(const BufferArgs& inputs,
-            const BufferArgs& outputs,
-            const BufferArgs& inouts) override {
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
    CHECK_EQ(1, inputs.size());
    CHECK_EQ(2, outputs.size());
-    CHECK_EQ(0, inouts.size());

    CHECK_EQ(inputs[0].shape().ndims(), 4);
    CHECK(inputs[0].shape() == outputs[0].shape());
    CHECK(inputs[0].shape() == outputs[1].shape());

+    CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
+    CHECK_EQ(outputs[1].getArgType(), ASSIGN_TO);
    size_t samples = inputs[0].shape()[0];
    size_t channels = inputs[0].shape()[1];
    size_t height = inputs[0].shape()[2];
@@ -160,6 +161,8 @@ private:
 };

 /**
+ * \brief {o_0} = calc(i_0, i_1, i_2, i_3)
+ *
 * \param inputs[0] input value.
 * \param inputs[1] output value.
 * \param inputs[2] output grad.
@@ -175,12 +178,9 @@ public:
    pow_ = config.get<real>("pow");
  }

-  void calc(const BufferArgs& inputs,
-            const BufferArgs& outputs,
-            const BufferArgs& inouts) override {
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
    CHECK_EQ(4, inputs.size());
    CHECK_EQ(1, outputs.size());
-    CHECK_EQ(0, inouts.size());

    CHECK_EQ(inputs[0].shape().ndims(), 4);
    CHECK(inputs[0].shape() == inputs[1].shape());
@@ -188,6 +188,9 @@ public:
    CHECK(inputs[0].shape() == inputs[3].shape());
    CHECK(inputs[0].shape() == outputs[0].shape());

+    // TODO(hedaoyuan): need support ASSIGN_TO mode.
+    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
+
    size_t samples = inputs[0].shape()[0];
    size_t channels = inputs[0].shape()[1];
    size_t height = inputs[0].shape()[2];

--- a/paddle/function/Function.cpp
+++ b/paddle/function/Function.cpp
@@ -72,16 +72,18 @@ FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
  return *this;
 }

-void BufferArgs::addArg(const Matrix& arg, const TensorShape& shape) {
-  args_.push_back(std::make_shared<BufferArg>(arg, shape));
+void BufferArgs::addArg(const Matrix& arg,
+                        const TensorShape& shape,
+                        ArgType argType) {
+  args_.push_back(std::make_shared<BufferArg>(arg, shape, argType));
 }

-void BufferArgs::addArg(const CpuSparseMatrix& arg) {
-  args_.push_back(std::make_shared<SparseMatrixArg>(arg));
+void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) {
+  args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType));
 }

-void BufferArgs::addArg(const GpuSparseMatrix& arg) {
-  args_.push_back(std::make_shared<SparseMatrixArg>(arg));
+void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) {
+  args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType));
 }

 ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;

--- a/paddle/function/Function.h
+++ b/paddle/function/Function.h
@@ -49,7 +49,7 @@ protected:
 /**
 * Argument type for Function::calc().
 * A BufferArgs contains a set of BufferArg,
- * because Function can have multiple inputs, outputs and inouts.
+ * because Function can have multiple inputs and outputs.
 */
 class BufferArgs {
 public:
@@ -58,9 +58,11 @@ public:

  // add argument into BufferArgs
  // Tensor can be Matrix, Vector, IVector.
+  // For inputs, do not need argType.
+  // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
  template <typename Tensor>
-  void addArg(const Tensor& arg) {
-    args_.push_back(std::make_shared<BufferArg>(arg));
+  void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) {
+    args_.push_back(std::make_shared<BufferArg>(arg, argType));
  }

  // Add arg into BufferArgs and reshape the arg.
@@ -68,10 +70,12 @@ public:
  // For example, arg represents an image buffer,
  // but Matrix can only represent a two-dimensional Tensor.
  // So need an extra argument to describe the shape of the image buffer.
-  void addArg(const Matrix& arg, const TensorShape& shape);
+  void addArg(const Matrix& arg,
+              const TensorShape& shape,
+              ArgType argType = UNSPECIFIED);

-  void addArg(const CpuSparseMatrix& arg);
-  void addArg(const GpuSparseMatrix& arg);
+  void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
+  void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);

  // get argument
  const BufferArg& operator[](size_t num) const {

--- a/paddle/gserver/layers/ContextProjection.cpp
+++ b/paddle/gserver/layers/ContextProjection.cpp
@@ -122,14 +122,13 @@ void ContextProjection::forward() {

  BufferArgs inputs;
  BufferArgs outputs;
-  BufferArgs inouts;
  inputs.addArg(*in_->value);
  inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
                          w_ptr ? w_ptr->getHeight() : 0,
                          input_dim));
  inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_));
-  outputs.addArg(*out_->value);
-  forward_[0]->calc(inputs, outputs, inouts);
+  outputs.addArg(*out_->value, ADD_TO);
+  forward_[0]->calc(inputs, outputs);

  if (state_ && config_.context_start() < 0) {
    CHECK_EQ(1, in_->getNumSequences());
@@ -166,15 +165,14 @@ void ContextProjection::backward(const UpdateCallback& callback) {

  BufferArgs inputs;
  BufferArgs outputs;
-  BufferArgs inouts;
  inputs.addArg(CpuMatrix(
      in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim));
  inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
                          w_ptr ? w_ptr->getHeight() : 0,
                          input_dim));
  inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_));
-  outputs.addArg(*out_->grad);
-  backward_[0]->calc(inputs, outputs, inouts);
+  outputs.addArg(*out_->grad, ADD_TO);
+  backward_[0]->calc(inputs, outputs);

  if (config_.trainable_padding()) {
    weight_->getParameterPtr()->incUpdate(callback);

--- a/paddle/gserver/layers/NormProjectionLayer.cpp
+++ b/paddle/gserver/layers/NormProjectionLayer.cpp
@@ -59,7 +59,6 @@ bool CMRProjectionNormLayer::init(const LayerMap& layerMap,

 void CMRProjectionNormLayer::forward(PassType passType) {
  Layer::forward(passType);
-
  /* malloc memory for the output_ if necessary */
  /* note: one sample correspond to one row */
  MatrixPtr input = inputLayers_[0]->getOutputValue();
@@ -67,42 +66,36 @@ void CMRProjectionNormLayer::forward(PassType passType) {
  int size = getSize();
  resetOutput(batchSize, size);

-  MatrixPtr outV = getOutputValue();
-
  Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_);

  shape_ = TensorShape({batchSize, channels_, imgSizeH_, imgSizeW_});

+  // prepare forward arguments
  BufferArgs inputs;
  BufferArgs outputs;
-  BufferArgs inouts;
-  inputs.addArg(*input, shape_);
-  outputs.addArg(*outV, shape_);
-  outputs.addArg(*denoms_, shape_);
+  inputs.addArg(*getInputValue(0), shape_);
+  outputs.addArg(*getOutputValue(), shape_, ASSIGN_TO);
+  outputs.addArg(*denoms_, shape_, ASSIGN_TO);

-  forward_[0]->calc(inputs, outputs, inouts);
+  forward_[0]->calc(inputs, outputs);
 }

 void CMRProjectionNormLayer::backward(const UpdateCallback& callback) {
  (void)callback;

-  if (NULL == inputLayers_[0]->getOutputGrad()) {
+  if (NULL == getInputGrad(0)) {
    return;
  }
-  /* Do derivation */
-  MatrixPtr preOutGrad = inputLayers_[0]->getOutputGrad();
-  MatrixPtr localGrad = getOutputGrad();
-  MatrixPtr localOutV = getOutputValue();
-  MatrixPtr preOutV = inputLayers_[0]->getOutputValue();

+  // prepare backward arguments
  BufferArgs inputs;
  BufferArgs outputs;
-  BufferArgs inouts;
-  inputs.addArg(*preOutV, shape_);
-  inputs.addArg(*localOutV, shape_);
-  inputs.addArg(*localGrad, shape_);
+  inputs.addArg(*getInputValue(0), shape_);
+  inputs.addArg(*getOutputValue(), shape_);
+  inputs.addArg(*getOutputGrad(), shape_);
  inputs.addArg(*denoms_, shape_);
-  outputs.addArg(*preOutGrad, shape_);
-  backward_[0]->calc(inputs, outputs, inouts);
+  outputs.addArg(*getInputGrad(0), shape_, ADD_TO);
+
+  backward_[0]->calc(inputs, outputs);
 }
 }  // namespace paddle