diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 3d28249f69c2bbc6efbd539bcc66dfa1282275bd..6576d18dae99e6f7c4abd8d388e420c22468e129 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -57,58 +57,67 @@ typedef std::shared_ptr BufferArgPtr; * output Buffer or added to the output Buffer is determined by the * argType_ property of the output BufferArg. */ + +// ArgType is only used by output BufferArg. +// For input argument, argType_ is ignored. +// For output argument, need to set the argType_ of the BufferArg. +enum ArgType { + UNSPECIFIED = 0, + ASSIGN_TO = 1, + ADD_TO = 2, +}; class BufferArg { public: - // ArgType is only used by output BufferArg. - // For input argument, argType_ is ignored. - // For output argument, need to set the argType_ of the BufferArg. - enum ArgType { - UNSPECIFIED = 0, - ASSIGN_TO = 1, - ADD_TO = 2, - }; - void setArgType(ArgType argType) { argType_ = argType; } ArgType getArgType() const { return argType_; } public: - BufferArg(void* buf, ValueType valueType, const TensorShape& shape) - : buf_(buf), valueType_(valueType), shape_(shape) {} + BufferArg(void* buf, + ValueType valueType, + const TensorShape& shape, + ArgType argType = UNSPECIFIED) + : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {} BufferArg(void* buf, ValueType valueType) : buf_(buf), valueType_(valueType) {} - BufferArg(const Matrix& matrix) + BufferArg(const Matrix& matrix, ArgType argType = UNSPECIFIED) : buf_( const_cast(reinterpret_cast(matrix.getData()))), valueType_(DataType::value), - shape_(2) { + shape_(2), + argType_(argType) { shape_.setDim(0, matrix.getHeight()); shape_.setDim(1, matrix.getWidth()); } - BufferArg(const Matrix& matrix, const TensorShape& shape) + BufferArg(const Matrix& matrix, + const TensorShape& shape, + ArgType argType = UNSPECIFIED) : buf_( const_cast(reinterpret_cast(matrix.getData()))), valueType_(DataType::value), - shape_(shape) { + shape_(shape), + argType_(argType) { CHECK_EQ(matrix.getElementCnt(), shape.getElements()); } - BufferArg(const Vector& vector) + BufferArg(const Vector& vector, ArgType argType = UNSPECIFIED) : buf_( const_cast(reinterpret_cast(vector.getData()))), valueType_(DataType::value), - shape_(1) { + shape_(1), + argType_(argType) { shape_.setDim(0, vector.getSize()); } - BufferArg(const IVector& vector) + BufferArg(const IVector& vector, ArgType argType = UNSPECIFIED) : buf_( const_cast(reinterpret_cast(vector.getData()))), valueType_(VALUE_TYPE_INT32), - shape_(1) { + shape_(1), + argType_(argType) { shape_.setDim(0, vector.getSize()); } @@ -163,8 +172,10 @@ protected: // if a < b then value_.buf_[a] < value_.buf_[b] class SequenceIdArg : public BufferArg { public: - SequenceIdArg(void* buf, const TensorShape& shape) - : BufferArg(buf, VALUE_TYPE_INT32, shape) { + SequenceIdArg(void* buf, + const TensorShape& shape, + ArgType argType = UNSPECIFIED) + : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { CHECK_EQ(shape_.ndims(), 1); numSeqs_ = shape_[0] - 1; } @@ -187,11 +198,15 @@ public: SequenceArg(void* buf, ValueType valueType, const TensorShape& shape, - const SequenceIdArg& startPositions) - : BufferArg(buf, valueType, shape), startPositions_(startPositions) {} + const SequenceIdArg& startPositions, + ArgType argType = UNSPECIFIED) + : BufferArg(buf, valueType, shape, argType), + startPositions_(startPositions) {} - SequenceArg(const Matrix& matrix, const IVector& vector) - : BufferArg(matrix), startPositions_(vector) {} + SequenceArg(const Matrix& matrix, + const IVector& vector, + ArgType argType = UNSPECIFIED) + : BufferArg(matrix, argType), startPositions_(vector) {} ~SequenceArg() {} @@ -214,8 +229,9 @@ public: const BufferArg& col, size_t nnz, SparseDataFormat format, - SparseDataType type) - : BufferArg(buf, valueType, shape), + SparseDataType type, + ArgType argType = UNSPECIFIED) + : BufferArg(buf, valueType, shape, argType), row_(row), col_(col), nnz_(nnz), @@ -232,13 +248,13 @@ public: } } - SparseMatrixArg(const CpuSparseMatrix& sparse) - : BufferArg(sparse), + SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED) + : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) {} - SparseMatrixArg(const GpuSparseMatrix& sparse) - : BufferArg(sparse), + SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED) + : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) {} diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 1a483c47953b12b2b2621bb290236f93cbce6f94..b50098c52123a84830e14486cbc82ea3e4a7ba94 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -84,12 +84,9 @@ public: begin_pad_ = config.get("begin_pad"); } - void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) override { + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(3, inputs.size()); CHECK_EQ(1, outputs.size()); - CHECK_EQ(0, inouts.size()); CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data()); CHECK_EQ(outputs[0].shape().ndims(), 2); @@ -103,6 +100,7 @@ public: /// input and output has the same batch_size CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); auto out_mat = outputs[0].matrix(); auto in_mat = inputs[0].matrix(); auto w_mat = !inputs[1].data() @@ -194,12 +192,9 @@ public: total_pad_ = config.get("total_pad"); } - void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) override { + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(3, inputs.size()); CHECK_EQ(1, outputs.size()); - CHECK_EQ(0, inouts.size()); CHECK(outputs[0].data() && inputs[2].data()); CHECK_EQ(outputs[0].shape().ndims(), 2); @@ -214,6 +209,8 @@ public: /// dim of output = dim of input * context_length CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + auto out_grad_mat = outputs[0].matrix(); auto in_grad_mat = !inputs[0].data() ? typename Tensor::Matrix(nullptr, 0, 0) diff --git a/paddle/function/CrossMapNormalOp.cpp b/paddle/function/CrossMapNormalOp.cpp index ec27db9c212965a7839d88691b935c29e3077a0b..23ee357a53d0d79f0ef17a08c65c939e9e369d33 100644 --- a/paddle/function/CrossMapNormalOp.cpp +++ b/paddle/function/CrossMapNormalOp.cpp @@ -112,6 +112,8 @@ void CrossMapNormalGrad(real* inputsGrad, } /** + * \brief {o_0, o_1} = calc(i_0) + * * \param inputs[0] input value. * \param outputs[0] output value. * \param outputs[1] denoms. @@ -125,17 +127,16 @@ public: pow_ = config.get("pow"); } - void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) override { + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1, inputs.size()); CHECK_EQ(2, outputs.size()); - CHECK_EQ(0, inouts.size()); CHECK_EQ(inputs[0].shape().ndims(), 4); CHECK(inputs[0].shape() == outputs[0].shape()); CHECK(inputs[0].shape() == outputs[1].shape()); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + CHECK_EQ(outputs[1].getArgType(), ASSIGN_TO); size_t samples = inputs[0].shape()[0]; size_t channels = inputs[0].shape()[1]; size_t height = inputs[0].shape()[2]; @@ -160,6 +161,8 @@ private: }; /** + * \brief {o_0} = calc(i_0, i_1, i_2, i_3) + * * \param inputs[0] input value. * \param inputs[1] output value. * \param inputs[2] output grad. @@ -175,12 +178,9 @@ public: pow_ = config.get("pow"); } - void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) override { + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(4, inputs.size()); CHECK_EQ(1, outputs.size()); - CHECK_EQ(0, inouts.size()); CHECK_EQ(inputs[0].shape().ndims(), 4); CHECK(inputs[0].shape() == inputs[1].shape()); @@ -188,6 +188,9 @@ public: CHECK(inputs[0].shape() == inputs[3].shape()); CHECK(inputs[0].shape() == outputs[0].shape()); + // TODO(hedaoyuan): need support ASSIGN_TO mode. + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + size_t samples = inputs[0].shape()[0]; size_t channels = inputs[0].shape()[1]; size_t height = inputs[0].shape()[2]; diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index 2f56cfc1b5492c23d596f4bfb5a7ae9f066bd10b..46af4e946258a5a956e957f38bfe06e43e7464dc 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -72,16 +72,18 @@ FuncConfig& FuncConfig::set(const std::string& key, bool v) { return *this; } -void BufferArgs::addArg(const Matrix& arg, const TensorShape& shape) { - args_.push_back(std::make_shared(arg, shape)); +void BufferArgs::addArg(const Matrix& arg, + const TensorShape& shape, + ArgType argType) { + args_.push_back(std::make_shared(arg, shape, argType)); } -void BufferArgs::addArg(const CpuSparseMatrix& arg) { - args_.push_back(std::make_shared(arg)); +void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) { + args_.push_back(std::make_shared(arg, argType)); } -void BufferArgs::addArg(const GpuSparseMatrix& arg) { - args_.push_back(std::make_shared(arg)); +void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { + args_.push_back(std::make_shared(arg, argType)); } ClassRegistrar FunctionBase::funcRegistrar_; diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 88d6824aa393933f29eb62975627b80133a8783c..249f8f9cfad58bf596e8cdce9188409b5690f969 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -49,7 +49,7 @@ protected: /** * Argument type for Function::calc(). * A BufferArgs contains a set of BufferArg, - * because Function can have multiple inputs, outputs and inouts. + * because Function can have multiple inputs and outputs. */ class BufferArgs { public: @@ -58,9 +58,11 @@ public: // add argument into BufferArgs // Tensor can be Matrix, Vector, IVector. + // For inputs, do not need argType. + // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. template - void addArg(const Tensor& arg) { - args_.push_back(std::make_shared(arg)); + void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) { + args_.push_back(std::make_shared(arg, argType)); } // Add arg into BufferArgs and reshape the arg. @@ -68,10 +70,12 @@ public: // For example, arg represents an image buffer, // but Matrix can only represent a two-dimensional Tensor. // So need an extra argument to describe the shape of the image buffer. - void addArg(const Matrix& arg, const TensorShape& shape); + void addArg(const Matrix& arg, + const TensorShape& shape, + ArgType argType = UNSPECIFIED); - void addArg(const CpuSparseMatrix& arg); - void addArg(const GpuSparseMatrix& arg); + void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); + void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); // get argument const BufferArg& operator[](size_t num) const { diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index 26783a42cac42d79d5280641c2512e505adb5239..04d06cf33fed105d87ba0a828f053e6c9f826689 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -122,14 +122,13 @@ void ContextProjection::forward() { BufferArgs inputs; BufferArgs outputs; - BufferArgs inouts; inputs.addArg(*in_->value); inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, w_ptr ? w_ptr->getHeight() : 0, input_dim)); inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); - outputs.addArg(*out_->value); - forward_[0]->calc(inputs, outputs, inouts); + outputs.addArg(*out_->value, ADD_TO); + forward_[0]->calc(inputs, outputs); if (state_ && config_.context_start() < 0) { CHECK_EQ(1, in_->getNumSequences()); @@ -166,15 +165,14 @@ void ContextProjection::backward(const UpdateCallback& callback) { BufferArgs inputs; BufferArgs outputs; - BufferArgs inouts; inputs.addArg(CpuMatrix( in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim)); inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, w_ptr ? w_ptr->getHeight() : 0, input_dim)); inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); - outputs.addArg(*out_->grad); - backward_[0]->calc(inputs, outputs, inouts); + outputs.addArg(*out_->grad, ADD_TO); + backward_[0]->calc(inputs, outputs); if (config_.trainable_padding()) { weight_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/gserver/layers/NormProjectionLayer.cpp b/paddle/gserver/layers/NormProjectionLayer.cpp index 573de152fd0d5eadb59450e34edfd066b85db600..4331009de7e98d2326049e563e46a55a20366507 100644 --- a/paddle/gserver/layers/NormProjectionLayer.cpp +++ b/paddle/gserver/layers/NormProjectionLayer.cpp @@ -59,7 +59,6 @@ bool CMRProjectionNormLayer::init(const LayerMap& layerMap, void CMRProjectionNormLayer::forward(PassType passType) { Layer::forward(passType); - /* malloc memory for the output_ if necessary */ /* note: one sample correspond to one row */ MatrixPtr input = inputLayers_[0]->getOutputValue(); @@ -67,42 +66,36 @@ void CMRProjectionNormLayer::forward(PassType passType) { int size = getSize(); resetOutput(batchSize, size); - MatrixPtr outV = getOutputValue(); - Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_); shape_ = TensorShape({batchSize, channels_, imgSizeH_, imgSizeW_}); + // prepare forward arguments BufferArgs inputs; BufferArgs outputs; - BufferArgs inouts; - inputs.addArg(*input, shape_); - outputs.addArg(*outV, shape_); - outputs.addArg(*denoms_, shape_); + inputs.addArg(*getInputValue(0), shape_); + outputs.addArg(*getOutputValue(), shape_, ASSIGN_TO); + outputs.addArg(*denoms_, shape_, ASSIGN_TO); - forward_[0]->calc(inputs, outputs, inouts); + forward_[0]->calc(inputs, outputs); } void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { (void)callback; - if (NULL == inputLayers_[0]->getOutputGrad()) { + if (NULL == getInputGrad(0)) { return; } - /* Do derivation */ - MatrixPtr preOutGrad = inputLayers_[0]->getOutputGrad(); - MatrixPtr localGrad = getOutputGrad(); - MatrixPtr localOutV = getOutputValue(); - MatrixPtr preOutV = inputLayers_[0]->getOutputValue(); + // prepare backward arguments BufferArgs inputs; BufferArgs outputs; - BufferArgs inouts; - inputs.addArg(*preOutV, shape_); - inputs.addArg(*localOutV, shape_); - inputs.addArg(*localGrad, shape_); + inputs.addArg(*getInputValue(0), shape_); + inputs.addArg(*getOutputValue(), shape_); + inputs.addArg(*getOutputGrad(), shape_); inputs.addArg(*denoms_, shape_); - outputs.addArg(*preOutGrad, shape_); - backward_[0]->calc(inputs, outputs, inouts); + outputs.addArg(*getInputGrad(0), shape_, ADD_TO); + + backward_[0]->calc(inputs, outputs); } } // namespace paddle