From 9edfd2003153dd122b01300c3973249be1abd8c1 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Tue, 10 Jan 2017 20:10:16 -0800 Subject: [PATCH] use Daoyuan's SequenceArg. --- paddle/function/BufferArg.h | 3 ++- paddle/function/ContextProjectionOp.cpp | 26 ++++++++++----------- paddle/function/ContextProjectionOpTest.cpp | 6 ++--- paddle/function/Function.cpp | 6 +++++ paddle/function/Function.h | 4 ++++ paddle/gserver/layers/ContextProjection.cpp | 3 +-- 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 12352ba29e3..f3a4350e12d 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -190,7 +190,7 @@ private: size_t numSeqs_; }; -// sequence data +// sequence data {seqId(vec), buf(matrix)} class SequenceArg : public BufferArg { public: SequenceArg(void* buf, @@ -210,6 +210,7 @@ public: void* getIdBuf() const { return startPositions_.data(); } size_t numSeqs() const { return startPositions_.numSeqs(); } + const SequenceIdArg& getSequenceIds() const { return startPositions_; } private: SequenceIdArg startPositions_; diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 42b78eacfd5..177708d00f8 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -17,7 +17,6 @@ limitations under the License. */ #include "paddle/math/Vector.h" namespace paddle { - /** * Context Projection Forward with CPU Matrix Device. * @@ -208,10 +207,10 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, * Context Projection Backward Function. * Update the weight gradient and input layer gradient with backprop * - * \param inputs[0] input sequence. - * \param inputs[1] output layer grad. - * \param outputs[0] input layer grad. - * \param outputs[1] weight grad. + * \param inputs[0].seq input sequence. + * \param inputs[0].matrix output layer grad. + * \param outputs[0] input layer grad. + * \param outputs[1] weight grad. */ template class ContextProjectionBackwardFunc : public FunctionBase { @@ -225,27 +224,28 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)2, inputs.size()); + CHECK_EQ((size_t)1, inputs.size()); CHECK_EQ((size_t)2, outputs.size()); - CHECK(inputs[0].data() && inputs[1].data()); - CHECK_EQ(inputs[0].shape().ndims(), (size_t)1); - CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); + const auto seqArg = dynamic_cast(inputs[0]); + CHECK(seqArg.data() && inputs[0].data()); + CHECK_EQ(seqArg.shape().ndims(), (size_t)2); + CHECK_EQ(seqArg.getSequenceIds().shape().ndims(), (size_t)1); CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(outputs[1].shape().ndims(), (size_t)2); /// dim of input grad == dim of weight CHECK_EQ(outputs[0].shape()[1], outputs[1].shape()[1]); /// input and output grad has the same batch_size - CHECK_EQ(outputs[0].shape()[0], inputs[1].shape()[0]); + CHECK_EQ(outputs[0].shape()[0], seqArg.shape()[0]); /// dim of output val = dim of input grad * context_length - CHECK_EQ(inputs[1].shape()[1], outputs[0].shape()[1] * context_length_); + CHECK_EQ(seqArg.shape()[1], outputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); CHECK_EQ(outputs[1].getArgType(), ADD_TO); - const auto seq_vec = inputs[0].vector(); - const auto out_grad_mat = inputs[1].matrix(); + const auto seq_vec = seqArg.getSequenceIds().vector(); + const auto out_grad_mat = seqArg.matrix(); auto in_grad_mat = !outputs[0].data() ? typename Tensor::Matrix(nullptr, 0, 0) diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index c8d5b4f2786..50ca2040050 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -122,8 +122,7 @@ void testMatrixProjectionBackward(int context_start, BufferArgs cpu_inputs; BufferArgs cpu_outputs; - cpu_inputs.addArg(*cpu_seq); - cpu_inputs.addArg(cpu_out_grad); + cpu_inputs.addArg(cpu_out_grad, *cpu_seq); cpu_outputs.addArg(cpu_in_grad, ADD_TO); cpu_outputs.addArg( cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO); @@ -132,8 +131,7 @@ void testMatrixProjectionBackward(int context_start, BufferArgs gpu_inputs; BufferArgs gpu_outputs; - gpu_inputs.addArg(*gpu_seq); - gpu_inputs.addArg(gpu_out_grad); + gpu_inputs.addArg(gpu_out_grad, *gpu_seq); gpu_outputs.addArg(gpu_in_grad, ADD_TO); gpu_outputs.addArg( gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO); diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index dbe3a4e9f60..3b659084653 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -90,6 +90,12 @@ void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { args_.push_back(std::make_shared(arg, argType)); } +void BufferArgs::addArg(const Matrix& matrix, + const IVector& vector, + ArgType argType) { + args_.push_back(std::make_shared(matrix, vector, argType)); +} + ClassRegistrar FunctionBase::funcRegistrar_; } // namespace paddle diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 249f8f9cfad..c15045143bb 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -77,6 +77,10 @@ public: void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); + void addArg(const Matrix& matrix, + const IVector& vector, + ArgType argType = UNSPECIFIED); + // get argument const BufferArg& operator[](size_t num) const { CHECK_LT(num, args_.size()); diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index def7c15ca82..17fd36ef563 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -166,8 +166,7 @@ void ContextProjection::backward(const UpdateCallback& callback) { BufferArgs inputs; BufferArgs outputs; - inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); - inputs.addArg(*out_->grad); + inputs.addArg(*out_->grad, *in_->sequenceStartPositions->getVector(useGpu_)); outputs.addArg( CpuMatrix( in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim), -- GitLab