diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index f1e42cad7259e3fb9c862bb53a6da254dabc5c1e..75c09108b1bb8d8f3948d093dfc89a1cd8758125 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -18,6 +18,10 @@ limitations under the License. */ namespace paddle { +/** + * Context Projection Forward with CPU Matrix Device. + * + */ template <> void ContextProjectionForward(CpuMatrix& out_mat, const CpuMatrix& input_mat, @@ -70,11 +74,29 @@ void ContextProjectionForward(CpuMatrix& out_mat, } /** - * \param outputs[0] output value. + * Paddle Function for Context Projection Forward. + * Calculate the value for the output layer with context projection. + * + * What is Context Projection? + * For example, assumed input (x) has 4 words and the dimension of each word + * representation is 2. If we use zero to pad instead of learned weight to pad, + * and the context_lenth is 3, the output (y) is: * - * \param inputs[0] input value. - * \param inputs[1] input weight. - * \param inputs[2] input sequence. + * @code + * x = [a1, a2; + * b1, b2; + * c1, c2; + * d1, d2] + * y = [0, 0, a1, a2, b1, b2; + * a1, a2, b1, b2, c1, c2; + * b1, b2, c1, c2, d1, d2; + * c1, c2, d1, d2, 0, 0] + * @endcode + * + * \param outputs[0] output value. + * \param inputs[0] input value. + * \param inputs[1] input weight. + * \param inputs[2] input sequence. */ template class ContextProjectionForwardFunc : public FunctionBase { @@ -123,6 +145,10 @@ private: size_t begin_pad_; }; +/** + * Context Projection Backward with CPU Matrix Device. + * + */ template <> <<<<<<< HEAD void ContextProjectionBackward(const CpuMatrix& out_grad_mat, @@ -178,10 +204,13 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, } /** - * \param inputs[0] input sequence. - * \param inputs[1] output grad. - * \param inouts[0] input grad. - * \param inouts[1] weight grad. + * Context Projection Backward Function. + * Update the weight gradient and input layer gradient with backprop + * + * \param inputs[0] input sequence. + * \param inputs[1] output grad. + * \param inouts[0] input grad. + * \param inouts[1] weight grad. */ template class ContextProjectionBackwardFunc : public FunctionBase { @@ -194,7 +223,6 @@ public: total_pad_ = config.get("total_pad"); } -<<<<<<< HEAD void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ((size_t)3, inputs.size()); CHECK_EQ((size_t)1, outputs.size()); @@ -213,42 +241,6 @@ public: CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); -======= - void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) override { - CHECK_EQ(2, inputs.size()); - CHECK_EQ(0, outputs.size()); - CHECK_EQ(2, inouts.size()); - - CHECK(inputs[0].getData() && inputs[1].getData()); - CHECK_EQ(inputs[0].dims_.size(), 1); - CHECK_EQ(inputs[1].dims_.size(), 2); - CHECK_EQ(inouts[0].dims_.size(), 2); - CHECK_EQ(inouts[1].dims_.size(), 2); - - /// dim of input grad == dim of weight grad - CHECK_EQ(inouts[0].dims_[1], inouts[1].dims_[1]); - /// input grad and output grad have the same batch_size - CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); - /// dim of output = dim of input * context_length - CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); - - typename SequenceT::type seq_vec( - inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); - const auto out_grad_mat = std::make_shared::type>( - inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); - auto in_grad_mat = - !inouts[0].getData() - ? nullptr - : std::make_shared::type>( - inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); - auto w_grad_mat = - !inouts[1].getData() - ? nullptr - : std::make_shared::type>( - inouts[1].getData(), inouts[1].dims_[0], inouts[1].dims_[1]); ->>>>>>> Wei Xu's comments, set up right inouts. auto out_grad_mat = outputs[0].matrix(); auto in_grad_mat = @@ -279,6 +271,9 @@ private: #if 0 /** + * Context Projection Backward Data Function. + * Update gradient of the input layer with backprop. + * * \param inouts[0] input grad. * \param inputs[0] input sequence. * \param inputs[1] output grad. @@ -326,6 +321,9 @@ private: }; /** + * Context Projection Backward Weight Function. + * Update weight gradient with backprop. + * * \param inouts[0] weight grad. * \param inputs[0] input sequence. * \param inputs[1] output grad.