From 86fa8c05280e18c6fc4a569931d9f50fd9467546 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Thu, 5 Jan 2017 11:05:18 -0800 Subject: [PATCH] Wei Xu's comments, set up right inouts. --- paddle/function/ContextProjectionOp.cpp | 124 +++++++++++++------- paddle/function/ContextProjectionOp.h | 20 ++-- paddle/function/ContextProjectionOpGpu.cu | 25 ++-- paddle/function/ContextProjectionOpTest.cpp | 34 +++--- 4 files changed, 124 insertions(+), 79 deletions(-) diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index cb448562ebb..8803ea78963 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -70,10 +70,11 @@ void ContextProjectionForward(CpuMatrix& out_mat, } /** + * \param outputs[0] output value. + * * \param inputs[0] input value. * \param inputs[1] input weight. * \param inputs[2] input sequence. - * \param outputs[0] output value. */ template class ContextProjectionForwardFunc : public FunctionBase { @@ -123,7 +124,8 @@ private: }; template <> -void ContextProjectionBackward(CpuMatrix& out_grad_mat, +<<<<<<< HEAD +void ContextProjectionBackward(const CpuMatrix& out_grad_mat, CpuMatrix& in_grad_mat, CpuMatrix& w_grad_mat, const CpuIVector& seq_vec, @@ -176,10 +178,10 @@ void ContextProjectionBackward(CpuMatrix& out_grad_mat, } /** - * \param inputs[0] input grad. - * \param inputs[1] weight grad. - * \param inputs[2] input sequence. - * \param outputs[0] output value. + * \param inputs[0] input sequence. + * \param inputs[1] output grad. + * \param inouts[0] input grad. + * \param inouts[1] weight grad. */ template class ContextProjectionBackwardFunc : public FunctionBase { @@ -192,6 +194,7 @@ public: total_pad_ = config.get("total_pad"); } +<<<<<<< HEAD void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ((size_t)3, inputs.size()); CHECK_EQ((size_t)1, outputs.size()); @@ -210,6 +213,42 @@ public: CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); +======= + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(2, inputs.size()); + CHECK_EQ(0, outputs.size()); + CHECK_EQ(2, inouts.size()); + + CHECK(inputs[0].getData() && inputs[1].getData()); + CHECK_EQ(inputs[0].dims_.size(), 1); + CHECK_EQ(inputs[1].dims_.size(), 2); + CHECK_EQ(inouts[0].dims_.size(), 2); + CHECK_EQ(inouts[1].dims_.size(), 2); + + /// dim of input grad == dim of weight grad + CHECK_EQ(inouts[0].dims_[1], inouts[1].dims_[1]); + /// input grad and output grad have the same batch_size + CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); + /// dim of output = dim of input * context_length + CHECK_EQ(inputs[1].dims_[1], inputs[0].dims_[1] * context_length_); + + typename SequenceT::type seq_vec( + inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); + const auto out_grad_mat = std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + auto in_grad_mat = + !inouts[0].getData() + ? nullptr + : std::make_shared::type>( + inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); + auto w_grad_mat = + !inouts[1].getData() + ? nullptr + : std::make_shared::type>( + inouts[1].getData(), inouts[1].dims_[0], inouts[1].dims_[1]); +>>>>>>> Wei Xu's comments, set up right inouts. auto out_grad_mat = outputs[0].matrix(); auto in_grad_mat = @@ -240,9 +279,9 @@ private: #if 0 /** - * \param inputs[0] input grad. - * \param inputs[1] input sequence. - * \param outputs[0] output grad. + * \param inouts[0] input grad. + * \param inputs[0] input sequence. + * \param inputs[1] output grad. */ template class ContextProjectionBackwardDataFunc : public FunctionBase { @@ -255,23 +294,24 @@ public: void calc(const Arguments& inputs, const Arguments& outputs, const Arguments& inouts) override { - CHECK_EQ(2, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); - CHECK_EQ(0, static_cast(inouts.size())); - CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); - CHECK_EQ(static_cast(outputs[0].dims_.size()), 2); - CHECK_EQ(static_cast(inputs[0].dims_.size()), 2); - CHECK_EQ(static_cast(inputs[1].dims_.size()), 1); - CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); - /// input and output has the same batch_size - CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); + CHECK_EQ(2, inputs.size()); + CHECK_EQ(0, outputs.size()); + CHECK_EQ(1, inouts.size()); + + CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData()); + CHECK_EQ(inputs[0].dims_.size(), 1); + CHECK_EQ(inputs[1].dims_.size(), 2); + CHECK_EQ(inouts[0].dims_.size(), 2); + CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); + /// input and output grad have the same batch_size + CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); - auto out_grad_mat = std::make_shared::type>( - outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); - const auto in_grad_mat = std::make_shared::type>( - inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); typename SequenceT::type seq_vec( - inputs[1].dims_[0], reinterpret_cast(inputs[1].getData())); + inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); + const auto out_grad_mat = std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + auto in_grad_mat = std::make_shared::type>( + inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); ContextProjectionBackwardData(out_grad_mat.get(), in_grad_mat.get(), @@ -286,9 +326,9 @@ private: }; /** - * \param inputs[0] weight grad. - * \param inputs[1] input sequence. - * \param outputs[0] output grad. + * \param inouts[0] weight grad. + * \param inputs[0] input sequence. + * \param inputs[1] output grad. */ template class ContextProjectionBackwardWeightFunc : public FunctionBase { @@ -303,22 +343,22 @@ public: void calc(const Arguments& inputs, const Arguments& outputs, const Arguments& inouts) override { - CHECK_EQ(2, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); - CHECK_EQ(0, static_cast(inouts.size())); - - CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); - CHECK_EQ(static_cast(outputs[0].dims_.size()), 2); - CHECK_EQ(static_cast(inputs[0].dims_.size()), 2); - CHECK_EQ(static_cast(inputs[1].dims_.size()), 1); - CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); - - auto out_grad_mat = std::make_shared::type>( - outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); - auto w_grad_mat = std::make_shared::type>( - inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); + CHECK_EQ(2, inputs.size()); + CHECK_EQ(0, outputs.size()); + CHECK_EQ(1, inouts.size()); + + CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData()); + CHECK_EQ(inputs[0].dims_.size(), 1); + CHECK_EQ(inputs[1].dims_.size(), 2); + CHECK_EQ(inouts[0].dims_.size(), 2); + CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); + typename SequenceT::type seq_vec( - inputs[1].dims_[0], reinterpret_cast(inputs[1].getData())); + inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); + const auto out_grad_mat = std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + auto w_grad_mat = std::make_shared::type>( + inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); ContextProjectionBackwardWeight(out_grad_mat.get(), w_grad_mat.get(), diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h index a558df5e072..8e956c6c6fd 100644 --- a/paddle/function/ContextProjectionOp.h +++ b/paddle/function/ContextProjectionOp.h @@ -21,14 +21,14 @@ namespace paddle { /** * \brief Context Projection Forward. * - * \param[out] outputs output data. - * \param[in] input input data. - * \param[in] weight input weight. - * \param[in] sequence input data. - * \param[in] context_length consecutive rows for concatenation. - * \param[in] context_start context start position. - * \param[in] begin_pad begining pad position. - * \param[in] is_padding whether padding 0 or not. + * \param[in/out] outputs output data. + * \param[in] input input data. + * \param[in] weight input weight. + * \param[in] sequence input data. + * \param[in] context_length consecutive rows for concatenation. + * \param[in] context_start context start position. + * \param[in] begin_pad begining pad position. + * \param[in] is_padding whether padding 0 or not. * */ template @@ -68,7 +68,7 @@ void ContextProjectionBackward( template void ContextProjectionBackwardData( - typename Tensor::Matrix& out_grad, + const typename Tensor::Matrix& out_grad, typename Tensor::Matrix& in_grad, const typename Tensor::Vector& sequence, size_t context_length, @@ -76,7 +76,7 @@ void ContextProjectionBackwardData( template void ContextProjectionBackwardWeight( - typename Tensor::Matrix& out_grad, + const typename Tensor::Matrix& out_grad, typename Tensor::Matrix& w_grad, const typename Tensor::Vector& seq_vec, size_t context_length, diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/function/ContextProjectionOpGpu.cu index 6a4a01a6510..6194ad8e74a 100644 --- a/paddle/function/ContextProjectionOpGpu.cu +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -138,10 +138,10 @@ void ContextProjectionForward(GpuMatrix& output, begin_pad); } -__global__ void KeContextProjectionBackwardData(real* out_grad, +__global__ void KeContextProjectionBackwardData(const real* out_grad, const int* sequence, real* in_grad, - int input_dim, + size_t input_dim, int context_length, int context_start) { int idx = threadIdx.x; @@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, real value = 0; int instances = seq_end - seq_start + context_length - 1; - out_grad += seq_start * input_dim * context_length; + auto out = const_cast(out_grad); + out += seq_start * input_dim * context_length; in_grad += seq_start * input_dim; for (int k = 0; k <= input_dim / block_size; k++) { if (idx < input_dim) { @@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, int outx = (i - context_length) < 0 ? i : (context_length - 1); int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); real* output_r = - out_grad + outy * input_dim * context_length + outx * input_dim; + out + outy * input_dim * context_length + outx * input_dim; for (int j = outy; j < seq_end - seq_start; j++) { value += output_r[idx]; if (j - outy == outx) break; @@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, * @param[in] context_start context start. * */ -void hl_context_projection_backward_data(real* out_grad, +void hl_context_projection_backward_data(const real* out_grad, const int* sequence, real* input_grad, size_t num_sequences, @@ -216,7 +217,8 @@ void hl_context_projection_backward_data(real* out_grad, } template <> -void ContextProjectionBackwardData(GpuMatrix& out_grad, +<<<<<<< HEAD +void ContextProjectionBackwardData(const GpuMatrix& out_grad, GpuMatrix& in_grad, const GpuIVector& sequence, size_t context_length, @@ -231,7 +233,7 @@ void ContextProjectionBackwardData(GpuMatrix& out_grad, } template -__global__ void KeContextProjectionBackwardWeight(real* out_grad, +__global__ void KeContextProjectionBackwardWeight(const real* out_grad, const int* sequence, real* w_grad, int num_sequences, @@ -254,7 +256,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad, for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { int seq_start = sequence[seqId]; int seq_end = sequence[seqId+1]; - output_r = out_grad + seq_start * w_dim * context_length; + output_r = const_cast(out_grad) + + seq_start * w_dim * context_length; if (context_start < 0) { if (padId + context_start < 0) { @@ -318,7 +321,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad, * beginning. * */ -void hl_context_projection_backward_weight(real* out_grad, +void hl_context_projection_backward_weight(const real* out_grad, const int* sequence, real* w_grad, size_t num_sequences, @@ -346,7 +349,7 @@ void hl_context_projection_backward_weight(real* out_grad, template <> void ContextProjectionBackwardWeight( - GpuMatrix& out_grad, + const GpuMatrix& out_grad, GpuMatrix& w_grad, const GpuIVector& seq_vec, size_t context_length, @@ -365,7 +368,7 @@ void ContextProjectionBackwardWeight( } template <> -void ContextProjectionBackward(GpuMatrix& out_grad, +void ContextProjectionBackward(const GpuMatrix& out_grad, GpuMatrix& in_grad, GpuMatrix& w_grad, const GpuIVector& sequence, diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index 6223d2fd23a..169c1dd505a 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -62,16 +62,18 @@ void testMatrixProjectionForward(int context_start, Dims{pad, input_dim}), Tensor(reinterpret_cast(cpu_seq->getData()), Dims{cpu_seq->getSize()})}, - {Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})}, - {}); + {}, + {Tensor(cpu_out.getData(), + Dims{batch_size, input_dim * context_length})}); compare.getGpuFunction()->calc( {Tensor(gpu_in.getData(), Dims{batch_size, input_dim}), Tensor(gpu_weight ? gpu_weight->getData() : nullptr, Dims{pad, input_dim}), Tensor(reinterpret_cast(gpu_seq->getData()), Dims{gpu_seq->getSize()})}, - {Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})}, - {}); + {}, + {Tensor(gpu_out.getData(), + Dims{batch_size, input_dim * context_length})}); autotest::TensorCheckEqual(cpu_out, gpu_out); } @@ -118,24 +120,24 @@ void testMatrixProjectionBackward(int context_start, } compare.getCpuFunction()->calc( + {Tensor(reinterpret_cast(cpu_seq->getData()), + Dims{cpu_seq->getSize()}), + Tensor(cpu_out_grad.getData(), + Dims{batch_size, input_dim * context_length})}, + {}, {Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, - Dims{pad, input_dim}), - Tensor(reinterpret_cast(cpu_seq->getData()), - Dims{cpu_seq->getSize()})}, - {Tensor(cpu_out_grad.getData(), - Dims{batch_size, input_dim * context_length})}, - {}); + Dims{pad, input_dim})}); compare.getGpuFunction()->calc( + {Tensor(reinterpret_cast(gpu_seq->getData()), + Dims{gpu_seq->getSize()}), + Tensor(gpu_out_grad.getData(), + Dims{batch_size, input_dim * context_length})}, + {}, {Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}), Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr, - Dims{pad, input_dim}), - Tensor(reinterpret_cast(gpu_seq->getData()), - Dims{gpu_seq->getSize()})}, - {Tensor(gpu_out_grad.getData(), - Dims{batch_size, input_dim * context_length})}, - {}); + Dims{pad, input_dim})}); autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); if (is_padding) { -- GitLab