diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 3ada0b727bc6de2762586c0de8857d39ea28b004..40852e1ab447b7bee3b4939f27b063ba8b751661 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -127,13 +127,14 @@ private: template <> void ContextProjectionBackward(Tensor& out_grad, - const Tensor& in_grad, - const Tensor& w_grad, + Tensor& in_grad, + Tensor& w_grad, const Tensor& sequence, size_t context_length, int context_start, size_t begin_pad, - bool is_padding) { + bool is_padding, + size_t total_pad) { CHECK(out_grad.getData() && sequence.getData()); CHECK_EQ(out_grad.dims_.size(), 2); CHECK_EQ(in_grad.dims_.size(), 2); @@ -202,8 +203,8 @@ void ContextProjectionBackward(Tensor& out_grad, } /** - * \param inputs[0] input value. - * \param inputs[1] input weight. + * \param inputs[0] input grad. + * \param inputs[1] weight grad. * \param inputs[2] input sequence. * \param outputs[0] output value. */ @@ -215,6 +216,7 @@ public: context_start_ = config.get("context_start"); begin_pad_ = config.get("begin_pad"); is_padding_ = config.get("is_padding"); + total_pad_ = config.get("total_pad"); } void calc(const Arguments& inputs, @@ -225,13 +227,14 @@ public: CHECK_EQ(0, inouts.size()); ContextProjectionBackward((Tensor&)outputs[0], - inputs[0], - inputs[1], + (Tensor&)inputs[0], + (Tensor&)inputs[1], inputs[2], context_length_, context_start_, begin_pad_, - is_padding_); + is_padding_, + total_pad_); } private: @@ -239,6 +242,7 @@ private: int context_start_; size_t begin_pad_; bool is_padding_; + size_t total_pad_; }; /** @@ -321,6 +325,9 @@ REGISTER_TYPED_FUNC(ContextProjectionBackward, REGISTER_TYPED_FUNC(ContextProjectionForward, GPU, ContextProjectionForwardFunc); +REGISTER_TYPED_FUNC(ContextProjectionBackward, + GPU, + ContextProjectionBackwardFunc); REGISTER_TYPED_FUNC(ContextProjectionBackwardData, GPU, ContextProjectionBackwardDataFunc); diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h index 5f4e0761db39f97912210efd88770083b180d274..e0f1beb496f2d3fb11aaf6d0af4097a6b91e0a1c 100644 --- a/paddle/function/ContextProjectionOp.h +++ b/paddle/function/ContextProjectionOp.h @@ -56,13 +56,14 @@ void ContextProjectionForward(Tensor& output, */ template void ContextProjectionBackward(Tensor& out_grad, - const Tensor& in_grad, - const Tensor& w_grad, + Tensor& in_grad, + Tensor& w_grad, const Tensor& sequence, size_t context_length, int context_start, size_t begin_pad, - bool is_padding); + bool is_padding, + size_t total_pad); template void ContextProjectionBackwardData(Tensor& out_grad, diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/function/ContextProjectionOpGpu.cu index 438826299891ce0e8257d3b49e30d8697e7b095a..1e5916002c2fb87807b6a8267d5644b17f39453f 100644 --- a/paddle/function/ContextProjectionOpGpu.cu +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -344,4 +344,32 @@ void ContextProjectionBackwardWeight(Tensor& out_grad, begin_pad); } +template <> +void ContextProjectionBackward(Tensor& out_grad, + Tensor& in_grad, + Tensor& w_grad, + const Tensor& sequence, + size_t context_length, + int context_start, + size_t begin_pad, + bool is_padding, + size_t total_pad) { + if (in_grad.getData()) { + ContextProjectionBackwardData(out_grad, + in_grad, + sequence, + context_length, + context_start); + } + if (is_padding && w_grad.getData()) { + ContextProjectionBackwardWeight(out_grad, + w_grad, + sequence, + context_length, + context_start, + total_pad, + begin_pad); + } +} + } // namespace paddle diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index 997bcb1bd2d58ea420b15f2f0ddf2262cfb9e757..372fc21cf1f534793fb8be5d51cb91e1ff715780 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -86,33 +86,13 @@ void testMatrixProjectionBackward(int context_start, std::max(0, (int)(context_start + context_length - 1)); if (pad == 0) is_padding = false; - std::shared_ptr cpu_func( - FunctionBase::funcRegistrar_.createByType( - "ContextProjectionBackward-CPU")); - FuncConfig cpu_config; - cpu_config.set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", std::max(0, -context_start)) - .set("is_padding", is_padding); - cpu_func->init(cpu_config); - - std::shared_ptr gpu_data_func( - FunctionBase::funcRegistrar_.createByType( - "ContextProjectionBackwardData-GPU")); - FuncConfig gpu_data_config; - gpu_data_config.set("context_length", context_length) - .set("context_start", context_start); - gpu_data_func->init(gpu_data_config); - - std::shared_ptr gpu_w_func( - FunctionBase::funcRegistrar_.createByType( - "ContextProjectionBackwardWeight-GPU")); - FuncConfig gpu_w_config; - gpu_w_config.set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", std::max(0, -context_start)) - .set("total_pad", pad); - gpu_w_func->init(gpu_w_config); + FunctionCompare compare("ContextProjectionBackward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start)) + .set("is_padding", is_padding) + .set("total_pad", pad)); CpuMatrix cpu_in_grad(batch_size, input_dim); cpu_in_grad.randomizeUniform(); @@ -138,32 +118,26 @@ void testMatrixProjectionBackward(int context_start, gpu_w_grad->copyFrom(*cpu_w_grad); } - cpu_func->calc({Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), - Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, - Dims{pad, input_dim}), - Tensor(reinterpret_cast(cpu_seq->getData()), - Dims{cpu_seq->getSize()})}, - {Tensor(cpu_out_grad.getData(), - Dims{batch_size, input_dim * context_length})}, - {}); + compare.getCpuFunction()->calc( + {Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), + Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, + Dims{pad, input_dim}), + Tensor(reinterpret_cast(cpu_seq->getData()), + Dims{cpu_seq->getSize()})}, + {Tensor(cpu_out_grad.getData(), + Dims{batch_size, input_dim * context_length})}, + {}); - gpu_data_func->calc( + compare.getGpuFunction()->calc( {Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}), + Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr, + Dims{pad, input_dim}), Tensor(reinterpret_cast(gpu_seq->getData()), Dims{gpu_seq->getSize()})}, {Tensor(gpu_out_grad.getData(), Dims{batch_size, input_dim * context_length})}, {}); - if (is_padding && gpu_w_grad) { - gpu_w_func->calc({Tensor(gpu_w_grad->getData(), Dims{pad, input_dim}), - Tensor(reinterpret_cast(gpu_seq->getData()), - Dims{gpu_seq->getSize()})}, - {Tensor(gpu_out_grad.getData(), - Dims{batch_size, input_dim * context_length})}, - {}); - } - autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); if (is_padding) { autotest::TensorCheckErr(*cpu_w_grad, *gpu_w_grad); diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index d6af71824cfe5975449a55aa94a2b7e4db405e0c..37e951a1e38a00c6769926d46ab539dca105268a 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -47,43 +47,23 @@ bool ContextProjection::init() { int context_start = config_.context_start(); bool is_padding = config_.trainable_padding(); size_t total_pad = is_padding ? beginPad_ + endPad_ : 0; - if (!useGpu_) { // CPU functions - createFunction(forward_, - "ContextProjectionForward-CPU", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", beginPad_) - .set("is_padding", is_padding)); - createFunction(backward_, - "ContextProjectionBackward-CPU", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", beginPad_) - .set("is_padding", is_padding)); - } else { // GPU functions - createFunction(forward_, - "ContextProjectionForward-GPU", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", beginPad_) - .set("is_padding", is_padding)); - createFunction(backward_, - "ContextProjectionBackwardData-GPU", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start)); - - createFunction(backward_, - "ContextProjectionBackwardWeight-GPU", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", beginPad_) - .set("total_pad", total_pad)); - } + + createFunction(forward_, + "ContextProjectionForward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", beginPad_) + .set("is_padding", is_padding)); + createFunction(backward_, + "ContextProjectionBackward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", beginPad_) + .set("is_padding", is_padding) + .set("total_pad", total_pad)); + return true; } @@ -185,38 +165,16 @@ void ContextProjection::backward(const UpdateCallback& callback) { REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str()); bool is_padding = config_.trainable_padding(); auto start_pos = in_->sequenceStartPositions; - if (!out_->grad->useGpu()) { - auto w_ptr = is_padding ? weight_->getWGrad() : nullptr; - backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr, - Dims{batch_size, input_dim}), - Tensor(w_ptr ? w_ptr->getData() : nullptr, - Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}), - Tensor(reinterpret_cast(const_cast( - start_pos->getData(useGpu_))), - Dims{start_pos->getSize()})}, - {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, - {}); - } else { - if (in_->grad) { - backward_[0]->calc( - {Tensor(in_->grad->getData(), Dims{batch_size, input_dim}), - Tensor(reinterpret_cast( - const_cast(start_pos->getData(useGpu_))), - Dims{start_pos->getSize()})}, - {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, - {}); - } - if (is_padding && weight_->getWGrad()) { - backward_[1]->calc( - {Tensor(weight_->getWGrad()->getData(), - Dims{weight_->getWGrad()->getHeight(), input_dim}), - Tensor(reinterpret_cast( - const_cast(start_pos->getData(useGpu_))), - Dims{start_pos->getSize()})}, - {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, - {}); - } - } + auto w_ptr = is_padding ? weight_->getWGrad() : nullptr; + backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr, + Dims{batch_size, input_dim}), + Tensor(w_ptr ? w_ptr->getData() : nullptr, + Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}), + Tensor(reinterpret_cast( + const_cast(start_pos->getData(useGpu_))), + Dims{start_pos->getSize()})}, + {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, + {}); if (config_.trainable_padding()) { weight_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/gserver/layers/Projection.h b/paddle/gserver/layers/Projection.h index 1e9f60706f89cba01ad0d1bb557cecbefabe43b9..778a7fe13d8a2b669831396e69546446b4745e61 100644 --- a/paddle/gserver/layers/Projection.h +++ b/paddle/gserver/layers/Projection.h @@ -102,13 +102,19 @@ protected: /** * Create layer function. Function is called in forward or backward. * \param function, Layer::forward_ or Layer::backward_ - * \param name, function name, include -GPU or -CPU + * \param name, function name * \param config, initialization configuration for the function */ void createFunction(std::vector>& function, const std::string& name, const FuncConfig& config) { - function.emplace_back(FunctionBase::funcRegistrar_.createByType(name)); + if (useGpu_) { + function.emplace_back( + FunctionBase::funcRegistrar_.createByType(name + "-GPU")); + } else { + function.emplace_back( + FunctionBase::funcRegistrar_.createByType(name + "-CPU")); + } auto& func = function.back(); func->init(config); }