diff --git a/paddle/function/context_projection_op.cpp b/paddle/function/ContextProjectionOp.cpp similarity index 99% rename from paddle/function/context_projection_op.cpp rename to paddle/function/ContextProjectionOp.cpp index a6a85fb6a46f765fcacb9406010ada14f9f6335a..3ada0b727bc6de2762586c0de8857d39ea28b004 100644 --- a/paddle/function/context_projection_op.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "context_projection_op.h" +#include "ContextProjectionOp.h" #include "paddle/math/Matrix.h" #include "paddle/math/Vector.h" diff --git a/paddle/function/context_projection_op.h b/paddle/function/ContextProjectionOp.h similarity index 100% rename from paddle/function/context_projection_op.h rename to paddle/function/ContextProjectionOp.h diff --git a/paddle/function/context_projection_op_gpu.cu b/paddle/function/ContextProjectionOpGpu.cu similarity index 99% rename from paddle/function/context_projection_op_gpu.cu rename to paddle/function/ContextProjectionOpGpu.cu index fdea433d07e9c44361a632dc189f89f245498f9c..438826299891ce0e8257d3b49e30d8697e7b095a 100644 --- a/paddle/function/context_projection_op_gpu.cu +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "hl_base.h" -#include "context_projection_op.h" +#include "ContextProjectionOp.h" namespace paddle { @@ -327,7 +327,7 @@ void ContextProjectionBackwardWeight(Tensor& out_grad, int context_start, size_t total_pad, size_t begin_pad) { - CHECK(w_grad.getData() && out_grad.getData()); + CHECK(w_grad.getData() && out_grad.getData() && sequence.getData()); CHECK_EQ(out_grad.dims_.size(), 2); CHECK_EQ(w_grad.dims_.size(), 2); CHECK_EQ(sequence.dims_.size(), 1); diff --git a/paddle/function/context_projection_op_test.cpp b/paddle/function/ContextProjectionOpTest.cpp similarity index 100% rename from paddle/function/context_projection_op_test.cpp rename to paddle/function/ContextProjectionOpTest.cpp diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index 215b3dbd837687eefea631cc79b107a65d5b33ed..6f82a8d053bc203eed44bd0d8d4c47d23a15268d 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -60,14 +60,14 @@ FuncConfig& FuncConfig::set(const std::string& key, real v) { template <> FuncConfig& FuncConfig::set(const std::string& key, int v) { - CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key; + CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key; valueMap_[key].i = v; return *this; } template <> FuncConfig& FuncConfig::set(const std::string& key, bool v) { - CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key; + CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key; valueMap_[key].b = v; return *this; } diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index 51c0ae5cc9523debffa4bdfe44fe0df0c56839c2..d6af71824cfe5975449a55aa94a2b7e4db405e0c 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -38,6 +38,53 @@ ContextProjection::ContextProjection(const ProjectionConfig& config, CHECK_EQ(inputDim * totalPad, parameter->getSize()); weight_.reset(new Weight(totalPad, inputDim, parameter)); } + // init forward_ and backward_ functions + init(); +} + +bool ContextProjection::init() { + size_t context_length = config_.context_length(); + int context_start = config_.context_start(); + bool is_padding = config_.trainable_padding(); + size_t total_pad = is_padding ? beginPad_ + endPad_ : 0; + if (!useGpu_) { // CPU functions + createFunction(forward_, + "ContextProjectionForward-CPU", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", beginPad_) + .set("is_padding", is_padding)); + createFunction(backward_, + "ContextProjectionBackward-CPU", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", beginPad_) + .set("is_padding", is_padding)); + } else { // GPU functions + createFunction(forward_, + "ContextProjectionForward-GPU", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", beginPad_) + .set("is_padding", is_padding)); + createFunction(backward_, + "ContextProjectionBackwardData-GPU", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start)); + + createFunction(backward_, + "ContextProjectionBackwardWeight-GPU", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", beginPad_) + .set("total_pad", total_pad)); + } + return true; } void ContextProjection::resetState() { @@ -78,25 +125,35 @@ LayerStatePtr ContextProjection::getState() { } void ContextProjection::forward() { - CHECK(in_->value); + CHECK(in_->value && out_->value); CHECK(in_->sequenceStartPositions); - auto startPositions = in_->sequenceStartPositions->getVector(useGpu_); - - int64_t inputDim = in_->value->getWidth(); - int64_t dim = out_->value->getWidth(); - CHECK_EQ(dim, inputDim * config_.context_length()); + size_t input_dim = in_->value->getWidth(); + size_t dim = out_->value->getWidth(); + CHECK_EQ(dim, input_dim * config_.context_length()); + size_t batch_size = in_->value->getHeight(); + CHECK_EQ(batch_size, out_->value->getHeight()); REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str()); - bool isPadding = config_.trainable_padding(); - out_->value->contextProjectionForward( - *(in_->value), - state_ ? state_.get() : isPadding ? weight_->getW().get() : nullptr, - *startPositions, - config_.context_length(), - config_.context_start(), - beginPad_, - state_ ? true : isPadding); + bool is_padding = config_.trainable_padding(); + /// first use state_, otherwise use weight_(padding false === w nullptr) + auto w_ptr = + state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr; + auto start_pos = in_->sequenceStartPositions; + /// if use state_ as weight_, w_ptr already has mem, so padding true + forward_[0]->init(FuncConfig() + .set("context_length", config_.context_length()) + .set("context_start", config_.context_start()) + .set("begin_pad", beginPad_) + .set("is_padding", state_ ? true : is_padding)); + forward_[0]->calc({Tensor(in_->value->getData(), Dims{batch_size, input_dim}), + Tensor(w_ptr ? w_ptr->getData() : nullptr, + Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}), + Tensor(reinterpret_cast( + const_cast(start_pos->getData(useGpu_))), + Dims{start_pos->getSize()})}, + {Tensor(out_->value->getData(), Dims{batch_size, dim})}, + {}); if (state_ && config_.context_start() < 0) { CHECK_EQ(1, in_->getNumSequences()); @@ -118,39 +175,46 @@ void ContextProjection::forward() { } void ContextProjection::backward(const UpdateCallback& callback) { - CHECK(in_->value); - int64_t inputDim = in_->value->getWidth(); - int64_t dim = out_->value->getWidth(); - CHECK_EQ(dim, inputDim * config_.context_length()); - auto startPositions = in_->sequenceStartPositions->getVector(useGpu_); + CHECK(in_->value && out_->value && out_->grad); + size_t input_dim = in_->value->getWidth(); + size_t dim = out_->value->getWidth(); + CHECK_EQ(dim, input_dim * config_.context_length()); + size_t batch_size = in_->value->getHeight(); + CHECK_EQ(batch_size, out_->value->getHeight()); REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str()); - bool isPadding = config_.trainable_padding(); + bool is_padding = config_.trainable_padding(); + auto start_pos = in_->sequenceStartPositions; if (!out_->grad->useGpu()) { - out_->grad->contextProjectionBackward( - in_->grad.get(), - isPadding ? weight_->getWGrad().get() : nullptr, - *startPositions, - config_.context_length(), - config_.context_start(), - beginPad_, - isPadding); + auto w_ptr = is_padding ? weight_->getWGrad() : nullptr; + backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr, + Dims{batch_size, input_dim}), + Tensor(w_ptr ? w_ptr->getData() : nullptr, + Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}), + Tensor(reinterpret_cast(const_cast( + start_pos->getData(useGpu_))), + Dims{start_pos->getSize()})}, + {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, + {}); } else { if (in_->grad) { - out_->grad->contextProjectionBackwardData(*(in_->grad), - *startPositions, - config_.context_length(), - config_.context_start()); + backward_[0]->calc( + {Tensor(in_->grad->getData(), Dims{batch_size, input_dim}), + Tensor(reinterpret_cast( + const_cast(start_pos->getData(useGpu_))), + Dims{start_pos->getSize()})}, + {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, + {}); } - - if (isPadding && weight_->getWGrad()) { - out_->grad->contextProjectionBackwardWeight( - *(weight_->getWGrad()), - *startPositions, - config_.context_length(), - config_.context_start(), - weight_->getWGrad()->getHeight(), - beginPad_); + if (is_padding && weight_->getWGrad()) { + backward_[1]->calc( + {Tensor(weight_->getWGrad()->getData(), + Dims{weight_->getWGrad()->getHeight(), input_dim}), + Tensor(reinterpret_cast( + const_cast(start_pos->getData(useGpu_))), + Dims{start_pos->getSize()})}, + {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, + {}); } } diff --git a/paddle/gserver/layers/ContextProjection.h b/paddle/gserver/layers/ContextProjection.h index 2df43bd04fec868924b5d45f9def231a48ee7f04..c87d6ed1d6d46b391ccf8722f6d110614be1fe78 100644 --- a/paddle/gserver/layers/ContextProjection.h +++ b/paddle/gserver/layers/ContextProjection.h @@ -61,6 +61,8 @@ public: virtual LayerStatePtr getState(); + virtual bool init(); + protected: std::unique_ptr weight_; /// number of extra timesteps added at the beginning diff --git a/paddle/gserver/layers/Projection.h b/paddle/gserver/layers/Projection.h index 8cd8042479eafdbd6b8dac03b63b344fcf9526b1..1e9f60706f89cba01ad0d1bb557cecbefabe43b9 100644 --- a/paddle/gserver/layers/Projection.h +++ b/paddle/gserver/layers/Projection.h @@ -88,11 +88,31 @@ public: */ virtual LayerStatePtr getState() { return nullptr; } + /** + * init forward_ and backward_ functions + */ + virtual bool init() { return true; } + /** * Get output size of projection. */ size_t getOutputSize() const { return config_.output_size(); } +protected: + /** + * Create layer function. Function is called in forward or backward. + * \param function, Layer::forward_ or Layer::backward_ + * \param name, function name, include -GPU or -CPU + * \param config, initialization configuration for the function + */ + void createFunction(std::vector>& function, + const std::string& name, + const FuncConfig& config) { + function.emplace_back(FunctionBase::funcRegistrar_.createByType(name)); + auto& func = function.back(); + func->init(config); + } + protected: /// Config of projection ProjectionConfig config_; @@ -106,5 +126,9 @@ protected: const Argument* out_; /// Store `passType` passed to forward() PassType passType_; + /// Layer forward function + std::vector> forward_; + /// Layer backward function + std::vector> backward_; }; } // namespace paddle