提交 f23a1170 编写于 作者: X xutianbing

add Context Projection functions to Layer's forward and backward,

resolve merge conflicts
上级 590ecba3
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "context_projection_op.h" #include "ContextProjectionOp.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "hl_base.h" #include "hl_base.h"
#include "context_projection_op.h" #include "ContextProjectionOp.h"
namespace paddle { namespace paddle {
...@@ -327,7 +327,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(Tensor& out_grad, ...@@ -327,7 +327,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(Tensor& out_grad,
int context_start, int context_start,
size_t total_pad, size_t total_pad,
size_t begin_pad) { size_t begin_pad) {
CHECK(w_grad.getData() && out_grad.getData()); CHECK(w_grad.getData() && out_grad.getData() && sequence.getData());
CHECK_EQ(out_grad.dims_.size(), 2); CHECK_EQ(out_grad.dims_.size(), 2);
CHECK_EQ(w_grad.dims_.size(), 2); CHECK_EQ(w_grad.dims_.size(), 2);
CHECK_EQ(sequence.dims_.size(), 1); CHECK_EQ(sequence.dims_.size(), 1);
......
...@@ -60,14 +60,14 @@ FuncConfig& FuncConfig::set<real>(const std::string& key, real v) { ...@@ -60,14 +60,14 @@ FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
template <> template <>
FuncConfig& FuncConfig::set<int>(const std::string& key, int v) { FuncConfig& FuncConfig::set<int>(const std::string& key, int v) {
CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key; CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key;
valueMap_[key].i = v; valueMap_[key].i = v;
return *this; return *this;
} }
template <> template <>
FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) { FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key; CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key;
valueMap_[key].b = v; valueMap_[key].b = v;
return *this; return *this;
} }
......
...@@ -38,6 +38,53 @@ ContextProjection::ContextProjection(const ProjectionConfig& config, ...@@ -38,6 +38,53 @@ ContextProjection::ContextProjection(const ProjectionConfig& config,
CHECK_EQ(inputDim * totalPad, parameter->getSize()); CHECK_EQ(inputDim * totalPad, parameter->getSize());
weight_.reset(new Weight(totalPad, inputDim, parameter)); weight_.reset(new Weight(totalPad, inputDim, parameter));
} }
// init forward_ and backward_ functions
init();
}
bool ContextProjection::init() {
size_t context_length = config_.context_length();
int context_start = config_.context_start();
bool is_padding = config_.trainable_padding();
size_t total_pad = is_padding ? beginPad_ + endPad_ : 0;
if (!useGpu_) { // CPU functions
createFunction(forward_,
"ContextProjectionForward-CPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("is_padding", is_padding));
createFunction(backward_,
"ContextProjectionBackward-CPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("is_padding", is_padding));
} else { // GPU functions
createFunction(forward_,
"ContextProjectionForward-GPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("is_padding", is_padding));
createFunction(backward_,
"ContextProjectionBackwardData-GPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start));
createFunction(backward_,
"ContextProjectionBackwardWeight-GPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("total_pad", total_pad));
}
return true;
} }
void ContextProjection::resetState() { void ContextProjection::resetState() {
...@@ -78,25 +125,35 @@ LayerStatePtr ContextProjection::getState() { ...@@ -78,25 +125,35 @@ LayerStatePtr ContextProjection::getState() {
} }
void ContextProjection::forward() { void ContextProjection::forward() {
CHECK(in_->value); CHECK(in_->value && out_->value);
CHECK(in_->sequenceStartPositions); CHECK(in_->sequenceStartPositions);
auto startPositions = in_->sequenceStartPositions->getVector(useGpu_); size_t input_dim = in_->value->getWidth();
size_t dim = out_->value->getWidth();
int64_t inputDim = in_->value->getWidth(); CHECK_EQ(dim, input_dim * config_.context_length());
int64_t dim = out_->value->getWidth(); size_t batch_size = in_->value->getHeight();
CHECK_EQ(dim, inputDim * config_.context_length()); CHECK_EQ(batch_size, out_->value->getHeight());
REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str()); REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str());
bool isPadding = config_.trainable_padding(); bool is_padding = config_.trainable_padding();
out_->value->contextProjectionForward( /// first use state_, otherwise use weight_(padding false === w nullptr)
*(in_->value), auto w_ptr =
state_ ? state_.get() : isPadding ? weight_->getW().get() : nullptr, state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr;
*startPositions, auto start_pos = in_->sequenceStartPositions;
config_.context_length(), /// if use state_ as weight_, w_ptr already has mem, so padding true
config_.context_start(), forward_[0]->init(FuncConfig()
beginPad_, .set("context_length", config_.context_length())
state_ ? true : isPadding); .set("context_start", config_.context_start())
.set("begin_pad", beginPad_)
.set("is_padding", state_ ? true : is_padding));
forward_[0]->calc({Tensor(in_->value->getData(), Dims{batch_size, input_dim}),
Tensor(w_ptr ? w_ptr->getData() : nullptr,
Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
Tensor(reinterpret_cast<real*>(
const_cast<int*>(start_pos->getData(useGpu_))),
Dims{start_pos->getSize()})},
{Tensor(out_->value->getData(), Dims{batch_size, dim})},
{});
if (state_ && config_.context_start() < 0) { if (state_ && config_.context_start() < 0) {
CHECK_EQ(1, in_->getNumSequences()); CHECK_EQ(1, in_->getNumSequences());
...@@ -118,39 +175,46 @@ void ContextProjection::forward() { ...@@ -118,39 +175,46 @@ void ContextProjection::forward() {
} }
void ContextProjection::backward(const UpdateCallback& callback) { void ContextProjection::backward(const UpdateCallback& callback) {
CHECK(in_->value); CHECK(in_->value && out_->value && out_->grad);
int64_t inputDim = in_->value->getWidth(); size_t input_dim = in_->value->getWidth();
int64_t dim = out_->value->getWidth(); size_t dim = out_->value->getWidth();
CHECK_EQ(dim, inputDim * config_.context_length()); CHECK_EQ(dim, input_dim * config_.context_length());
auto startPositions = in_->sequenceStartPositions->getVector(useGpu_); size_t batch_size = in_->value->getHeight();
CHECK_EQ(batch_size, out_->value->getHeight());
REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str()); REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str());
bool isPadding = config_.trainable_padding(); bool is_padding = config_.trainable_padding();
auto start_pos = in_->sequenceStartPositions;
if (!out_->grad->useGpu()) { if (!out_->grad->useGpu()) {
out_->grad->contextProjectionBackward( auto w_ptr = is_padding ? weight_->getWGrad() : nullptr;
in_->grad.get(), backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr,
isPadding ? weight_->getWGrad().get() : nullptr, Dims{batch_size, input_dim}),
*startPositions, Tensor(w_ptr ? w_ptr->getData() : nullptr,
config_.context_length(), Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
config_.context_start(), Tensor(reinterpret_cast<real*>(const_cast<int*>(
beginPad_, start_pos->getData(useGpu_))),
isPadding); Dims{start_pos->getSize()})},
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
{});
} else { } else {
if (in_->grad) { if (in_->grad) {
out_->grad->contextProjectionBackwardData(*(in_->grad), backward_[0]->calc(
*startPositions, {Tensor(in_->grad->getData(), Dims{batch_size, input_dim}),
config_.context_length(), Tensor(reinterpret_cast<real*>(
config_.context_start()); const_cast<int*>(start_pos->getData(useGpu_))),
Dims{start_pos->getSize()})},
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
{});
} }
if (is_padding && weight_->getWGrad()) {
if (isPadding && weight_->getWGrad()) { backward_[1]->calc(
out_->grad->contextProjectionBackwardWeight( {Tensor(weight_->getWGrad()->getData(),
*(weight_->getWGrad()), Dims{weight_->getWGrad()->getHeight(), input_dim}),
*startPositions, Tensor(reinterpret_cast<real*>(
config_.context_length(), const_cast<int*>(start_pos->getData(useGpu_))),
config_.context_start(), Dims{start_pos->getSize()})},
weight_->getWGrad()->getHeight(), {Tensor(out_->grad->getData(), Dims{batch_size, dim})},
beginPad_); {});
} }
} }
......
...@@ -61,6 +61,8 @@ public: ...@@ -61,6 +61,8 @@ public:
virtual LayerStatePtr getState(); virtual LayerStatePtr getState();
virtual bool init();
protected: protected:
std::unique_ptr<Weight> weight_; std::unique_ptr<Weight> weight_;
/// number of extra timesteps added at the beginning /// number of extra timesteps added at the beginning
......
...@@ -88,11 +88,31 @@ public: ...@@ -88,11 +88,31 @@ public:
*/ */
virtual LayerStatePtr getState() { return nullptr; } virtual LayerStatePtr getState() { return nullptr; }
/**
* init forward_ and backward_ functions
*/
virtual bool init() { return true; }
/** /**
* Get output size of projection. * Get output size of projection.
*/ */
size_t getOutputSize() const { return config_.output_size(); } size_t getOutputSize() const { return config_.output_size(); }
protected:
/**
* Create layer function. Function is called in forward or backward.
* \param function, Layer::forward_ or Layer::backward_
* \param name, function name, include -GPU or -CPU
* \param config, initialization configuration for the function
*/
void createFunction(std::vector<std::shared_ptr<FunctionBase>>& function,
const std::string& name,
const FuncConfig& config) {
function.emplace_back(FunctionBase::funcRegistrar_.createByType(name));
auto& func = function.back();
func->init(config);
}
protected: protected:
/// Config of projection /// Config of projection
ProjectionConfig config_; ProjectionConfig config_;
...@@ -106,5 +126,9 @@ protected: ...@@ -106,5 +126,9 @@ protected:
const Argument* out_; const Argument* out_;
/// Store `passType` passed to forward() /// Store `passType` passed to forward()
PassType passType_; PassType passType_;
/// Layer forward function
std::vector<std::shared_ptr<FunctionBase>> forward_;
/// Layer backward function
std::vector<std::shared_ptr<FunctionBase>> backward_;
}; };
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册