提交 f23a1170 编写于 作者: X xutianbing

add Context Projection functions to Layer's forward and backward,

resolve merge conflicts
上级 590ecba3
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "context_projection_op.h"
#include "ContextProjectionOp.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h"
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_base.h"
#include "context_projection_op.h"
#include "ContextProjectionOp.h"
namespace paddle {
......@@ -327,7 +327,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(Tensor& out_grad,
int context_start,
size_t total_pad,
size_t begin_pad) {
CHECK(w_grad.getData() && out_grad.getData());
CHECK(w_grad.getData() && out_grad.getData() && sequence.getData());
CHECK_EQ(out_grad.dims_.size(), 2);
CHECK_EQ(w_grad.dims_.size(), 2);
CHECK_EQ(sequence.dims_.size(), 1);
......
......@@ -60,14 +60,14 @@ FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
template <>
FuncConfig& FuncConfig::set<int>(const std::string& key, int v) {
CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key;
valueMap_[key].i = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key;
valueMap_[key].b = v;
return *this;
}
......
......@@ -38,6 +38,53 @@ ContextProjection::ContextProjection(const ProjectionConfig& config,
CHECK_EQ(inputDim * totalPad, parameter->getSize());
weight_.reset(new Weight(totalPad, inputDim, parameter));
}
// init forward_ and backward_ functions
init();
}
bool ContextProjection::init() {
size_t context_length = config_.context_length();
int context_start = config_.context_start();
bool is_padding = config_.trainable_padding();
size_t total_pad = is_padding ? beginPad_ + endPad_ : 0;
if (!useGpu_) { // CPU functions
createFunction(forward_,
"ContextProjectionForward-CPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("is_padding", is_padding));
createFunction(backward_,
"ContextProjectionBackward-CPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("is_padding", is_padding));
} else { // GPU functions
createFunction(forward_,
"ContextProjectionForward-GPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("is_padding", is_padding));
createFunction(backward_,
"ContextProjectionBackwardData-GPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start));
createFunction(backward_,
"ContextProjectionBackwardWeight-GPU",
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("total_pad", total_pad));
}
return true;
}
void ContextProjection::resetState() {
......@@ -78,25 +125,35 @@ LayerStatePtr ContextProjection::getState() {
}
void ContextProjection::forward() {
CHECK(in_->value);
CHECK(in_->value && out_->value);
CHECK(in_->sequenceStartPositions);
auto startPositions = in_->sequenceStartPositions->getVector(useGpu_);
int64_t inputDim = in_->value->getWidth();
int64_t dim = out_->value->getWidth();
CHECK_EQ(dim, inputDim * config_.context_length());
size_t input_dim = in_->value->getWidth();
size_t dim = out_->value->getWidth();
CHECK_EQ(dim, input_dim * config_.context_length());
size_t batch_size = in_->value->getHeight();
CHECK_EQ(batch_size, out_->value->getHeight());
REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str());
bool isPadding = config_.trainable_padding();
out_->value->contextProjectionForward(
*(in_->value),
state_ ? state_.get() : isPadding ? weight_->getW().get() : nullptr,
*startPositions,
config_.context_length(),
config_.context_start(),
beginPad_,
state_ ? true : isPadding);
bool is_padding = config_.trainable_padding();
/// first use state_, otherwise use weight_(padding false === w nullptr)
auto w_ptr =
state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr;
auto start_pos = in_->sequenceStartPositions;
/// if use state_ as weight_, w_ptr already has mem, so padding true
forward_[0]->init(FuncConfig()
.set("context_length", config_.context_length())
.set("context_start", config_.context_start())
.set("begin_pad", beginPad_)
.set("is_padding", state_ ? true : is_padding));
forward_[0]->calc({Tensor(in_->value->getData(), Dims{batch_size, input_dim}),
Tensor(w_ptr ? w_ptr->getData() : nullptr,
Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
Tensor(reinterpret_cast<real*>(
const_cast<int*>(start_pos->getData(useGpu_))),
Dims{start_pos->getSize()})},
{Tensor(out_->value->getData(), Dims{batch_size, dim})},
{});
if (state_ && config_.context_start() < 0) {
CHECK_EQ(1, in_->getNumSequences());
......@@ -118,39 +175,46 @@ void ContextProjection::forward() {
}
void ContextProjection::backward(const UpdateCallback& callback) {
CHECK(in_->value);
int64_t inputDim = in_->value->getWidth();
int64_t dim = out_->value->getWidth();
CHECK_EQ(dim, inputDim * config_.context_length());
auto startPositions = in_->sequenceStartPositions->getVector(useGpu_);
CHECK(in_->value && out_->value && out_->grad);
size_t input_dim = in_->value->getWidth();
size_t dim = out_->value->getWidth();
CHECK_EQ(dim, input_dim * config_.context_length());
size_t batch_size = in_->value->getHeight();
CHECK_EQ(batch_size, out_->value->getHeight());
REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str());
bool isPadding = config_.trainable_padding();
bool is_padding = config_.trainable_padding();
auto start_pos = in_->sequenceStartPositions;
if (!out_->grad->useGpu()) {
out_->grad->contextProjectionBackward(
in_->grad.get(),
isPadding ? weight_->getWGrad().get() : nullptr,
*startPositions,
config_.context_length(),
config_.context_start(),
beginPad_,
isPadding);
auto w_ptr = is_padding ? weight_->getWGrad() : nullptr;
backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr,
Dims{batch_size, input_dim}),
Tensor(w_ptr ? w_ptr->getData() : nullptr,
Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
Tensor(reinterpret_cast<real*>(const_cast<int*>(
start_pos->getData(useGpu_))),
Dims{start_pos->getSize()})},
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
{});
} else {
if (in_->grad) {
out_->grad->contextProjectionBackwardData(*(in_->grad),
*startPositions,
config_.context_length(),
config_.context_start());
backward_[0]->calc(
{Tensor(in_->grad->getData(), Dims{batch_size, input_dim}),
Tensor(reinterpret_cast<real*>(
const_cast<int*>(start_pos->getData(useGpu_))),
Dims{start_pos->getSize()})},
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
{});
}
if (isPadding && weight_->getWGrad()) {
out_->grad->contextProjectionBackwardWeight(
*(weight_->getWGrad()),
*startPositions,
config_.context_length(),
config_.context_start(),
weight_->getWGrad()->getHeight(),
beginPad_);
if (is_padding && weight_->getWGrad()) {
backward_[1]->calc(
{Tensor(weight_->getWGrad()->getData(),
Dims{weight_->getWGrad()->getHeight(), input_dim}),
Tensor(reinterpret_cast<real*>(
const_cast<int*>(start_pos->getData(useGpu_))),
Dims{start_pos->getSize()})},
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
{});
}
}
......
......@@ -61,6 +61,8 @@ public:
virtual LayerStatePtr getState();
virtual bool init();
protected:
std::unique_ptr<Weight> weight_;
/// number of extra timesteps added at the beginning
......
......@@ -88,11 +88,31 @@ public:
*/
virtual LayerStatePtr getState() { return nullptr; }
/**
* init forward_ and backward_ functions
*/
virtual bool init() { return true; }
/**
* Get output size of projection.
*/
size_t getOutputSize() const { return config_.output_size(); }
protected:
/**
* Create layer function. Function is called in forward or backward.
* \param function, Layer::forward_ or Layer::backward_
* \param name, function name, include -GPU or -CPU
* \param config, initialization configuration for the function
*/
void createFunction(std::vector<std::shared_ptr<FunctionBase>>& function,
const std::string& name,
const FuncConfig& config) {
function.emplace_back(FunctionBase::funcRegistrar_.createByType(name));
auto& func = function.back();
func->init(config);
}
protected:
/// Config of projection
ProjectionConfig config_;
......@@ -106,5 +126,9 @@ protected:
const Argument* out_;
/// Store `passType` passed to forward()
PassType passType_;
/// Layer forward function
std::vector<std::shared_ptr<FunctionBase>> forward_;
/// Layer backward function
std::vector<std::shared_ptr<FunctionBase>> backward_;
};
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册