提交 ea4d08da 编写于 作者: X xutianbing

update interface of context projection functions, Tensor -> Matrix/Vector

上级 2c37ad7e
......@@ -19,35 +19,17 @@ limitations under the License. */
namespace paddle {
template <>
void ContextProjectionForward<DEVICE_TYPE_CPU>(Tensor& output,
const Tensor& input,
const Tensor& weight,
const Tensor& sequence,
void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix* out_mat,
const CpuMatrix* input_mat,
const CpuMatrix* weight_mat,
const CpuIVector& seq_vec,
size_t context_length,
int context_start,
size_t begin_pad,
bool is_padding) {
CHECK(output.getData() && input.getData() && sequence.getData());
CHECK_EQ(output.dims_.size(), 2);
CHECK_EQ(input.dims_.size(), 2);
CHECK_EQ(weight.dims_.size(), 2);
CHECK_EQ(sequence.dims_.size(), 1);
auto out_mat = std::make_shared<CpuMatrix>(
output.getData(), output.dims_[0], output.dims_[1]);
const auto in_mat = std::make_shared<CpuMatrix>(
input.getData(), input.dims_[0], input.dims_[1]);
const auto weight_mat =
!weight.getData()
? nullptr
: std::make_shared<CpuMatrix>(
weight.getData(), weight.dims_[0], weight.dims_[1]);
CpuIVector seq_vec(sequence.dims_[0],
reinterpret_cast<int*>(sequence.getData()));
CHECK_EQ(out_mat->getWidth(), in_mat->getWidth() * context_length);
size_t begin_pad) {
const int* starts = seq_vec.getData();
const size_t num_sequences = seq_vec.getSize() - 1;
auto w_mat = const_cast<CpuMatrix*>(weight_mat);
auto in_mat = const_cast<CpuMatrix*>(input_mat);
for (size_t i = 0; i < num_sequences; ++i) {
for (size_t j = 0; j < context_length; ++j) {
int begin = starts[i] + context_start + j;
......@@ -58,8 +40,8 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(Tensor& output,
int64_t pad_size =
std::min(starts[i] - begin, starts[i + 1] - starts[i]);
MatrixPtr mat = out_mat->subMatrix(starts[i], pad_size);
if (is_padding && weight_mat) {
MatrixPtr sub = weight_mat->subMatrix(j, pad_size);
if (w_mat) {
MatrixPtr sub = w_mat->subMatrix(j, pad_size);
mat->addAtOffset(*sub, j * in_mat->getWidth());
}
dst_begin = starts[i] + pad_size;
......@@ -69,8 +51,8 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(Tensor& output,
int64_t pad_size =
std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
MatrixPtr mat = out_mat->subMatrix(starts[i + 1] - pad_size, pad_size);
if (is_padding && weight_mat) {
MatrixPtr sub = weight_mat->subMatrix(
if (w_mat) {
MatrixPtr sub = w_mat->subMatrix(
begin_pad + context_start + j - pad_size, pad_size);
mat->addAtOffset(*sub, j * in_mat->getWidth());
}
......@@ -98,7 +80,6 @@ public:
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
begin_pad_ = config.get<size_t>("begin_pad");
is_padding_ = config.get<bool>("is_padding");
}
void calc(const Arguments& inputs,
......@@ -108,59 +89,58 @@ public:
CHECK_EQ(1, outputs.size());
CHECK_EQ(0, inouts.size());
ContextProjectionForward<Device>((Tensor&)outputs[0],
inputs[0],
inputs[1],
inputs[2],
CHECK(outputs[0].getData() && inputs[0].getData() && inputs[2].getData());
CHECK_EQ(outputs[0].dims_.size(), 2);
CHECK_EQ(inputs[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inputs[2].dims_.size(), 1);
/// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
/// dim of input == dim of weight
CHECK_EQ(inputs[0].dims_[1], inputs[1].dims_[1]);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
auto out_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
const auto in_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
const auto w_mat =
!inputs[1].getData()
? nullptr
: std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[2].dims_[0], reinterpret_cast<int*>(inputs[2].getData()));
ContextProjectionForward<Device>(out_mat.get(),
in_mat.get(),
w_mat.get(),
seq_vec,
context_length_,
context_start_,
begin_pad_,
is_padding_);
begin_pad_);
}
private:
size_t context_length_;
int context_start_;
size_t begin_pad_;
bool is_padding_;
};
template <>
void ContextProjectionBackward<DEVICE_TYPE_CPU>(Tensor& out_grad,
Tensor& in_grad,
Tensor& w_grad,
const Tensor& sequence,
void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix* out_grad_mat,
CpuMatrix* in_grad_mat,
CpuMatrix* w_grad_mat,
const CpuIVector& seq_vec,
size_t context_length,
int context_start,
size_t begin_pad,
bool is_padding,
size_t total_pad) {
CHECK(out_grad.getData() && sequence.getData());
CHECK_EQ(out_grad.dims_.size(), 2);
CHECK_EQ(in_grad.dims_.size(), 2);
CHECK_EQ(w_grad.dims_.size(), 2);
CHECK_EQ(sequence.dims_.size(), 1);
auto out_grad_mat = std::make_shared<CpuMatrix>(
out_grad.getData(), out_grad.dims_[0], out_grad.dims_[1]);
const auto in_grad_mat =
!in_grad.getData()
? nullptr
: std::make_shared<CpuMatrix>(
in_grad.getData(), in_grad.dims_[0], in_grad.dims_[1]);
const auto w_grad_mat =
!w_grad.getData()
? nullptr
: std::make_shared<CpuMatrix>(
w_grad.getData(), w_grad.dims_[0], w_grad.dims_[1]);
CpuIVector seq_vec(sequence.dims_[0],
reinterpret_cast<int*>(sequence.getData()));
CHECK_EQ(out_grad_mat->getWidth(), in_grad_mat->getWidth() * context_length);
CHECK(out_grad_mat);
size_t input_dim = in_grad_mat ? in_grad_mat->getWidth()
: w_grad_mat ? w_grad_mat->getWidth() : 0;
CHECK_EQ(out_grad_mat->getWidth(), input_dim * context_length);
const int* starts = seq_vec.getData();
size_t num_sequences = seq_vec.getSize() - 1;
for (size_t i = 0; i < num_sequences; ++i) {
......@@ -226,10 +206,38 @@ public:
CHECK_EQ(1, outputs.size());
CHECK_EQ(0, inouts.size());
ContextProjectionBackward<Device>((Tensor&)outputs[0],
(Tensor&)inputs[0],
(Tensor&)inputs[1],
inputs[2],
CHECK(outputs[0].getData() && inputs[2].getData());
CHECK_EQ(outputs[0].dims_.size(), 2);
CHECK_EQ(inputs[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inputs[2].dims_.size(), 1);
/// dim of input == dim of weight
CHECK_EQ(inputs[0].dims_[1], inputs[1].dims_[1]);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
/// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
auto in_grad_mat =
!inputs[0].getData()
? nullptr
: std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
auto w_grad_mat =
!inputs[1].getData()
? nullptr
: std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[2].dims_[0], reinterpret_cast<int*>(inputs[2].getData()));
ContextProjectionBackward<Device>(out_grad_mat.get(),
in_grad_mat ? in_grad_mat.get() : nullptr,
w_grad_mat ? w_grad_mat.get() : nullptr,
seq_vec,
context_length_,
context_start_,
begin_pad_,
......@@ -264,10 +272,24 @@ public:
CHECK_EQ(2, inputs.size());
CHECK_EQ(1, outputs.size());
CHECK_EQ(0, inouts.size());
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(outputs[0].dims_.size(), 2);
CHECK_EQ(inputs[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_.size(), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
const auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
ContextProjectionBackwardData<Device>((Tensor&)outputs[0],
(Tensor&)inputs[0],
inputs[1],
ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
seq_vec,
context_length_,
context_start_);
}
......@@ -299,9 +321,22 @@ public:
CHECK_EQ(1, outputs.size());
CHECK_EQ(0, inouts.size());
ContextProjectionBackwardWeight<Device>((Tensor&)outputs[0],
(Tensor&)inputs[0],
inputs[1],
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(outputs[0].dims_.size(), 2);
CHECK_EQ(inputs[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_.size(), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
seq_vec,
context_length_,
context_start_,
total_pad_,
......
......@@ -32,14 +32,13 @@ namespace paddle {
*
*/
template <DeviceType Device>
void ContextProjectionForward(Tensor& output,
const Tensor& input,
const Tensor& weight,
const Tensor& sequence,
void ContextProjectionForward(typename MatrixT<Device>::type* output,
const typename MatrixT<Device>::type* input,
const typename MatrixT<Device>::type* weight,
const typename SequenceT<Device>::type& sequence,
size_t context_length,
int context_start,
size_t begin_pad,
bool is_padding);
size_t begin_pad);
/**
* \brief Context Projection Backward.
......@@ -55,10 +54,10 @@ void ContextProjectionForward(Tensor& output,
*
*/
template <DeviceType Device>
void ContextProjectionBackward(Tensor& out_grad,
Tensor& in_grad,
Tensor& w_grad,
const Tensor& sequence,
void ContextProjectionBackward(typename MatrixT<Device>::type* out_grad,
typename MatrixT<Device>::type* in_grad,
typename MatrixT<Device>::type* w_grad,
const typename SequenceT<Device>::type& seq_vec,
size_t context_length,
int context_start,
size_t begin_pad,
......@@ -66,16 +65,18 @@ void ContextProjectionBackward(Tensor& out_grad,
size_t total_pad);
template <DeviceType Device>
void ContextProjectionBackwardData(Tensor& out_grad,
Tensor& in_grad,
const Tensor& sequence,
void ContextProjectionBackwardData(
typename MatrixT<Device>::type* out_grad,
typename MatrixT<Device>::type* in_grad,
const typename SequenceT<Device>::type& sequence,
size_t context_length,
int context_start);
template <DeviceType Device>
void ContextProjectionBackwardWeight(Tensor& out_grad,
Tensor& w_grad,
const Tensor& sequence,
void ContextProjectionBackwardWeight(
typename MatrixT<Device>::type* out_grad,
typename MatrixT<Device>::type* w_grad,
const typename SequenceT<Device>::type& seq_vec,
size_t context_length,
int context_start,
size_t total_pad,
......
......@@ -75,18 +75,16 @@ __global__ void KeContextProjectionForward(const real* input,
void hl_context_projection_forward(const real* input,
const int* sequence,
real* weight,
const real* weight,
real* output,
int num_sequences,
int input_dim,
int context_length,
int context_start,
int begin_pad,
bool is_padding) {
int begin_pad) {
CHECK_NOTNULL(input);
CHECK_NOTNULL(sequence);
CHECK_NOTNULL(output);
CHECK(!is_padding || weight);
int block_size = 128;
int blocks_x = num_sequences;
......@@ -94,7 +92,7 @@ void hl_context_projection_forward(const real* input,
dim3 threads(block_size, 1);
dim3 grid(blocks_x, blocks_y);
if (is_padding) {
if (weight) {
KeContextProjectionForward<true><<< grid, threads, 0, STREAM_DEFAULT >>>
(input, sequence, weight, output, input_dim,
context_length, context_start, begin_pad);
......@@ -107,31 +105,23 @@ void hl_context_projection_forward(const real* input,
}
template <>
void ContextProjectionForward<DEVICE_TYPE_GPU>(Tensor& output,
const Tensor& input,
const Tensor& weight,
const Tensor& sequence,
void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix* output,
const GpuMatrix* input,
const GpuMatrix* weight,
const GpuIVector& sequence,
size_t context_length,
int context_start,
size_t begin_pad,
bool is_padding) {
CHECK(output.getData() && input.getData() && sequence.getData());
CHECK_EQ(output.dims_.size(), 2);
CHECK_EQ(input.dims_.size(), 2);
CHECK_EQ(weight.dims_.size(), 2);
CHECK_EQ(sequence.dims_.size(), 1);
CHECK_EQ(output.dims_[1], input.dims_[1] * context_length);
hl_context_projection_forward(input.getData(),
reinterpret_cast<int*>(sequence.getData()),
weight.getData(),
output.getData(),
sequence.dims_[0] - 1,
input.dims_[1],
size_t begin_pad) {
CHECK(input && output);
hl_context_projection_forward(input->getData(),
sequence.getData(),
weight ? weight->getData() : nullptr,
output->getData(),
sequence.getSize() - 1,
input->getWidth(),
context_length,
context_start,
begin_pad,
is_padding);
begin_pad);
}
__global__ void KeContextProjectionBackwardData(real* out_grad,
......@@ -200,22 +190,17 @@ void hl_context_projection_backward_data(real* out_grad,
}
template <>
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(Tensor& out_grad,
Tensor& in_grad,
const Tensor& sequence,
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix* out_grad,
GpuMatrix* in_grad,
const GpuIVector& sequence,
size_t context_length,
int context_start) {
CHECK(in_grad.getData() && out_grad.getData() && sequence.getData());
CHECK_EQ(out_grad.dims_.size(), 2);
CHECK_EQ(in_grad.dims_.size(), 2);
CHECK_EQ(sequence.dims_.size(), 1);
CHECK_EQ(out_grad.dims_[1], in_grad.dims_[1] * context_length);
hl_context_projection_backward_data(out_grad.getData(),
reinterpret_cast<int*>(sequence.getData()),
in_grad.getData(),
sequence.dims_[0] - 1,
in_grad.dims_[1],
CHECK(in_grad && out_grad);
hl_context_projection_backward_data(out_grad->getData(),
sequence.getData(),
in_grad->getData(),
sequence.getSize() - 1,
in_grad->getWidth(),
context_length,
context_start);
}
......@@ -320,24 +305,20 @@ void hl_context_projection_backward_weight(real* out_grad,
}
template <>
void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(Tensor& out_grad,
Tensor& w_grad,
const Tensor& sequence,
void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
GpuMatrix* out_grad,
GpuMatrix* w_grad,
const GpuIVector& seq_vec,
size_t context_length,
int context_start,
size_t total_pad,
size_t begin_pad) {
CHECK(w_grad.getData() && out_grad.getData() && sequence.getData());
CHECK_EQ(out_grad.dims_.size(), 2);
CHECK_EQ(w_grad.dims_.size(), 2);
CHECK_EQ(sequence.dims_.size(), 1);
CHECK_EQ(out_grad.dims_[1], w_grad.dims_[1] * context_length);
hl_context_projection_backward_weight(out_grad.getData(),
reinterpret_cast<int*>(sequence.getData()),
w_grad.getData(),
sequence.dims_[0] - 1,
w_grad.dims_[1],
CHECK(out_grad && w_grad);
hl_context_projection_backward_weight(out_grad->getData(),
seq_vec.getData(),
w_grad->getData(),
seq_vec.getSize() - 1,
w_grad->getWidth(),
total_pad,
context_length,
context_start,
......@@ -345,24 +326,27 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(Tensor& out_grad,
}
template <>
void ContextProjectionBackward<DEVICE_TYPE_GPU>(Tensor& out_grad,
Tensor& in_grad,
Tensor& w_grad,
const Tensor& sequence,
void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix* out_grad,
GpuMatrix* in_grad,
GpuMatrix* w_grad,
const GpuIVector& sequence,
size_t context_length,
int context_start,
size_t begin_pad,
bool is_padding,
size_t total_pad) {
if (in_grad.getData()) {
ContextProjectionBackwardData<DEVICE_TYPE_GPU>(out_grad,
CHECK(out_grad);
if (in_grad) {
ContextProjectionBackwardData<DEVICE_TYPE_GPU>(
out_grad,
in_grad,
sequence,
context_length,
context_start);
}
if (is_padding && w_grad.getData()) {
ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(out_grad,
if (is_padding && w_grad) {
ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
out_grad,
w_grad,
sequence,
context_length,
......
......@@ -32,8 +32,7 @@ void testMatrixProjectionForward(int context_start,
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", std::max(0, -context_start))
.set("is_padding", is_padding));
.set("begin_pad", std::max(0, -context_start)));
CpuMatrix cpu_in(batch_size, input_dim);
cpu_in.randomizeUniform();
......
......@@ -40,6 +40,19 @@ struct MatrixT<DEVICE_TYPE_GPU> {
using type = GpuMatrix;
};
template <DeviceType Device>
struct SequenceT;
template <>
struct SequenceT<DEVICE_TYPE_CPU> {
using type = CpuIVector;
};
template <>
struct SequenceT<DEVICE_TYPE_GPU> {
using type = GpuIVector;
};
typedef std::vector<size_t> Dims;
class Tensor {
......
......@@ -53,8 +53,7 @@ bool ContextProjection::init() {
FuncConfig()
.set("context_length", context_length)
.set("context_start", context_start)
.set("begin_pad", beginPad_)
.set("is_padding", is_padding));
.set("begin_pad", beginPad_));
createFunction(backward_,
"ContextProjectionBackward",
FuncConfig()
......@@ -112,7 +111,7 @@ void ContextProjection::forward() {
size_t dim = out_->value->getWidth();
CHECK_EQ(dim, input_dim * config_.context_length());
size_t batch_size = in_->value->getHeight();
CHECK_EQ(batch_size, out_->value->getHeight());
CHECK_EQ(forward_.size(), 1) << "Only one forward function here";
REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str());
bool is_padding = config_.trainable_padding();
......@@ -120,12 +119,6 @@ void ContextProjection::forward() {
auto w_ptr =
state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr;
auto start_pos = in_->sequenceStartPositions;
/// if use state_ as weight_, w_ptr already has mem, so padding true
forward_[0]->init(FuncConfig()
.set("context_length", config_.context_length())
.set("context_start", config_.context_start())
.set("begin_pad", beginPad_)
.set("is_padding", state_ ? true : is_padding));
forward_[0]->calc({Tensor(in_->value->getData(), Dims{batch_size, input_dim}),
Tensor(w_ptr ? w_ptr->getData() : nullptr,
Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
......@@ -161,6 +154,7 @@ void ContextProjection::backward(const UpdateCallback& callback) {
CHECK_EQ(dim, input_dim * config_.context_length());
size_t batch_size = in_->value->getHeight();
CHECK_EQ(batch_size, out_->value->getHeight());
CHECK_EQ(backward_.size(), 1) << "Only one backward function here";
REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str());
bool is_padding = config_.trainable_padding();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册