提交 86fa8c05 编写于 作者: X xutianbing

Wei Xu's comments, set up right inouts.

上级 1dd972f9
......@@ -70,10 +70,11 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
}
/**
* \param outputs[0] output value.
*
* \param inputs[0] input value.
* \param inputs[1] input weight.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
*/
template <DeviceType Device>
class ContextProjectionForwardFunc : public FunctionBase {
......@@ -123,7 +124,8 @@ private:
};
template <>
void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
<<<<<<< HEAD
void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
CpuMatrix& in_grad_mat,
CpuMatrix& w_grad_mat,
const CpuIVector& seq_vec,
......@@ -176,10 +178,10 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
}
/**
* \param inputs[0] input grad.
* \param inputs[1] weight grad.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
* \param inouts[0] input grad.
* \param inouts[1] weight grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardFunc : public FunctionBase {
......@@ -192,6 +194,7 @@ public:
total_pad_ = config.get<size_t>("total_pad");
}
<<<<<<< HEAD
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size());
CHECK_EQ((size_t)1, outputs.size());
......@@ -210,6 +213,42 @@ public:
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
=======
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(2, inouts.size());
CHECK(inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inouts[1].dims_.size(), 2);
/// dim of input grad == dim of weight grad
CHECK_EQ(inouts[0].dims_[1], inouts[1].dims_[1]);
/// input grad and output grad have the same batch_size
CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]);
/// dim of output = dim of input * context_length
CHECK_EQ(inputs[1].dims_[1], inputs[0].dims_[1] * context_length_);
typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto in_grad_mat =
!inouts[0].getData()
? nullptr
: std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
auto w_grad_mat =
!inouts[1].getData()
? nullptr
: std::make_shared<typename MatrixT<Device>::type>(
inouts[1].getData(), inouts[1].dims_[0], inouts[1].dims_[1]);
>>>>>>> Wei Xu's comments, set up right inouts.
auto out_grad_mat = outputs[0].matrix<Device>();
auto in_grad_mat =
......@@ -240,9 +279,9 @@ private:
#if 0
/**
* \param inputs[0] input grad.
* \param inputs[1] input sequence.
* \param outputs[0] output grad.
* \param inouts[0] input grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase {
......@@ -255,23 +294,24 @@ public:
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size()));
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
/// input and output grad have the same batch_size
CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
const auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
......@@ -286,9 +326,9 @@ private:
};
/**
* \param inputs[0] weight grad.
* \param inputs[1] input sequence.
* \param outputs[0] output grad.
* \param inouts[0] weight grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase {
......@@ -303,22 +343,22 @@ public:
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size()));
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
......
......@@ -21,7 +21,7 @@ namespace paddle {
/**
* \brief Context Projection Forward.
*
* \param[out] outputs output data.
* \param[in/out] outputs output data.
* \param[in] input input data.
* \param[in] weight input weight.
* \param[in] sequence input data.
......@@ -68,7 +68,7 @@ void ContextProjectionBackward(
template <DeviceType DType>
void ContextProjectionBackwardData(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad,
const typename Tensor<int, DType>::Vector& sequence,
size_t context_length,
......@@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
template <DeviceType DType>
void ContextProjectionBackwardWeight(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec,
size_t context_length,
......
......@@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
begin_pad);
}
__global__ void KeContextProjectionBackwardData(real* out_grad,
__global__ void KeContextProjectionBackwardData(const real* out_grad,
const int* sequence,
real* in_grad,
int input_dim,
size_t input_dim,
int context_length,
int context_start) {
int idx = threadIdx.x;
......@@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
real value = 0;
int instances = seq_end - seq_start + context_length - 1;
out_grad += seq_start * input_dim * context_length;
auto out = const_cast<real*>(out_grad);
out += seq_start * input_dim * context_length;
in_grad += seq_start * input_dim;
for (int k = 0; k <= input_dim / block_size; k++) {
if (idx < input_dim) {
......@@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
int outx = (i - context_length) < 0 ? i : (context_length - 1);
int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
real* output_r =
out_grad + outy * input_dim * context_length + outx * input_dim;
out + outy * input_dim * context_length + outx * input_dim;
for (int j = outy; j < seq_end - seq_start; j++) {
value += output_r[idx];
if (j - outy == outx) break;
......@@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
* @param[in] context_start context start.
*
*/
void hl_context_projection_backward_data(real* out_grad,
void hl_context_projection_backward_data(const real* out_grad,
const int* sequence,
real* input_grad,
size_t num_sequences,
......@@ -216,7 +217,8 @@ void hl_context_projection_backward_data(real* out_grad,
}
template <>
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
<<<<<<< HEAD
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad,
const GpuIVector& sequence,
size_t context_length,
......@@ -231,7 +233,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
}
template<int THREADS_X, int THREADS_Y>
__global__ void KeContextProjectionBackwardWeight(real* out_grad,
__global__ void KeContextProjectionBackwardWeight(const real* out_grad,
const int* sequence,
real* w_grad,
int num_sequences,
......@@ -254,7 +256,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) {
int seq_start = sequence[seqId];
int seq_end = sequence[seqId+1];
output_r = out_grad + seq_start * w_dim * context_length;
output_r = const_cast<real*>(out_grad)
+ seq_start * w_dim * context_length;
if (context_start < 0) {
if (padId + context_start < 0) {
......@@ -318,7 +321,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
* beginning.
*
*/
void hl_context_projection_backward_weight(real* out_grad,
void hl_context_projection_backward_weight(const real* out_grad,
const int* sequence,
real* w_grad,
size_t num_sequences,
......@@ -346,7 +349,7 @@ void hl_context_projection_backward_weight(real* out_grad,
template <>
void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
GpuMatrix& out_grad,
const GpuMatrix& out_grad,
GpuMatrix& w_grad,
const GpuIVector& seq_vec,
size_t context_length,
......@@ -365,7 +368,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
}
template <>
void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
void ContextProjectionBackward<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad,
GpuMatrix& w_grad,
const GpuIVector& sequence,
......
......@@ -62,16 +62,18 @@ void testMatrixProjectionForward(int context_start,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()})},
{Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})},
{});
{},
{Tensor(cpu_out.getData(),
Dims{batch_size, input_dim * context_length})});
compare.getGpuFunction()->calc(
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_weight ? gpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})},
{});
{},
{Tensor(gpu_out.getData(),
Dims{batch_size, input_dim * context_length})});
autotest::TensorCheckEqual(cpu_out, gpu_out);
}
......@@ -118,24 +120,24 @@ void testMatrixProjectionBackward(int context_start,
}
compare.getCpuFunction()->calc(
{Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()}),
Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{},
{Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()})},
{Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{});
Dims{pad, input_dim})});
compare.getGpuFunction()->calc(
{Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()}),
Tensor(gpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{},
{Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{});
Dims{pad, input_dim})});
autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
if (is_padding) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册