提交 23ac0b78 编写于 作者: X xutianbing

merge Daoyuan's FuncArgs, pass the ContextProjection test.

上级 1482ec43
......@@ -24,7 +24,7 @@ if(WITH_TESTING)
add_simple_unittest(TensorTypeTest)
add_simple_unittest(BufferArgTest)
add_simple_unittest(FunctionTest)
# add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest(ContextProjectionOpTest)
endif()
endif()
......
......@@ -125,11 +125,11 @@ public:
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto out_mat = outputs[0].matrix<Device>();
auto in_mat = inputs[0].matrix<Device>();
auto w_mat = !inputs[1].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>();
auto seq_vec = inputs[2].vector<int, Device>();
const auto in_mat = inputs[0].matrix<Device>();
const auto w_mat =
!inputs[1].data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>();
const auto seq_vec = inputs[2].vector<int, Device>();
ContextProjectionForward<Device>(out_mat,
in_mat,
w_mat,
......@@ -150,7 +150,6 @@ private:
*
*/
template <>
<<<<<<< HEAD
void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
CpuMatrix& in_grad_mat,
CpuMatrix& w_grad_mat,
......@@ -174,7 +173,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
int64_t pad_size =
std::min(starts[i] - begin, starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) {
MatrixPtr mat = out_grad_mat.subMatrix(starts[i], pad_size);
MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(starts[i], pad_size);
MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size);
sub->addAtOffset(*mat, j * input_dim);
}
......@@ -185,8 +185,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
int64_t pad_size =
std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) {
MatrixPtr mat =
out_grad_mat.subMatrix(starts[i + 1] - pad_size, pad_size);
MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(starts[i + 1] - pad_size, pad_size);
MatrixPtr sub = w_grad_mat.subMatrix(
begin_pad + context_start + j - pad_size, pad_size);
sub->addAtOffset(*mat, j * input_dim);
......@@ -197,7 +197,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
if (end <= begin) continue;
if (!in_grad_mat) continue;
MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin);
MatrixPtr dst = out_grad_mat.subMatrix(dst_begin, dst_end - dst_begin);
MatrixPtr dst = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(dst_begin, dst_end - dst_begin);
src->addAtOffset(*dst, j * input_dim);
}
}
......@@ -207,10 +208,10 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
* Context Projection Backward Function.
* Update the weight gradient and input layer gradient with backprop
*
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
* \param inouts[0] input grad.
* \param inouts[1] weight grad.
* \param inputs[0] input sequence.
* \param inputs[1] output layer grad.
* \param outputs[0] input layer grad.
* \param outputs[1] weight grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardFunc : public FunctionBase {
......@@ -224,32 +225,34 @@ public:
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size());
CHECK_EQ((size_t)1, outputs.size());
CHECK_EQ((size_t)2, inputs.size());
CHECK_EQ((size_t)2, outputs.size());
CHECK(outputs[0].data() && inputs[2].data());
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2);
CHECK(inputs[0].data() && inputs[1].data());
CHECK_EQ(inputs[0].shape().ndims(), (size_t)1);
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[2].shape().ndims(), (size_t)1);
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(outputs[1].shape().ndims(), (size_t)2);
/// dim of input == dim of weight
CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]);
/// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);
/// dim of input grad == dim of weight
CHECK_EQ(outputs[0].shape()[1], outputs[1].shape()[1]);
/// input and output grad has the same batch_size
CHECK_EQ(outputs[0].shape()[0], inputs[1].shape()[0]);
/// dim of output val = dim of input grad * context_length
CHECK_EQ(inputs[1].shape()[1], outputs[0].shape()[1] * context_length_);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
CHECK_EQ(outputs[1].getArgType(), ADD_TO);
auto out_grad_mat = outputs[0].matrix<Device>();
const auto seq_vec = inputs[0].vector<int, Device>();
const auto out_grad_mat = inputs[1].matrix<Device>();
auto in_grad_mat =
!inputs[0].data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[0].matrix<Device>();
auto w_grad_mat = !inputs[1].data()
!outputs[0].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: outputs[0].matrix<Device>();
auto w_grad_mat = !outputs[1].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>();
auto seq_vec = inputs[2].vector<int, Device>();
: outputs[1].matrix<Device>();
ContextProjectionBackward<Device>(out_grad_mat,
in_grad_mat,
w_grad_mat,
......@@ -269,112 +272,6 @@ private:
size_t total_pad_;
};
#if 0
/**
* Context Projection Backward Data Function.
* Update gradient of the input layer with backprop.
*
* \param inouts[0] input grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
/// input and output grad have the same batch_size
CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]);
typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
seq_vec,
context_length_,
context_start_);
}
private:
size_t context_length_;
int context_start_;
};
/**
* Context Projection Backward Weight Function.
* Update weight gradient with backprop.
*
* \param inouts[0] weight grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
begin_pad_ = config.get<size_t>("begin_pad");
total_pad_ = config.get<size_t>("total_pad");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
seq_vec,
context_length_,
context_start_,
total_pad_,
begin_pad_);
}
private:
size_t context_length_;
int context_start_;
size_t begin_pad_;
size_t total_pad_;
};
#endif
REGISTER_TYPED_FUNC(ContextProjectionForward,
CPU,
ContextProjectionForwardFunc);
......@@ -388,13 +285,5 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward,
GPU,
ContextProjectionBackwardFunc);
#if 0
REGISTER_TYPED_FUNC(ContextProjectionBackwardData,
GPU,
ContextProjectionBackwardDataFunc);
REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight,
GPU,
ContextProjectionBackwardWeightFunc);
#endif
#endif
} // namespace paddle
......@@ -56,7 +56,7 @@ void ContextProjectionForward(
*/
template <DeviceType DType>
void ContextProjectionBackward(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad,
typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec,
......
......@@ -217,7 +217,6 @@ void hl_context_projection_backward_data(const real* out_grad,
}
template <>
<<<<<<< HEAD
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad,
const GpuIVector& sequence,
......
......@@ -56,24 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out.randomizeUniform();
gpu_out.copyFrom(cpu_out);
compare.getCpuFunction()->calc(
{Tensor(cpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_weight ? cpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()})},
{},
{Tensor(cpu_out.getData(),
Dims{batch_size, input_dim * context_length})});
compare.getGpuFunction()->calc(
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_weight ? gpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()})},
{},
{Tensor(gpu_out.getData(),
Dims{batch_size, input_dim * context_length})});
BufferArgs cpu_inputs;
BufferArgs cpu_outputs;
cpu_inputs.addArg(cpu_in);
cpu_inputs.addArg(cpu_weight ? *cpu_weight
: CpuMatrix(nullptr, 0, input_dim));
cpu_inputs.addArg(*cpu_seq);
cpu_outputs.addArg(cpu_out, ADD_TO);
compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
BufferArgs gpu_inputs;
BufferArgs gpu_outputs;
gpu_inputs.addArg(gpu_in);
gpu_inputs.addArg(gpu_weight ? *gpu_weight
: GpuMatrix(nullptr, 0, input_dim));
gpu_inputs.addArg(*gpu_seq);
gpu_outputs.addArg(gpu_out, ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckEqual(cpu_out, gpu_out);
}
......@@ -119,25 +120,25 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad->copyFrom(*cpu_w_grad);
}
compare.getCpuFunction()->calc(
{Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()}),
Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{},
{Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr,
Dims{pad, input_dim})});
compare.getGpuFunction()->calc(
{Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()}),
Tensor(gpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{},
{Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr,
Dims{pad, input_dim})});
BufferArgs cpu_inputs;
BufferArgs cpu_outputs;
cpu_inputs.addArg(*cpu_seq);
cpu_inputs.addArg(cpu_out_grad);
cpu_outputs.addArg(cpu_in_grad, ADD_TO);
cpu_outputs.addArg(
cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
BufferArgs gpu_inputs;
BufferArgs gpu_outputs;
gpu_inputs.addArg(*gpu_seq);
gpu_inputs.addArg(gpu_out_grad);
gpu_outputs.addArg(gpu_in_grad, ADD_TO);
gpu_outputs.addArg(
gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
if (is_padding) {
......
......@@ -27,66 +27,28 @@ public:
gpu->init(config);
}
void cmpWithArg(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) {
void cmpWithArg(const BufferArgs& inputs,
const BufferArgs& outputs,
const BufferArgs& inouts) {
// init cpu and gpu arguments
auto initArgs = [=](
Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) {
for (const auto arg : inArgs) {
size_t size = sizeof(real);
for (const auto dim : arg.dims_) {
size *= dim;
}
if (arg.getData()) {
// todo(tianbing), waste unnecessary mem here
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
// already init outside
} else {
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(
Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_));
gpuArgs.emplace_back(
Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_));
// will use an api to refactor this code.
CpuVector cpuVector(size / sizeof(real),
(real*)cpuArgs.back().getData());
GpuVector gpuVector(size / sizeof(real),
(real*)gpuArgs.back().getData());
cpuVector.uniform(0.001, 1);
gpuVector.copyFrom(cpuVector);
}
}
BufferArgs& cpuArgs, BufferArgs& gpuArgs, const BufferArgs& inArgs) {
/// leave it empty to pass the compile of ContextProjectionTest
/// Daoyuan is working on FunctionTest
/// and I will further merge with it
};
initArgs(cpuInputs, gpuInputs, inputs);
initArgs(cpuOutputs, gpuOutputs, outputs);
initArgs(cpuInouts, gpuInouts, inouts);
// function calculate
cpu->calc(cpuInputs, cpuOutputs, cpuInouts);
gpu->calc(gpuInputs, gpuOutputs, gpuInouts);
cpu->calc(cpuInputs, cpuOutputs);
gpu->calc(gpuInputs, gpuOutputs);
// check outputs and inouts
auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) {
for (size_t i = 0; i < cpuArgs.size(); i++) {
auto cpu = cpuArgs[i];
auto gpu = gpuArgs[i];
size_t size = 1;
for (auto dim : cpu.dims_) {
size *= dim;
}
CpuVector cpuVector(size, (real*)cpu.getData());
GpuVector gpuVector(size, (real*)gpu.getData());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
auto checkArgs = [=](const BufferArgs& cpuArgs, const BufferArgs& gpuArgs) {
/// leave it open
};
checkArgs(cpuOutputs, gpuOutputs);
checkArgs(cpuInouts, gpuInouts);
}
std::shared_ptr<FunctionBase> getCpuFunction() const { return cpu; }
......@@ -98,12 +60,12 @@ protected:
std::shared_ptr<FunctionBase> gpu;
std::vector<CpuMemHandlePtr> cpuMemory;
std::vector<GpuMemHandlePtr> gpuMemory;
Arguments cpuInputs;
Arguments cpuOutputs;
Arguments cpuInouts;
Arguments gpuInputs;
Arguments gpuOutputs;
Arguments gpuInouts;
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
BufferArgs cpuInouts;
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
BufferArgs gpuInouts;
};
} // namespace paddle
......@@ -166,13 +166,16 @@ void ContextProjection::backward(const UpdateCallback& callback) {
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(CpuMatrix(
in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim));
inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
w_ptr ? w_ptr->getHeight() : 0,
input_dim));
inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_));
outputs.addArg(*out_->grad, ADD_TO);
inputs.addArg(*out_->grad);
outputs.addArg(
CpuMatrix(
in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim),
ADD_TO);
outputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
w_ptr ? w_ptr->getHeight() : 0,
input_dim),
ADD_TO);
backward_[0]->calc(inputs, outputs);
if (config_.trainable_padding()) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册