diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 349b21e7e64064804c5d0ee26e82698925832c35..0dc7792f646457c22ee4791f18814afaa3809f7b 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -190,7 +190,7 @@ public: : BufferArg(VALUE_TYPE_INT32, shape, argType) { bufferType_ = TENSOR_SEQUENCE_ID; CHECK_EQ(shape_.ndims(), 1UL); - CHECK_GT(shape_[0], 1UL); + CHECK_GE(shape_[0], 1UL); numSeqs_ = shape_[0] - 1; } @@ -226,7 +226,8 @@ public: SequenceArg(ValueType valueType, const TensorShape& shape, ArgType argType = UNSPECIFIED) - : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) { + : BufferArg(valueType, shape, argType), + startPositions_(TensorShape({shape[0]})) { bufferType_ = TENSOR_SEQUENCE_DATA; } diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 6cd4e4abee8fccf3a4745b0bfc6701df4ddfa5c0..b87750b74247bd0eb822340bc5a85d41b86ecec2 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -108,26 +108,23 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK(1 == inputs.size() || 2 == inputs.size()); - CHECK_EQ((size_t)1, outputs.size()); + CHECK(1UL == inputs.size() || 2UL == inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto val_seqs = dynamic_cast(inputs[0]); auto out_seq = dynamic_cast(outputs[0]); CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceId().data()); - CHECK_EQ(out_seq.shape().ndims(), (size_t)2); - CHECK_EQ(val_seqs.shape().ndims(), (size_t)2); - CHECK_EQ(val_seqs.getSequenceId().shape().ndims(), (size_t)1); - if (2 == inputs.size()) { - CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); - } + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(val_seqs.shape().ndims(), 2UL); /// dim of output = dim of input * context_length CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_); /// input and output has the same batch_size CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]); - /// dim of input == dim of weight - if (2 == inputs.size()) { + if (2UL == inputs.size()) { + CHECK_EQ(inputs[1].shape().ndims(), 2UL); + /// dim of input == dim of weight CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]); } @@ -135,10 +132,11 @@ public: auto out_mat = out_seq.matrix(); const auto in_mat = val_seqs.matrix(); const auto w_mat = - (2 == inputs.size()) + (2UL == inputs.size() && inputs[1].data()) ? inputs[1].matrix() : typename Tensor::Matrix(nullptr, 0, 0); const auto seq_vec = val_seqs.getSequenceId().vector(); + ContextProjectionForward(out_mat, in_mat, w_mat, @@ -235,36 +233,40 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)1, inputs.size()); - CHECK_EQ((size_t)2, outputs.size()); + CHECK_EQ(1UL, inputs.size()); + CHECK(1UL == outputs.size() || 2UL == outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); auto out_seq = dynamic_cast(outputs[0]); CHECK(in_seq.data() && in_seq.getSequenceId().data()); - CHECK_EQ(in_seq.shape().ndims(), (size_t)2); - CHECK_EQ(in_seq.getSequenceId().shape().ndims(), (size_t)1); - CHECK_EQ(out_seq.shape().ndims(), (size_t)2); - CHECK_EQ(out_seq.getSequenceId().shape().ndims(), (size_t)1); - CHECK_EQ(outputs[1].shape().ndims(), (size_t)2); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(out_seq.getSequenceId().shape().ndims(), 1UL); - /// dim of input grad == dim of weight - CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); /// input and output grad has the same batch_size CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]); /// dim of output grad = dim of input grad * context_length CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_); CHECK_EQ(out_seq.getArgType(), ADD_TO); - CHECK_EQ(outputs[1].getArgType(), ADD_TO); + + if (2UL == outputs.size()) { + CHECK_EQ(outputs[1].shape().ndims(), 2UL); + /// dim of input grad == dim of weight + CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); + CHECK_EQ(outputs[1].getArgType(), ADD_TO); + } const auto seq_vec = in_seq.getSequenceId().vector(); const auto out_grad_mat = in_seq.matrix(); auto in_grad_mat = !out_seq.data() ? typename Tensor::Matrix(nullptr, 0, 0) : out_seq.matrix(); - auto w_grad_mat = !outputs[1].data() - ? typename Tensor::Matrix(nullptr, 0, 0) - : outputs[1].matrix(); + auto w_grad_mat = + (2UL == outputs.size() && outputs[1].data()) + ? outputs[1].matrix() + : typename Tensor::Matrix(nullptr, 0, 0); + ContextProjectionBackward(out_grad_mat, in_grad_mat, w_grad_mat, @@ -304,17 +306,17 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); const auto out_seq = dynamic_cast(outputs[0]); CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data()); - CHECK_EQ(static_cast(out_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.getSequenceId().shape().ndims()), 1); + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); /// output layer grad dim == input layer grad dim * context_length_ CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_); /// input and output has the same batch_size @@ -355,14 +357,14 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data()); - CHECK_EQ(static_cast(outputs[0].shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.getSequenceId().shape().ndims()), 1); + CHECK_EQ(outputs[0].shape().ndims(), 2UL); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]); /// output layer grad dim == weight dim * context_length_ CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_); diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h index 2bdd47e4e9b02483c2c5af82bf00c4e55d68f93e..6f7d936379a5378e6fd85dd86618d1b6094bd14f 100644 --- a/paddle/function/ContextProjectionOp.h +++ b/paddle/function/ContextProjectionOp.h @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - #include "Function.h" namespace paddle { diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index c9db2ff8008e0bb0fa04370fb7b3ecd7641d2062..0f5d6a848d406d14984a0b6edad8192dab42e88b 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -28,55 +28,26 @@ void testMatrixProjectionForward(int context_start, std::max(0, (int)(context_start + context_length - 1)); if (pad == 0) is_padding = false; - FunctionCompare compare("ContextProjectionForward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", std::max(0, -context_start))); - - CpuMatrix cpu_in(batch_size, input_dim); - cpu_in.randomizeUniform(); - GpuMatrix gpu_in(batch_size, input_dim); - gpu_in.copyFrom(cpu_in); - auto cpu_weight = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - auto gpu_weight = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - if (is_padding) { - cpu_weight->randomizeUniform(); - gpu_weight->copyFrom(*cpu_weight); + FunctionCompare test("ContextProjectionForward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start))); + + // prepare input arguments + test.addSequence(SequenceIdArg(TensorShape{batch_size})); + test.addInputs( + SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim})); + if (is_padding) { // weight + test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim})); } - IVectorPtr cpu_seq; - generateSequenceStartPositions(batch_size, cpu_seq); - IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); - gpu_seq->copyFrom(*cpu_seq); - - CpuMatrix cpu_out(batch_size, input_dim * context_length); - GpuMatrix gpu_out(batch_size, input_dim * context_length); - cpu_out.randomizeUniform(); - gpu_out.copyFrom(cpu_out); - - BufferArgs cpu_inputs; - BufferArgs cpu_outputs; - cpu_inputs.addArg(cpu_in, *cpu_seq); - if (cpu_weight) { - cpu_inputs.addArg(*cpu_weight, *cpu_seq); - } - cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO); - - compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); + test.addOutputs( + SequenceArg(VALUE_TYPE_FLOAT, + TensorShape{batch_size, input_dim * context_length}), + ADD_TO); - BufferArgs gpu_inputs; - BufferArgs gpu_outputs; - gpu_inputs.addArg(gpu_in, *gpu_seq); - if (gpu_weight) { - gpu_inputs.addArg(*gpu_weight, *gpu_seq); - } - gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO); - - compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); - - autotest::TensorCheckEqual(cpu_out, gpu_out); + // run Function + test.run(); } void testMatrixProjectionBackward(int context_start, @@ -88,63 +59,31 @@ void testMatrixProjectionBackward(int context_start, std::max(0, (int)(context_start + context_length - 1)); if (pad == 0) is_padding = false; - FunctionCompare compare("ContextProjectionBackward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", std::max(0, -context_start)) - .set("is_padding", is_padding) - .set("total_pad", pad)); - - CpuMatrix cpu_in_grad(batch_size, input_dim); - cpu_in_grad.randomizeUniform(); - GpuMatrix gpu_in_grad(batch_size, input_dim); - gpu_in_grad.copyFrom(cpu_in_grad); - - CpuMatrix cpu_out_grad(batch_size, input_dim * context_length); - cpu_out_grad.randomizeUniform(); - GpuMatrix gpu_out_grad(batch_size, input_dim * context_length); - gpu_out_grad.copyFrom(cpu_out_grad); - - IVectorPtr cpu_seq; - generateSequenceStartPositions(batch_size, cpu_seq); - IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); - gpu_seq->copyFrom(*cpu_seq); - - auto cpu_w_grad = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - auto gpu_w_grad = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - if (is_padding) { - cpu_w_grad->randomizeUniform(); - gpu_w_grad->copyFrom(*cpu_w_grad); + FunctionCompare test("ContextProjectionBackward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start)) + .set("is_padding", is_padding) + .set("total_pad", pad)); + + // prepare input arguments + test.addSequence(SequenceIdArg(TensorShape{batch_size})); + test.addInputs(SequenceArg( + VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim * context_length})); + test.addOutputs( + SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim}), + ADD_TO); + if (is_padding) { // weight + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim}), + ADD_TO); } - BufferArgs cpu_inputs; - BufferArgs cpu_outputs; - cpu_inputs.addArg(cpu_out_grad, *cpu_seq); - cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO); - cpu_outputs.addArg( - cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO); - - compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); - - BufferArgs gpu_inputs; - BufferArgs gpu_outputs; - gpu_inputs.addArg(gpu_out_grad, *gpu_seq); - gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO); - gpu_outputs.addArg( - gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO); - - compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); - - autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); - if (is_padding) { - autotest::TensorCheckErr(*cpu_w_grad, *gpu_w_grad); - } + // run Function + test.run(); } -TEST(ContextProjection, projection) { +TEST(ContextProjection, Projection) { for (auto context_start : {-5, -3, -1, 0, 3}) { for (auto context_length : {1, 2, 5, 7}) { for (auto trainable_padding : {false, true}) { diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index 00f59f97d4c8c1076abe00866b786615a9801a5d..0cfafdb27f55a3e6617d31a968d2a05fc77f5b46 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -69,6 +69,54 @@ public: gpuMemory_.back()->getBuf(), input.valueType(), input.shape())); } + // assume one copy of sequence is shared by different SequenceArgs + void addSequence(const SequenceIdArg& input) { + CHECK_EQ(input.shape().ndims(), 1UL); + size_t batchSize = input.shape()[0]; + size_t numSeqs = batchSize / 10 + 1; + size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); + cpuMemory_.emplace_back(std::make_shared(sizeId)); + gpuMemory_.emplace_back(std::make_shared(sizeId)); + cpuSeq_ = std::make_shared(cpuMemory_.back()->getBuf(), + TensorShape{numSeqs + 1}); + gpuSeq_ = std::make_shared(gpuMemory_.back()->getBuf(), + TensorShape{numSeqs + 1}); + /// init sequence Id + initArg(*cpuSeq_, batchSize); + + // todo(tianbing), delete it + CHECK_EQ(cpuSeq_->shape().getElements(), cpuSeq_->numSeqs() + 1); + + CpuIVector cpuSeq(cpuSeq_->shape().getElements(), (int*)cpuSeq_->data()); + GpuIVector gpuSeq(gpuSeq_->shape().getElements(), (int*)gpuSeq_->data()); + gpuSeq.copyFrom(cpuSeq); + } + + void addInputs(const SequenceArg& input) { + CHECK_EQ(input.shape().ndims(), 2UL); + size_t batchSize = input.shape()[0]; + if (!cpuSeq_ || !gpuSeq_) { // sequence not exist + addSequence(SequenceIdArg(TensorShape{batchSize})); + } + + size_t size = + input.shape().getElements() * sizeOfValuType(input.valueType()); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + /// SequenceArg + cpuInputs_.emplace_back( + std::make_shared(cpuMemory_.back()->getBuf(), + input.valueType(), + input.shape(), + *cpuSeq_)); + gpuInputs_.emplace_back( + std::make_shared(gpuMemory_.back()->getBuf(), + input.valueType(), + input.shape(), + *gpuSeq_)); + } + // output need only contains shape, do not contains data. void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) { size_t size = @@ -116,24 +164,31 @@ public: std::make_shared(*gpuSparse_, argType)); } - void addInputs(const SequenceArg& input) { - size_t batchSize = input.shape()[0]; - size_t numSeqs = batchSize / 10 + 1; - - size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); - cpuMemory_.emplace_back(std::make_shared(sizeId)); - gpuMemory_.emplace_back(std::make_shared(sizeId)); - - TensorShape seqsId({numSeqs + 1}); - // void* cpuBuffer = cpuMemory_.back()->getBuf(); - // void* gpuBuffer = gpuMemory_.back()->getBuf(); + void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) { + CHECK_EQ(output.shape().ndims(), 2UL); + size_t batchSize = output.shape()[0]; + if (!cpuSeq_ || !gpuSeq_) { // sequence not exist + addSequence(SequenceIdArg(TensorShape{batchSize})); + } size_t size = - input.shape().getElements() * sizeOfValuType(input.valueType()); + output.shape().getElements() * sizeOfValuType(output.valueType()); cpuMemory_.emplace_back(std::make_shared(size)); gpuMemory_.emplace_back(std::make_shared(size)); - // TODO: need be implemented. + /// SequenceArg + cpuOutputs_.emplace_back( + std::make_shared(cpuMemory_.back()->getBuf(), + output.valueType(), + output.shape(), + *cpuSeq_, + argType)); + gpuOutputs_.emplace_back( + std::make_shared(gpuMemory_.back()->getBuf(), + output.valueType(), + output.shape(), + *gpuSeq_, + argType)); } void addInputs(const SparseMatrixArg& input) { @@ -193,14 +248,44 @@ public: std::shared_ptr getGpuFunction() const { return gpuFunc_; } protected: + // only init cpu argument, gpu argument copy from cpu argument. + void initArg(BufferArg& arg) { + CpuVector vector(arg.shape().getElements(), (real*)arg.data()); + vector.uniform(0.001, 1); + } + + void initArg(SequenceArg& arg) { + /// init only matrix + CpuVector vector(arg.shape().getElements(), (real*)arg.data()); + vector.uniform(0.001, 1); + } + + void initArg(SequenceIdArg& arg, size_t batchSize) { + size_t numSeqs = arg.numSeqs(); + int* buf = reinterpret_cast(arg.data()); + int pos = 0; + size_t maxLen = 2 * batchSize / numSeqs; + for (int i = 0; i < (int)numSeqs; ++i) { + int len = 1 + uniformRandom(std::min( + maxLen, batchSize - pos - numSeqs + i)); + buf[i] = pos; + pos += len; + VLOG(1) << " len=" << len; + } + buf[numSeqs] = batchSize; + } + void initInputs() { for (size_t i = 0; i < cpuInputs_.size(); i++) { if (cpuInputs_[i]->isSparseArg()) { continue; /// sparse matrix already init } - initArg(*cpuInputs_[i]); - + if (cpuInputs_[i]->isSequenceArg()) { + initArg(dynamic_cast(*cpuInputs_[i])); + } else { + initArg(*cpuInputs_[i]); + } // TODO: Need a BufferCopy used to copy from one BufferArg to another. CpuVector cpuVector(cpuInputs_[i]->shape().getElements(), (real*)cpuInputs_[i]->data()); @@ -217,7 +302,11 @@ protected: continue; /// sparse matrix already init } - initArg(*cpuOutputs_[i]); + if (cpuOutputs_[i]->isSequenceArg()) { + initArg(dynamic_cast(*cpuOutputs_[i])); + } else { + initArg(*cpuOutputs_[i]); + } // TODO: Need a BufferCopy used to copy from one BufferArg to another. CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(), @@ -241,28 +330,6 @@ protected: } } - // only init cpu argument, gpu argument copy from cpu argument. - void initArg(BufferArg& arg) { - CpuVector vector(arg.shape().getElements(), (real*)arg.data()); - vector.uniform(0.001, 1); - } - - void initArg(SequenceIdArg& arg, size_t batchSize) { - size_t numSeqs = arg.numSeqs(); - int* buf = reinterpret_cast(arg.data()); - int pos = 0; - size_t maxLen = 2 * batchSize / numSeqs; - for (int i = 0; i < (int)numSeqs; ++i) { - int len = uniformRandom( - std::min(maxLen, batchSize - pos - numSeqs + i)) + - 1; - buf[i] = pos; - pos += len; - VLOG(1) << " len=" << len; - } - buf[numSeqs] = batchSize; - } - protected: std::shared_ptr cpuFunc_; std::shared_ptr gpuFunc_; @@ -274,6 +341,8 @@ protected: std::vector gpuOutputs_; std::shared_ptr cpuSparse_; std::shared_ptr gpuSparse_; + std::shared_ptr cpuSeq_; + std::shared_ptr gpuSeq_; }; } // namespace paddle