From 838ef366dc6349ff25f88b6f69d2815c21ea8261 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Sat, 24 Dec 2016 19:21:09 -0800 Subject: [PATCH] add first paddle function example for ContextProjectionForward operator, by going through Daoyuan's excellent paddle function design. --- paddle/function/CMakeLists.txt | 4 + paddle/function/Function.cpp | 28 ++++ paddle/function/Function.h | 2 + paddle/function/FunctionTest.h | 44 ++++-- paddle/function/context_projection_op.cpp | 136 +++++++++++++++++ paddle/function/context_projection_op.h | 43 ++++++ paddle/function/context_projection_op_gpu.cu | 137 ++++++++++++++++++ .../function/context_projection_op_test.cpp | 101 +++++++++++++ 8 files changed, 479 insertions(+), 16 deletions(-) create mode 100644 paddle/function/context_projection_op.cpp create mode 100644 paddle/function/context_projection_op.h create mode 100644 paddle/function/context_projection_op_gpu.cu create mode 100644 paddle/function/context_projection_op_test.cpp diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index b0e6f92f1..f70ae9959 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -17,6 +17,10 @@ if(WITH_TESTING) # file(GLOB test_files . *OpTest.cpp) # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files}) add_simple_unittest(CrossMapNormalOpTest) + add_unittest(ContextProjectionOpTest + ContextProjectionOpTest.cpp + ContextProjectionOpGpu.cu + ../gserver/tests/TestUtil.cpp) endif() endif() diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index eb005e674..215b3dbd8 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -30,6 +30,20 @@ real FuncConfig::get(const std::string& key) const { return it->second.r; } +template <> +int FuncConfig::get(const std::string& key) const { + auto it = valueMap_.find(key); + CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'"; + return it->second.i; +} + +template <> +bool FuncConfig::get(const std::string& key) const { + auto it = valueMap_.find(key); + CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'"; + return it->second.b; +} + template <> FuncConfig& FuncConfig::set(const std::string& key, size_t v) { CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key; @@ -44,6 +58,20 @@ FuncConfig& FuncConfig::set(const std::string& key, real v) { return *this; } +template <> +FuncConfig& FuncConfig::set(const std::string& key, int v) { + CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key; + valueMap_[key].i = v; + return *this; +} + +template <> +FuncConfig& FuncConfig::set(const std::string& key, bool v) { + CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key; + valueMap_[key].b = v; + return *this; +} + ClassRegistrar FunctionBase::funcRegistrar_; } // namespace paddle diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 095584c0b..210eba130 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -59,6 +59,8 @@ public: union value { size_t s; real r; + int i; + bool b; }; template diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index a602bde57..32131037f 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -33,25 +33,33 @@ public: // init cpu and gpu arguments auto initArgs = [=]( Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) { - for (auto arg : inArgs) { + for (const auto arg : inArgs) { size_t size = sizeof(real); - for (auto dim : arg.dims_) { + for (const auto dim : arg.dims_) { size *= dim; } - cpuMemory.emplace_back(std::make_shared(size)); - gpuMemory.emplace_back(std::make_shared(size)); - cpuArgs.emplace_back( - Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_)); - gpuArgs.emplace_back( - Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_)); - - // will use an api to refactor this code. - CpuVector cpuVector(size / sizeof(real), - (real*)cpuArgs.back().getData()); - GpuVector gpuVector(size / sizeof(real), - (real*)gpuArgs.back().getData()); - cpuVector.uniform(0.001, 1); - gpuVector.copyFrom(cpuVector); + if (arg.getData()) { + // todo(tianbing), waste unnecessary mem here + cpuMemory.emplace_back(std::make_shared(size)); + gpuMemory.emplace_back(std::make_shared(size)); + cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); + gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); + // already init outside + } else { + cpuMemory.emplace_back(std::make_shared(size)); + gpuMemory.emplace_back(std::make_shared(size)); + cpuArgs.emplace_back( + Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_)); + gpuArgs.emplace_back( + Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_)); + // will use an api to refactor this code. + CpuVector cpuVector(size / sizeof(real), + (real*)cpuArgs.back().getData()); + GpuVector gpuVector(size / sizeof(real), + (real*)gpuArgs.back().getData()); + cpuVector.uniform(0.001, 1); + gpuVector.copyFrom(cpuVector); + } } }; initArgs(cpuInputs, gpuInputs, inputs); @@ -81,6 +89,10 @@ public: checkArgs(cpuInouts, gpuInouts); } + std::shared_ptr getCpuFunction() const { return cpu; } + + std::shared_ptr getGpuFunction() const { return gpu; } + protected: std::shared_ptr cpu; std::shared_ptr gpu; diff --git a/paddle/function/context_projection_op.cpp b/paddle/function/context_projection_op.cpp new file mode 100644 index 000000000..75c41eed1 --- /dev/null +++ b/paddle/function/context_projection_op.cpp @@ -0,0 +1,136 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "context_projection_op.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" + +namespace paddle { + +template <> +void ContextProjectionForward(Tensor& output, + const Tensor& input, + const Tensor& weight, + const Tensor& sequence, + size_t context_length, + int context_start, + size_t begin_pad, + bool is_padding) { + CHECK(output.getData() && input.getData() && sequence.getData()); + CHECK_EQ(output.dims_.size(), 2); + CHECK_EQ(input.dims_.size(), 2); + CHECK_EQ(weight.dims_.size(), 2); + CHECK_EQ(sequence.dims_.size(), 1); + + auto out_mat = std::make_shared( + output.getData(), output.dims_[0], output.dims_[1]); + const auto in_mat = std::make_shared( + input.getData(), input.dims_[0], input.dims_[1]); + const auto weight_mat = + !weight.getData() + ? nullptr + : std::make_shared( + weight.getData(), weight.dims_[0], input.dims_[1]); + CpuIVector seq_vec(sequence.dims_[0], + reinterpret_cast(sequence.getData())); + CHECK_EQ(out_mat->getWidth(), in_mat->getWidth() * context_length); + + const int* starts = seq_vec.getData(); + const size_t num_sequences = seq_vec.getSize() - 1; + for (size_t i = 0; i < num_sequences; ++i) { + for (size_t j = 0; j < context_length; ++j) { + int begin = starts[i] + context_start + j; + int end = starts[i + 1] + context_start + j; + int dst_begin = starts[i]; + int dst_end = starts[i + 1]; + if (begin < starts[i]) { + int64_t pad_size = + std::min(starts[i] - begin, starts[i + 1] - starts[i]); + MatrixPtr mat = out_mat->subMatrix(starts[i], pad_size); + if (is_padding && weight_mat) { + MatrixPtr sub = weight_mat->subMatrix(j, pad_size); + mat->addAtOffset(*sub, j * in_mat->getWidth()); + } + dst_begin = starts[i] + pad_size; + begin = starts[i]; + } + if (end > starts[i + 1]) { + int64_t pad_size = + std::min(end - starts[i + 1], starts[i + 1] - starts[i]); + MatrixPtr mat = out_mat->subMatrix(starts[i + 1] - pad_size, pad_size); + if (is_padding && weight_mat) { + MatrixPtr sub = weight_mat->subMatrix( + begin_pad + context_start + j - pad_size, pad_size); + mat->addAtOffset(*sub, j * in_mat->getWidth()); + } + dst_end = starts[i + 1] - pad_size; + end = starts[i + 1]; + } + if (end <= begin) continue; + MatrixPtr src = in_mat->subMatrix(begin, end - begin); + MatrixPtr dst = out_mat->subMatrix(dst_begin, dst_end - dst_begin); + dst->addAtOffset(*src, j * in_mat->getWidth()); + } + } +} + +/** + * \param inputs[0] input value. + * \param inputs[1] input weight. + * \param inputs[2] input sequence. + * \param outputs[0] output value. + */ +template +class ContextProjectionForwardFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + context_length_ = config.get("context_length"); + context_start_ = config.get("context_start"); + begin_pad_ = config.get("begin_pad"); + is_padding_ = config.get("is_padding"); + } + + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(3, inputs.size()); + CHECK_EQ(1, outputs.size()); + CHECK_EQ(0, inouts.size()); + + ContextProjectionForward((Tensor&)outputs[0], + inputs[0], + inputs[1], + inputs[2], + context_length_, + context_start_, + begin_pad_, + is_padding_); + } + +private: + size_t context_length_; + int context_start_; + size_t begin_pad_; + bool is_padding_; +}; + +REGISTER_TYPED_FUNC(ContextProjectionForward, + CPU, + ContextProjectionForwardFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(ContextProjectionForward, + GPU, + ContextProjectionForwardFunc); +#endif +} // namespace paddle diff --git a/paddle/function/context_projection_op.h b/paddle/function/context_projection_op.h new file mode 100644 index 000000000..bdc5071bc --- /dev/null +++ b/paddle/function/context_projection_op.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief Context Projection Forward. + * + * \param[out] outputs output data. + * \param[in] input input data. + * \param[in] weight input weight. + * \param[in] sequence input data. + * \param[in] context_length consecutive rows for concatenation. + * \param[in] begin_pad context start position. + * \param[in] is_padding whether padding 0 or not. + * + */ +template +void ContextProjectionForward(Tensor& output, + const Tensor& input, + const Tensor& weight, + const Tensor& sequence, + size_t context_length, + int context_start, + size_t begin_pad, + bool is_padding); + +} // namespace paddle diff --git a/paddle/function/context_projection_op_gpu.cu b/paddle/function/context_projection_op_gpu.cu new file mode 100644 index 000000000..4e7958164 --- /dev/null +++ b/paddle/function/context_projection_op_gpu.cu @@ -0,0 +1,137 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "context_projection_op.h" + +namespace paddle { + +template +__global__ void KeContextProjectionForward(const real* input, + const int* sequence, + const real* weight, + real* output, + int input_dim, + int context_length, + int context_start, + int begin_pad) { + int idx = threadIdx.x; + int block_size = blockDim.x; + int sequenceId = blockIdx.x; + int seq_start = sequence[sequenceId]; + int seq_end = sequence[sequenceId+1]; + real value = 0; + + int instances = seq_end - seq_start + context_length - 1; + output += seq_start * input_dim * context_length; + input += seq_start * input_dim; + for (int k = 0; k <= input_dim / block_size; k++) { + if (idx < input_dim) { + for (int i = 0; i < instances; i++) { + // i + context_start; + if ((i + context_start) < 0) { + if (padding) { + value = weight[i * input_dim + idx]; + } else { + continue; + } + } else if ((i + context_start) >= (seq_end - seq_start)) { + if (padding) { + value = + weight[(begin_pad + i + context_start - (seq_end - seq_start)) * + input_dim + idx]; + } else { + continue; + } + } else { + value = input[(i + context_start) * input_dim + idx]; + } + + int outx = (i - context_length) < 0 ? i : (context_length - 1); + int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); + real* output_r = + output + outy * input_dim * context_length + outx * input_dim; + for (int j = outy; j < seq_end - seq_start; j++) { + output_r[idx] += value; + if (j - outy == outx) break; + output_r += (context_length - 1) * input_dim; + } + } + } + idx += block_size; + } +} + +void hl_context_projection_forward(const real* input, + const int* sequence, + real* weight, + real* output, + int num_sequences, + int input_dim, + int context_length, + int context_start, + int begin_pad, + bool is_padding) { + CHECK_NOTNULL(input); + CHECK_NOTNULL(sequence); + CHECK_NOTNULL(output); + CHECK(!is_padding || weight); + + int block_size = 128; + int blocks_x = num_sequences; + int blocks_y = 1; + dim3 threads(block_size, 1); + dim3 grid(blocks_x, blocks_y); + + if (is_padding) { + KeContextProjectionForward<<< grid, threads, 0, STREAM_DEFAULT >>> + (input, sequence, weight, output, input_dim, + context_length, context_start, begin_pad); + } else { + KeContextProjectionForward<<< grid, threads, 0, STREAM_DEFAULT >>> + (input, sequence, weight, output, input_dim, + context_length, context_start, begin_pad); + } + CHECK_SYNC("hl_context_projection_forward failed"); +} + +template <> +void ContextProjectionForward(Tensor& output, + const Tensor& input, + const Tensor& weight, + const Tensor& sequence, + size_t context_length, + int context_start, + size_t begin_pad, + bool is_padding) { + CHECK(output.getData() && input.getData() && sequence.getData()); + CHECK_EQ(output.dims_.size(), 2); + CHECK_EQ(input.dims_.size(), 2); + CHECK_EQ(weight.dims_.size(), 2); + CHECK_EQ(sequence.dims_.size(), 1); + CHECK_EQ(output.dims_[1], input.dims_[1] * context_length); + + hl_context_projection_forward(input.getData(), + reinterpret_cast(sequence.getData()), + weight.getData(), + output.getData(), + sequence.dims_[0] - 1, + input.dims_[1], + context_length, + context_start, + begin_pad, + is_padding); +} + +} // namespace paddle diff --git a/paddle/function/context_projection_op_test.cpp b/paddle/function/context_projection_op_test.cpp new file mode 100644 index 000000000..98784471a --- /dev/null +++ b/paddle/function/context_projection_op_test.cpp @@ -0,0 +1,101 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" +#include "paddle/gserver/tests/TestUtil.h" +#include "paddle/math/Matrix.h" + +using namespace paddle; // NOLINT + +void testMatrixProjectionForward(int context_start, + size_t context_length, + bool is_padding, + size_t batch_size, + size_t input_dim) { + size_t pad = std::max(0, -context_start) + + std::max(0, (int)(context_start + context_length - 1)); + if (pad == 0) is_padding = false; + + FunctionCompare compare("ContextProjectionForward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start)) + .set("is_padding", is_padding)); + + CpuMatrix cpu_in(batch_size, input_dim); + cpu_in.randomizeUniform(); + GpuMatrix gpu_in(batch_size, input_dim); + gpu_in.copyFrom(cpu_in); + auto cpu_weight = + is_padding ? std::make_shared(pad, input_dim) : nullptr; + auto gpu_weight = + is_padding ? std::make_shared(pad, input_dim) : nullptr; + if (is_padding) { + cpu_weight->randomizeUniform(); + gpu_weight->copyFrom(*cpu_weight); + } + IVectorPtr cpu_seq; + generateSequenceStartPositions(batch_size, cpu_seq); + IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); + gpu_seq->copyFrom(*cpu_seq); + + CpuMatrix cpu_out(batch_size, input_dim * context_length); + GpuMatrix gpu_out(batch_size, input_dim * context_length); + cpu_out.randomizeUniform(); + gpu_out.copyFrom(cpu_out); + + compare.getCpuFunction()->calc( + {Tensor(cpu_in.getData(), Dims{batch_size, input_dim}), + Tensor(cpu_weight ? cpu_weight->getData() : nullptr, + Dims{pad, input_dim}), + Tensor(reinterpret_cast(cpu_seq->getData()), + Dims{cpu_seq->getSize()})}, + {Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})}, + {}); + compare.getGpuFunction()->calc( + {Tensor(gpu_in.getData(), Dims{batch_size, input_dim}), + Tensor(gpu_weight ? gpu_weight->getData() : nullptr, + Dims{pad, input_dim}), + Tensor(reinterpret_cast(gpu_seq->getData()), + Dims{gpu_seq->getSize()})}, + {Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})}, + {}); + + autotest::TensorCheckEqual(cpu_out, gpu_out); +} + +TEST(ContextProjectionForward, projection) { + for (auto context_start : {-5, -3, -1, 0, 3}) { + for (auto context_length : {1, 2, 5, 7}) { + for (auto trainable_padding : {false, true}) { + for (auto batch_size : {1, 2, 5, 20, 100}) { + for (auto input_dim : {15, 32, 63, 128, 200}) { + VLOG(3) << " context_start=" << context_start + << " context_length=" << context_length + << " trainable_padding=" << trainable_padding + << " batch_size=" << batch_size + << " input_dim=" << input_dim; + testMatrixProjectionForward(context_start, + context_length, + trainable_padding, + batch_size, + input_dim); + } + } + } + } + } +} -- GitLab