diff --git a/paddle/cuda/include/hl_sequence.h b/paddle/cuda/include/hl_sequence.h index 9bcd25b0623e569052e08c0befc8e09f937fa4bd..9f9d8f972e3a4c62e5caedcf85054be5681b96c1 100644 --- a/paddle/cuda/include/hl_sequence.h +++ b/paddle/cuda/include/hl_sequence.h @@ -48,78 +48,6 @@ extern void hl_max_sequence_forward(real* input, extern void hl_max_sequence_backward( real* outputGrad, int* index, real* inputGrad, int numSequences, int dim); -/** - * @brief Context projection forward. - * - * @param[in] input input sequence. - * @param[in] sequence sequence index. - * @param[in] weightData padding data. - * @param[out] output output sequence. - * @param[in] numSequences number of sequences. - * @param[in] inputDim input sequence dimension. - * @param[in] contextLength context length. - * @param[in] contextStart context start. - * @param[in] beginPad number of extra timesteps added at the - * beginning. - * @param[in] isPadding trainable padding. - * - */ -extern void hl_context_projection_forward(real* input, - const int* sequence, - real* weightData, - real* output, - int numSequences, - int inputDim, - int contextLength, - int contextStart, - int beginPad, - bool isPadding); - -/** - * @brief Context projection backward data. - * - * @param[in] outputGrad output gradient. - * @param[in] sequence sequence index. - * @param[out] inputGrad input gradient. - * @param[in] numSequences number of sequences. - * @param[in] inputDim input sequence dimension. - * @param[in] contextLength context length. - * @param[in] contextStart context start. - * - */ -extern void hl_context_projection_backward_data(real* outputGrad, - const int* sequence, - real* inputGrad, - int numSequences, - int inputDim, - int contextLength, - int contextStart); - -/** - * @brief Context projection backward weight. - * - * @param[in] outputGrad output gradient. - * @param[in] sequence sequence index. - * @param[out] weightGrad weight gradient. - * @param[in] numSequences number of sequences. - * @param[in] weightDim input sequence dimension. - * @param[in] totalPad number of extra timesteps. - * @param[in] contextLength context length. - * @param[in] contextStart context start. - * @param[in] beginPad number of extra timesteps added at the - * beginning. - * - */ -extern void hl_context_projection_backward_weight(real* outputGrad, - const int* sequence, - real* weightGrad, - int numSequences, - int weightDim, - int totalPad, - int contextLength, - int contextStart, - int beginPad); - /** * @brief Memory copy from sequence to batch. * diff --git a/paddle/cuda/include/stub/hl_sequence_stub.h b/paddle/cuda/include/stub/hl_sequence_stub.h index d6b07556f8958a62bd47f0b47b75bbebafeb58d3..05e51bce9e1df6fc6ef1cad891b44a9172da185d 100644 --- a/paddle/cuda/include/stub/hl_sequence_stub.h +++ b/paddle/cuda/include/stub/hl_sequence_stub.h @@ -27,35 +27,6 @@ inline void hl_max_sequence_forward(real* input, inline void hl_max_sequence_backward( real* outputGrad, int* index, real* inputGrad, int numSequences, int dim) {} -inline void hl_context_projection_forward(real* input, - const int* sequence, - real* weightData, - real* output, - int numSequences, - int inputDim, - int contextLength, - int contextStart, - int beginPad, - bool isPadding) {} - -inline void hl_context_projection_backward_data(real* outputGrad, - const int* sequence, - real* inputGrad, - int numSequences, - int inputDim, - int contextLength, - int contextStart) {} - -inline void hl_context_projection_backward_weight(real* outputGrad, - const int* sequence, - real* weightGrad, - int numSequences, - int weightDim, - int totalPad, - int contextLength, - int contextStart, - int beginPad) {} - inline void hl_sequence2batch_copy(real* batch, real* sequence, const int* batchIndex, diff --git a/paddle/cuda/src/hl_cuda_sequence.cu b/paddle/cuda/src/hl_cuda_sequence.cu index 4e33ac443c1f78b7fa50a15784875cbadfcf7497..ba823de2720336851bf9c49d8162360af93e8601 100644 --- a/paddle/cuda/src/hl_cuda_sequence.cu +++ b/paddle/cuda/src/hl_cuda_sequence.cu @@ -90,258 +90,6 @@ void hl_max_sequence_backward(real* outputGrad, CHECK_SYNC("hl_max_sequence_backward failed"); } -template -__global__ void KeContextProjectionForward(real* input, - const int* sequence, - real* weightData, - real* output, - int inputDim, - int contextLength, - int contextStart, - int beginPad) { - int idx = threadIdx.x; - int blockSize = blockDim.x; - int sequenceId = blockIdx.x; - int seqStart = sequence[sequenceId]; - int seqEnd = sequence[sequenceId+1]; - real value = 0; - - int instances = seqEnd - seqStart + contextLength - 1; - output += seqStart * inputDim * contextLength; - input += seqStart * inputDim; - for (int k = 0; k <= inputDim / blockSize; k++) { - if (idx < inputDim) { - for (int i = 0; i < instances; i++) { - // i + contextStart; - if ((i + contextStart) < 0) { - if (padding) { - value = weightData[i * inputDim + idx]; - } else { - continue; - } - } else if ((i + contextStart) >= (seqEnd - seqStart)) { - if (padding) { - value = - weightData[(beginPad + i + contextStart - (seqEnd - seqStart)) * - inputDim + idx]; - } else { - continue; - } - } else { - value = input[(i + contextStart) * inputDim + idx]; - } - - int outx = (i - contextLength) < 0 ? i : (contextLength - 1); - int outy = (i - contextLength) < 0 ? 0 : (i - (contextLength - 1)); - real* output_r = - output + outy * inputDim * contextLength + outx * inputDim; - for (int j = outy; j < seqEnd - seqStart; j++) { - output_r[idx] += value; - if (j - outy == outx) break; - output_r += (contextLength - 1) * inputDim; - } - } - } - idx += blockSize; - } -} - -void hl_context_projection_forward(real* input, - const int* sequence, - real* weightData, - real* output, - int numSequences, - int inputDim, - int contextLength, - int contextStart, - int beginPad, - bool isPadding) { - CHECK_NOTNULL(input); - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(output); - CHECK(!isPadding || weightData); - - int blockSize = 128; - int blocksX = numSequences; - int blocksY = 1; - dim3 threads(blockSize, 1); - dim3 grid(blocksX, blocksY); - - if (isPadding) { - KeContextProjectionForward<<< grid, threads, 0, STREAM_DEFAULT >>> - (input, sequence, weightData, output, inputDim, - contextLength, contextStart, beginPad); - } else { - KeContextProjectionForward<<< grid, threads, 0, STREAM_DEFAULT >>> - (input, sequence, weightData, output, inputDim, - contextLength, contextStart, beginPad); - } - CHECK_SYNC("hl_context_projection_forward failed"); -} - -__global__ void KeContextProjectionBackwardData(real* outputGrad, - const int* sequence, - real* inputGrad, - int inputDim, - int contextLength, - int contextStart) { - int idx = threadIdx.x; - int blockSize = blockDim.x; - int sequenceId = blockIdx.x; - int seqStart = sequence[sequenceId]; - int seqEnd = sequence[sequenceId+1]; - real value = 0; - - int instances = seqEnd - seqStart + contextLength - 1; - outputGrad += seqStart * inputDim * contextLength; - inputGrad += seqStart * inputDim; - for (int k = 0; k <= inputDim / blockSize; k++) { - if (idx < inputDim) { - for (int i = 0; i < instances; i++) { - if ((i + contextStart) < 0) { - continue; - } else if ((i + contextStart) >= (seqEnd - seqStart)) { - continue; - } else { - // value = 0; - value = inputGrad[(i + contextStart) * inputDim + idx]; - } - - int outx = (i - contextLength) < 0 ? i : (contextLength - 1); - int outy = (i - contextLength) < 0 ? 0 : (i - (contextLength - 1)); - real* output_r = - outputGrad + outy * inputDim * contextLength + outx * inputDim; - for (int j = outy; j < seqEnd - seqStart; j++) { - value += output_r[idx]; - if (j - outy == outx) break; - output_r += (contextLength - 1) * inputDim; - } - inputGrad[(i + contextStart) * inputDim + idx] = value; - } - } - idx += blockSize; - } -} - -void hl_context_projection_backward_data(real* outputGrad, - const int* sequence, - real* inputGrad, - int numSequences, - int inputDim, - int contextLength, - int contextStart) { - CHECK_NOTNULL(outputGrad); - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(inputGrad); - - int blockSize = 128; - int blocksX = numSequences; - int blocksY = 1; - dim3 threads(blockSize, 1); - dim3 grid(blocksX, blocksY); - KeContextProjectionBackwardData<<< grid, threads, 0, STREAM_DEFAULT >>> - (outputGrad, sequence, inputGrad, inputDim, contextLength, contextStart); - CHECK_SYNC("hl_context_projection_backward_data failed"); -} - -template -__global__ void KeContextProjectionBackwardWeight(real* outputGrad, - const int* sequence, - real* weightGrad, - int numSequences, - int weightDim, - int contextLength, - int contextStart, - int beginPad) { - __shared__ real sum_s[THREADS_Y][THREADS_X]; - int padOfBlock = (weightDim + THREADS_X - 1) / THREADS_X; - const int idx = threadIdx.x; - const int idy = threadIdx.y; - int padId = blockIdx.x / padOfBlock; - int weightIdx = idx + THREADS_X * (blockIdx.x % padOfBlock); - int instanceId; - real value = 0; - real* output_r; - - sum_s[idy][idx] = 0.0f; - if (weightIdx < weightDim) { - for (int seqId = idy; seqId < numSequences; seqId += THREADS_Y) { - int seqStart = sequence[seqId]; - int seqEnd = sequence[seqId+1]; - output_r = outputGrad + seqStart * weightDim * contextLength; - - if (contextStart < 0) { - if (padId + contextStart < 0) { - instanceId = padId; - } else { - // beginPad > 0; - instanceId = (padId - beginPad) + (seqEnd - seqStart) - contextStart; - } - } else { - if (padId + (seqEnd - seqStart) < contextStart) { - continue; - } else { - // beginPad == 0; - instanceId = padId + (seqEnd - seqStart) - contextStart; - } - } - - int outx = (instanceId - contextLength) < 0 ? - instanceId : (contextLength - 1); - int outy = (instanceId - contextLength) < 0 ? - 0 : (instanceId - (contextLength - 1)); - output_r += outy * weightDim * contextLength + outx * weightDim; - for (int j = outy; j < seqEnd - seqStart; j++) { - value += output_r[weightIdx]; - if (j - outy == outx) break; - output_r += (contextLength - 1) * weightDim; - } - } - sum_s[idy][idx] = value; - } - __syncthreads(); - - for (int stride = THREADS_Y/2; stride > 0; stride = stride/2) { - if (idy < stride) { - sum_s[idy][idx] += sum_s[idy + stride][idx]; - } - __syncthreads(); - } - __syncthreads(); - - if (weightIdx < weightDim) { - if (idy == 0) { - weightGrad[padId * weightDim + weightIdx] += sum_s[0][idx]; - } - } -} - -void hl_context_projection_backward_weight(real* outputGrad, - const int* sequence, - real* weightGrad, - int numSequences, - int weightDim, - int totalPad, - int contextLength, - int contextStart, - int beginPad) { - CHECK_NOTNULL(outputGrad); - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(weightGrad); - - int threadsX = 32; - int threadsY = 32; - int blocksX = totalPad * ((weightDim + threadsX - 1) / threadsX); - dim3 threads(threadsX, threadsY); - dim3 grid(blocksX, 1); - - KeContextProjectionBackwardWeight<32, 32> - <<< grid, threads, 0, STREAM_DEFAULT >>> - (outputGrad, sequence, weightGrad, numSequences, weightDim, - contextLength, contextStart, beginPad); - CHECK_SYNC("hl_context_projection_backward_weight failed"); -} - template __global__ void KeMatrixAddRows(real* output, real* table, diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index b0e6f92f1e7d65571d22b116240a7a2415d86718..0b3126155d0c0872a70fc83260d4ea34161cb717 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -17,6 +17,9 @@ if(WITH_TESTING) # file(GLOB test_files . *OpTest.cpp) # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files}) add_simple_unittest(CrossMapNormalOpTest) + add_unittest(ContextProjectionOpTest + ContextProjectionOpTest.cpp + ../gserver/tests/TestUtil.cpp) endif() endif() diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bd367a859e10c0522206cd0215970922905905ed --- /dev/null +++ b/paddle/function/ContextProjectionOp.cpp @@ -0,0 +1,373 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ContextProjectionOp.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" + +namespace paddle { + +template <> +void ContextProjectionForward(CpuMatrix* out_mat, + const CpuMatrix* input_mat, + const CpuMatrix* weight_mat, + const CpuIVector& seq_vec, + size_t context_length, + int context_start, + size_t begin_pad) { + const int* starts = seq_vec.getData(); + const size_t num_sequences = seq_vec.getSize() - 1; + auto w_mat = const_cast(weight_mat); + auto in_mat = const_cast(input_mat); + for (size_t i = 0; i < num_sequences; ++i) { + for (size_t j = 0; j < context_length; ++j) { + int begin = starts[i] + context_start + j; + int end = starts[i + 1] + context_start + j; + int dst_begin = starts[i]; + int dst_end = starts[i + 1]; + if (begin < starts[i]) { + int64_t pad_size = + std::min(starts[i] - begin, starts[i + 1] - starts[i]); + MatrixPtr mat = out_mat->subMatrix(starts[i], pad_size); + if (w_mat) { + MatrixPtr sub = w_mat->subMatrix(j, pad_size); + mat->addAtOffset(*sub, j * in_mat->getWidth()); + } + dst_begin = starts[i] + pad_size; + begin = starts[i]; + } + if (end > starts[i + 1]) { + int64_t pad_size = + std::min(end - starts[i + 1], starts[i + 1] - starts[i]); + MatrixPtr mat = out_mat->subMatrix(starts[i + 1] - pad_size, pad_size); + if (w_mat) { + MatrixPtr sub = w_mat->subMatrix( + begin_pad + context_start + j - pad_size, pad_size); + mat->addAtOffset(*sub, j * in_mat->getWidth()); + } + dst_end = starts[i + 1] - pad_size; + end = starts[i + 1]; + } + if (end <= begin) continue; + MatrixPtr src = in_mat->subMatrix(begin, end - begin); + MatrixPtr dst = out_mat->subMatrix(dst_begin, dst_end - dst_begin); + dst->addAtOffset(*src, j * in_mat->getWidth()); + } + } +} + +/** + * \param inputs[0] input value. + * \param inputs[1] input weight. + * \param inputs[2] input sequence. + * \param outputs[0] output value. + */ +template +class ContextProjectionForwardFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + context_length_ = config.get("context_length"); + context_start_ = config.get("context_start"); + begin_pad_ = config.get("begin_pad"); + } + + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(3, inputs.size()); + CHECK_EQ(1, outputs.size()); + CHECK_EQ(0, inouts.size()); + + CHECK(outputs[0].getData() && inputs[0].getData() && inputs[2].getData()); + CHECK_EQ(outputs[0].dims_.size(), 2); + CHECK_EQ(inputs[0].dims_.size(), 2); + CHECK_EQ(inputs[1].dims_.size(), 2); + CHECK_EQ(inputs[2].dims_.size(), 1); + /// dim of output = dim of input * context_length + CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); + /// dim of input == dim of weight + CHECK_EQ(inputs[0].dims_[1], inputs[1].dims_[1]); + /// input and output has the same batch_size + CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); + + auto out_mat = std::make_shared::type>( + outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); + const auto in_mat = std::make_shared::type>( + inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); + const auto w_mat = + !inputs[1].getData() + ? nullptr + : std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + typename SequenceT::type seq_vec( + inputs[2].dims_[0], reinterpret_cast(inputs[2].getData())); + + ContextProjectionForward(out_mat.get(), + in_mat.get(), + w_mat.get(), + seq_vec, + context_length_, + context_start_, + begin_pad_); + } + +private: + size_t context_length_; + int context_start_; + size_t begin_pad_; +}; + +template <> +void ContextProjectionBackward(CpuMatrix* out_grad_mat, + CpuMatrix* in_grad_mat, + CpuMatrix* w_grad_mat, + const CpuIVector& seq_vec, + size_t context_length, + int context_start, + size_t begin_pad, + bool is_padding, + size_t total_pad) { + CHECK(out_grad_mat); + size_t input_dim = in_grad_mat ? in_grad_mat->getWidth() + : w_grad_mat ? w_grad_mat->getWidth() : 0; + const int* starts = seq_vec.getData(); + size_t num_sequences = seq_vec.getSize() - 1; + for (size_t i = 0; i < num_sequences; ++i) { + for (size_t j = 0; j < context_length; ++j) { + int begin = starts[i] + context_start + j; + int end = starts[i + 1] + context_start + j; + int dst_begin = starts[i]; + int dst_end = starts[i + 1]; + if (begin < starts[i]) { + int64_t pad_size = + std::min(starts[i] - begin, starts[i + 1] - starts[i]); + if (is_padding && w_grad_mat) { + MatrixPtr mat = out_grad_mat->subMatrix(starts[i], pad_size); + MatrixPtr sub = w_grad_mat->subMatrix(j, pad_size); + sub->addAtOffset(*mat, j * input_dim); + } + dst_begin = starts[i] + pad_size; + begin = starts[i]; + } + if (end > starts[i + 1]) { + int64_t pad_size = + std::min(end - starts[i + 1], starts[i + 1] - starts[i]); + if (is_padding && w_grad_mat) { + MatrixPtr mat = + out_grad_mat->subMatrix(starts[i + 1] - pad_size, pad_size); + MatrixPtr sub = w_grad_mat->subMatrix( + begin_pad + context_start + j - pad_size, pad_size); + sub->addAtOffset(*mat, j * input_dim); + } + dst_end = starts[i + 1] - pad_size; + end = starts[i + 1]; + } + if (end <= begin) continue; + if (!in_grad_mat) continue; + MatrixPtr src = in_grad_mat->subMatrix(begin, end - begin); + MatrixPtr dst = out_grad_mat->subMatrix(dst_begin, dst_end - dst_begin); + src->addAtOffset(*dst, j * input_dim); + } + } +} + +/** + * \param inputs[0] input grad. + * \param inputs[1] weight grad. + * \param inputs[2] input sequence. + * \param outputs[0] output value. + */ +template +class ContextProjectionBackwardFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + context_length_ = config.get("context_length"); + context_start_ = config.get("context_start"); + begin_pad_ = config.get("begin_pad"); + is_padding_ = config.get("is_padding"); + total_pad_ = config.get("total_pad"); + } + + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(3, inputs.size()); + CHECK_EQ(1, outputs.size()); + CHECK_EQ(0, inouts.size()); + + CHECK(outputs[0].getData() && inputs[2].getData()); + CHECK_EQ(outputs[0].dims_.size(), 2); + CHECK_EQ(inputs[0].dims_.size(), 2); + CHECK_EQ(inputs[1].dims_.size(), 2); + CHECK_EQ(inputs[2].dims_.size(), 1); + + /// dim of input == dim of weight + CHECK_EQ(inputs[0].dims_[1], inputs[1].dims_[1]); + /// input and output has the same batch_size + CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); + /// dim of output = dim of input * context_length + CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); + + auto out_grad_mat = std::make_shared::type>( + outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); + auto in_grad_mat = + !inputs[0].getData() + ? nullptr + : std::make_shared::type>( + inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); + auto w_grad_mat = + !inputs[1].getData() + ? nullptr + : std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + typename SequenceT::type seq_vec( + inputs[2].dims_[0], reinterpret_cast(inputs[2].getData())); + + ContextProjectionBackward(out_grad_mat.get(), + in_grad_mat ? in_grad_mat.get() : nullptr, + w_grad_mat ? w_grad_mat.get() : nullptr, + seq_vec, + context_length_, + context_start_, + begin_pad_, + is_padding_, + total_pad_); + } + +private: + size_t context_length_; + int context_start_; + size_t begin_pad_; + bool is_padding_; + size_t total_pad_; +}; + +/** + * \param inputs[0] input grad. + * \param inputs[1] input sequence. + * \param outputs[0] output grad. + */ +template +class ContextProjectionBackwardDataFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + context_length_ = config.get("context_length"); + context_start_ = config.get("context_start"); + } + + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(2, inputs.size()); + CHECK_EQ(1, outputs.size()); + CHECK_EQ(0, inouts.size()); + CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); + CHECK_EQ(outputs[0].dims_.size(), 2); + CHECK_EQ(inputs[0].dims_.size(), 2); + CHECK_EQ(inputs[1].dims_.size(), 1); + CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); + /// input and output has the same batch_size + CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); + + auto out_grad_mat = std::make_shared::type>( + outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); + const auto in_grad_mat = std::make_shared::type>( + inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); + typename SequenceT::type seq_vec( + inputs[1].dims_[0], reinterpret_cast(inputs[1].getData())); + + ContextProjectionBackwardData(out_grad_mat.get(), + in_grad_mat.get(), + seq_vec, + context_length_, + context_start_); + } + +private: + size_t context_length_; + int context_start_; +}; + +/** + * \param inputs[0] weight grad. + * \param inputs[1] input sequence. + * \param outputs[0] output grad. + */ +template +class ContextProjectionBackwardWeightFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + context_length_ = config.get("context_length"); + context_start_ = config.get("context_start"); + begin_pad_ = config.get("begin_pad"); + total_pad_ = config.get("total_pad"); + } + + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(2, inputs.size()); + CHECK_EQ(1, outputs.size()); + CHECK_EQ(0, inouts.size()); + + CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); + CHECK_EQ(outputs[0].dims_.size(), 2); + CHECK_EQ(inputs[0].dims_.size(), 2); + CHECK_EQ(inputs[1].dims_.size(), 1); + CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); + + auto out_grad_mat = std::make_shared::type>( + outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); + auto w_grad_mat = std::make_shared::type>( + inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); + typename SequenceT::type seq_vec( + inputs[1].dims_[0], reinterpret_cast(inputs[1].getData())); + + ContextProjectionBackwardWeight(out_grad_mat.get(), + w_grad_mat.get(), + seq_vec, + context_length_, + context_start_, + total_pad_, + begin_pad_); + } + +private: + size_t context_length_; + int context_start_; + size_t begin_pad_; + size_t total_pad_; +}; + +REGISTER_TYPED_FUNC(ContextProjectionForward, + CPU, + ContextProjectionForwardFunc); +REGISTER_TYPED_FUNC(ContextProjectionBackward, + CPU, + ContextProjectionBackwardFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(ContextProjectionForward, + GPU, + ContextProjectionForwardFunc); +REGISTER_TYPED_FUNC(ContextProjectionBackward, + GPU, + ContextProjectionBackwardFunc); +REGISTER_TYPED_FUNC(ContextProjectionBackwardData, + GPU, + ContextProjectionBackwardDataFunc); +REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight, + GPU, + ContextProjectionBackwardWeightFunc); +#endif +} // namespace paddle diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h new file mode 100644 index 0000000000000000000000000000000000000000..93eb050fde35f474750f3c2efa72b7471f654b75 --- /dev/null +++ b/paddle/function/ContextProjectionOp.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief Context Projection Forward. + * + * \param[out] outputs output data. + * \param[in] input input data. + * \param[in] weight input weight. + * \param[in] sequence input data. + * \param[in] context_length consecutive rows for concatenation. + * \param[in] context_start context start position. + * \param[in] begin_pad begining pad position. + * \param[in] is_padding whether padding 0 or not. + * + */ +template +void ContextProjectionForward(typename MatrixT::type* output, + const typename MatrixT::type* input, + const typename MatrixT::type* weight, + const typename SequenceT::type& sequence, + size_t context_length, + int context_start, + size_t begin_pad); + +/** + * \brief Context Projection Backward. + * + * \param[out] outputs output gradient. + * \param[in] input input gradient. + * \param[in] weight input weight gradient. + * \param[in] sequence input data. + * \param[in] context_length consecutive rows for concatenation. + * \param[in] context_start context start position. + * \param[in] begin_pad begining pad position. + * \param[in] is_padding whether padding 0 or not. + * + */ +template +void ContextProjectionBackward(typename MatrixT::type* out_grad, + typename MatrixT::type* in_grad, + typename MatrixT::type* w_grad, + const typename SequenceT::type& seq_vec, + size_t context_length, + int context_start, + size_t begin_pad, + bool is_padding, + size_t total_pad); + +template +void ContextProjectionBackwardData( + typename MatrixT::type* out_grad, + typename MatrixT::type* in_grad, + const typename SequenceT::type& sequence, + size_t context_length, + int context_start); + +template +void ContextProjectionBackwardWeight( + typename MatrixT::type* out_grad, + typename MatrixT::type* w_grad, + const typename SequenceT::type& seq_vec, + size_t context_length, + int context_start, + size_t total_pad, + size_t begin_pad); + +} // namespace paddle diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/function/ContextProjectionOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..1ec7058f96c8200728e5add051d5fa6a77a97e36 --- /dev/null +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -0,0 +1,401 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "ContextProjectionOp.h" + +namespace paddle { + +template +__global__ void KeContextProjectionForward(const real* input, + const int* sequence, + const real* weight, + real* output, + int input_dim, + int context_length, + int context_start, + int begin_pad) { + int idx = threadIdx.x; + int block_size = blockDim.x; + int sequenceId = blockIdx.x; + int seq_start = sequence[sequenceId]; + int seq_end = sequence[sequenceId+1]; + real value = 0; + + int instances = seq_end - seq_start + context_length - 1; + output += seq_start * input_dim * context_length; + input += seq_start * input_dim; + for (int k = 0; k <= input_dim / block_size; k++) { + if (idx < input_dim) { + for (int i = 0; i < instances; i++) { + // i + context_start; + if ((i + context_start) < 0) { + if (padding) { + value = weight[i * input_dim + idx]; + } else { + continue; + } + } else if ((i + context_start) >= (seq_end - seq_start)) { + if (padding) { + value = + weight[(begin_pad + i + context_start - (seq_end - seq_start)) * + input_dim + idx]; + } else { + continue; + } + } else { + value = input[(i + context_start) * input_dim + idx]; + } + + int outx = (i - context_length) < 0 ? i : (context_length - 1); + int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); + real* output_r = + output + outy * input_dim * context_length + outx * input_dim; + for (int j = outy; j < seq_end - seq_start; j++) { + output_r[idx] += value; + if (j - outy == outx) break; + output_r += (context_length - 1) * input_dim; + } + } + } + idx += block_size; + } +} + +/** + * @brief Context projection forward. + * + * @param[in] input input sequence. + * @param[in] sequence sequence index. + * @param[in] weight padding data. + * @param[out] output output sequence. + * @param[in] num_sequences number of sequences. + * @param[in] input_dim input sequence dimension. + * @param[in] context_length context length. + * @param[in] context_start context start. + * @param[in] begin_pad number of extra timesteps added at the + * beginning. + * + */ +void hl_context_projection_forward(const real* input, + const int* sequence, + const real* weight, + real* output, + size_t num_sequences, + size_t input_dim, + size_t context_length, + int context_start, + size_t begin_pad) { + CHECK_NOTNULL(input); + CHECK_NOTNULL(sequence); + CHECK_NOTNULL(output); + + int block_size = 128; + int blocks_x = num_sequences; + int blocks_y = 1; + dim3 threads(block_size, 1); + dim3 grid(blocks_x, blocks_y); + + if (weight) { + KeContextProjectionForward<<< grid, threads, 0, STREAM_DEFAULT >>> + (input, sequence, weight, output, input_dim, + context_length, context_start, begin_pad); + } else { + KeContextProjectionForward<<< grid, threads, 0, STREAM_DEFAULT >>> + (input, sequence, weight, output, input_dim, + context_length, context_start, begin_pad); + } + CHECK_SYNC("hl_context_projection_forward failed"); +} + +template <> +void ContextProjectionForward(GpuMatrix* output, + const GpuMatrix* input, + const GpuMatrix* weight, + const GpuIVector& sequence, + size_t context_length, + int context_start, + size_t begin_pad) { + CHECK(input && output); + hl_context_projection_forward(input->getData(), + sequence.getData(), + weight ? weight->getData() : nullptr, + output->getData(), + sequence.getSize() - 1, + input->getWidth(), + context_length, + context_start, + begin_pad); +} + +__global__ void KeContextProjectionBackwardData(real* out_grad, + const int* sequence, + real* in_grad, + int input_dim, + int context_length, + int context_start) { + int idx = threadIdx.x; + int block_size = blockDim.x; + int sequenceId = blockIdx.x; + int seq_start = sequence[sequenceId]; + int seq_end = sequence[sequenceId+1]; + real value = 0; + + int instances = seq_end - seq_start + context_length - 1; + out_grad += seq_start * input_dim * context_length; + in_grad += seq_start * input_dim; + for (int k = 0; k <= input_dim / block_size; k++) { + if (idx < input_dim) { + for (int i = 0; i < instances; i++) { + if ((i + context_start) < 0) { + continue; + } else if ((i + context_start) >= (seq_end - seq_start)) { + continue; + } else { + // value = 0; + value = in_grad[(i + context_start) * input_dim + idx]; + } + + int outx = (i - context_length) < 0 ? i : (context_length - 1); + int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); + real* output_r = + out_grad + outy * input_dim * context_length + outx * input_dim; + for (int j = outy; j < seq_end - seq_start; j++) { + value += output_r[idx]; + if (j - outy == outx) break; + output_r += (context_length - 1) * input_dim; + } + in_grad[(i + context_start) * input_dim + idx] = value; + } + } + idx += block_size; + } +} + +/** + * @brief Context projection backward data. + * + * @param[in] out_grad output gradient. + * @param[in] sequence sequence index. + * @param[out] input_grad input gradient. + * @param[in] num_sequences number of sequences. + * @param[in] input_dim input sequence dimension. + * @param[in] context_length context length. + * @param[in] context_start context start. + * + */ +void hl_context_projection_backward_data(real* out_grad, + const int* sequence, + real* input_grad, + size_t num_sequences, + size_t input_dim, + size_t context_length, + int context_start) { + CHECK_NOTNULL(out_grad); + CHECK_NOTNULL(sequence); + CHECK_NOTNULL(input_grad); + + int block_size = 128; + int blocks_x = num_sequences; + int blocks_y = 1; + dim3 threads(block_size, 1); + dim3 grid(blocks_x, blocks_y); + KeContextProjectionBackwardData<<< grid, threads, 0, STREAM_DEFAULT >>> + (out_grad, sequence, input_grad, input_dim, context_length, context_start); + CHECK_SYNC("hl_context_projection_backward_data failed"); +} + +template <> +void ContextProjectionBackwardData(GpuMatrix* out_grad, + GpuMatrix* in_grad, + const GpuIVector& sequence, + size_t context_length, + int context_start) { + CHECK(in_grad && out_grad); + hl_context_projection_backward_data(out_grad->getData(), + sequence.getData(), + in_grad->getData(), + sequence.getSize() - 1, + in_grad->getWidth(), + context_length, + context_start); +} + +template +__global__ void KeContextProjectionBackwardWeight(real* out_grad, + const int* sequence, + real* w_grad, + int num_sequences, + int w_dim, + int context_length, + int context_start, + int begin_pad) { + __shared__ real sum_s[THREADS_Y][THREADS_X]; + int pad_of_block = (w_dim + THREADS_X - 1) / THREADS_X; + const int idx = threadIdx.x; + const int idy = threadIdx.y; + int padId = blockIdx.x / pad_of_block; + int weight_idx = idx + THREADS_X * (blockIdx.x % pad_of_block); + int instanceId; + real value = 0; + real* output_r; + + sum_s[idy][idx] = 0.0f; + if (weight_idx < w_dim) { + for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { + int seq_start = sequence[seqId]; + int seq_end = sequence[seqId+1]; + output_r = out_grad + seq_start * w_dim * context_length; + + if (context_start < 0) { + if (padId + context_start < 0) { + instanceId = padId; + } else { + // begin_pad > 0; + instanceId = (padId - begin_pad) + + (seq_end - seq_start) - context_start; + } + } else { + if (padId + (seq_end - seq_start) < context_start) { + continue; + } else { + // begin_pad == 0; + instanceId = padId + (seq_end - seq_start) - context_start; + } + } + + int outx = (instanceId - context_length) < 0 ? + instanceId : (context_length - 1); + int outy = (instanceId - context_length) < 0 ? + 0 : (instanceId - (context_length - 1)); + output_r += outy * w_dim * context_length + outx * w_dim; + for (int j = outy; j < seq_end - seq_start; j++) { + value += output_r[weight_idx]; + if (j - outy == outx) break; + output_r += (context_length - 1) * w_dim; + } + } + sum_s[idy][idx] = value; + } + __syncthreads(); + + for (int stride = THREADS_Y/2; stride > 0; stride = stride/2) { + if (idy < stride) { + sum_s[idy][idx] += sum_s[idy + stride][idx]; + } + __syncthreads(); + } + __syncthreads(); + + if (weight_idx < w_dim) { + if (idy == 0) { + w_grad[padId * w_dim + weight_idx] += sum_s[0][idx]; + } + } +} + +/** + * @brief Context projection backward weight. + * + * @param[in] out_grad output gradient. + * @param[in] sequence sequence index. + * @param[out] w_grad weight gradient. + * @param[in] num_sequences number of sequences. + * @param[in] w_dim input sequence dimension. + * @param[in] total_pad number of extra timesteps. + * @param[in] context_length context length. + * @param[in] context_start context start. + * @param[in] begin_pad number of extra timesteps added at the + * beginning. + * + */ +void hl_context_projection_backward_weight(real* out_grad, + const int* sequence, + real* w_grad, + size_t num_sequences, + size_t w_dim, + size_t total_pad, + size_t context_length, + int context_start, + size_t begin_pad) { + CHECK_NOTNULL(out_grad); + CHECK_NOTNULL(sequence); + CHECK_NOTNULL(w_grad); + + int threads_x = 32; + int threads_y = 32; + int blocks_x = total_pad * ((w_dim + threads_x - 1) / threads_x); + dim3 threads(threads_x, threads_y); + dim3 grid(blocks_x, 1); + + KeContextProjectionBackwardWeight<32, 32> + <<< grid, threads, 0, STREAM_DEFAULT >>> + (out_grad, sequence, w_grad, num_sequences, w_dim, + context_length, context_start, begin_pad); + CHECK_SYNC("hl_context_projection_backward_weight failed"); +} + +template <> +void ContextProjectionBackwardWeight( + GpuMatrix* out_grad, + GpuMatrix* w_grad, + const GpuIVector& seq_vec, + size_t context_length, + int context_start, + size_t total_pad, + size_t begin_pad) { + CHECK(out_grad && w_grad); + hl_context_projection_backward_weight(out_grad->getData(), + seq_vec.getData(), + w_grad->getData(), + seq_vec.getSize() - 1, + w_grad->getWidth(), + total_pad, + context_length, + context_start, + begin_pad); +} + +template <> +void ContextProjectionBackward(GpuMatrix* out_grad, + GpuMatrix* in_grad, + GpuMatrix* w_grad, + const GpuIVector& sequence, + size_t context_length, + int context_start, + size_t begin_pad, + bool is_padding, + size_t total_pad) { + CHECK(out_grad); + if (in_grad) { + ContextProjectionBackwardData( + out_grad, + in_grad, + sequence, + context_length, + context_start); + } + if (is_padding && w_grad) { + ContextProjectionBackwardWeight( + out_grad, + w_grad, + sequence, + context_length, + context_start, + total_pad, + begin_pad); + } +} + +} // namespace paddle diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..359428fc03d698145cb880bd735c908838f96f56 --- /dev/null +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -0,0 +1,172 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" +#include "paddle/gserver/tests/TestUtil.h" +#include "paddle/math/Matrix.h" + +using namespace paddle; // NOLINT + +void testMatrixProjectionForward(int context_start, + size_t context_length, + bool is_padding, + size_t batch_size, + size_t input_dim) { + size_t pad = std::max(0, -context_start) + + std::max(0, (int)(context_start + context_length - 1)); + if (pad == 0) is_padding = false; + + FunctionCompare compare("ContextProjectionForward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start))); + + CpuMatrix cpu_in(batch_size, input_dim); + cpu_in.randomizeUniform(); + GpuMatrix gpu_in(batch_size, input_dim); + gpu_in.copyFrom(cpu_in); + auto cpu_weight = + is_padding ? std::make_shared(pad, input_dim) : nullptr; + auto gpu_weight = + is_padding ? std::make_shared(pad, input_dim) : nullptr; + if (is_padding) { + cpu_weight->randomizeUniform(); + gpu_weight->copyFrom(*cpu_weight); + } + IVectorPtr cpu_seq; + generateSequenceStartPositions(batch_size, cpu_seq); + IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); + gpu_seq->copyFrom(*cpu_seq); + + CpuMatrix cpu_out(batch_size, input_dim * context_length); + GpuMatrix gpu_out(batch_size, input_dim * context_length); + cpu_out.randomizeUniform(); + gpu_out.copyFrom(cpu_out); + + compare.getCpuFunction()->calc( + {Tensor(cpu_in.getData(), Dims{batch_size, input_dim}), + Tensor(cpu_weight ? cpu_weight->getData() : nullptr, + Dims{pad, input_dim}), + Tensor(reinterpret_cast(cpu_seq->getData()), + Dims{cpu_seq->getSize()})}, + {Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})}, + {}); + compare.getGpuFunction()->calc( + {Tensor(gpu_in.getData(), Dims{batch_size, input_dim}), + Tensor(gpu_weight ? gpu_weight->getData() : nullptr, + Dims{pad, input_dim}), + Tensor(reinterpret_cast(gpu_seq->getData()), + Dims{gpu_seq->getSize()})}, + {Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})}, + {}); + + autotest::TensorCheckEqual(cpu_out, gpu_out); +} + +void testMatrixProjectionBackward(int context_start, + int context_length, + bool is_padding, + size_t batch_size, + size_t input_dim) { + size_t pad = std::max(0, -context_start) + + std::max(0, (int)(context_start + context_length - 1)); + if (pad == 0) is_padding = false; + + FunctionCompare compare("ContextProjectionBackward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start)) + .set("is_padding", is_padding) + .set("total_pad", pad)); + + CpuMatrix cpu_in_grad(batch_size, input_dim); + cpu_in_grad.randomizeUniform(); + GpuMatrix gpu_in_grad(batch_size, input_dim); + gpu_in_grad.copyFrom(cpu_in_grad); + + CpuMatrix cpu_out_grad(batch_size, input_dim * context_length); + cpu_out_grad.randomizeUniform(); + GpuMatrix gpu_out_grad(batch_size, input_dim * context_length); + gpu_out_grad.copyFrom(cpu_out_grad); + + IVectorPtr cpu_seq; + generateSequenceStartPositions(batch_size, cpu_seq); + IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); + gpu_seq->copyFrom(*cpu_seq); + + auto cpu_w_grad = + is_padding ? std::make_shared(pad, input_dim) : nullptr; + auto gpu_w_grad = + is_padding ? std::make_shared(pad, input_dim) : nullptr; + if (is_padding) { + cpu_w_grad->randomizeUniform(); + gpu_w_grad->copyFrom(*cpu_w_grad); + } + + compare.getCpuFunction()->calc( + {Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), + Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, + Dims{pad, input_dim}), + Tensor(reinterpret_cast(cpu_seq->getData()), + Dims{cpu_seq->getSize()})}, + {Tensor(cpu_out_grad.getData(), + Dims{batch_size, input_dim * context_length})}, + {}); + + compare.getGpuFunction()->calc( + {Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}), + Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr, + Dims{pad, input_dim}), + Tensor(reinterpret_cast(gpu_seq->getData()), + Dims{gpu_seq->getSize()})}, + {Tensor(gpu_out_grad.getData(), + Dims{batch_size, input_dim * context_length})}, + {}); + + autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); + if (is_padding) { + autotest::TensorCheckErr(*cpu_w_grad, *gpu_w_grad); + } +} + +TEST(ContextProjection, projection) { + for (auto context_start : {-5, -3, -1, 0, 3}) { + for (auto context_length : {1, 2, 5, 7}) { + for (auto trainable_padding : {false, true}) { + for (auto batch_size : {1, 2, 5, 20, 100}) { + for (auto input_dim : {15, 32, 63, 128, 200}) { + VLOG(3) << " context_start=" << context_start + << " context_length=" << context_length + << " trainable_padding=" << trainable_padding + << " batch_size=" << batch_size + << " input_dim=" << input_dim; + testMatrixProjectionForward(context_start, + context_length, + trainable_padding, + batch_size, + input_dim); + testMatrixProjectionBackward(context_start, + context_length, + trainable_padding, + batch_size, + input_dim); + } + } + } + } + } +} diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index eb005e6744f2f343ad6feab84d5851b7760a1e58..6f82a8d053bc203eed44bd0d8d4c47d23a15268d 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -30,6 +30,20 @@ real FuncConfig::get(const std::string& key) const { return it->second.r; } +template <> +int FuncConfig::get(const std::string& key) const { + auto it = valueMap_.find(key); + CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'"; + return it->second.i; +} + +template <> +bool FuncConfig::get(const std::string& key) const { + auto it = valueMap_.find(key); + CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'"; + return it->second.b; +} + template <> FuncConfig& FuncConfig::set(const std::string& key, size_t v) { CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key; @@ -44,6 +58,20 @@ FuncConfig& FuncConfig::set(const std::string& key, real v) { return *this; } +template <> +FuncConfig& FuncConfig::set(const std::string& key, int v) { + CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key; + valueMap_[key].i = v; + return *this; +} + +template <> +FuncConfig& FuncConfig::set(const std::string& key, bool v) { + CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key; + valueMap_[key].b = v; + return *this; +} + ClassRegistrar FunctionBase::funcRegistrar_; } // namespace paddle diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 095584c0b19f7a0b7d8787a0bc6bbdd78d785eed..9e8cbb8e48c30e80c5057fc53c050b67d3957188 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -40,6 +40,19 @@ struct MatrixT { using type = GpuMatrix; }; +template +struct SequenceT; + +template <> +struct SequenceT { + using type = CpuIVector; +}; + +template <> +struct SequenceT { + using type = GpuIVector; +}; + typedef std::vector Dims; class Tensor { @@ -59,6 +72,8 @@ public: union value { size_t s; real r; + int i; + bool b; }; template diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index a602bde57e5aed7452d5d1a8860b277203a682e1..32131037f6de4a9f7a3ebf8f5773eccd65dc2cdb 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -33,25 +33,33 @@ public: // init cpu and gpu arguments auto initArgs = [=]( Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) { - for (auto arg : inArgs) { + for (const auto arg : inArgs) { size_t size = sizeof(real); - for (auto dim : arg.dims_) { + for (const auto dim : arg.dims_) { size *= dim; } - cpuMemory.emplace_back(std::make_shared(size)); - gpuMemory.emplace_back(std::make_shared(size)); - cpuArgs.emplace_back( - Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_)); - gpuArgs.emplace_back( - Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_)); - - // will use an api to refactor this code. - CpuVector cpuVector(size / sizeof(real), - (real*)cpuArgs.back().getData()); - GpuVector gpuVector(size / sizeof(real), - (real*)gpuArgs.back().getData()); - cpuVector.uniform(0.001, 1); - gpuVector.copyFrom(cpuVector); + if (arg.getData()) { + // todo(tianbing), waste unnecessary mem here + cpuMemory.emplace_back(std::make_shared(size)); + gpuMemory.emplace_back(std::make_shared(size)); + cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); + gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); + // already init outside + } else { + cpuMemory.emplace_back(std::make_shared(size)); + gpuMemory.emplace_back(std::make_shared(size)); + cpuArgs.emplace_back( + Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_)); + gpuArgs.emplace_back( + Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_)); + // will use an api to refactor this code. + CpuVector cpuVector(size / sizeof(real), + (real*)cpuArgs.back().getData()); + GpuVector gpuVector(size / sizeof(real), + (real*)gpuArgs.back().getData()); + cpuVector.uniform(0.001, 1); + gpuVector.copyFrom(cpuVector); + } } }; initArgs(cpuInputs, gpuInputs, inputs); @@ -81,6 +89,10 @@ public: checkArgs(cpuInouts, gpuInouts); } + std::shared_ptr getCpuFunction() const { return cpu; } + + std::shared_ptr getGpuFunction() const { return gpu; } + protected: std::shared_ptr cpu; std::shared_ptr gpu; diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index 51c0ae5cc9523debffa4bdfe44fe0df0c56839c2..e947b2b9ecbebda11db5c049e1606a2d5926c28c 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -38,6 +38,32 @@ ContextProjection::ContextProjection(const ProjectionConfig& config, CHECK_EQ(inputDim * totalPad, parameter->getSize()); weight_.reset(new Weight(totalPad, inputDim, parameter)); } + // init forward_ and backward_ functions + init(); +} + +bool ContextProjection::init() { + size_t context_length = config_.context_length(); + int context_start = config_.context_start(); + bool is_padding = config_.trainable_padding(); + size_t total_pad = is_padding ? beginPad_ + endPad_ : 0; + + createFunction(forward_, + "ContextProjectionForward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", beginPad_)); + createFunction(backward_, + "ContextProjectionBackward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", beginPad_) + .set("is_padding", is_padding) + .set("total_pad", total_pad)); + + return true; } void ContextProjection::resetState() { @@ -78,25 +104,29 @@ LayerStatePtr ContextProjection::getState() { } void ContextProjection::forward() { - CHECK(in_->value); + CHECK(in_->value && out_->value); CHECK(in_->sequenceStartPositions); - auto startPositions = in_->sequenceStartPositions->getVector(useGpu_); - - int64_t inputDim = in_->value->getWidth(); - int64_t dim = out_->value->getWidth(); - CHECK_EQ(dim, inputDim * config_.context_length()); + size_t input_dim = in_->value->getWidth(); + size_t dim = out_->value->getWidth(); + CHECK_EQ(dim, input_dim * config_.context_length()); + size_t batch_size = in_->value->getHeight(); + CHECK_EQ(forward_.size(), 1) << "Only one forward function here"; REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str()); - bool isPadding = config_.trainable_padding(); - out_->value->contextProjectionForward( - *(in_->value), - state_ ? state_.get() : isPadding ? weight_->getW().get() : nullptr, - *startPositions, - config_.context_length(), - config_.context_start(), - beginPad_, - state_ ? true : isPadding); + bool is_padding = config_.trainable_padding(); + /// first use state_, otherwise use weight_(padding false === w nullptr) + auto w_ptr = + state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr; + auto start_pos = in_->sequenceStartPositions; + forward_[0]->calc({Tensor(in_->value->getData(), Dims{batch_size, input_dim}), + Tensor(w_ptr ? w_ptr->getData() : nullptr, + Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}), + Tensor(reinterpret_cast( + const_cast(start_pos->getData(useGpu_))), + Dims{start_pos->getSize()})}, + {Tensor(out_->value->getData(), Dims{batch_size, dim})}, + {}); if (state_ && config_.context_start() < 0) { CHECK_EQ(1, in_->getNumSequences()); @@ -118,41 +148,27 @@ void ContextProjection::forward() { } void ContextProjection::backward(const UpdateCallback& callback) { - CHECK(in_->value); - int64_t inputDim = in_->value->getWidth(); - int64_t dim = out_->value->getWidth(); - CHECK_EQ(dim, inputDim * config_.context_length()); - auto startPositions = in_->sequenceStartPositions->getVector(useGpu_); + CHECK(in_->value && out_->value && out_->grad); + size_t input_dim = in_->value->getWidth(); + size_t dim = out_->value->getWidth(); + CHECK_EQ(dim, input_dim * config_.context_length()); + size_t batch_size = in_->value->getHeight(); + CHECK_EQ(batch_size, out_->value->getHeight()); + CHECK_EQ(backward_.size(), 1) << "Only one backward function here"; REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str()); - bool isPadding = config_.trainable_padding(); - if (!out_->grad->useGpu()) { - out_->grad->contextProjectionBackward( - in_->grad.get(), - isPadding ? weight_->getWGrad().get() : nullptr, - *startPositions, - config_.context_length(), - config_.context_start(), - beginPad_, - isPadding); - } else { - if (in_->grad) { - out_->grad->contextProjectionBackwardData(*(in_->grad), - *startPositions, - config_.context_length(), - config_.context_start()); - } - - if (isPadding && weight_->getWGrad()) { - out_->grad->contextProjectionBackwardWeight( - *(weight_->getWGrad()), - *startPositions, - config_.context_length(), - config_.context_start(), - weight_->getWGrad()->getHeight(), - beginPad_); - } - } + bool is_padding = config_.trainable_padding(); + auto start_pos = in_->sequenceStartPositions; + auto w_ptr = is_padding ? weight_->getWGrad() : nullptr; + backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr, + Dims{batch_size, input_dim}), + Tensor(w_ptr ? w_ptr->getData() : nullptr, + Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}), + Tensor(reinterpret_cast( + const_cast(start_pos->getData(useGpu_))), + Dims{start_pos->getSize()})}, + {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, + {}); if (config_.trainable_padding()) { weight_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/gserver/layers/ContextProjection.h b/paddle/gserver/layers/ContextProjection.h index 2df43bd04fec868924b5d45f9def231a48ee7f04..c87d6ed1d6d46b391ccf8722f6d110614be1fe78 100644 --- a/paddle/gserver/layers/ContextProjection.h +++ b/paddle/gserver/layers/ContextProjection.h @@ -61,6 +61,8 @@ public: virtual LayerStatePtr getState(); + virtual bool init(); + protected: std::unique_ptr weight_; /// number of extra timesteps added at the beginning diff --git a/paddle/gserver/layers/Projection.h b/paddle/gserver/layers/Projection.h index 8cd8042479eafdbd6b8dac03b63b344fcf9526b1..778a7fe13d8a2b669831396e69546446b4745e61 100644 --- a/paddle/gserver/layers/Projection.h +++ b/paddle/gserver/layers/Projection.h @@ -88,11 +88,37 @@ public: */ virtual LayerStatePtr getState() { return nullptr; } + /** + * init forward_ and backward_ functions + */ + virtual bool init() { return true; } + /** * Get output size of projection. */ size_t getOutputSize() const { return config_.output_size(); } +protected: + /** + * Create layer function. Function is called in forward or backward. + * \param function, Layer::forward_ or Layer::backward_ + * \param name, function name + * \param config, initialization configuration for the function + */ + void createFunction(std::vector>& function, + const std::string& name, + const FuncConfig& config) { + if (useGpu_) { + function.emplace_back( + FunctionBase::funcRegistrar_.createByType(name + "-GPU")); + } else { + function.emplace_back( + FunctionBase::funcRegistrar_.createByType(name + "-CPU")); + } + auto& func = function.back(); + func->init(config); + } + protected: /// Config of projection ProjectionConfig config_; @@ -106,5 +132,9 @@ protected: const Argument* out_; /// Store `passType` passed to forward() PassType passType_; + /// Layer forward function + std::vector> forward_; + /// Layer backward function + std::vector> backward_; }; } // namespace paddle diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 50d2e3eb671028c8169321fcd85fe25735c11a14..90813a89969c2525f7029f1c2609bed116c910c4 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1304,68 +1304,6 @@ void GpuMatrix::maxSequenceBackward(Matrix& outputGrad, hl_max_sequence_backward(outGrad, maxIndex, inputGrad, numSequences, dim); } -void GpuMatrix::contextProjectionForward(Matrix& input, - Matrix* weight, - const IVector& sequence, - int contextLength, - int contextStart, - size_t beginPad, - bool isPadding) { - CHECK(dynamic_cast(&input)); - CHECK(dynamic_cast(&sequence)); - if (weight) CHECK(dynamic_cast(weight)); - CHECK_EQ(getWidth(), input.getWidth() * contextLength); - - hl_context_projection_forward(input.getData(), - sequence.getData(), - isPadding ? weight->getData() : NULL, - getData(), - sequence.getSize() - 1, - input.getWidth(), - contextLength, - contextStart, - beginPad, - isPadding); -} - -void GpuMatrix::contextProjectionBackwardData(Matrix& inputGrad, - const IVector& sequence, - int contextLength, - int contextStart) { - CHECK(dynamic_cast(&inputGrad)); - CHECK(dynamic_cast(&sequence)); - CHECK_EQ(getWidth(), inputGrad.getWidth() * contextLength); - - hl_context_projection_backward_data(getData(), - sequence.getData(), - inputGrad.getData(), - sequence.getSize() - 1, - inputGrad.getWidth(), - contextLength, - contextStart); -} - -void GpuMatrix::contextProjectionBackwardWeight(Matrix& weightGrad, - const IVector& sequence, - int contextLength, - int contextStart, - int totalPad, - size_t beginPad) { - CHECK(dynamic_cast(&weightGrad)); - CHECK(dynamic_cast(&sequence)); - CHECK_EQ(getWidth(), weightGrad.getWidth() * contextLength); - - hl_context_projection_backward_weight(getData(), - sequence.getData(), - weightGrad.getData(), - sequence.getSize() - 1, - weightGrad.getWidth(), - totalPad, - contextLength, - contextStart, - beginPad); -} - void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) { CHECK(data.useGpu_ == true && W.useGpu_ == true) << "Matrix type are not equal"; @@ -2203,113 +2141,6 @@ void CpuMatrix::maxSequenceBackward(Matrix& outputGrad, } } -void CpuMatrix::contextProjectionForward(Matrix& input, - Matrix* weight, - const IVector& sequence, - int contextLength, - int contextStart, - size_t beginPad, - bool isPadding) { - auto input_ptr = dynamic_cast(&input); - auto seq_ptr = dynamic_cast(&sequence); - CHECK(input_ptr && seq_ptr); - if (weight) CHECK(dynamic_cast(weight)); - CHECK_EQ(getWidth(), input_ptr->getWidth() * contextLength); - - const int* starts = seq_ptr->getData(); - size_t numSequences = seq_ptr->getSize() - 1; - for (size_t i = 0; i < numSequences; ++i) { - for (int j = 0; j < contextLength; ++j) { - int begin = starts[i] + contextStart + j; - int end = starts[i + 1] + contextStart + j; - int dstBegin = starts[i]; - int dstEnd = starts[i + 1]; - if (begin < starts[i]) { - int64_t padSize = - std::min(starts[i] - begin, starts[i + 1] - starts[i]); - MatrixPtr mat = this->subMatrix(starts[i], padSize); - if (isPadding) { - MatrixPtr sub = weight->subMatrix(j, padSize); - mat->addAtOffset(*sub, j * input_ptr->getWidth()); - } - dstBegin = starts[i] + padSize; - begin = starts[i]; - } - if (end > starts[i + 1]) { - int64_t padSize = - std::min(end - starts[i + 1], starts[i + 1] - starts[i]); - MatrixPtr mat = this->subMatrix(starts[i + 1] - padSize, padSize); - if (isPadding) { - MatrixPtr sub = - weight->subMatrix(beginPad + contextStart + j - padSize, padSize); - mat->addAtOffset(*sub, j * input_ptr->getWidth()); - } - dstEnd = starts[i + 1] - padSize; - end = starts[i + 1]; - } - if (end <= begin) continue; - MatrixPtr src = input_ptr->subMatrix(begin, end - begin); - MatrixPtr dst = this->subMatrix(dstBegin, dstEnd - dstBegin); - dst->addAtOffset(*src, j * input_ptr->getWidth()); - } - } -} - -void CpuMatrix::contextProjectionBackward(Matrix* inputGrad, - Matrix* weightGrad, - const IVector& sequence, - int contextLength, - int contextStart, - size_t beginPad, - bool isPadding) { - if (inputGrad) CHECK(dynamic_cast(inputGrad)); - if (weightGrad) CHECK(dynamic_cast(weightGrad)); - CHECK(dynamic_cast(&sequence)); - - int64_t inputDim = inputGrad ? inputGrad->getWidth() - : weightGrad ? weightGrad->getWidth() : 0; - CHECK_EQ(getWidth(), inputDim * contextLength); - - const int* starts = sequence.getData(); - size_t numSequences = sequence.getSize() - 1; - for (size_t i = 0; i < numSequences; ++i) { - for (int j = 0; j < contextLength; ++j) { - int begin = starts[i] + contextStart + j; - int end = starts[i + 1] + contextStart + j; - int dstBegin = starts[i]; - int dstEnd = starts[i + 1]; - if (begin < starts[i]) { - int64_t padSize = - std::min(starts[i] - begin, starts[i + 1] - starts[i]); - if (isPadding && weightGrad) { - MatrixPtr mat = this->subMatrix(starts[i], padSize); - MatrixPtr sub = weightGrad->subMatrix(j, padSize); - sub->addAtOffset(*mat, j * inputDim); - } - dstBegin = starts[i] + padSize; - begin = starts[i]; - } - if (end > starts[i + 1]) { - int64_t padSize = - std::min(end - starts[i + 1], starts[i + 1] - starts[i]); - if (isPadding && weightGrad) { - MatrixPtr mat = this->subMatrix(starts[i + 1] - padSize, padSize); - MatrixPtr sub = weightGrad->subMatrix( - beginPad + contextStart + j - padSize, padSize); - sub->addAtOffset(*mat, j * inputDim); - } - dstEnd = starts[i + 1] - padSize; - end = starts[i + 1]; - } - if (end <= begin) continue; - if (!inputGrad) continue; - MatrixPtr src = inputGrad->subMatrix(begin, end - begin); - MatrixPtr dst = this->subMatrix(dstBegin, dstEnd - dstBegin); - src->addAtOffset(*dst, j * inputDim); - } - } -} - inline void vecAddTo(real* a, const real* b, size_t len) { for (unsigned int i = 0; i < len; ++i) { a[i] += b[i]; diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index bda863de38675fe481544a7e82b69f445df361bd..4865a081a5aaa010d5b3ce0127ffc6f8330d4a68 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -972,42 +972,6 @@ public: LOG(FATAL) << "Not implemeted"; } - virtual void contextProjectionForward(Matrix& input, - Matrix* weight, - const IVector& sequence, - int contextLength, - int contextStart, - size_t beginPad, - bool isPadding) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void contextProjectionBackward(Matrix* inputGrad, - Matrix* weightGrad, - const IVector& sequence, - int contextLength, - int contextStart, - size_t beginPad, - bool isPadding) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void contextProjectionBackwardData(Matrix& inputGrad, - const IVector& sequence, - int contextLength, - int contextStart) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void contextProjectionBackwardWeight(Matrix& weightGrad, - const IVector& sequence, - int contextLength, - int contextStart, - int totalPad, - size_t beginPad) { - LOG(FATAL) << "Not implemeted"; - } - /** * @code * this.row[i] += table.row[ids[i]] @@ -1442,26 +1406,6 @@ public: const IVector& sequence, IVector& index); - void contextProjectionForward(Matrix& input, - Matrix* weight, - const IVector& sequence, - int contextLength, - int contextStart, - size_t beginPad, - bool isPadding); - - void contextProjectionBackwardData(Matrix& inputGrad, - const IVector& sequence, - int contextLength, - int contextStart); - - void contextProjectionBackwardWeight(Matrix& weightGrad, - const IVector& sequence, - int contextLength, - int contextStart, - int totalPad, - size_t beginPad); - void bilinearForward(const Matrix& in, const size_t inImgH, const size_t inImgW, @@ -1648,22 +1592,6 @@ public: const IVector& sequence, IVector& index); - void contextProjectionForward(Matrix& input, - Matrix* weight, - const IVector& sequence, - int contextLength, - int contextStart, - size_t beginPad, - bool isPadding); - - void contextProjectionBackward(Matrix* inputGrad, - Matrix* weightGrad, - const IVector& sequence, - int contextLength, - int contextStart, - size_t beginPad, - bool isPadding); - real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } virtual real* getRowBuf(size_t row) { return getRow(row); } diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index efda4ff27be92916228b4bcc9170107ff707d6cf..98d63438a57b48340bc3b05ac7ac3d6c5cd90fb0 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -29,148 +29,6 @@ using namespace std; // NOLINT using autotest::TensorCheckEqual; using autotest::TensorCheckErr; -void testMatrixProjectionForward(int contextStart, - int contextLength, - bool padding, - int batchSize, - int inputDim) { - MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - int pad = std::max(0, -contextStart) + - std::max(0, contextStart + contextLength - 1); - if (pad == 0) padding = false; - MatrixPtr cpuWeight = nullptr; - MatrixPtr gpuWeight = nullptr; - if (padding) { - cpuWeight = std::make_shared(pad, inputDim); - gpuWeight = std::make_shared(pad, inputDim); - cpuWeight->randomizeUniform(); - gpuWeight->copyFrom(*cpuWeight); - } - - IVectorPtr cpuSequence; - generateSequenceStartPositions(batchSize, cpuSequence); - IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); - gpuSequence->copyFrom(*cpuSequence); - - MatrixPtr cpuOutput = - std::make_shared(batchSize, inputDim * contextLength); - MatrixPtr gpuOutput = - std::make_shared(batchSize, inputDim * contextLength); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - - // calculate - int beginPad = std::max(0, -contextStart); - cpuOutput->contextProjectionForward(*cpuInput, - cpuWeight.get(), - *cpuSequence, - contextLength, - contextStart, - beginPad, - padding); - - gpuOutput->contextProjectionForward(*gpuInput, - gpuWeight.get(), - *gpuSequence, - contextLength, - contextStart, - beginPad, - padding); - - TensorCheckEqual(*cpuOutput, *gpuOutput); -} - -void testMatrixProjectionBackward(int contextStart, - int contextLength, - bool padding, - int batchSize, - int inputDim) { - MatrixPtr cpuOutputGrad = - std::make_shared(batchSize, inputDim * contextLength); - MatrixPtr gpuOutputGrad = - std::make_shared(batchSize, inputDim * contextLength); - cpuOutputGrad->randomizeUniform(); - gpuOutputGrad->copyFrom(*cpuOutputGrad); - - IVectorPtr cpuSequence; - generateSequenceStartPositions(batchSize, cpuSequence); - IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); - gpuSequence->copyFrom(*cpuSequence); - - MatrixPtr cpuInputGrad = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInputGrad = std::make_shared(batchSize, inputDim); - cpuInputGrad->randomizeUniform(); - gpuInputGrad->copyFrom(*cpuInputGrad); - - int pad = std::max(0, -contextStart) + - std::max(0, contextStart + contextLength - 1); - if (pad == 0) padding = false; - MatrixPtr cpuWeightGrad = nullptr; - MatrixPtr gpuWeightGrad = nullptr; - if (padding) { - cpuWeightGrad = std::make_shared(pad, inputDim); - gpuWeightGrad = std::make_shared(pad, inputDim); - cpuWeightGrad->randomizeUniform(); - gpuWeightGrad->copyFrom(*cpuWeightGrad); - } - - // calculate - int beginPad = std::max(0, -contextStart); - cpuOutputGrad->contextProjectionBackward(cpuInputGrad.get(), - cpuWeightGrad.get(), - *cpuSequence, - contextLength, - contextStart, - beginPad, - padding); - gpuOutputGrad->contextProjectionBackwardData( - *gpuInputGrad, *gpuSequence, contextLength, contextStart); - if (padding) { - gpuOutputGrad->contextProjectionBackwardWeight(*gpuWeightGrad, - *gpuSequence, - contextLength, - contextStart, - pad, - beginPad); - } - - TensorCheckErr(*cpuInputGrad, *gpuInputGrad); - if (padding) { - TensorCheckErr(*cpuWeightGrad, *gpuWeightGrad); - } -} - -TEST(Matrix, projection) { - for (auto contextStart : {-5, -3, -1, 0, 3}) { - for (auto contextLength : {1, 2, 5, 7}) { - for (auto trainablePadding : {false, true}) { - for (auto batchSize : {1, 2, 5, 20, 100}) { - for (auto inputDim : {15, 32, 63, 128, 200}) { - VLOG(3) << " contextStart=" << contextStart - << " contextLength=" << contextLength - << " trainablePadding=" << trainablePadding - << " batchSize=" << batchSize << " inputDim=" << inputDim; - testMatrixProjectionForward(contextStart, - contextLength, - trainablePadding, - batchSize, - inputDim); - testMatrixProjectionBackward(contextStart, - contextLength, - trainablePadding, - batchSize, - inputDim); - } - } - } - } - } -} - void testMatrixMaxSequence(int batchSize, int inputDim) { // forward MatrixPtr cpuInput = std::make_shared(batchSize, inputDim);