diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 75fcc1cda165197fc4413efc6bbbc440088cb4cd..1919d86c33b6e89d5edf78a1f0caa6403550601a 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -115,7 +115,8 @@ set(DEPS_OPS softmax_with_cross_entropy_op sum_op pool_op - pool_with_index_op) + pool_with_index_op + sequence_conv_op) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc @@ -126,6 +127,8 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(sum_op DEPS net_op) op_library(pool_op DEPS pooling) op_library(pool_with_index_op DEPS pooling) +op_library(sequence_conv_op DEPS sequence_project) + list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) diff --git a/paddle/operators/sequence_project_op.cc b/paddle/operators/sequence_conv_op.cc similarity index 64% rename from paddle/operators/sequence_project_op.cc rename to paddle/operators/sequence_conv_op.cc index 6b5c3c676b9fba133e5b509dbd874dbf81e6f9eb..1fc23302dcb329ea94aeec4da00f5752ac43a712 100644 --- a/paddle/operators/sequence_project_op.cc +++ b/paddle/operators/sequence_conv_op.cc @@ -12,34 +12,41 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/sequence_project_op.h" +#include "paddle/operators/sequence_conv_op.h" namespace paddle { namespace operators { -class SequenceProjectOp : public framework::OperatorWithKernel { +class SequenceConvOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; protected: void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of SequenceProjectOp should not be null."); + "Input(X) of SequenceConvOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Filter"), + "Input(Filter) of SequenceConvOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of SequenceProjectOp should not be null."); + "Output(Out) of SequenceConvOp should not be null."); // PaddingData mast be not empty. Otherwise(EnforceNotMet: enforce numel() > // 0 failed, 0 <= 0) - PADDLE_ENFORCE( - ctx->HasInput("PaddingData"), - "Input(PaddingData) of SequenceProjectOp should not be null."); - - auto in_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE(in_dims.size() == 2, "Input(X) should be 2-D tensor."); + PADDLE_ENFORCE(ctx->HasInput("PaddingData"), + "Input(PaddingData) of SequenceConvOp should not be null."); int context_length = ctx->Attrs().Get("context_length"); bool padding_trainable = ctx->Attrs().Get("padding_trainable"); int context_start = ctx->Attrs().Get("context_start"); + auto in_dims = ctx->GetInputDim("X"); + auto filter_dims = ctx->GetInputDim("Filter"); + PADDLE_ENFORCE(in_dims.size() == 2 && filter_dims.size() == 2, + "Input(X, Filter) should be 2-D tensor."); + PADDLE_ENFORCE( + filter_dims[0] == context_length && filter_dims[1] == in_dims[1], + "Filter's shape should be (context_length x " + "number_of_input_features)."); + if (padding_trainable) { framework::DDim padding_dim = ctx->GetInputDim("PaddingData"); int up_pad = std::max(0, -context_start); @@ -60,12 +67,12 @@ class SequenceProjectOp : public framework::OperatorWithKernel { "and 'context_length'."); } - in_dims[1] = in_dims[1] * context_length; + in_dims[1] = 1; ctx->SetOutputDim("Out", in_dims); } }; -class SequenceProjectGradOp : public framework::OperatorWithKernel { +class SequenceConvGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -77,60 +84,66 @@ class SequenceProjectGradOp : public framework::OperatorWithKernel { if (ctx->Attrs().Get("padding_trainable") && ctx->HasOutput(framework::GradVarName("PaddingData"))) { - auto padding_dims = ctx->GetInputDim("PaddingData"); - ctx->SetOutputDim(framework::GradVarName("PaddingData"), padding_dims); + ctx->SetOutputDim(framework::GradVarName("PaddingData"), + ctx->GetInputDim("PaddingData")); } if (ctx->HasOutput(framework::GradVarName("X"))) { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } + if (ctx->HasOutput(framework::GradVarName("Filter"))) { + ctx->SetOutputDim(framework::GradVarName("Filter"), + ctx->GetInputDim("Filter")); + } } }; -class SequenceProjectOpMaker : public framework::OpProtoAndCheckerMaker { +class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker { public: - SequenceProjectOpMaker(framework::OpProto* proto, - framework::OpAttrChecker* op_checker) + SequenceConvOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", - "(A float LoDTensor) the input of SequenceProjectOp, a vector of " + "(A float LoDTensor) the input of SequenceConvOp, a vector of " "2-D matrix of size (minibatch, number_of_input_features)."); - AddOutput("Out", - "(A float LoDTensor) the output of SequenceProjectOp, a vector " - "of 2-D matrix of size (minibatch, number_of_input_features x " - "context_length)."); AddInput("PaddingData", - "(A float LoDTensor) the input of SequenceProjectOp, a vector of " + "(A float LoDTensor) the input of SequenceConvOp, a vector of " "2-D matrix of size (up_pad + down_pad, " "number_of_input_features). "); + AddInput("Filter", + "(A float LoDTensor) the input of SequenceConvOp, a vector of " + "2-D matrix of size (context_length x number_of_input_features)."); + AddOutput("Out", + "(A float LoDTensor) the output of SequenceConvOp, a vector " + "of 2-D matrix of size (minibatch, 1)."); AddAttr("padding_trainable", - "(bool, default false) the padding data of SequenceProjectOp " + "(bool, default false) the padding data of SequenceConvOp " "is trainable or not.") .SetDefault(false); AddAttr("context_length", - "(int, default 3) the context_length of SequenceProjectOp.") + "(int, default 3) the context_length of SequenceConvOp.") .SetDefault(3) .GreaterThan(0); AddAttr("context_start", - "(int, default 0) the context_start of SequenceProjectOp.") + "(int, default 0) the context_start of SequenceConvOp.") .SetDefault(0); AddAttr("context_stride", - "(int, default 1) the context_stride of SequenceProjectOp. " + "(int, default 1) the context_stride of SequenceConvOp. " "Currently, sequence_project_op only support " "context_stride=1.") .SetDefault(1) .GreaterThan(0); AddComment(R"DOC( - SequenceProjectOp projects features of context_length time-steps of each instance. + SequenceConvOp projects features of context_length time-steps of each instance. For a mini-batch of 2 variable lengths sentences, containing 3, and 1 time-steps: Assumed input (X) is a [4, M, N] float LoDTensor, and X->lod()[0] = [0, 3, 4]. Besides, for the sake of simplicity, we assume M=1 and N=2. - X = [[a1, a2, - b1, b2. + X = [[a1, a2; + b1, b2; c1, c2] [d1, d2]] @@ -141,19 +154,19 @@ class SequenceProjectOpMaker : public framework::OpProtoAndCheckerMaker { If context_start is -1 and padding_trainable is false, we use zero to pad instead of learned weight to pad, and the context_lenth is 3, the output (Out) is: - Out = [0, 0, a1, a2, b1, b2; + Out =[[0, 0, a1, a2, b1, b2; a1, a2, b1, b2, c1, c2; - b1, b2, c1, c2, 0, 0; - 0, 0, d1, d2, 0, 0] + b1, b2, c1, c2, 0, 0 ] + [0, 0, d1, d2, 0, 0 ]] - Case2: If context_start is -1 and padding_trainable is true, we use learned weight to pad, and the context_lenth is 3, the output (Out) is: - Out = [w1, w2, a1, a2, b1, b2; + Out = [[w1, w2, a1, a2, b1, b2; a1, a2, b1, b2, c1, c2; - b1, b2, c1, c2, w3, w4; - w1, w2, d1, d2, w3, w4] + b1, b2, c1, c2, w3, w4] + [w1, w2, d1, d2, w3, w4]] )DOC"); } @@ -163,13 +176,11 @@ class SequenceProjectOpMaker : public framework::OpProtoAndCheckerMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sequence_project, ops::SequenceProjectOp, - ops::SequenceProjectOpMaker, sequence_project_grad, - ops::SequenceProjectGradOp); +REGISTER_OP(sequence_conv, ops::SequenceConvOp, ops::SequenceConvOpMaker, + sequence_conv_grad, ops::SequenceConvGradOp); REGISTER_OP_CPU_KERNEL( - sequence_project, - ops::SequenceProjectKernel); + sequence_conv, ops::SequenceConvKernel); REGISTER_OP_CPU_KERNEL( - sequence_project_grad, - ops::SequenceProjectGradKernel); + sequence_conv_grad, + ops::SequenceConvGradKernel); diff --git a/paddle/operators/sequence_project_op.cu b/paddle/operators/sequence_conv_op.cu similarity index 75% rename from paddle/operators/sequence_project_op.cu rename to paddle/operators/sequence_conv_op.cu index 7d3479d6f955bf1cc5926055086ed2afcc9a4168..4c0c673a517c4b05c3abd8bf6b5cf5bbb19cfae0 100644 --- a/paddle/operators/sequence_project_op.cu +++ b/paddle/operators/sequence_conv_op.cu @@ -14,12 +14,11 @@ #define EIGEN_USE_GPU -#include "paddle/operators/sequence_project_op.h" +#include "paddle/operators/sequence_conv_op.h" namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - sequence_project, - ops::SequenceProjectKernel); + sequence_conv, ops::SequenceConvKernel); REGISTER_OP_GPU_KERNEL( - sequence_project_grad, - ops::SequenceProjectGradKernel); + sequence_conv_grad, + ops::SequenceConvGradKernel); diff --git a/paddle/operators/sequence_project_op.h b/paddle/operators/sequence_conv_op.h similarity index 57% rename from paddle/operators/sequence_project_op.h rename to paddle/operators/sequence_conv_op.h index c1f7f97f09b1479a577853dd9b267c0683fc8408..d049e83ff352e09d1d98cf6a8b93d1331f75f2bf 100644 --- a/paddle/operators/sequence_project_op.h +++ b/paddle/operators/sequence_conv_op.h @@ -15,46 +15,39 @@ limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/math/im2col.h" #include "paddle/operators/math/math_function.h" -#include "paddle/operators/strided_memcpy.h" +#include "paddle/operators/math/sequence_project.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -template -using EigenVector = framework::EigenVector; +// template +// using EigenVector = framework::EigenVector; template using EigenMatrix = framework::EigenMatrix; template -class SequenceProjectKernel : public framework::OpKernel { +class SequenceConvKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); auto* out = context.Output("Out"); - out->mutable_data(context.GetPlace()); - - // Because if padding_trainable is false, padding data should be zeros. - auto temp = framework::EigenVector::Flatten(*out); - temp.device(context.GetEigenDevice()) = - temp.constant(static_cast(0)); + auto filter = *context.Input("Filter"); - auto place = context.GetEigenDevice(); + out->mutable_data(context.GetPlace()); int context_start = context.Attr("context_start"); int context_length = context.Attr("context_length"); - bool padding_trainable = context.Attr("padding_trainable"); int context_stride = context.Attr("context_stride"); + bool padding_trainable = context.Attr("padding_trainable"); // InferShape by in_lod PADDLE_ENFORCE_EQ(in->lod().size(), 1UL, "Only support one level sequence now."); - auto lod_level_0 = in->lod()[0]; const LoDTensor* padding_data = nullptr; if (padding_trainable) { @@ -63,117 +56,51 @@ class SequenceProjectKernel : public framework::OpKernel { int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); - int sequence_height, sequence_width; - int input_row_begin, input_row_end; + int sequence_width; sequence_width = static_cast(in->dims()[1]); - paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kOCF, Place, float> - im2col_ocf; - - for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { - input_row_begin = (context_start > 0) - ? static_cast(lod_level_0[i]) + context_start - : static_cast(lod_level_0[i]); - input_row_end = static_cast(lod_level_0[i + 1]); - - Tensor out_t = out->Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); - - sequence_height = static_cast(out_t.dims()[0]); - - std::vector output_shape( - {sequence_height, 1, 1, context_length, - sequence_width}); // output_height, output_width, - // input_channels, filter_height, filter_width - out_t.Resize(framework::make_ddim(output_shape)); - - if (input_row_begin < input_row_end) { - Tensor in_t = in->Slice(input_row_begin, input_row_end); - std::vector input_shape( - {1, input_row_end - input_row_begin, - sequence_width}); // input_channels, input_height, input_width - in_t.Resize(framework::make_ddim(input_shape)); - - im2col_ocf(context.device_context(), in_t, out_t, - /*stride_height*/ context_stride, /*stride_width*/ 0, up_pad, - down_pad); - } + // use col_shape in the im2col calculation + framework::DDim col_shape = {in->dims()[0], + sequence_width * context_length}; + LoDTensor col; + col.mutable_data(col_shape, context.GetPlace()); + // Because if padding_trainable is false, padding data should be zeros. + auto temp = framework::EigenVector::Flatten(col); + temp.device(context.GetEigenDevice()) = + temp.constant(static_cast(0)); - if (padding_trainable) { - // add up trainable data - out_t.Resize(framework::make_ddim( - {sequence_height * context_length, sequence_width})); + paddle::operators::math::SequenceProjectFunctor + seq_project_functor; - if (up_pad > 0) { // add up pad - int padding_rows = std::min( - up_pad, static_cast(lod_level_0[i + 1] - lod_level_0[i])); + seq_project_functor(context.device_context(), in, padding_data, &col, + padding_trainable, context_start, context_length, + context_stride, up_pad, down_pad); - for (int k = 0; k < padding_rows; ++k) { - int padding_size = - k + context_length < up_pad ? context_length : up_pad - k; - Tensor out_t_sub = out_t.Slice(k * context_length, - k * context_length + padding_size); - Tensor w_sub = padding_data->Slice(k, k + padding_size); - // in this block, using EigenVector::Flatten is ok too. - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - out_t_sub_e.device(place) = w_sub_e; - } - } - if (down_pad > 0) { // add down pad - int down_pad_begin_row = - std::max(0, - (sequence_height - context_start - context_length) + 1) + - 1; - int padding_begin = std::max(0, context_start - sequence_height); - int padding_size = - sequence_height - context_start >= context_length - ? 1 - : context_length - (sequence_height - context_start); - if (context_start >= sequence_height) padding_size = context_length; - int padding_idx = padding_begin; - for (int t = 0; t + down_pad_begin_row <= sequence_height; - ++t, ++padding_size) { - if (context_start >= sequence_height) padding_size = context_length; - if (padding_size > context_length) { - padding_size = context_length; - padding_idx++; - } - if (padding_begin > 0 || sequence_height == context_start) - padding_idx = padding_begin + t; - Tensor out_t_sub = out_t.Slice( - (down_pad_begin_row + t) * context_length - padding_size, - (down_pad_begin_row + t) * context_length); - Tensor w_sub = padding_data->Slice( - up_pad + padding_idx, up_pad + padding_idx + padding_size); - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - out_t_sub_e.device(place) = w_sub_e; - } - } - } - out_t.Resize(framework::make_ddim( - {sequence_height, context_length * sequence_width})); - } + filter.Resize(framework::make_ddim({context_length * sequence_width, 1})); + math::matmul(context.device_context(), col, false, filter, false, + T(1.0), out, T(0.0)); } }; template -class SequenceProjectGradKernel : public framework::OpKernel { +class SequenceConvGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* out_g = context.Input(framework::GradVarName("Out")); auto* in_g = context.Output(framework::GradVarName("X")); + auto* filter_g = + context.Output(framework::GradVarName("Filter")); auto* padding_data_g = context.Output(framework::GradVarName("PaddingData")); auto* in = context.Input("X"); + auto* filter = context.Input("Filter"); + auto place = context.GetEigenDevice(); int context_start = context.Attr("context_start"); int context_length = context.Attr("context_length"); - bool padding_trainable = context.Attr("padding_trainable"); int context_stride = context.Attr("context_stride"); + bool padding_trainable = context.Attr("padding_trainable"); // InferShape by in_lod PADDLE_ENFORCE_EQ(in->lod().size(), 1UL, @@ -187,15 +114,31 @@ class SequenceProjectGradKernel : public framework::OpKernel { sequence_width = static_cast(in->dims()[1]); - paddle::operators::math::Col2ImFunctor< - paddle::operators::math::ColFormat::kOCF, Place, float> - col2im_ocf; + // use col_shape in the im2col calculation + framework::DDim col_shape = {in->dims()[0], + sequence_width * context_length}; + LoDTensor col; + + if (in_g || filter_g || (padding_trainable && padding_data_g)) { + col.mutable_data(col_shape, context.GetPlace()); + // Because if padding_trainable is false, padding data should be zeros. + auto temp = framework::EigenVector::Flatten(col); + temp.device(context.GetEigenDevice()) = + temp.constant(static_cast(0)); + math::matmul(context.device_context(), *out_g, false, *filter, + true, T(1.0), &col, T(1.0)); + } if (in_g) { in_g->mutable_data(context.GetPlace()); + math::SetConstant functor; functor(context.device_context(), in_g, 0); + paddle::operators::math::Col2ImFunctor< + paddle::operators::math::ColFormat::kOCF, Place, float> + col2im_ocf; + for (int i = 0; i < static_cast(lod_g_level_0.size()) - 1; ++i) { input_row_begin = (context_start > 0) @@ -203,10 +146,10 @@ class SequenceProjectGradKernel : public framework::OpKernel { : static_cast(lod_g_level_0[i]); input_row_end = static_cast(lod_g_level_0[i + 1]); - Tensor out_g_t = out_g->Slice(static_cast(lod_g_level_0[i]), - static_cast(lod_g_level_0[i + 1])); + Tensor col_t = col.Slice(static_cast(lod_g_level_0[i]), + static_cast(lod_g_level_0[i + 1])); - sequence_height = static_cast(out_g_t.dims()[0]); + sequence_height = static_cast(col_t.dims()[0]); if (input_row_begin < input_row_end) { Tensor in_t = in_g->Slice(input_row_begin, input_row_end); @@ -214,19 +157,19 @@ class SequenceProjectGradKernel : public framework::OpKernel { std::vector output_shape( {sequence_height, 1, 1, context_length, sequence_width}); // output_height, output_width, - // input_channels, filter_height, filter_width - out_g_t.Resize(framework::make_ddim(output_shape)); + // input_channels, filter_height, filter_width + col_t.Resize(framework::make_ddim(output_shape)); std::vector input_shape( {1, input_row_end - input_row_begin, sequence_width}); // input_channels, input_height, input_width in_t.Resize(framework::make_ddim(input_shape)); - col2im_ocf(context.device_context(), in_t, out_g_t, + col2im_ocf(context.device_context(), in_t, col_t, /*stride_height*/ context_stride, /*stride_width*/ 0, up_pad, down_pad); } - out_g_t.Resize(framework::make_ddim( + col_t.Resize(framework::make_ddim( {sequence_height, context_length * sequence_width})); } } @@ -244,12 +187,12 @@ class SequenceProjectGradKernel : public framework::OpKernel { : static_cast(lod_g_level_0[i]); input_row_end = static_cast(lod_g_level_0[i + 1]); - Tensor out_g_t = out_g->Slice(static_cast(lod_g_level_0[i]), - static_cast(lod_g_level_0[i + 1])); + Tensor col_t = col.Slice(static_cast(lod_g_level_0[i]), + static_cast(lod_g_level_0[i + 1])); - sequence_height = static_cast(out_g_t.dims()[0]); + sequence_height = static_cast(col_t.dims()[0]); - out_g_t.Resize(framework::make_ddim( + col_t.Resize(framework::make_ddim( {sequence_height * context_length, sequence_width})); if (up_pad > 0) { // add up pad @@ -260,8 +203,8 @@ class SequenceProjectGradKernel : public framework::OpKernel { for (int k = 0; k < padding_rows; ++k) { int padding_size = k + context_length < up_pad ? context_length : up_pad - k; - Tensor out_t_sub = out_g_t.Slice(k * context_length, - k * context_length + padding_size); + Tensor out_t_sub = col_t.Slice(k * context_length, + k * context_length + padding_size); Tensor w_sub = padding_data_g->Slice(k, k + padding_size); // in this block, using EigenVector::Flatten is ok too. auto out_t_sub_e = EigenMatrix::From(out_t_sub); @@ -290,7 +233,7 @@ class SequenceProjectGradKernel : public framework::OpKernel { } if (padding_begin > 0 || sequence_height == context_start) padding_idx = padding_begin + t; - Tensor out_t_sub = out_g_t.Slice( + Tensor out_t_sub = col_t.Slice( (down_pad_begin_row + t) * context_length - padding_size, (down_pad_begin_row + t) * context_length); Tensor w_sub = padding_data_g->Slice( @@ -300,10 +243,40 @@ class SequenceProjectGradKernel : public framework::OpKernel { w_sub_e.device(place) = w_sub_e + out_t_sub_e; } } - out_g_t.Resize(framework::make_ddim( + col_t.Resize(framework::make_ddim( {sequence_height, context_length * sequence_width})); } } + + if (filter_g) { + filter_g->mutable_data(context.GetPlace()); + + math::SetConstant functor; + functor(context.device_context(), filter_g, 0); + + Tensor filter_grad_ = *filter_g; + Tensor out_grad_ = *out_g; + + const LoDTensor* padding_data = nullptr; + if (padding_trainable) { + padding_data = context.Input("PaddingData"); + } + + sequence_width = static_cast(in->dims()[1]); + + paddle::operators::math::SequenceProjectFunctor + seq_project_functor; + + seq_project_functor(context.device_context(), in, padding_data, &col, + padding_trainable, context_start, context_length, + context_stride, up_pad, down_pad); + + filter_grad_.Resize( + framework::make_ddim({context_length * sequence_width, 1})); + + math::matmul(context.device_context(), col, true, out_grad_, + false, T(1.0), &filter_grad_, T(1.0)); + } } }; diff --git a/python/paddle/v2/framework/tests/test_seq_project.py b/python/paddle/v2/framework/tests/test_seq_project.py deleted file mode 100644 index 60bf2a7fdfeb13f149117e9eedd05ba13699dd13..0000000000000000000000000000000000000000 --- a/python/paddle/v2/framework/tests/test_seq_project.py +++ /dev/null @@ -1,212 +0,0 @@ -import unittest -import numpy as np -import random -from op_test import OpTest - - -class TestSeqProject(OpTest): - def setUp(self): - self.init_test_case() - self.op_type = 'sequence_project' - if self.context_length == 1 and self.context_start == 0 and self.padding_trainable: - print "If context_start is 0 and context_length is 1, padding_trainable should be false." - return - - # one level, batch size - x = np.random.uniform( - 0.1, 1, [self.input_size[0], self.input_size[1]]).astype('float32') - - self.begin_pad = np.max([0, -self.context_start]) - self.end_pad = np.max([0, self.context_start + self.context_length - 1]) - self.total_pad = self.begin_pad + self.end_pad - if self.total_pad == 0: - self.total_pad = 1 - # PaddingData mast be not empty. Otherwise(EnforceNotMet: enforce numel() > 0 failed, 0 <= 0) - padding_data = np.random.uniform( - 0.1, 1, [self.total_pad, self.input_size[1]]).astype('float32') - - self.inputs = { - 'X': (x, self.lod), - 'PaddingData': (padding_data, [[0, self.total_pad]]) - } - self.attrs = { - 'context_start': self.context_start, - 'context_length': self.context_length, - 'padding_trainable': self.padding_trainable, - 'context_stride': self.context_stride - } - out = np.zeros((self.input_size[0], self.input_size[1] * - self.context_length)).astype('float32') - self.outputs = {'Out': out} - self.compute() - - def compute(self): - x, lod = self.inputs['X'] - pading_data, _ = self.inputs['PaddingData'] - out = self.outputs['Out'] - lod = lod[0] - begin_pad = np.max([0, -self.context_start]) - - for i in range(len(lod) - 1): - for j in range(self.context_length): - in_begin = lod[i] + self.context_start + j - in_end = lod[i + 1] + self.context_start + j - out_begin = lod[i] - out_end = lod[i + 1] - if in_begin < lod[i]: - pad_size = np.min([lod[i] - in_begin, lod[i + 1] - lod[i]]) - if self.padding_trainable: - sub_w = pading_data[j:j + pad_size, :] - out[lod[i]:lod[i] + pad_size, j * self.input_size[1]:( - j + 1) * self.input_size[1]] = sub_w - out_begin = lod[i] + pad_size - in_begin = lod[i] - - if in_end > lod[i + 1]: - pad_size = np.min( - [in_end - lod[i + 1], lod[i + 1] - lod[i]]) - if self.padding_trainable: - sub_w = pading_data[begin_pad + self.context_start + j - - pad_size:begin_pad + - self.context_start + j, :] - out[lod[i + 1] - pad_size:lod[i + 1], j * self. - input_size[1]:(j + 1) * self.input_size[1]] = sub_w - in_end = lod[i + 1] - out_end = lod[i + 1] - pad_size - if in_end <= in_begin: - continue - - in_sub = x[in_begin:in_end, :] - out[out_begin:out_end, j * self.input_size[1]:(j + 1) * - self.input_size[1]] += in_sub - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - if self.padding_trainable: - self.check_grad( - set(['X', 'PaddingData']), 'Out', max_relative_error=0.05) - - def test_check_grad_no_filter(self): - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.05, - no_grad_set=set(['PaddingData'])) - - def test_check_grad_no_input(self): - if self.padding_trainable: - self.check_grad( - ['PaddingData'], - 'Out', - max_relative_error=0.05, - no_grad_set=set(['X'])) - - def init_test_case(self): - self.op_type = "sequence_project" - self.input_row = 11 - self.context_start = 0 - self.context_length = 1 - self.padding_trainable = False - self.context_stride = 1 - - self.input_size = [self.input_row, 23] - self.lod = [[0, 4, 5, 8, self.input_row]] - - -class TestSeqProjectCase1(TestSeqProject): - def init_test_case(self): - self.op_type = "sequence_project" - self.input_row = 11 - self.context_start = -1 - self.context_length = 3 - self.padding_trainable = True - self.context_stride = 1 - - self.input_size = [self.input_row, 23] - self.lod = [[0, 4, 5, 8, self.input_row]] - - -class TestSeqProjectCase2(TestSeqProject): - def init_test_case(self): - self.op_type = "sequence_project" - self.input_row = 25 - self.context_start = 2 - self.context_length = 3 - self.padding_trainable = True - self.context_stride = 1 - - self.input_size = [self.input_row, 23] - idx = range(self.input_size[0]) - del idx[0] - self.lod = [[0] + np.sort(random.sample(idx, 8)).tolist() + - [self.input_size[0]]] - - -''' -class TestSeqProjectCases(TestSeqProject): - def setUp(self): - self.init_test_case() - self.op_type = 'sequence_project' - - num = 0 - for context_start in [-5, -3, -1, 0, 3]: - for context_length in [1, 2, 5, 7]: - for batch_size in [1, 2, 5, 7]: - for padding_trainable in [False, True]: - - if context_length == 1 and context_start == 0 and padding_trainable: - continue - - self.context_start = context_start - self.context_length = context_length - self.padding_trainable = padding_trainable - self.input_size = [batch_size, 23] - x = np.random.uniform(0.1, 1, - self.input_size).astype('float32') - self.lod = [[0, self.input_size[0]]] - if self.input_size[0] > 2: - idx = range(self.input_size[0]) - del idx[0] - self.lod = [ - [0] + np.sort(random.sample(idx, 2)).tolist() + - [self.input_size[0]] - ] - - self.begin_pad = np.max([0, -self.context_start]) - self.end_pad = np.max([0, self.context_start + self.context_length - 1]) - self.total_pad = self.begin_pad + self.end_pad - if self.total_pad == 0: - self.total_pad = 1 - # PaddingData mast be not empty. Otherwise(EnforceNotMet: enforce numel() > 0 failed, 0 <= 0) - padding_data = np.random.uniform( - 0.1, 1, [self.total_pad, self.input_size[1]]).astype('float32') - - self.inputs = { - 'X': (x, self.lod), - 'PaddingData': (padding_data, [[0, self.total_pad]]) - } - self.attrs = { - 'context_start': self.context_start, - 'context_length': self.context_length, - 'padding_trainable': self.padding_trainable, - 'context_stride': self.context_stride - } - out = np.zeros((self.input_size[0], self.input_size[1] * - self.context_length)).astype('float32') - self.outputs = {'Out': out} - print num - print self.attrs - print batch_size - print padding_trainable - print "$$$$$$$$$$$$$" - - self.compute() - self.test_check_output() - - num += 1 -''' - -if __name__ == '__main__': - unittest.main()