diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index 35d251f71a0cb631d5900498ea3188b5ddeae334..17e576a9d5c8f50fbe84b066a93460f03ae6bb08 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -371,6 +371,8 @@ template struct RowwiseAdd; template struct ColwiseSum; template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; template struct RowwiseSum; template struct RowwiseSum; diff --git a/paddle/fluid/operators/math/math_function.cu b/paddle/fluid/operators/math/math_function.cu index 3abbcdb71d03eaf6f8eba3d97150d27ac5a5405e..c6ca2693a053360ce5dc44765acf1520a11cce2c 100644 --- a/paddle/fluid/operators/math/math_function.cu +++ b/paddle/fluid/operators/math/math_function.cu @@ -422,6 +422,8 @@ struct RowwiseAdd { template struct RowwiseAdd; template struct RowwiseAdd; template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; // template struct ColwiseSum; // The ColwiseSum failed in debug mode, // and only failed for this case. So reimplemented it. diff --git a/paddle/fluid/operators/sequence_expand_op.cc b/paddle/fluid/operators/sequence_expand_op.cc index acb6eb82a23a1cd122c4159022985eaf0d3fde7c..25a8283858bf5f867a083dd4f581386bfcf17076 100644 --- a/paddle/fluid/operators/sequence_expand_op.cc +++ b/paddle/fluid/operators/sequence_expand_op.cc @@ -33,9 +33,10 @@ class SequenceExpandOp : public framework::OperatorWithKernel { "Output(Out) of SequenceExpandOp should not be null."); auto x_dims = ctx->GetInputDim("X"); + int ref_level = ctx->Attrs().Get("ref_level"); + PADDLE_ENFORCE_EQ(x_dims.size(), 2U, "Dimension number of Input(X) should be 2."); - int ref_level = ctx->Attrs().Get("ref_level"); if (ctx->IsRuntime()) { framework::Variable* x_var = @@ -51,39 +52,37 @@ class SequenceExpandOp : public framework::OperatorWithKernel { "greater than 1."); PADDLE_ENFORCE(x_lod.size() == y_lod.size() || x_lod.size() == 0, - "Number of lod level of Input(X) either equal to 0 " - "or equal to that of Input(Y)."); + "Level number of Input(X)'s lod should be either equal " + "to 0 or equal to that of Input(Y)."); + + PADDLE_ENFORCE_GT(y_lod.size(), 0, + "Level number of Input(Y)'s lod should be " + "greater than 0."); + + PADDLE_ENFORCE( + ref_level == -1 || + (ref_level >= 0 && ref_level < static_cast(y_lod.size())), + "Invlid `ref_level`, which should be either equal to -1 " + "or in [0, %d)", + y_lod.size()); + + if (ref_level == -1) ref_level = y_lod.size() - 1; int64_t out_first_dim = 0; - if (y_lod[ref_level].size() < 1) { + if (y_lod[ref_level].size() <= 1) { out_first_dim = x_dims[0]; } else { - if (x_lod.size() == 1) { // X is LoDTensor - for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { - int x_seq_len = x_lod[0][i] - x_lod[0][i - 1]; - out_first_dim += - (y_lod[ref_level][i] - y_lod[ref_level][i - 1]) * x_seq_len; - } - } else { // X is normal Tensor - for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { - out_first_dim += y_lod[ref_level][i] - y_lod[ref_level][i - 1]; + for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { + int x_seq_len = 1; + if (x_lod.size() == 1) { + x_seq_len = x_lod[0][i] - x_lod[0][i - 1]; } + out_first_dim += + (y_lod[ref_level][i] - y_lod[ref_level][i - 1]) * x_seq_len; } } ctx->SetOutputDim("Out", {out_first_dim, x_dims[1]}); } else { - framework::VarDesc* in_reader = - boost::get(ctx->GetInputVarPtrs("Y")[0]); - int lod_level_num = in_reader->GetLoDLevels().size(); - - PADDLE_ENFORCE_GE(ref_level, 0, - "Level of referred lod should be greater or " - "equal to 0."); - - PADDLE_ENFORCE_LT(ref_level, lod_level_num, - "Level of referred lod should be smaller than " - "level number of Input(Y)."); - ctx->SetOutputDim("Out", {-1, x_dims[1]}); } } @@ -102,7 +101,7 @@ class SequenceExpandOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "(LodTensor, default LoDTensor) Output LoDTensor which is " "generated from Input(X) by referring lod of Input(Y)."); - AddAttr("ref_level", "Specify lod level of Input(Y)."); + AddAttr("ref_level", "Specify lod level of Input(Y).").SetDefault(-1); AddComment(R"DOC( Sequence Expand Operator. diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h index 2b4fa016f73efc80aecf912e25504fb49bf67941..8cbfdf177e0a2bf762923bbd07de67ae99584be0 100644 --- a/paddle/fluid/operators/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_expand_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/fluid/operators/math/math_function.h" namespace paddle { namespace operators { @@ -32,52 +32,53 @@ class SequenceExpandKernel : public framework::OpKernel { auto* out = context.Output("Out"); int ref_level = context.Attr("ref_level"); + out->mutable_data(context.GetPlace()); auto& x_lod = x->lod(); auto& y_lod = y->lod(); - PADDLE_ENFORCE_GE(ref_level, 0, - "Value of attribute `ref_level` should be greater or " - "equal to 0."); + PADDLE_ENFORCE_GT(y_lod.size(), 0, + "Level number of `Y`'s lod should be greater than 0."); - PADDLE_ENFORCE_LT(ref_level, y_lod.size(), - "Value of attribute `ref_level` should be smaller than " - "level number of Y's lod."); + PADDLE_ENFORCE( + ref_level == -1 || (ref_level >= 0 && ref_level < y_lod.size()), + "Invlid `ref_level`, which should be either equal to -1 " + "or in [0, %d)", + y_lod.size()); - if (y_lod[ref_level].size() < 1) { + if (ref_level == -1) ref_level = y_lod.size() - 1; + + if (y_lod[ref_level].size() <= 1) { framework::TensorCopy(*x, context.GetPlace(), out); return; } - if (x_lod.size() == 0) { - int out_start = 0; - for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { - int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]; - auto x_sub_tensor = x->Slice(i - 1, i); - for (size_t j = 0; j < repeat_num; ++j) { - auto out_sub_tensor = out->Slice(out_start, out_start + 1); - framework::TensorCopy(x_sub_tensor, context.GetPlace(), - &out_sub_tensor); - out_start++; - } - } - } else { - auto& out_lod = *out->mutable_lod(); + auto& out_lod = *out->mutable_lod(); + if (x_lod.size() == 1) { out_lod.resize(1); - out_lod[0].resize(1); - out_lod[0][0] = 0; - int out_idx = 0; - for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { - int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]; - int x_seq_len = x_lod[0][i] - x_lod[0][i - 1]; - auto x_sub_tensor = x->Slice(x_lod[0][i], x_lod[0][i - 1]); - for (size_t j = 0; j < repeat_num; ++j) { - auto out_sub_tensor = - out->Slice(out_lod[0][out_idx], out_lod[0][out_idx] + x_seq_len); - framework::TensorCopy(x_sub_tensor, context.GetPlace(), - &out_sub_tensor); - out_lod[0].push_back(out_lod[0][out_idx] + x_seq_len); - out_idx++; + out_lod[0] = {0}; + } + + int out_offset = 0; + for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { + int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]; + int x_start = i - 1; + int x_end = i; + if (x_lod.size() == 1) { + x_start = x_lod[0][i - 1]; + x_end = x_lod[0][i]; + } + int x_seq_len = x_end - x_start; + auto x_sub_tensor = x->Slice(x_start, x_end); + for (size_t j = 0; j < repeat_num; ++j) { + int out_start = out_offset; + if (x_lod.size() == 1) { + out_start = out_lod[0][out_offset]; + out_lod[0].push_back(x_seq_len); } + auto out_sub_tensor = out->Slice(out_start, out_start + x_seq_len); + framework::TensorCopy(x_sub_tensor, context.GetPlace(), + &out_sub_tensor); + out_offset++; } } } @@ -99,27 +100,49 @@ template class SequenceExpandGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* d_out = context.Input(framework::GradVarName("Out")); + auto* g_out = context.Input(framework::GradVarName("Out")); auto* x = context.Input("X"); - auto* out = context.Input("Out"); - auto* d_x = context.Output(framework::GradVarName("X")); - auto out_last_level = out->lod().back(); - d_x->set_lod(x->lod()); - const T* d_out_data = d_out->data(); - T* d_x_data = d_x->mutable_data(context.GetPlace()); - size_t element_len = d_out->numel() / d_out->dims()[0]; - for (size_t i = 0; i < out_last_level.size() - 1; ++i) { - size_t repeat = out_last_level[i + 1] - out_last_level[i]; - Eigen::TensorMap< - Eigen::Tensor> - d_out_t(d_out_data, static_cast(repeat), element_len); - Eigen::TensorMap> - d_x_t(d_x_data, static_cast(element_len)); - auto place = - context.template device_context().eigen_device(); - d_x_t.device(*place) = d_out_t.sum(Eigen::array({{0}})); - d_out_data += (repeat * element_len); - d_x_data += element_len; + auto* y = context.Input("Y"); + auto* g_x = context.Output(framework::GradVarName("X")); + int ref_level = context.Attr("ref_level"); + + g_x->mutable_data(context.GetPlace()); + g_x->set_lod(x->lod()); + + auto& x_lod = x->lod(); + auto& y_lod = y->lod(); + + if (ref_level == -1) ref_level = y_lod.size() - 1; + + // just copy the gradient + if (y_lod[ref_level].size() <= 1) { + framework::TensorCopy(*g_out, context.GetPlace(), g_x); + return; + } + + auto& dev_ctx = context.template device_context(); + + int g_out_offset = 0; + for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { + int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]; + if (repeat_num > 0) { + int x_start = i - 1; + int x_end = i; + if (x_lod.size() == 1) { + x_start = x_lod[0][i - 1]; + x_end = x_lod[0][i]; + } + int x_seq_len = x_end - x_start; + auto column = x_seq_len * x->dims()[1]; + auto g_x_sub = g_x->Slice(x_start, x_end); + g_x_sub = framework::ReshapeToMatrix(g_x_sub, column); + int g_out_end = g_out_offset + repeat_num * x_seq_len; + auto g_out_sub = g_out->Slice(g_out_offset, g_out_end); + g_out_sub = framework::ReshapeToMatrix(g_out_sub, column); + math::ColwiseSum col_sum; + col_sum(dev_ctx, g_out_sub, &g_x_sub); + g_out_offset += repeat_num * x_seq_len; + } } } };