提交 bf3f56e8 编写于 作者: Y yangyaming

Finish adaption for backward.

上级 352fa41a
...@@ -371,6 +371,8 @@ template struct RowwiseAdd<platform::CPUDeviceContext, double>; ...@@ -371,6 +371,8 @@ template struct RowwiseAdd<platform::CPUDeviceContext, double>;
template struct ColwiseSum<platform::CPUDeviceContext, float>; template struct ColwiseSum<platform::CPUDeviceContext, float>;
template struct ColwiseSum<platform::CPUDeviceContext, double>; template struct ColwiseSum<platform::CPUDeviceContext, double>;
template struct ColwiseSum<platform::CPUDeviceContext, int>;
template struct ColwiseSum<platform::CPUDeviceContext, int64_t>;
template struct RowwiseSum<platform::CPUDeviceContext, float>; template struct RowwiseSum<platform::CPUDeviceContext, float>;
template struct RowwiseSum<platform::CPUDeviceContext, double>; template struct RowwiseSum<platform::CPUDeviceContext, double>;
......
...@@ -422,6 +422,8 @@ struct RowwiseAdd<platform::CUDADeviceContext, T> { ...@@ -422,6 +422,8 @@ struct RowwiseAdd<platform::CUDADeviceContext, T> {
template struct RowwiseAdd<platform::CUDADeviceContext, float>; template struct RowwiseAdd<platform::CUDADeviceContext, float>;
template struct RowwiseAdd<platform::CUDADeviceContext, double>; template struct RowwiseAdd<platform::CUDADeviceContext, double>;
template struct ColwiseSum<platform::CUDADeviceContext, float>; template struct ColwiseSum<platform::CUDADeviceContext, float>;
template struct ColwiseSum<platform::CUDADeviceContext, int>;
template struct ColwiseSum<platform::CUDADeviceContext, int64_t>;
// template struct ColwiseSum<platform::CUDADeviceContext, double>; // template struct ColwiseSum<platform::CUDADeviceContext, double>;
// The ColwiseSum<platform::CUDADeviceContext, double> failed in debug mode, // The ColwiseSum<platform::CUDADeviceContext, double> failed in debug mode,
// and only failed for this case. So reimplemented it. // and only failed for this case. So reimplemented it.
......
...@@ -33,9 +33,10 @@ class SequenceExpandOp : public framework::OperatorWithKernel { ...@@ -33,9 +33,10 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
"Output(Out) of SequenceExpandOp should not be null."); "Output(Out) of SequenceExpandOp should not be null.");
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
int ref_level = ctx->Attrs().Get<int>("ref_level");
PADDLE_ENFORCE_EQ(x_dims.size(), 2U, PADDLE_ENFORCE_EQ(x_dims.size(), 2U,
"Dimension number of Input(X) should be 2."); "Dimension number of Input(X) should be 2.");
int ref_level = ctx->Attrs().Get<int>("ref_level");
if (ctx->IsRuntime()) { if (ctx->IsRuntime()) {
framework::Variable* x_var = framework::Variable* x_var =
...@@ -51,39 +52,37 @@ class SequenceExpandOp : public framework::OperatorWithKernel { ...@@ -51,39 +52,37 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
"greater than 1."); "greater than 1.");
PADDLE_ENFORCE(x_lod.size() == y_lod.size() || x_lod.size() == 0, PADDLE_ENFORCE(x_lod.size() == y_lod.size() || x_lod.size() == 0,
"Number of lod level of Input(X) either equal to 0 " "Level number of Input(X)'s lod should be either equal "
"or equal to that of Input(Y)."); "to 0 or equal to that of Input(Y).");
PADDLE_ENFORCE_GT(y_lod.size(), 0,
"Level number of Input(Y)'s lod should be "
"greater than 0.");
PADDLE_ENFORCE(
ref_level == -1 ||
(ref_level >= 0 && ref_level < static_cast<int>(y_lod.size())),
"Invlid `ref_level`, which should be either equal to -1 "
"or in [0, %d)",
y_lod.size());
if (ref_level == -1) ref_level = y_lod.size() - 1;
int64_t out_first_dim = 0; int64_t out_first_dim = 0;
if (y_lod[ref_level].size() < 1) { if (y_lod[ref_level].size() <= 1) {
out_first_dim = x_dims[0]; out_first_dim = x_dims[0];
} else { } else {
if (x_lod.size() == 1) { // X is LoDTensor for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { int x_seq_len = 1;
int x_seq_len = x_lod[0][i] - x_lod[0][i - 1]; if (x_lod.size() == 1) {
out_first_dim += x_seq_len = x_lod[0][i] - x_lod[0][i - 1];
(y_lod[ref_level][i] - y_lod[ref_level][i - 1]) * x_seq_len;
}
} else { // X is normal Tensor
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
out_first_dim += y_lod[ref_level][i] - y_lod[ref_level][i - 1];
} }
out_first_dim +=
(y_lod[ref_level][i] - y_lod[ref_level][i - 1]) * x_seq_len;
} }
} }
ctx->SetOutputDim("Out", {out_first_dim, x_dims[1]}); ctx->SetOutputDim("Out", {out_first_dim, x_dims[1]});
} else { } else {
framework::VarDesc* in_reader =
boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Y")[0]);
int lod_level_num = in_reader->GetLoDLevels().size();
PADDLE_ENFORCE_GE(ref_level, 0,
"Level of referred lod should be greater or "
"equal to 0.");
PADDLE_ENFORCE_LT(ref_level, lod_level_num,
"Level of referred lod should be smaller than "
"level number of Input(Y).");
ctx->SetOutputDim("Out", {-1, x_dims[1]}); ctx->SetOutputDim("Out", {-1, x_dims[1]});
} }
} }
...@@ -102,7 +101,7 @@ class SequenceExpandOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -102,7 +101,7 @@ class SequenceExpandOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", AddOutput("Out",
"(LodTensor, default LoDTensor<float>) Output LoDTensor which is " "(LodTensor, default LoDTensor<float>) Output LoDTensor which is "
"generated from Input(X) by referring lod of Input(Y)."); "generated from Input(X) by referring lod of Input(Y).");
AddAttr<int>("ref_level", "Specify lod level of Input(Y)."); AddAttr<int>("ref_level", "Specify lod level of Input(Y).").SetDefault(-1);
AddComment(R"DOC( AddComment(R"DOC(
Sequence Expand Operator. Sequence Expand Operator.
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "unsupported/Eigen/CXX11/Tensor" #include "paddle/fluid/operators/math/math_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -32,52 +32,53 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -32,52 +32,53 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
auto* out = context.Output<LoDTensor>("Out"); auto* out = context.Output<LoDTensor>("Out");
int ref_level = context.Attr<int>("ref_level"); int ref_level = context.Attr<int>("ref_level");
out->mutable_data<T>(context.GetPlace());
auto& x_lod = x->lod(); auto& x_lod = x->lod();
auto& y_lod = y->lod(); auto& y_lod = y->lod();
PADDLE_ENFORCE_GE(ref_level, 0, PADDLE_ENFORCE_GT(y_lod.size(), 0,
"Value of attribute `ref_level` should be greater or " "Level number of `Y`'s lod should be greater than 0.");
"equal to 0.");
PADDLE_ENFORCE_LT(ref_level, y_lod.size(), PADDLE_ENFORCE(
"Value of attribute `ref_level` should be smaller than " ref_level == -1 || (ref_level >= 0 && ref_level < y_lod.size()),
"level number of Y's lod."); "Invlid `ref_level`, which should be either equal to -1 "
"or in [0, %d)",
y_lod.size());
if (y_lod[ref_level].size() < 1) { if (ref_level == -1) ref_level = y_lod.size() - 1;
if (y_lod[ref_level].size() <= 1) {
framework::TensorCopy(*x, context.GetPlace(), out); framework::TensorCopy(*x, context.GetPlace(), out);
return; return;
} }
if (x_lod.size() == 0) { auto& out_lod = *out->mutable_lod();
int out_start = 0; if (x_lod.size() == 1) {
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
auto x_sub_tensor = x->Slice(i - 1, i);
for (size_t j = 0; j < repeat_num; ++j) {
auto out_sub_tensor = out->Slice(out_start, out_start + 1);
framework::TensorCopy(x_sub_tensor, context.GetPlace(),
&out_sub_tensor);
out_start++;
}
}
} else {
auto& out_lod = *out->mutable_lod();
out_lod.resize(1); out_lod.resize(1);
out_lod[0].resize(1); out_lod[0] = {0};
out_lod[0][0] = 0; }
int out_idx = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { int out_offset = 0;
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]; for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int x_seq_len = x_lod[0][i] - x_lod[0][i - 1]; int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
auto x_sub_tensor = x->Slice(x_lod[0][i], x_lod[0][i - 1]); int x_start = i - 1;
for (size_t j = 0; j < repeat_num; ++j) { int x_end = i;
auto out_sub_tensor = if (x_lod.size() == 1) {
out->Slice(out_lod[0][out_idx], out_lod[0][out_idx] + x_seq_len); x_start = x_lod[0][i - 1];
framework::TensorCopy(x_sub_tensor, context.GetPlace(), x_end = x_lod[0][i];
&out_sub_tensor); }
out_lod[0].push_back(out_lod[0][out_idx] + x_seq_len); int x_seq_len = x_end - x_start;
out_idx++; auto x_sub_tensor = x->Slice(x_start, x_end);
for (size_t j = 0; j < repeat_num; ++j) {
int out_start = out_offset;
if (x_lod.size() == 1) {
out_start = out_lod[0][out_offset];
out_lod[0].push_back(x_seq_len);
} }
auto out_sub_tensor = out->Slice(out_start, out_start + x_seq_len);
framework::TensorCopy(x_sub_tensor, context.GetPlace(),
&out_sub_tensor);
out_offset++;
} }
} }
} }
...@@ -99,27 +100,49 @@ template <typename DeviceContext, typename T> ...@@ -99,27 +100,49 @@ template <typename DeviceContext, typename T>
class SequenceExpandGradKernel : public framework::OpKernel<T> { class SequenceExpandGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out")); auto* g_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto* x = context.Input<LoDTensor>("X"); auto* x = context.Input<LoDTensor>("X");
auto* out = context.Input<LoDTensor>("Out"); auto* y = context.Input<LoDTensor>("Y");
auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X")); auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
auto out_last_level = out->lod().back(); int ref_level = context.Attr<int>("ref_level");
d_x->set_lod(x->lod());
const T* d_out_data = d_out->data<T>(); g_x->mutable_data<T>(context.GetPlace());
T* d_x_data = d_x->mutable_data<T>(context.GetPlace()); g_x->set_lod(x->lod());
size_t element_len = d_out->numel() / d_out->dims()[0];
for (size_t i = 0; i < out_last_level.size() - 1; ++i) { auto& x_lod = x->lod();
size_t repeat = out_last_level[i + 1] - out_last_level[i]; auto& y_lod = y->lod();
Eigen::TensorMap<
Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>> if (ref_level == -1) ref_level = y_lod.size() - 1;
d_out_t(d_out_data, static_cast<int>(repeat), element_len);
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>> // just copy the gradient
d_x_t(d_x_data, static_cast<int>(element_len)); if (y_lod[ref_level].size() <= 1) {
auto place = framework::TensorCopy(*g_out, context.GetPlace(), g_x);
context.template device_context<DeviceContext>().eigen_device(); return;
d_x_t.device(*place) = d_out_t.sum(Eigen::array<int, 1>({{0}})); }
d_out_data += (repeat * element_len);
d_x_data += element_len; auto& dev_ctx = context.template device_context<DeviceContext>();
int g_out_offset = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
if (repeat_num > 0) {
int x_start = i - 1;
int x_end = i;
if (x_lod.size() == 1) {
x_start = x_lod[0][i - 1];
x_end = x_lod[0][i];
}
int x_seq_len = x_end - x_start;
auto column = x_seq_len * x->dims()[1];
auto g_x_sub = g_x->Slice(x_start, x_end);
g_x_sub = framework::ReshapeToMatrix(g_x_sub, column);
int g_out_end = g_out_offset + repeat_num * x_seq_len;
auto g_out_sub = g_out->Slice(g_out_offset, g_out_end);
g_out_sub = framework::ReshapeToMatrix(g_out_sub, column);
math::ColwiseSum<DeviceContext, T> col_sum;
col_sum(dev_ctx, g_out_sub, &g_x_sub);
g_out_offset += repeat_num * x_seq_len;
}
} }
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册