提交 bf3f56e8 编写于 作者: Y yangyaming

Finish adaption for backward.

上级 352fa41a
......@@ -371,6 +371,8 @@ template struct RowwiseAdd<platform::CPUDeviceContext, double>;
template struct ColwiseSum<platform::CPUDeviceContext, float>;
template struct ColwiseSum<platform::CPUDeviceContext, double>;
template struct ColwiseSum<platform::CPUDeviceContext, int>;
template struct ColwiseSum<platform::CPUDeviceContext, int64_t>;
template struct RowwiseSum<platform::CPUDeviceContext, float>;
template struct RowwiseSum<platform::CPUDeviceContext, double>;
......
......@@ -422,6 +422,8 @@ struct RowwiseAdd<platform::CUDADeviceContext, T> {
template struct RowwiseAdd<platform::CUDADeviceContext, float>;
template struct RowwiseAdd<platform::CUDADeviceContext, double>;
template struct ColwiseSum<platform::CUDADeviceContext, float>;
template struct ColwiseSum<platform::CUDADeviceContext, int>;
template struct ColwiseSum<platform::CUDADeviceContext, int64_t>;
// template struct ColwiseSum<platform::CUDADeviceContext, double>;
// The ColwiseSum<platform::CUDADeviceContext, double> failed in debug mode,
// and only failed for this case. So reimplemented it.
......
......@@ -33,9 +33,10 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
"Output(Out) of SequenceExpandOp should not be null.");
auto x_dims = ctx->GetInputDim("X");
int ref_level = ctx->Attrs().Get<int>("ref_level");
PADDLE_ENFORCE_EQ(x_dims.size(), 2U,
"Dimension number of Input(X) should be 2.");
int ref_level = ctx->Attrs().Get<int>("ref_level");
if (ctx->IsRuntime()) {
framework::Variable* x_var =
......@@ -51,39 +52,37 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
"greater than 1.");
PADDLE_ENFORCE(x_lod.size() == y_lod.size() || x_lod.size() == 0,
"Number of lod level of Input(X) either equal to 0 "
"or equal to that of Input(Y).");
"Level number of Input(X)'s lod should be either equal "
"to 0 or equal to that of Input(Y).");
PADDLE_ENFORCE_GT(y_lod.size(), 0,
"Level number of Input(Y)'s lod should be "
"greater than 0.");
PADDLE_ENFORCE(
ref_level == -1 ||
(ref_level >= 0 && ref_level < static_cast<int>(y_lod.size())),
"Invlid `ref_level`, which should be either equal to -1 "
"or in [0, %d)",
y_lod.size());
if (ref_level == -1) ref_level = y_lod.size() - 1;
int64_t out_first_dim = 0;
if (y_lod[ref_level].size() < 1) {
if (y_lod[ref_level].size() <= 1) {
out_first_dim = x_dims[0];
} else {
if (x_lod.size() == 1) { // X is LoDTensor
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int x_seq_len = x_lod[0][i] - x_lod[0][i - 1];
int x_seq_len = 1;
if (x_lod.size() == 1) {
x_seq_len = x_lod[0][i] - x_lod[0][i - 1];
}
out_first_dim +=
(y_lod[ref_level][i] - y_lod[ref_level][i - 1]) * x_seq_len;
}
} else { // X is normal Tensor
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
out_first_dim += y_lod[ref_level][i] - y_lod[ref_level][i - 1];
}
}
}
ctx->SetOutputDim("Out", {out_first_dim, x_dims[1]});
} else {
framework::VarDesc* in_reader =
boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Y")[0]);
int lod_level_num = in_reader->GetLoDLevels().size();
PADDLE_ENFORCE_GE(ref_level, 0,
"Level of referred lod should be greater or "
"equal to 0.");
PADDLE_ENFORCE_LT(ref_level, lod_level_num,
"Level of referred lod should be smaller than "
"level number of Input(Y).");
ctx->SetOutputDim("Out", {-1, x_dims[1]});
}
}
......@@ -102,7 +101,7 @@ class SequenceExpandOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out",
"(LodTensor, default LoDTensor<float>) Output LoDTensor which is "
"generated from Input(X) by referring lod of Input(Y).");
AddAttr<int>("ref_level", "Specify lod level of Input(Y).");
AddAttr<int>("ref_level", "Specify lod level of Input(Y).").SetDefault(-1);
AddComment(R"DOC(
Sequence Expand Operator.
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "unsupported/Eigen/CXX11/Tensor"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
......@@ -32,52 +32,53 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
auto* out = context.Output<LoDTensor>("Out");
int ref_level = context.Attr<int>("ref_level");
out->mutable_data<T>(context.GetPlace());
auto& x_lod = x->lod();
auto& y_lod = y->lod();
PADDLE_ENFORCE_GE(ref_level, 0,
"Value of attribute `ref_level` should be greater or "
"equal to 0.");
PADDLE_ENFORCE_GT(y_lod.size(), 0,
"Level number of `Y`'s lod should be greater than 0.");
PADDLE_ENFORCE_LT(ref_level, y_lod.size(),
"Value of attribute `ref_level` should be smaller than "
"level number of Y's lod.");
PADDLE_ENFORCE(
ref_level == -1 || (ref_level >= 0 && ref_level < y_lod.size()),
"Invlid `ref_level`, which should be either equal to -1 "
"or in [0, %d)",
y_lod.size());
if (y_lod[ref_level].size() < 1) {
if (ref_level == -1) ref_level = y_lod.size() - 1;
if (y_lod[ref_level].size() <= 1) {
framework::TensorCopy(*x, context.GetPlace(), out);
return;
}
if (x_lod.size() == 0) {
int out_start = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
auto x_sub_tensor = x->Slice(i - 1, i);
for (size_t j = 0; j < repeat_num; ++j) {
auto out_sub_tensor = out->Slice(out_start, out_start + 1);
framework::TensorCopy(x_sub_tensor, context.GetPlace(),
&out_sub_tensor);
out_start++;
}
}
} else {
auto& out_lod = *out->mutable_lod();
if (x_lod.size() == 1) {
out_lod.resize(1);
out_lod[0].resize(1);
out_lod[0][0] = 0;
int out_idx = 0;
out_lod[0] = {0};
}
int out_offset = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
int x_seq_len = x_lod[0][i] - x_lod[0][i - 1];
auto x_sub_tensor = x->Slice(x_lod[0][i], x_lod[0][i - 1]);
int x_start = i - 1;
int x_end = i;
if (x_lod.size() == 1) {
x_start = x_lod[0][i - 1];
x_end = x_lod[0][i];
}
int x_seq_len = x_end - x_start;
auto x_sub_tensor = x->Slice(x_start, x_end);
for (size_t j = 0; j < repeat_num; ++j) {
auto out_sub_tensor =
out->Slice(out_lod[0][out_idx], out_lod[0][out_idx] + x_seq_len);
int out_start = out_offset;
if (x_lod.size() == 1) {
out_start = out_lod[0][out_offset];
out_lod[0].push_back(x_seq_len);
}
auto out_sub_tensor = out->Slice(out_start, out_start + x_seq_len);
framework::TensorCopy(x_sub_tensor, context.GetPlace(),
&out_sub_tensor);
out_lod[0].push_back(out_lod[0][out_idx] + x_seq_len);
out_idx++;
}
out_offset++;
}
}
}
......@@ -99,27 +100,49 @@ template <typename DeviceContext, typename T>
class SequenceExpandGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto* g_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto* x = context.Input<LoDTensor>("X");
auto* out = context.Input<LoDTensor>("Out");
auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
auto out_last_level = out->lod().back();
d_x->set_lod(x->lod());
const T* d_out_data = d_out->data<T>();
T* d_x_data = d_x->mutable_data<T>(context.GetPlace());
size_t element_len = d_out->numel() / d_out->dims()[0];
for (size_t i = 0; i < out_last_level.size() - 1; ++i) {
size_t repeat = out_last_level[i + 1] - out_last_level[i];
Eigen::TensorMap<
Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>>
d_out_t(d_out_data, static_cast<int>(repeat), element_len);
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>
d_x_t(d_x_data, static_cast<int>(element_len));
auto place =
context.template device_context<DeviceContext>().eigen_device();
d_x_t.device(*place) = d_out_t.sum(Eigen::array<int, 1>({{0}}));
d_out_data += (repeat * element_len);
d_x_data += element_len;
auto* y = context.Input<LoDTensor>("Y");
auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
int ref_level = context.Attr<int>("ref_level");
g_x->mutable_data<T>(context.GetPlace());
g_x->set_lod(x->lod());
auto& x_lod = x->lod();
auto& y_lod = y->lod();
if (ref_level == -1) ref_level = y_lod.size() - 1;
// just copy the gradient
if (y_lod[ref_level].size() <= 1) {
framework::TensorCopy(*g_out, context.GetPlace(), g_x);
return;
}
auto& dev_ctx = context.template device_context<DeviceContext>();
int g_out_offset = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
if (repeat_num > 0) {
int x_start = i - 1;
int x_end = i;
if (x_lod.size() == 1) {
x_start = x_lod[0][i - 1];
x_end = x_lod[0][i];
}
int x_seq_len = x_end - x_start;
auto column = x_seq_len * x->dims()[1];
auto g_x_sub = g_x->Slice(x_start, x_end);
g_x_sub = framework::ReshapeToMatrix(g_x_sub, column);
int g_out_end = g_out_offset + repeat_num * x_seq_len;
auto g_out_sub = g_out->Slice(g_out_offset, g_out_end);
g_out_sub = framework::ReshapeToMatrix(g_out_sub, column);
math::ColwiseSum<DeviceContext, T> col_sum;
col_sum(dev_ctx, g_out_sub, &g_x_sub);
g_out_offset += repeat_num * x_seq_len;
}
}
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册