提交 db1b128f 编写于 作者: D dzhwinter

"add details"

上级 53c8c36a
...@@ -13,15 +13,19 @@ See the License for the specific language governing permissions and ...@@ -13,15 +13,19 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <numeric> // std::itoa
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/operators/math/math_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct SequenceExpandFunctor { struct SequenceExpandFunctor {
...@@ -38,23 +42,35 @@ template <typename T> ...@@ -38,23 +42,35 @@ template <typename T>
struct SequenceExpandFunctor<platform::CPUDeviceContext, T> { struct SequenceExpandFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context, const LoDTensor& x, void operator()(const platform::CPUDeviceContext& context, const LoDTensor& x,
LoDTensor* out) { LoDTensor* out) {
auto x_dims = x.dims(); auto& out_lod = out->lod()[0];
size_t element_len = framework::product(x_dims) / x_dims[0]; framework::Vector<size_t> x_lod;
const T* x_data = x.data<T>(); if (x.lod() == 1) {
T* out_data = out->mutable_data<T>(context.GetPlace()); x_lod = x.lod()[0];
auto out_starts = out->lod().back(); } else {
x_lod.reserve(out_lod.size());
for (size_t i = 0; i < out_starts.size() - 1; i++) { std::itoa(x_lod.begin(), x_lod.end(), 0); // fill 0 ~ out_lod.size()-1
int scale = out_starts[i + 1] - out_starts[i]; }
Eigen::TensorMap< int out_offset = 0;
Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>> auto& eigen_place = *context.eigen_device();
x_t(x_data, 1, element_len); for (size_t i = 1; i < out_lod.size(); ++i) {
Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, Eigen::DenseIndex>> int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
out_t(out_data, scale, element_len); int x_start = x_lod[i - 1];
Eigen::array<int, 2> cast({{scale, 1}}); int x_end = x_lod[i];
out_t.device(*context.eigen_device()) = x_t.broadcast(cast); int x_seq_len = x_end - x_start;
x_data += element_len; if (repeat_num > 0) {
out_data += element_len * scale; auto x_sub_tensor = x->Slice(x_start, x_end);
x_sub_tensor.Resize({1, x_sub_tensor.numel()});
int out_start = out_offset;
if (x_lod.size() == 1) {
out_start = out_lod[0][out_offset];
}
auto out_sub_tensor =
out->Slice(out_start, out_start + x_seq_len * repeat_num);
out_sub_tensor.Resize({repeat_num, x_sub_tensor.dims()[1]});
EigenMatrix<T>::From(out_sub_tensor).device(eigen_place) =
EigenMatrix<T>::From(x_sub_tensor)
.broadcast(Eigen::array<int, 2>({{repeat_num, 1}}));
}
} }
} }
}; };
...@@ -64,15 +80,42 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -64,15 +80,42 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<LoDTensor>("X"); auto* x = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out");
auto x_dims = x->dims();
auto* y = context.Input<LoDTensor>("Y"); auto* y = context.Input<LoDTensor>("Y");
PADDLE_ENFORCE(!y->lod().empty(), "y should have lod"); auto* out = context.Output<LoDTensor>("Out");
PADDLE_ENFORCE_EQ(static_cast<size_t>(x_dims[0]),
y->lod().back().size() - 1, int ref_level = context.Attr<int>("ref_level");
"The size of last lod level in Input(Y)" auto& x_lod = x->lod();
"must be equal to dims[0] of Input(X)."); auto& y_lod = y->lod();
out->set_lod(y->lod());
if (ref_level == -1) ref_level = y_lod.size() - 1;
out->mutable_data<T>(context.GetPlace());
if (y_lod[ref_level].size() <= 1) {
framework::TensorCopy(*x, context.GetPlace(), out);
return;
}
auto& out_lod = *out->mutable_lod();
// x lod level is at most 1.
if (x_lod.size() == 0) {
out_lod = y_lod[ref_level];
} else if (x_lod.size() == 1) {
out_lod.resize(1);
out_lod[0] = {0};
int out_offset = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
int x_start = x_lod[0][i - 1];
int x_end = x_lod[0][i];
int x_seq_len = x_end - x_start;
for (int j = 0; j < repeat_num; ++j) {
out_lod[0].push_back(out_lod[0].back() + x_seq_len);
out_offset++;
}
}
}
SequenceExpandFunctor<DeviceContext, T> functor; SequenceExpandFunctor<DeviceContext, T> functor;
functor(context.template device_context<DeviceContext>(), *x, out); functor(context.template device_context<DeviceContext>(), *x, out);
} }
...@@ -94,21 +137,31 @@ template <typename T> ...@@ -94,21 +137,31 @@ template <typename T>
struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> { struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context, const LoDTensor& x, void operator()(const platform::CPUDeviceContext& context, const LoDTensor& x,
const LoDTensor& out, const LoDTensor& dout, LoDTensor* dx) { const LoDTensor& out, const LoDTensor& dout, LoDTensor* dx) {
auto out_last_level = out.lod().back(); auto& dev_ctx = context.template device_context<DeviceContext>();
const T* d_out_data = dout.data<T>();
T* d_x_data = dx->mutable_data<T>(context.GetPlace()); math::SetConstant<DeviceContext, T> set_zero;
size_t element_len = dout.numel() / dout.dims()[0]; set_zero(dev_ctx, g_x, static_cast<T>(0));
for (size_t i = 0; i < out_last_level.size() - 1; ++i) {
size_t repeat = out_last_level[i + 1] - out_last_level[i]; int g_out_offset = 0;
Eigen::TensorMap< for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>> int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
d_out_t(d_out_data, static_cast<int>(repeat), element_len); if (repeat_num > 0) {
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>> int x_start = i - 1;
d_x_t(d_x_data, static_cast<int>(element_len)); int x_end = i;
d_x_t.device(*context.eigen_device()) = if (x_lod.size() == 1) {
d_out_t.sum(Eigen::array<int, 1>({{0}})); x_start = x_lod[0][i - 1];
d_out_data += (repeat * element_len); x_end = x_lod[0][i];
d_x_data += element_len; }
int x_seq_len = x_end - x_start;
auto g_x_sub = g_x->Slice(x_start, x_end);
g_x_sub.Resize(flatten_to_1d(g_x_sub.dims()));
int g_out_end = g_out_offset + repeat_num * x_seq_len;
auto g_out_sub = g_out->Slice(g_out_offset, g_out_end);
g_out_sub.Resize({repeat_num, g_x_sub.dims()[0]});
math::ColwiseSum<DeviceContext, T> col_sum;
col_sum(dev_ctx, g_out_sub, &g_x_sub);
g_out_offset += repeat_num * x_seq_len;
}
} }
} }
}; };
...@@ -117,15 +170,29 @@ template <typename DeviceContext, typename T> ...@@ -117,15 +170,29 @@ template <typename DeviceContext, typename T>
class SequenceExpandGradKernel : public framework::OpKernel<T> { class SequenceExpandGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* g_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto* x = context.Input<LoDTensor>("X"); auto* x = context.Input<LoDTensor>("X");
auto* out = context.Input<LoDTensor>("Out"); auto* y = context.Input<LoDTensor>("Y");
auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out")); auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
int ref_level = context.Attr<int>("ref_level");
g_x->mutable_data<T>(context.GetPlace());
g_x->set_lod(x->lod());
auto& x_lod = x->lod();
auto& y_lod = y->lod();
if (ref_level == -1) ref_level = y_lod.size() - 1;
// just copy the gradient
if (y_lod[ref_level].size() <= 1) {
framework::TensorCopy(*g_out, context.GetPlace(), g_x);
return;
}
auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
d_x->set_lod(x->lod());
SequenceExpandGradFunctor<DeviceContext, T> functor; SequenceExpandGradFunctor<DeviceContext, T> functor;
functor(context.template device_context<DeviceContext>(), *x, *out, *d_out, functor(context.template device_context<DeviceContext>(), *x, *y, *g_out,
d_x); g_x);
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册