提交 05239b6f 编写于 作者: C chengduoZH

fix functor

上级 6f02fe7d
...@@ -90,108 +90,143 @@ template <typename Place, typename T> ...@@ -90,108 +90,143 @@ template <typename Place, typename T>
class SequenceProjectFunctor { class SequenceProjectFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::LoDTensor* in, framework::LoDTensor& in, framework::LoDTensor& padding_data,
const framework::LoDTensor* padding_data, framework::LoDTensor& col, bool padding_trainable,
framework::LoDTensor* col, bool padding_trainable,
int context_start, int context_length, int context_stride, int context_start, int context_length, int context_stride,
int up_pad, int down_pad) { int up_pad, int down_pad, bool gradient, bool input_grad,
auto lod_level_0 = in->lod()[0]; bool pad_grad) {
auto lod_level_0 = in.lod()[0];
paddle::operators::math::Im2ColFunctor< paddle::operators::math::Im2ColFunctor<
paddle::operators::math::ColFormat::kOCF, Place, float> paddle::operators::math::ColFormat::kOCF, Place, float>
im2col_ocf; im2col_ocf;
paddle::operators::math::Col2ImFunctor<
paddle::operators::math::ColFormat::kOCF, Place, float>
col2im_ocf;
int input_row_begin, input_row_end; int input_row_begin, input_row_end;
int sequence_height, sequence_width; int sequence_height, sequence_width;
sequence_width = in->dims()[1]; sequence_width = in.dims()[1];
input_grad = gradient && input_grad;
for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) { pad_grad = gradient && pad_grad;
input_row_begin = (context_start > 0)
? static_cast<int>(lod_level_0[i]) + context_start if (!gradient || input_grad) {
: static_cast<int>(lod_level_0[i]); for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
input_row_end = static_cast<int>(lod_level_0[i + 1]); input_row_begin = (context_start > 0)
? static_cast<int>(lod_level_0[i]) + context_start
framework::Tensor out_t = : static_cast<int>(lod_level_0[i]);
col->Slice(static_cast<int>(lod_level_0[i]), input_row_end = static_cast<int>(lod_level_0[i + 1]);
static_cast<int>(lod_level_0[i + 1]));
framework::Tensor out_t =
sequence_height = static_cast<int>(out_t.dims()[0]); col.Slice(static_cast<int>(lod_level_0[i]),
static_cast<int>(lod_level_0[i + 1]));
if (input_row_begin < input_row_end) {
framework::Tensor in_t = in->Slice(input_row_begin, input_row_end); sequence_height = static_cast<int>(out_t.dims()[0]);
std::vector<int64_t> output_shape( if (input_row_begin < input_row_end) {
{sequence_height, 1, 1, context_length, framework::Tensor in_t = in.Slice(input_row_begin, input_row_end);
sequence_width}); // output_height, output_width,
// input_channels, filter_height, filter_width std::vector<int64_t> output_shape(
{sequence_height, 1, 1, context_length,
out_t.Resize(framework::make_ddim(output_shape)); sequence_width}); // output_height, output_width,
// input_channels, filter_height, filter_width
std::vector<int64_t> input_shape(
{1, input_row_end - input_row_begin, out_t.Resize(framework::make_ddim(output_shape));
sequence_width}); // input_channels, input_height, input_width
in_t.Resize(framework::make_ddim(input_shape)); std::vector<int64_t> input_shape(
{1, input_row_end - input_row_begin,
im2col_ocf(context, in_t, out_t, sequence_width}); // input_channels, input_height, input_width
/*stride_height*/ context_stride, /*stride_width*/ 1, up_pad, in_t.Resize(framework::make_ddim(input_shape));
down_pad, 0, 0);
if (gradient) {
col2im_ocf(context, in_t, out_t,
/*stride_height*/ context_stride, /*stride_width*/ 1,
up_pad, down_pad, 0, 0);
} else {
im2col_ocf(context, in_t, out_t,
/*stride_height*/ context_stride, /*stride_width*/ 1,
up_pad, down_pad, 0, 0);
}
out_t.Resize(framework::make_ddim(
{sequence_height, context_length * sequence_width}));
}
} }
}
if (!gradient || pad_grad) {
if (padding_trainable) { if (padding_trainable) {
// add up trainable data for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
out_t.Resize(framework::make_ddim( framework::Tensor out_t =
{sequence_height * context_length, sequence_width})); col.Slice(static_cast<int>(lod_level_0[i]),
static_cast<int>(lod_level_0[i + 1]));
if (up_pad > 0) { // add up pad
int padding_rows = std::min( sequence_height = static_cast<int>(out_t.dims()[0]);
up_pad, static_cast<int>(lod_level_0[i + 1] - lod_level_0[i]));
// add up trainable data
for (int k = 0; k < padding_rows; ++k) { out_t.Resize(framework::make_ddim(
int padding_size = {sequence_height * context_length, sequence_width}));
k + context_length < up_pad ? context_length : up_pad - k;
framework::Tensor out_t_sub = out_t.Slice( if (up_pad > 0) { // add up pad
k * context_length, k * context_length + padding_size); int padding_rows = std::min(
framework::Tensor w_sub = padding_data->Slice(k, k + padding_size); up_pad, static_cast<int>(lod_level_0[i + 1] - lod_level_0[i]));
// in this block, using EigenVector<T>::Flatten is ok too.
auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub); for (int k = 0; k < padding_rows; ++k) {
auto w_sub_e = EigenMatrix<T>::From(w_sub); int padding_size =
out_t_sub_e.device(*context.GetEigenDevice<Place>()) = w_sub_e; k + context_length < up_pad ? context_length : up_pad - k;
framework::Tensor out_t_sub = out_t.Slice(
k * context_length, k * context_length + padding_size);
framework::Tensor w_sub = padding_data.Slice(k, k + padding_size);
// in this block, using EigenVector<T>::Flatten is ok too.
auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
auto w_sub_e = EigenMatrix<T>::From(w_sub);
if (gradient) {
w_sub_e.device(*context.GetEigenDevice<Place>()) =
w_sub_e + out_t_sub_e;
} else {
out_t_sub_e.device(*context.GetEigenDevice<Place>()) = w_sub_e;
}
}
} }
} if (down_pad > 0) { // add down pad
if (down_pad > 0) { // add down pad int down_pad_begin_row =
int down_pad_begin_row = std::max(
std::max(0, 0, (sequence_height - context_start - context_length) + 1) +
(sequence_height - context_start - context_length) + 1) + 1;
1; int padding_begin = std::max(0, context_start - sequence_height);
int padding_begin = std::max(0, context_start - sequence_height); int padding_size =
int padding_size = sequence_height - context_start >= context_length
sequence_height - context_start >= context_length ? 1
? 1 : context_length - (sequence_height - context_start);
: context_length - (sequence_height - context_start);
if (context_start >= sequence_height) padding_size = context_length;
int padding_idx = padding_begin;
for (int t = 0; t + down_pad_begin_row <= sequence_height;
++t, ++padding_size) {
if (context_start >= sequence_height) padding_size = context_length; if (context_start >= sequence_height) padding_size = context_length;
if (padding_size > context_length) { int padding_idx = padding_begin;
padding_size = context_length; for (int t = 0; t + down_pad_begin_row <= sequence_height;
padding_idx++; ++t, ++padding_size) {
if (context_start >= sequence_height)
padding_size = context_length;
if (padding_size > context_length) {
padding_size = context_length;
padding_idx++;
}
if (padding_begin > 0 || sequence_height == context_start)
padding_idx = padding_begin + t;
framework::Tensor out_t_sub = out_t.Slice(
(down_pad_begin_row + t) * context_length - padding_size,
(down_pad_begin_row + t) * context_length);
framework::Tensor w_sub = padding_data.Slice(
up_pad + padding_idx, up_pad + padding_idx + padding_size);
auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
auto w_sub_e = EigenMatrix<T>::From(w_sub);
if (gradient) {
w_sub_e.device(*context.GetEigenDevice<Place>()) =
w_sub_e + out_t_sub_e;
} else {
out_t_sub_e.device(*context.GetEigenDevice<Place>()) = w_sub_e;
}
} }
if (padding_begin > 0 || sequence_height == context_start)
padding_idx = padding_begin + t;
framework::Tensor out_t_sub = out_t.Slice(
(down_pad_begin_row + t) * context_length - padding_size,
(down_pad_begin_row + t) * context_length);
framework::Tensor w_sub = padding_data->Slice(
up_pad + padding_idx, up_pad + padding_idx + padding_size);
auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
auto w_sub_e = EigenMatrix<T>::From(w_sub);
out_t_sub_e.device(*context.GetEigenDevice<Place>()) = w_sub_e;
} }
out_t.Resize(framework::make_ddim(
{sequence_height, context_length * sequence_width}));
} }
} }
out_t.Resize(framework::make_ddim(
{sequence_height, context_length * sequence_width}));
} }
} }
}; };
......
...@@ -39,6 +39,7 @@ class SequenceConvKernel : public framework::OpKernel<T> { ...@@ -39,6 +39,7 @@ class SequenceConvKernel : public framework::OpKernel<T> {
auto filter = *context.Input<LoDTensor>("Filter"); auto filter = *context.Input<LoDTensor>("Filter");
out->mutable_data<T>(context.GetPlace()); out->mutable_data<T>(context.GetPlace());
// out->set_lod(in->lod());
int context_start = context.Attr<int>("context_start"); int context_start = context.Attr<int>("context_start");
int context_length = context.Attr<int>("context_length"); int context_length = context.Attr<int>("context_length");
...@@ -71,10 +72,12 @@ class SequenceConvKernel : public framework::OpKernel<T> { ...@@ -71,10 +72,12 @@ class SequenceConvKernel : public framework::OpKernel<T> {
paddle::operators::math::SequenceProjectFunctor<Place, T> paddle::operators::math::SequenceProjectFunctor<Place, T>
seq_project_functor; seq_project_functor;
LoDTensor* input = const_cast<LoDTensor*>(in);
LoDTensor* pad_data = const_cast<LoDTensor*>(padding_data);
seq_project_functor(context.device_context(), in, padding_data, &col, seq_project_functor(context.device_context(), *input, *pad_data, col,
padding_trainable, context_start, context_length, padding_trainable, context_start, context_length,
context_stride, up_pad, down_pad); context_stride, up_pad, down_pad, false, false, false);
filter.Resize(framework::make_ddim({context_length * sequence_width, 1})); filter.Resize(framework::make_ddim({context_length * sequence_width, 1}));
math::matmul<Place, T>(context.device_context(), col, false, filter, false, math::matmul<Place, T>(context.device_context(), col, false, filter, false,
...@@ -95,8 +98,6 @@ class SequenceConvGradKernel : public framework::OpKernel<T> { ...@@ -95,8 +98,6 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* filter = context.Input<LoDTensor>("Filter"); auto* filter = context.Input<LoDTensor>("Filter");
auto place = context.GetEigenDevice<Place>();
int context_start = context.Attr<int>("context_start"); int context_start = context.Attr<int>("context_start");
int context_length = context.Attr<int>("context_length"); int context_length = context.Attr<int>("context_length");
int context_stride = context.Attr<int>("context_stride"); int context_stride = context.Attr<int>("context_stride");
...@@ -109,10 +110,7 @@ class SequenceConvGradKernel : public framework::OpKernel<T> { ...@@ -109,10 +110,7 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
int up_pad = std::max(0, -context_start); int up_pad = std::max(0, -context_start);
int down_pad = std::max(0, context_start + context_length - 1); int down_pad = std::max(0, context_start + context_length - 1);
int sequence_height, sequence_width; int sequence_width = static_cast<int>(in->dims()[1]);
int input_row_begin, input_row_end;
sequence_width = static_cast<int>(in->dims()[1]);
// use col_shape in the im2col calculation // use col_shape in the im2col calculation
framework::DDim col_shape = {in->dims()[0], framework::DDim col_shape = {in->dims()[0],
...@@ -129,50 +127,19 @@ class SequenceConvGradKernel : public framework::OpKernel<T> { ...@@ -129,50 +127,19 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
math::matmul<Place, T>(context.device_context(), *out_g, false, *filter, math::matmul<Place, T>(context.device_context(), *out_g, false, *filter,
true, T(1.0), &col, T(1.0)); true, T(1.0), &col, T(1.0));
} }
paddle::operators::math::SequenceProjectFunctor<Place, T>
seq_project_functor;
if (in_g) { if (in_g) {
in_g->mutable_data<T>(context.GetPlace()); in_g->mutable_data<T>(context.GetPlace());
in_g->set_lod(in->lod());
math::SetConstant<Place, T> functor; math::SetConstant<Place, T> functor;
functor(context.device_context(), in_g, 0); functor(context.device_context(), in_g, 0);
paddle::operators::math::Col2ImFunctor< seq_project_functor(context.device_context(), *in_g, *padding_data_g, col,
paddle::operators::math::ColFormat::kOCF, Place, float> padding_trainable, context_start, context_length,
col2im_ocf; context_stride, up_pad, down_pad, true, true, false);
for (int i = 0; i < static_cast<int>(lod_g_level_0.size()) - 1; ++i) {
input_row_begin =
(context_start > 0)
? static_cast<int>(lod_g_level_0[i]) + context_start
: static_cast<int>(lod_g_level_0[i]);
input_row_end = static_cast<int>(lod_g_level_0[i + 1]);
Tensor col_t = col.Slice(static_cast<int>(lod_g_level_0[i]),
static_cast<int>(lod_g_level_0[i + 1]));
sequence_height = static_cast<int>(col_t.dims()[0]);
if (input_row_begin < input_row_end) {
Tensor in_t = in_g->Slice(input_row_begin, input_row_end);
std::vector<int64_t> output_shape(
{sequence_height, 1, 1, context_length,
sequence_width}); // output_height, output_width,
// input_channels, filter_height, filter_width
col_t.Resize(framework::make_ddim(output_shape));
std::vector<int64_t> input_shape(
{1, input_row_end - input_row_begin,
sequence_width}); // input_channels, input_height, input_width
in_t.Resize(framework::make_ddim(input_shape));
col2im_ocf(context.device_context(), in_t, col_t,
/*stride_height*/ context_stride, /*stride_width*/ 1,
up_pad, down_pad, 0, 0);
}
col_t.Resize(framework::make_ddim(
{sequence_height, context_length * sequence_width}));
}
} }
if (padding_trainable && padding_data_g) { if (padding_trainable && padding_data_g) {
...@@ -181,66 +148,10 @@ class SequenceConvGradKernel : public framework::OpKernel<T> { ...@@ -181,66 +148,10 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
math::SetConstant<Place, T> functor; math::SetConstant<Place, T> functor;
functor(context.device_context(), padding_data_g, 0); functor(context.device_context(), padding_data_g, 0);
for (int i = 0; i < static_cast<int>(lod_g_level_0.size()) - 1; ++i) { LoDTensor* input = const_cast<LoDTensor*>(in);
Tensor col_t = col.Slice(static_cast<int>(lod_g_level_0[i]), seq_project_functor(context.device_context(), *input, *padding_data_g,
static_cast<int>(lod_g_level_0[i + 1])); col, padding_trainable, context_start, context_length,
context_stride, up_pad, down_pad, true, false, true);
sequence_height = static_cast<int>(col_t.dims()[0]);
col_t.Resize(framework::make_ddim(
{sequence_height * context_length, sequence_width}));
if (up_pad > 0) { // add up pad
int padding_rows = std::min(
up_pad,
static_cast<int>(lod_g_level_0[i + 1] - lod_g_level_0[i]));
for (int k = 0; k < padding_rows; ++k) {
int padding_size =
k + context_length < up_pad ? context_length : up_pad - k;
Tensor out_t_sub = col_t.Slice(k * context_length,
k * context_length + padding_size);
Tensor w_sub = padding_data_g->Slice(k, k + padding_size);
// in this block, using EigenVector<T>::Flatten is ok too.
auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
auto w_sub_e = EigenMatrix<T>::From(w_sub);
w_sub_e.device(place) = w_sub_e + out_t_sub_e;
}
}
if (down_pad > 0) { // add down pad
int down_pad_begin_row =
std::max(0,
(sequence_height - context_start - context_length) + 1) +
1;
int padding_begin = std::max(0, context_start - sequence_height);
int padding_size =
sequence_height - context_start >= context_length
? 1
: context_length - (sequence_height - context_start);
if (context_start >= sequence_height) padding_size = context_length;
int padding_idx = padding_begin;
for (int t = 0; t + down_pad_begin_row <= sequence_height;
++t, ++padding_size) {
if (context_start >= sequence_height) padding_size = context_length;
if (padding_size > context_length) {
padding_size = context_length;
padding_idx++;
}
if (padding_begin > 0 || sequence_height == context_start)
padding_idx = padding_begin + t;
Tensor out_t_sub = col_t.Slice(
(down_pad_begin_row + t) * context_length - padding_size,
(down_pad_begin_row + t) * context_length);
Tensor w_sub = padding_data_g->Slice(
up_pad + padding_idx, up_pad + padding_idx + padding_size);
auto out_t_sub_e = EigenMatrix<T>::From(out_t_sub);
auto w_sub_e = EigenMatrix<T>::From(w_sub);
w_sub_e.device(place) = w_sub_e + out_t_sub_e;
}
}
col_t.Resize(framework::make_ddim(
{sequence_height, context_length * sequence_width}));
}
} }
if (filter_g) { if (filter_g) {
...@@ -259,12 +170,13 @@ class SequenceConvGradKernel : public framework::OpKernel<T> { ...@@ -259,12 +170,13 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
sequence_width = static_cast<int>(in->dims()[1]); sequence_width = static_cast<int>(in->dims()[1]);
paddle::operators::math::SequenceProjectFunctor<Place, T> LoDTensor* input = const_cast<LoDTensor*>(in);
seq_project_functor; LoDTensor* pad_data = const_cast<LoDTensor*>(padding_data);
seq_project_functor(context.device_context(), in, padding_data, &col, seq_project_functor(context.device_context(), *input, *pad_data, col,
padding_trainable, context_start, context_length, padding_trainable, context_start, context_length,
context_stride, up_pad, down_pad); context_stride, up_pad, down_pad, false, false,
false);
filter_grad_.Resize( filter_grad_.Resize(
framework::make_ddim({context_length * sequence_width, 1})); framework::make_ddim({context_length * sequence_width, 1}));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册