diff --git a/paddle/framework/selected_rows.h b/paddle/framework/selected_rows.h index f9f563051e264ae7ed7cf3c07c0065522b2bbe2e..ddc6dec194fcb7dc10886414190b63370df23a1e 100644 --- a/paddle/framework/selected_rows.h +++ b/paddle/framework/selected_rows.h @@ -45,6 +45,9 @@ class SelectedRows { } private: + // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9} here. + // SelectedRows are simplely concated when adding together. Until a + // SelectedRows add a Tensor, will the duplicate rows be handled. std::vector rows_; std::unique_ptr value_{nullptr}; int64_t height_; diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc index 306612b65fa38db31f2f37efd967c8a8610d8a30..ed49a0a549130a839fbd875db84111c57520cfed 100644 --- a/paddle/operators/math/math_function.cc +++ b/paddle/operators/math/math_function.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/math/math_function.h" +#include "paddle/framework/eigen.h" +#include "paddle/memory/memcpy.h" namespace paddle { namespace operators { @@ -151,11 +153,17 @@ struct SelectedRowsAdd { framework::SelectedRows* output) { auto in1_height = input1.height(); PADDLE_ENFORCE_EQ(in1_height, input2.height()); - PADDLE_ENFORCE_EQ(in1_height, output->height()); + output->set_height(in1_height); auto& in1_rows = input1.rows(); auto& in2_rows = input2.rows(); - auto& out_rows = output->rows(); + std::vector out_rows; + out_rows.reserve(in1_rows.size() + in2_rows.size()); + + // concat rows + out_rows.insert(out_rows.end(), in1_rows.begin(), in1_rows.end()); + out_rows.insert(out_rows.end(), in2_rows.begin(), in2_rows.end()); + output->set_rows(out_rows); auto* out_value = output->mutable_value(); auto& in1_value = input1.value(); @@ -165,29 +173,59 @@ struct SelectedRowsAdd { PADDLE_ENFORCE_EQ(in1_row_numel, in2_value.numel() / in2_rows.size()); PADDLE_ENFORCE_EQ(in1_row_numel, out_value->numel() / out_rows.size()); - SetConstant functor; - functor(context, out_value, 0.0); auto* out_data = out_value->data(); auto* in1_data = in1_value.data(); - for (size_t i = 0; i < in1_rows.size(); i++) { - auto row = detail::FindPos(out_rows, in1_rows[i]); - for (size_t j = 0; j < in1_row_numel; j++) { - out_data[row * in1_row_numel + j] += in1_data[i * in1_row_numel + j]; - } - } + memory::Copy(platform::CPUPlace(), out_data, platform::CPUPlace(), in1_data, + in1_value.numel() * sizeof(T)); auto* in2_data = in2_value.data(); - for (size_t i = 0; i < in2_rows.size(); i++) { - auto row = detail::FindPos(out_rows, in2_rows[i]); - for (size_t j = 0; j < in1_row_numel; j++) { - out_data[row * in1_row_numel + j] += in2_data[i * in1_row_numel + j]; + memory::Copy(platform::CPUPlace(), out_data + in1_value.numel(), + platform::CPUPlace(), in2_data, in2_value.numel() * sizeof(T)); + } +}; + +template struct SelectedRowsAdd; + +template +struct SelectedRowsAddTensor { + void operator()(const platform::DeviceContext& context, + const framework::SelectedRows& input1, + const framework::Tensor& input2, framework::Tensor* output) { + auto in1_height = input1.height(); + auto in2_dims = input2.dims(); + auto out_dims = output->dims(); + PADDLE_ENFORCE_EQ(in1_height, in2_dims[0]); + PADDLE_ENFORCE_EQ(in1_height, out_dims[0]); + + auto& in1_value = input1.value(); + auto& in1_rows = input1.rows(); + + int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); + PADDLE_ENFORCE_EQ(in1_row_numel, input2.numel() / in1_height); + PADDLE_ENFORCE_EQ(in1_row_numel, output->numel() / in1_height); + + SetConstant functor; + functor(context, output, 0.0); + + auto* in1_data = in1_value.data(); + auto* out_data = output->data(); + + for (size_t i = 0; i < in1_rows.size(); i++) { + for (int64_t j = 0; j < in1_row_numel; j++) { + out_data[in1_rows[i] * in1_row_numel + j] += + in1_data[i * in1_row_numel + j]; } } + + auto out_eigen = framework::EigenVector::Flatten(*output); + auto in2_eigen = framework::EigenVector::Flatten(input2); + out_eigen.device(*context.GetEigenDevice()) = + out_eigen + in2_eigen; } }; -template struct SelectedRowsAdd; +template struct SelectedRowsAddTensor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/math_function.h b/paddle/operators/math/math_function.h index f298f34baba9867351601ec4b87c0a0160d2a38d..0d0d4cdd7396362ee154896fe57615ac02ad09ae 100644 --- a/paddle/operators/math/math_function.h +++ b/paddle/operators/math/math_function.h @@ -96,6 +96,8 @@ struct SetConstant { } }; +// SelectedRows + SelectedRows will simplely concat value and rows. +// The real computation happens in dealing with LoDTensor. template struct SelectedRowsAdd { void operator()(const platform::DeviceContext& context, @@ -104,6 +106,13 @@ struct SelectedRowsAdd { framework::SelectedRows* output); }; +template +struct SelectedRowsAddTensor { + void operator()(const platform::DeviceContext& context, + const framework::SelectedRows& input1, + const framework::Tensor& input2, framework::Tensor* output); +}; + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/math_function_test.cc b/paddle/operators/math/math_function_test.cc index 43760bc6015839a351622597bdc865ca9a3b6c27..e3186171d19f0fdc2d894bb7b2aff722777bd2b4 100644 --- a/paddle/operators/math/math_function_test.cc +++ b/paddle/operators/math/math_function_test.cc @@ -286,37 +286,76 @@ TEST(math_function, selected_rows_add) { auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( make_ddim({static_cast(rows1.size()), row_numel}), cpu_place); - functor(ctx, in1_value, 2.0); + functor(ctx, in1_value, 1.0); std::vector rows2{0, 5, 7, 9}; std::unique_ptr selected_rows2{new SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( make_ddim({static_cast(rows2.size()), row_numel}), cpu_place); - functor(ctx, in2_value, 1.0); + functor(ctx, in2_value, 2.0); std::unique_ptr output{new SelectedRows()}; - output->set_height(height); - std::vector out_rows = {0, 4, 5, 7, 9}; - output->set_rows(out_rows); - auto* out_value = output->mutable_value(); - out_value->mutable_data(make_ddim({5, 10}), cpu_place); + + // simplely concat two SelectedRows + out_value->mutable_data(make_ddim({7, 10}), cpu_place); SelectedRowsAdd add_functor; add_functor(ctx, *selected_rows1, *selected_rows2, output.get()); - auto* data = output->value().data(); - // out_rows[0] = 0 - EXPECT_EQ(data[0 * row_numel + 0], 3.0); - EXPECT_EQ(data[0 * row_numel + 8], 3.0); - // out_rows[1] = 4 - EXPECT_EQ(data[1 * row_numel + 1], 2.0); - // out_rows[2] = 5 - EXPECT_EQ(data[2 * row_numel + 6], 1.0); - // out_rows[3] = 7 - EXPECT_EQ(data[3 * row_numel + 3], 3.0); - EXPECT_EQ(data[3 * row_numel + 8], 3.0); - // out_rows[4] = 9 - EXPECT_EQ(data[4 * row_numel + 4], 1.0); + auto out_height = output->height(); + EXPECT_EQ(out_height, height); + + auto& out_rows = output->rows(); + + // input1 rows + EXPECT_EQ(out_rows[0], 0); + EXPECT_EQ(out_rows[1], 4); + EXPECT_EQ(out_rows[2], 7); + // input2 rows + EXPECT_EQ(out_rows[3], 0); + EXPECT_EQ(out_rows[4], 5); + EXPECT_EQ(out_rows[5], 7); + EXPECT_EQ(out_rows[6], 9); + + auto* out_data = output->value().data(); + // input1 value + EXPECT_EQ(out_data[0 * row_numel + 0], 1.0); + EXPECT_EQ(out_data[0 * row_numel + 8], 1.0); + EXPECT_EQ(out_data[1 * row_numel + 1], 1.0); + EXPECT_EQ(out_data[2 * row_numel + 6], 1.0); + // input2 value + EXPECT_EQ(out_data[3 * row_numel + 3], 2.0); + EXPECT_EQ(out_data[3 * row_numel + 8], 2.0); + EXPECT_EQ(out_data[4 * row_numel + 4], 2.0); + EXPECT_EQ(out_data[5 * row_numel + 7], 2.0); + EXPECT_EQ(out_data[6 * row_numel + 9], 2.0); + + std::unique_ptr tensor1{new Tensor()}; + tensor1->mutable_data(make_ddim({height, row_numel}), cpu_place); + SetConstant constant_functor; + constant_functor(ctx, tensor1.get(), 3.0); + + std::unique_ptr tensor2{new Tensor()}; + tensor2->mutable_data(make_ddim({height, row_numel}), cpu_place); + + SelectedRowsAddTensor add_tensor_functor; + add_tensor_functor(ctx, *output, *tensor1, tensor2.get()); + + auto* tensor2_data = tensor2->data(); + // row0: 1.0 + 2.0 + 3.0 + EXPECT_EQ(tensor2_data[0 * row_numel + 0], 6.0); + // row1: 3.0 + EXPECT_EQ(tensor2_data[1 * row_numel + 1], 3.0); + // row4 : 1.0 + 3.0 + EXPECT_EQ(tensor2_data[4 * row_numel + 6], 4.0); + // row5: 2.0 + 3.0 + EXPECT_EQ(tensor2_data[5 * row_numel + 7], 5.0); + // row6: 3.0 + EXPECT_EQ(tensor2_data[6 * row_numel + 1], 3.0); + // row7: 1.0 + 2.0 + 3.0 + EXPECT_EQ(tensor2_data[7 * row_numel + 3], 6.0); + // row9: 2.0 + 3.0 + EXPECT_EQ(tensor2_data[9 * row_numel + 6], 5.0); }