diff --git a/paddle/fluid/operators/sum_mkldnn_op.cc b/paddle/fluid/operators/sum_mkldnn_op.cc index d2035777ee2289291a02594ee289156504df09d9..f9a16ef35ecb9eeb6c8eda9d124ecb17e7f9d5ce 100644 --- a/paddle/fluid/operators/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/sum_mkldnn_op.cc @@ -34,15 +34,15 @@ namespace paddle { namespace operators { -using paddle::framework::Tensor; -using paddle::platform::MKLDNNDeviceContext; -using paddle::platform::CPUDeviceContext; using framework::DataLayout; using mkldnn::memory; using mkldnn::primitive; +using mkldnn::reorder; using mkldnn::stream; using mkldnn::sum; -using mkldnn::reorder; +using paddle::framework::Tensor; +using paddle::platform::CPUDeviceContext; +using paddle::platform::MKLDNNDeviceContext; using platform::to_void_cast; template @@ -175,18 +175,35 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { auto& sel_row = get_selected_row(i); first_dim += sel_row.rows().size(); } - auto in_dim = - framework::vectorize(get_selected_row(N - 1).value().dims()); + + std::vector in_dim; + for (int i = 0; i < N; i++) { + auto& sel_row = get_selected_row(i); + if (sel_row.rows().size() > 0) { + in_dim = framework::vectorize(sel_row.value().dims()); + break; + } + } + + if (in_dim.empty()) { + VLOG(3) << "WARNING: all the inputs are empty"; + in_dim = framework::vectorize(get_selected_row(N - 1).value().dims()); + } else { + in_dim[0] = static_cast(first_dim); + } + in_dim[0] = static_cast(first_dim); out_value->Resize(framework::make_ddim(in_dim)); + out_value->mutable_data(ctx.GetPlace()); + // if all the input sparse vars are empty, no need to // merge these vars. if (first_dim == 0UL) { return; } - out_value->mutable_data(ctx.GetPlace()); + math::SelectedRowsAddTo functor; int64_t offset = 0; for (int i = 0; i < N; i++) { diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 49a4afb3a8a19c97e844e66477c6288772ece807..6dffe527c1072ee97fcde1725bfc1a47ed1ad74a 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -105,18 +105,30 @@ class SumKernel : public framework::OpKernel { auto &sel_row = get_selected_row(i); first_dim += sel_row.rows().size(); } - auto in_dim = - framework::vectorize(get_selected_row(N - 1).value().dims()); - in_dim[0] = static_cast(first_dim); + + std::vector in_dim; + for (int i = 0; i < N; i++) { + auto &sel_row = get_selected_row(i); + if (sel_row.rows().size() > 0) { + in_dim = framework::vectorize(sel_row.value().dims()); + break; + } + } + if (in_dim.empty()) { + VLOG(3) << "WARNING: all the inputs are empty"; + in_dim = framework::vectorize(get_selected_row(N - 1).value().dims()); + } else { + in_dim[0] = static_cast(first_dim); + } out_value->Resize(framework::make_ddim(in_dim)); + out_value->mutable_data(context.GetPlace()); // if all the input sparse vars are empty, no need to // merge these vars. if (first_dim == 0UL) { return; } - out_value->mutable_data(context.GetPlace()); math::SelectedRowsAddTo functor; diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index 9dc93048e631801acd3b21a3bb140ecc4fcd7732..74797bb65678404b7b35d06eecc7f9a12b2a346e 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -17,6 +17,8 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest +import paddle.fluid.core as core +from paddle.fluid.op import Operator class TestSumOp(OpTest): @@ -42,5 +44,66 @@ class TestSumOp(OpTest): pass +class TestSelectedRowsSumOp(OpTest): + def check_with_place(self, place): + scope = core.Scope() + self.check_input_and_optput(scope, place, True, True, True) + self.check_input_and_optput(scope, place, False, True, True) + self.check_input_and_optput(scope, place, False, False, True) + self.check_input_and_optput(scope, place, False, False, False) + + def check_input_and_optput(self, + scope, + place, + w1_has_data=False, + w2_has_data=False, + w3_has_data=False): + + self.create_selected_rows(scope, place, "W1", w1_has_data) + self.create_selected_rows(scope, place, "W2", w2_has_data) + self.create_selected_rows(scope, place, "W3", w3_has_data) + + # create Out Variable + out = scope.var('Out').get_selected_rows() + + # create and run sum operator + sum_op = Operator("sum", X=["W1", "W2", "W3"], Out='Out') + sum_op.run(scope, place) + + has_data_w_num = 0 + for w in [w1_has_data, w2_has_data, w3_has_data]: + if not w: + has_data_w_num += 1 + + self.assertEqual(7 * has_data_w_num, len(out.rows())) + + def create_selected_rows(self, scope, place, var_name, isEmpty): + # create and initialize W Variable + if not isEmpty: + rows = [0, 1, 2, 3, 4, 5, 6] + row_numel = 12 + else: + rows = [] + row_numel = 12 + + var = scope.var(var_name) + w_selected_rows = var.get_selected_rows() + w_selected_rows.set_height(len(rows)) + w_selected_rows.set_rows(rows) + w_array = np.ones((len(rows), row_numel)).astype("float32") + for i in range(len(rows)): + w_array[i] *= i + w_tensor = w_selected_rows.get_tensor() + w_tensor.set(w_array, place) + + return var + + def test_w_is_selected_rows(self): + places = [core.CPUPlace()] + # currently only support CPU + for place in places: + self.check_with_place(place) + + if __name__ == "__main__": unittest.main()