提交 5d718a58 编写于 作者: Q qiaolongfei

optimize reduce_sum_grad op

上级 b643473d
...@@ -88,6 +88,35 @@ class ReduceGradKernel : public framework::OpKernel<T> { ...@@ -88,6 +88,35 @@ class ReduceGradKernel : public framework::OpKernel<T> {
auto* output = context.Output<Tensor>(framework::GradVarName("X")); auto* output = context.Output<Tensor>(framework::GradVarName("X"));
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
if (context.GetPlace().type() == typeid(platform::CPUPlace)) {
const auto* input2_d = input2->data<T>();
auto* output_d = output->data<T>();
// CPU reduce_all_grad
if (reduce_all) {
PADDLE_ENFORCE(input2->dims().size() == 1 && input2->dims()[0] == 1,
"output should be a scalar");
for (int64_t i = 0; i < framework::product(input0->dims()); ++i) {
output_d[i] = input2_d[0];
}
return;
}
if (input0->dims().size() == 2 && dims.size() == 1) {
auto& input_dim = input0->dims();
for (int64_t i = 0; i < input_dim[0]; ++i) {
for (int64_t j = 0; j < input_dim[1]; ++j) {
if (dims[0] == 0) {
output_d[i * input_dim[1] + j] = input2_d[j];
} else {
output_d[i * input_dim[1] + j] = input2_d[i];
}
}
}
return;
}
}
if (reduce_all) { if (reduce_all) {
auto x = EigenVector<T>::Flatten(*input0); auto x = EigenVector<T>::Flatten(*input0);
auto x_reduce = EigenVector<T>::From(*input1); auto x_reduce = EigenVector<T>::From(*input1);
......
...@@ -31,7 +31,7 @@ struct SumGradFunctor { ...@@ -31,7 +31,7 @@ struct SumGradFunctor {
typename DY, typename Dim> typename DY, typename Dim>
void operator()(const DeviceContext& place, X* x, Y* y, DX* dx, DY* dy, void operator()(const DeviceContext& place, X* x, Y* y, DX* dx, DY* dy,
const Dim& dim, int size) { const Dim& dim, int size) {
dx->device(place) = dy->broadcast(dim); dx->device(place) = dy->eval().broadcast(dim);
} }
}; };
......
...@@ -2961,7 +2961,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): ...@@ -2961,7 +2961,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
# x is a Tensor variable with following elements: # x is a Tensor variable with following elements:
# [[0.2, 0.3, 0.5, 0.9] # [[0.2, 0.3, 0.5, 0.9]
# [0.1, 0.2, 0.6, 0.7]] # [0.1, 0.2, 0.6, 0.7]]
# Each example is followed by the correspending output tensor. # Each example is followed by the corresponding output tensor.
fluid.layers.reduce_sum(x) # [3.5] fluid.layers.reduce_sum(x) # [3.5]
fluid.layers.reduce_sum(x, dim=0) # [0.3, 0.5, 1.1, 1.6] fluid.layers.reduce_sum(x, dim=0) # [0.3, 0.5, 1.1, 1.6]
fluid.layers.reduce_sum(x, dim=-1) # [1.9, 1.6] fluid.layers.reduce_sum(x, dim=-1) # [1.9, 1.6]
...@@ -2970,7 +2970,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): ...@@ -2970,7 +2970,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
# x is a Tensor variable with shape [2, 2, 2] and elements as below: # x is a Tensor variable with shape [2, 2, 2] and elements as below:
# [[[1, 2], [3, 4]], # [[[1, 2], [3, 4]],
# [[5, 6], [7, 8]]] # [[5, 6], [7, 8]]]
# Each example is followed by the correspending output tensor. # Each example is followed by the corresponding output tensor.
fluid.layers.reduce_sum(x, dim=[1, 2]) # [10, 26] fluid.layers.reduce_sum(x, dim=[1, 2]) # [10, 26]
fluid.layers.reduce_sum(x, dim=[0, 1]) # [16, 20] fluid.layers.reduce_sum(x, dim=[0, 1]) # [16, 20]
......
...@@ -89,15 +89,11 @@ class TestProdOp(OpTest): ...@@ -89,15 +89,11 @@ class TestProdOp(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestKeepDimReduce(OpTest): class Test1DReduce(OpTest):
def setUp(self): def setUp(self):
self.op_type = "reduce_sum" self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} self.inputs = {'X': np.random.random(20).astype("float64")}
self.attrs = {'dim': [-2], 'keep_dim': True} self.outputs = {'Out': self.inputs['X'].sum(axis=0)}
self.outputs = {
'Out':
self.inputs['X'].sum(axis=tuple(self.attrs['dim']), keepdims=True)
}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -106,32 +102,40 @@ class TestKeepDimReduce(OpTest): ...@@ -106,32 +102,40 @@ class TestKeepDimReduce(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class Test1DReduce(OpTest): class Test2DReduce0(Test1DReduce):
def setUp(self): def setUp(self):
self.op_type = "reduce_sum" self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random(20).astype("float64")} self.attrs = {'dim': [0]}
self.inputs = {'X': np.random.random((20, 10)).astype("float64")}
self.outputs = {'Out': self.inputs['X'].sum(axis=0)} self.outputs = {'Out': self.inputs['X'].sum(axis=0)}
def test_check_output(self):
self.check_output()
def test_check_grad(self): class Test2DReduce1(Test1DReduce):
self.check_grad(['X'], 'Out') def setUp(self):
self.op_type = "reduce_sum"
self.attrs = {'dim': [1]}
self.inputs = {'X': np.random.random((20, 10)).astype("float64")}
self.outputs = {'Out': self.inputs['X'].sum(axis=1)}
class TestKeepDimReduce(Test1DReduce):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.attrs = {'dim': [-2], 'keep_dim': True}
self.outputs = {
'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']),
keepdims=self.attrs['keep_dim'])
}
class TestReduceAll(OpTest): class TestReduceAll(Test1DReduce):
def setUp(self): def setUp(self):
self.op_type = "reduce_sum" self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")} self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")}
self.attrs = {'reduce_all': True} self.attrs = {'reduce_all': True}
self.outputs = {'Out': self.inputs['X'].sum()} self.outputs = {'Out': self.inputs['X'].sum()}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
## reduction in multi dims ## reduction in multi dims
class TestReduceMeanOpMultiAxises(OpTest): class TestReduceMeanOpMultiAxises(OpTest):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册