未验证 提交 8d00f76e 编写于 作者: X xiaoguoguo626807 提交者: GitHub

【fluid api clear】Remove reduce sum (#48330)

* remove fluid.reduce_sum

* remove fluid.reduce_sum

* modify axis and import paddle

* modify keepdim and out_name

* modift unittest

* modift unittest

* modify CI_static and loss.py

* modify test_mse_loss

* modify static ci

* modify static ci datatype

* add import paddle in test

* fix conflict

* fix conflict

* modify ci

* modify ci

* fix_conflict

* fix bug

* code_style
上级 4527d249
...@@ -136,6 +136,7 @@ TODO ...@@ -136,6 +136,7 @@ TODO
# Examples # Examples
```python ```python
import paddle
class MyLayer(fluid.imperative.Layer): class MyLayer(fluid.imperative.Layer):
def __init__(self): def __init__(self):
super(MyLayer, self).__init__() super(MyLayer, self).__init__()
...@@ -143,7 +144,7 @@ class MyLayer(fluid.imperative.Layer): ...@@ -143,7 +144,7 @@ class MyLayer(fluid.imperative.Layer):
def forward(self, inputs): def forward(self, inputs):
x = fluid.layers.relu(inputs) x = fluid.layers.relu(inputs)
x = fluid.layers.elementwise_mul(x, x) x = fluid.layers.elementwise_mul(x, x)
x = fluid.layers.reduce_sum(x) x = paddle.sum(x)
return [x] return [x]
...@@ -184,7 +185,7 @@ class MLP(fluid.Layer): ...@@ -184,7 +185,7 @@ class MLP(fluid.Layer):
def forward(self, inputs): def forward(self, inputs):
x = self._linear1(inputs) x = self._linear1(inputs)
x = self._linear2(x) x = self._linear2(x)
x = fluid.layers.reduce_sum(x) x = paddle.sum(x)
return x return x
......
...@@ -69,7 +69,7 @@ class GroupShardedClipGrad: ...@@ -69,7 +69,7 @@ class GroupShardedClipGrad:
layers.merge_selected_rows(g) layers.merge_selected_rows(g)
) )
square = paddle.square(merge_grad) square = paddle.square(merge_grad)
sum_square = layers.reduce_sum(square) sum_square = paddle.sum(square)
if p.dtype == paddle.float16: if p.dtype == paddle.float16:
if p_slice: if p_slice:
...@@ -87,7 +87,7 @@ class GroupShardedClipGrad: ...@@ -87,7 +87,7 @@ class GroupShardedClipGrad:
global_norm_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32) global_norm_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32)
else: else:
global_norm_fp16 = layers.concat(sum_square_fp16) global_norm_fp16 = layers.concat(sum_square_fp16)
global_norm_fp16 = layers.reduce_sum(global_norm_fp16) global_norm_fp16 = paddle.sum(global_norm_fp16)
global_norm_fp16 = paddle.cast( global_norm_fp16 = paddle.cast(
global_norm_fp16, dtype=paddle.float32 global_norm_fp16, dtype=paddle.float32
) )
...@@ -97,7 +97,7 @@ class GroupShardedClipGrad: ...@@ -97,7 +97,7 @@ class GroupShardedClipGrad:
global_unslice_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32) global_unslice_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32)
else: else:
global_unslice_fp16 = layers.concat(unslice_params_fp16) global_unslice_fp16 = layers.concat(unslice_params_fp16)
global_unslice_fp16 = layers.reduce_sum(global_unslice_fp16) global_unslice_fp16 = paddle.sum(global_unslice_fp16)
global_unslice_fp16 = paddle.cast( global_unslice_fp16 = paddle.cast(
global_unslice_fp16, dtype=paddle.float32 global_unslice_fp16, dtype=paddle.float32
) )
...@@ -108,7 +108,7 @@ class GroupShardedClipGrad: ...@@ -108,7 +108,7 @@ class GroupShardedClipGrad:
if len(sum_square_fp32) != 0 if len(sum_square_fp32) != 0
else paddle.to_tensor([0.0], dtype=paddle.float32) else paddle.to_tensor([0.0], dtype=paddle.float32)
) )
global_norm_fp32 = layers.reduce_sum(global_norm_fp32) global_norm_fp32 = paddle.sum(global_norm_fp32)
# global norm of non-distributed FP32 params_and_grads for unslice parameters # global norm of non-distributed FP32 params_and_grads for unslice parameters
global_unslice_fp32 = ( global_unslice_fp32 = (
...@@ -116,7 +116,7 @@ class GroupShardedClipGrad: ...@@ -116,7 +116,7 @@ class GroupShardedClipGrad:
if len(unslice_params_fp32) != 0 if len(unslice_params_fp32) != 0
else paddle.to_tensor([0.0], dtype=paddle.float32) else paddle.to_tensor([0.0], dtype=paddle.float32)
) )
global_unslice_fp32 = layers.reduce_sum(global_unslice_fp32) global_unslice_fp32 = paddle.sum(global_unslice_fp32)
global_unslice_var = global_unslice_fp16 + global_unslice_fp32 global_unslice_var = global_unslice_fp16 + global_unslice_fp32
global_norm_var = ( global_norm_var = (
......
...@@ -70,8 +70,7 @@ class ShardingClipGrad: ...@@ -70,8 +70,7 @@ class ShardingClipGrad:
layers.merge_selected_rows(g) layers.merge_selected_rows(g)
) )
square = paddle.square(merge_grad) square = paddle.square(merge_grad)
sum_square = layers.reduce_sum(square) sum_square = paddle.sum(square)
if p.dtype == paddle.float16: if p.dtype == paddle.float16:
if p_slice: if p_slice:
sum_square_fp16.append(sum_square) sum_square_fp16.append(sum_square)
...@@ -88,7 +87,7 @@ class ShardingClipGrad: ...@@ -88,7 +87,7 @@ class ShardingClipGrad:
global_norm_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32) global_norm_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32)
else: else:
global_norm_fp16 = layers.concat(sum_square_fp16) global_norm_fp16 = layers.concat(sum_square_fp16)
global_norm_fp16 = layers.reduce_sum(global_norm_fp16) global_norm_fp16 = paddle.sum(global_norm_fp16)
global_norm_fp16 = paddle.cast( global_norm_fp16 = paddle.cast(
global_norm_fp16, dtype=paddle.float32 global_norm_fp16, dtype=paddle.float32
) )
...@@ -98,7 +97,7 @@ class ShardingClipGrad: ...@@ -98,7 +97,7 @@ class ShardingClipGrad:
global_unslice_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32) global_unslice_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32)
else: else:
global_unslice_fp16 = layers.concat(unslice_params_fp16) global_unslice_fp16 = layers.concat(unslice_params_fp16)
global_unslice_fp16 = layers.reduce_sum(global_unslice_fp16) global_unslice_fp16 = paddle.sum(global_unslice_fp16)
global_unslice_fp16 = paddle.cast( global_unslice_fp16 = paddle.cast(
global_unslice_fp16, dtype=paddle.float32 global_unslice_fp16, dtype=paddle.float32
) )
...@@ -109,7 +108,7 @@ class ShardingClipGrad: ...@@ -109,7 +108,7 @@ class ShardingClipGrad:
if len(sum_square_fp32) != 0 if len(sum_square_fp32) != 0
else paddle.to_tensor([0.0], dtype=paddle.float32) else paddle.to_tensor([0.0], dtype=paddle.float32)
) )
global_norm_fp32 = layers.reduce_sum(global_norm_fp32) global_norm_fp32 = paddle.sum(global_norm_fp32)
# global norm of non-distributed FP32 params_and_grads for unslice parameter # global norm of non-distributed FP32 params_and_grads for unslice parameter
global_unslice_fp32 = ( global_unslice_fp32 = (
...@@ -117,7 +116,7 @@ class ShardingClipGrad: ...@@ -117,7 +116,7 @@ class ShardingClipGrad:
if len(unslice_params_fp32) != 0 if len(unslice_params_fp32) != 0
else paddle.to_tensor([0.0], dtype=paddle.float32) else paddle.to_tensor([0.0], dtype=paddle.float32)
) )
global_unslice_fp32 = layers.reduce_sum(global_unslice_fp32) global_unslice_fp32 = paddle.sum(global_unslice_fp32)
global_unslice_var = global_unslice_fp16 + global_unslice_fp32 global_unslice_var = global_unslice_fp16 + global_unslice_fp32
global_norm_var = ( global_norm_var = (
......
...@@ -37,7 +37,7 @@ def sum(input, scope=None, util=None): ...@@ -37,7 +37,7 @@ def sum(input, scope=None, util=None):
# in model.py # in model.py
input = fluid.layers.cast(some_input, dtype='float32') input = fluid.layers.cast(some_input, dtype='float32')
cnt = fluid.layers.reduce_sum(input) cnt = paddle.sum(input)
global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0) global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0)
tmp = fluid.layers.elementwise_add(cnt, global_cnt) tmp = fluid.layers.elementwise_add(cnt, global_cnt)
fluid.layers.assign(tmp, global_cnt) fluid.layers.assign(tmp, global_cnt)
...@@ -77,7 +77,7 @@ def max(input, scope=None, util=None): ...@@ -77,7 +77,7 @@ def max(input, scope=None, util=None):
# in model.py # in model.py
input = fluid.layers.cast(some_input, dtype='float32') input = fluid.layers.cast(some_input, dtype='float32')
cnt = fluid.layers.reduce_sum(input) cnt = paddle.sum(input)
global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0) global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0)
tmp = paddle.maximum(cnt, global_cnt) tmp = paddle.maximum(cnt, global_cnt)
fluid.layers.assign(tmp, global_cnt) fluid.layers.assign(tmp, global_cnt)
...@@ -117,7 +117,7 @@ def min(input, scope=None, util=None): ...@@ -117,7 +117,7 @@ def min(input, scope=None, util=None):
# in model.py # in model.py
input = fluid.layers.cast(some_input, dtype='float32') input = fluid.layers.cast(some_input, dtype='float32')
cnt = fluid.layers.reduce_sum(input) cnt = paddle.sum(input)
global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0) global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0)
tmp = fluid.layers.elementwise_min(cnt, global_cnt) tmp = fluid.layers.elementwise_min(cnt, global_cnt)
fluid.layers.assign(tmp, global_cnt) fluid.layers.assign(tmp, global_cnt)
......
...@@ -73,7 +73,7 @@ def _squared_l2_norm(x): ...@@ -73,7 +73,7 @@ def _squared_l2_norm(x):
or x.dtype == core.VarDesc.VarType.BF16 or x.dtype == core.VarDesc.VarType.BF16
): ):
square = paddle.square(x) square = paddle.square(x)
sum_square = layers.reduce_sum(square) sum_square = paddle.sum(square)
return sum_square return sum_square
if in_dygraph_mode(): if in_dygraph_mode():
......
...@@ -64,9 +64,7 @@ class AdaRoundLoss: ...@@ -64,9 +64,7 @@ class AdaRoundLoss:
square_cost = fluid.layers.square_error_cost( square_cost = fluid.layers.square_error_cost(
ada_quantized_output, orig_output ada_quantized_output, orig_output
) )
recon_loss = fluid.layers.reduce_mean( recon_loss = fluid.layers.reduce_mean(paddle.sum(square_cost, axis=-1))
fluid.layers.reduce_sum(square_cost, dim=-1)
)
return recon_loss return recon_loss
def compute_round_loss(self, alpha_v, warm_start, beta): def compute_round_loss(self, alpha_v, warm_start, beta):
...@@ -76,7 +74,7 @@ class AdaRoundLoss: ...@@ -76,7 +74,7 @@ class AdaRoundLoss:
# calculate regularization term - which ensures parameter to converge to exactly zeros and ones # calculate regularization term - which ensures parameter to converge to exactly zeros and ones
# at the end of optimization # at the end of optimization
reg_term = fluid.layers.reduce_sum( reg_term = paddle.sum(
-paddle.pow(paddle.abs(2 * h_v - 1), beta) + 1 -paddle.pow(paddle.abs(2 * h_v - 1), beta) + 1
) )
......
...@@ -113,7 +113,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""): ...@@ -113,7 +113,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""):
cost = fluid.layers.softmax_with_cross_entropy( cost = fluid.layers.softmax_with_cross_entropy(
logits, label, return_softmax=False logits, label, return_softmax=False
) )
sum_cost = fluid.layers.reduce_sum(cost) sum_cost = paddle.sum(cost)
# Test program # Test program
test_program = train_program.clone(for_test=True) test_program = train_program.clone(for_test=True)
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import warnings import warnings
import numpy as np import numpy as np
import paddle
from . import layers from . import layers
from .framework import Program, Variable, program_guard from .framework import Program, Variable, program_guard
from . import unique_name from . import unique_name
......
...@@ -1640,6 +1640,7 @@ class Variable(metaclass=VariableMetaClass): ...@@ -1640,6 +1640,7 @@ class Variable(metaclass=VariableMetaClass):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
...@@ -1652,7 +1653,7 @@ class Variable(metaclass=VariableMetaClass): ...@@ -1652,7 +1653,7 @@ class Variable(metaclass=VariableMetaClass):
tmp.stop_gradient=False tmp.stop_gradient=False
inputs2.append(tmp) inputs2.append(tmp)
ret2 = fluid.layers.sums(inputs2) ret2 = fluid.layers.sums(inputs2)
loss2 = fluid.layers.reduce_sum(ret2) loss2 = paddle.sum(ret2)
loss2.backward() loss2.backward()
print(loss2.gradient()) print(loss2.gradient())
...@@ -1687,6 +1688,7 @@ class Variable(metaclass=VariableMetaClass): ...@@ -1687,6 +1688,7 @@ class Variable(metaclass=VariableMetaClass):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
...@@ -1698,7 +1700,7 @@ class Variable(metaclass=VariableMetaClass): ...@@ -1698,7 +1700,7 @@ class Variable(metaclass=VariableMetaClass):
tmp.stop_gradient=False tmp.stop_gradient=False
inputs2.append(tmp) inputs2.append(tmp)
ret2 = fluid.layers.sums(inputs2) ret2 = fluid.layers.sums(inputs2)
loss2 = fluid.layers.reduce_sum(ret2) loss2 = paddle.sum(ret2)
loss2.backward() loss2.backward()
print(loss2.gradient()) print(loss2.gradient())
loss2.clear_gradient() loss2.clear_gradient()
......
...@@ -45,7 +45,7 @@ class SimpleLayer(Layer): ...@@ -45,7 +45,7 @@ class SimpleLayer(Layer):
def forward(self, inputs): def forward(self, inputs):
x = self._linear1(inputs) x = self._linear1(inputs)
x = layers.reduce_sum(x) x = paddle.sum(x)
return x return x
......
...@@ -3399,7 +3399,7 @@ class IfElse: ...@@ -3399,7 +3399,7 @@ class IfElse:
output = ie() # [array([[-7.], [-9.], [ 8.], [ 7.]], dtype=float32)] output = ie() # [array([[-7.], [-9.], [ 8.], [ 7.]], dtype=float32)]
# Get the first Variable in the output List and add all elements. # Get the first Variable in the output List and add all elements.
out = fluid.layers.reduce_sum(output[0]) out = paddle.sum(output[0])
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
......
...@@ -582,6 +582,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25): ...@@ -582,6 +582,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -591,7 +592,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25): ...@@ -591,7 +592,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
batch_size = 32 batch_size = 32
max_iter = 20 max_iter = 20
paddle.enable_static()
def gen_train_data(): def gen_train_data():
x_data = np.random.uniform(0, 255, (batch_size, 3, image_height, x_data = np.random.uniform(0, 255, (batch_size, 3, image_height,
image_width)).astype('float64') image_width)).astype('float64')
...@@ -601,12 +602,12 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25): ...@@ -601,12 +602,12 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
def get_focal_loss(pred, label, fg_num, num_classes): def get_focal_loss(pred, label, fg_num, num_classes):
pred = fluid.layers.reshape(pred, [-1, num_classes]) pred = paddle.reshape(pred, [-1, num_classes])
label = fluid.layers.reshape(label, [-1, 1]) label = paddle.reshape(label, [-1, 1])
label.stop_gradient = True label.stop_gradient = True
loss = fluid.layers.sigmoid_focal_loss( loss = fluid.layers.sigmoid_focal_loss(
pred, label, fg_num, gamma=2.0, alpha=0.25) pred, label, fg_num, gamma=2.0, alpha=0.25)
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss return loss
...@@ -628,7 +629,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25): ...@@ -628,7 +629,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
data = fluid.layers.fill_constant(shape=[1], value=1, dtype='int32') data = fluid.layers.fill_constant(shape=[1], value=1, dtype='int32')
fg_label = fluid.layers.greater_equal(label, data) fg_label = fluid.layers.greater_equal(label, data)
fg_label = fluid.layers.cast(fg_label, dtype='int32') fg_label = fluid.layers.cast(fg_label, dtype='int32')
fg_num = fluid.layers.reduce_sum(fg_label) fg_num = paddle.sum(fg_label, dtype='int32')
fg_num.stop_gradient = True fg_num.stop_gradient = True
avg_loss = get_focal_loss(output, label, fg_num, num_classes) avg_loss = get_focal_loss(output, label, fg_num, num_classes)
return avg_loss return avg_loss
...@@ -1847,9 +1848,9 @@ def ssd_loss( ...@@ -1847,9 +1848,9 @@ def ssd_loss(
# shape=(-1, 0) is set for compile-time, the correct shape is set by # shape=(-1, 0) is set for compile-time, the correct shape is set by
# actual_shape in runtime. # actual_shape in runtime.
loss = paddle.reshape(x=loss, shape=actual_shape) loss = paddle.reshape(x=loss, shape=actual_shape)
loss = nn.reduce_sum(loss, dim=1, keep_dim=True) loss = paddle.sum(loss, axis=1, keepdim=True)
if normalize: if normalize:
normalizer = nn.reduce_sum(target_loc_weight) normalizer = paddle.sum(target_loc_weight)
loss = loss / normalizer loss = loss / normalizer
return loss return loss
......
...@@ -538,13 +538,13 @@ class Categorical(Distribution): ...@@ -538,13 +538,13 @@ class Categorical(Distribution):
) )
e_logits = paddle.exp(logits) e_logits = paddle.exp(logits)
other_e_logits = paddle.exp(other_logits) other_e_logits = paddle.exp(other_logits)
z = nn.reduce_sum(e_logits, dim=-1, keep_dim=True) z = paddle.sum(e_logits, axis=-1, keepdim=True)
other_z = nn.reduce_sum(other_e_logits, dim=-1, keep_dim=True) other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True)
prob = e_logits / z prob = e_logits / z
kl = nn.reduce_sum( kl = paddle.sum(
prob * (logits - nn.log(z) - other_logits + nn.log(other_z)), prob * (logits - nn.log(z) - other_logits + nn.log(other_z)),
dim=-1, axis=-1,
keep_dim=True, keepdim=True,
) )
return kl return kl
...@@ -558,10 +558,11 @@ class Categorical(Distribution): ...@@ -558,10 +558,11 @@ class Categorical(Distribution):
""" """
logits = self.logits - paddle.max(self.logits, axis=-1, keepdim=True) logits = self.logits - paddle.max(self.logits, axis=-1, keepdim=True)
e_logits = paddle.exp(logits) e_logits = paddle.exp(logits)
z = nn.reduce_sum(e_logits, dim=-1, keep_dim=True) z = paddle.sum(e_logits, axis=-1, keepdim=True)
prob = e_logits / z prob = e_logits / z
entropy = -1.0 * nn.reduce_sum( entropy = -1.0 * paddle.sum(
prob * (logits - nn.log(z)), dim=-1, keep_dim=True prob * (logits - nn.log(z)), axis=-1, keepdim=True
) )
return entropy return entropy
...@@ -703,7 +704,7 @@ class MultivariateNormalDiag(Distribution): ...@@ -703,7 +704,7 @@ class MultivariateNormalDiag(Distribution):
""" """
check_type(other, 'other', MultivariateNormalDiag, 'kl_divergence') check_type(other, 'other', MultivariateNormalDiag, 'kl_divergence')
tr_cov_matmul = nn.reduce_sum(self._inv(other.scale) * self.scale) tr_cov_matmul = paddle.sum(self._inv(other.scale) * self.scale)
loc_matmul_cov = nn.matmul( loc_matmul_cov = nn.matmul(
(other.loc - self.loc), self._inv(other.scale) (other.loc - self.loc), self._inv(other.scale)
) )
......
...@@ -75,7 +75,6 @@ __all__ = [ ...@@ -75,7 +75,6 @@ __all__ = [
'batch_norm', 'batch_norm',
'instance_norm', 'instance_norm',
'data_norm', 'data_norm',
'reduce_sum',
'reduce_mean', 'reduce_mean',
'reduce_all', 'reduce_all',
'reduce_any', 'reduce_any',
......
...@@ -1788,14 +1788,16 @@ class LarsMomentumOptimizer(Optimizer): ...@@ -1788,14 +1788,16 @@ class LarsMomentumOptimizer(Optimizer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
inp = fluid.layers.data( inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False) name="inp", shape=[2, 2], append_batch_size=False)
out = fluid.layers.fc(inp, size=3) out = fluid.layers.fc(inp, size=3)
out = fluid.layers.reduce_sum(out) out = paddle.sum(out)
optimizer = fluid.optimizer.LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = fluid.optimizer.LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
optimizer.minimize(out) optimizer.minimize(out)
...@@ -2046,13 +2048,15 @@ class AdagradOptimizer(Optimizer): ...@@ -2046,13 +2048,15 @@ class AdagradOptimizer(Optimizer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
paddle.enable_static()
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
inp = fluid.data(name="inp", shape=[2, 2]) inp = fluid.data(name="inp", shape=[2, 2])
out = fluid.layers.fc(inp, size=3) out = fluid.layers.fc(inp, size=3)
out = fluid.layers.reduce_sum(out) out = paddle.sum(out)
optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.2) optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.2)
optimizer.minimize(out) optimizer.minimize(out)
......
...@@ -186,7 +186,7 @@ class TestIfElse(unittest.TestCase): ...@@ -186,7 +186,7 @@ class TestIfElse(unittest.TestCase):
false_target = paddle.tanh(false_target) false_target = paddle.tanh(false_target)
ie.output(false_target) ie.output(false_target)
if_out = ie() if_out = ie()
out = layers.reduce_sum(if_out[0]) out = paddle.sum(if_out[0])
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
......
...@@ -947,8 +947,8 @@ class TransFormer(Layer): ...@@ -947,8 +947,8 @@ class TransFormer(Layer):
soft_label=True if self._label_smooth_eps else False, soft_label=True if self._label_smooth_eps else False,
) )
weighted_cost = cost * weights weighted_cost = cost * weights
sum_cost = fluid.layers.reduce_sum(weighted_cost) sum_cost = paddle.sum(weighted_cost)
token_num = fluid.layers.reduce_sum(weights) token_num = paddle.sum(weights)
token_num.stop_gradient = True token_num.stop_gradient = True
avg_cost = sum_cost / token_num avg_cost = sum_cost / token_num
return sum_cost, avg_cost, predict, token_num return sum_cost, avg_cost, predict, token_num
......
...@@ -56,7 +56,7 @@ def fake_simnet_reader(): ...@@ -56,7 +56,7 @@ def fake_simnet_reader():
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -1591,8 +1591,8 @@ def transformer( ...@@ -1591,8 +1591,8 @@ def transformer(
soft_label=True if label_smooth_eps else False, soft_label=True if label_smooth_eps else False,
) )
weighted_cost = cost * weights weighted_cost = cost * weights
sum_cost = layers.reduce_sum(weighted_cost) sum_cost = paddle.sum(weighted_cost)
token_num = layers.reduce_sum(weights) token_num = paddle.sum(weights)
avg_cost = sum_cost / token_num avg_cost = sum_cost / token_num
avg_cost.stop_gradient = True avg_cost.stop_gradient = True
return sum_cost, avg_cost, predict, token_num return sum_cost, avg_cost, predict, token_num
......
...@@ -302,7 +302,7 @@ class BaseModel(fluid.dygraph.Layer): ...@@ -302,7 +302,7 @@ class BaseModel(fluid.dygraph.Layer):
) )
loss = loss * tar_mask loss = loss * tar_mask
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss return loss
...@@ -405,7 +405,7 @@ class BaseModel(fluid.dygraph.Layer): ...@@ -405,7 +405,7 @@ class BaseModel(fluid.dygraph.Layer):
parent_ids = [] parent_ids = []
for step_idx in range(paddle.to_tensor(self.beam_max_step_num)): for step_idx in range(paddle.to_tensor(self.beam_max_step_num)):
if fluid.layers.reduce_sum(1 - beam_finished).numpy()[0] == 0: if paddle.sum(1 - beam_finished).numpy()[0] == 0:
break break
step_input = self._merge_batch_beams(step_input) step_input = self._merge_batch_beams(step_input)
new_dec_hidden, new_dec_cell = [], [] new_dec_hidden, new_dec_cell = [], []
...@@ -830,6 +830,6 @@ class AttentionModel(fluid.dygraph.Layer): ...@@ -830,6 +830,6 @@ class AttentionModel(fluid.dygraph.Layer):
) )
loss = loss * tar_mask loss = loss * tar_mask
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss return loss
...@@ -510,8 +510,8 @@ class BOW(Layer): ...@@ -510,8 +510,8 @@ class BOW(Layer):
right_emb, shape=[-1, self.seq_len, self.bow_dim] right_emb, shape=[-1, self.seq_len, self.bow_dim]
) )
bow_left = fluid.layers.reduce_sum(left_emb, dim=1) bow_left = paddle.sum(left_emb, axis=1)
bow_right = fluid.layers.reduce_sum(right_emb, dim=1) bow_right = paddle.sum(right_emb, axis=1)
softsign_layer = SoftsignLayer() softsign_layer = SoftsignLayer()
left_soft = softsign_layer.ops(bow_left) left_soft = softsign_layer.ops(bow_left)
right_soft = softsign_layer.ops(bow_right) right_soft = softsign_layer.ops(bow_right)
......
...@@ -497,8 +497,8 @@ class BOW(paddle.nn.Layer): ...@@ -497,8 +497,8 @@ class BOW(paddle.nn.Layer):
right_emb, shape=[-1, self.seq_len, self.bow_dim] right_emb, shape=[-1, self.seq_len, self.bow_dim]
) )
bow_left = paddle.fluid.layers.reduce_sum(left_emb, dim=1) bow_left = paddle.sum(left_emb, axis=1)
bow_right = paddle.fluid.layers.reduce_sum(right_emb, dim=1) bow_right = paddle.sum(right_emb, axis=1)
softsign_layer = SoftsignLayer() softsign_layer = SoftsignLayer()
left_soft = softsign_layer.ops(bow_left) left_soft = softsign_layer.ops(bow_left)
right_soft = softsign_layer.ops(bow_right) right_soft = softsign_layer.ops(bow_right)
......
...@@ -324,9 +324,7 @@ def bmn_loss_func( ...@@ -324,9 +324,7 @@ def bmn_loss_func(
num_entries = fluid.layers.cast( num_entries = fluid.layers.cast(
fluid.layers.shape(pmask), dtype=DATATYPE fluid.layers.shape(pmask), dtype=DATATYPE
) )
num_positive = fluid.layers.cast( num_positive = fluid.layers.cast(paddle.sum(pmask), dtype=DATATYPE)
fluid.layers.reduce_sum(pmask), dtype=DATATYPE
)
ratio = num_entries / num_positive ratio = num_entries / num_positive
coef_0 = 0.5 * ratio / (ratio - 1) coef_0 = 0.5 * ratio / (ratio - 1)
coef_1 = 0.5 * ratio coef_1 = 0.5 * ratio
...@@ -359,15 +357,9 @@ def bmn_loss_func( ...@@ -359,15 +357,9 @@ def bmn_loss_func(
u_lmask = fluid.layers.cast(x=u_lmask, dtype=DATATYPE) u_lmask = fluid.layers.cast(x=u_lmask, dtype=DATATYPE)
u_lmask = fluid.layers.elementwise_mul(u_lmask, mask) u_lmask = fluid.layers.elementwise_mul(u_lmask, mask)
num_h = fluid.layers.cast( num_h = fluid.layers.cast(paddle.sum(u_hmask), dtype=DATATYPE)
fluid.layers.reduce_sum(u_hmask), dtype=DATATYPE num_m = fluid.layers.cast(paddle.sum(u_mmask), dtype=DATATYPE)
) num_l = fluid.layers.cast(paddle.sum(u_lmask), dtype=DATATYPE)
num_m = fluid.layers.cast(
fluid.layers.reduce_sum(u_mmask), dtype=DATATYPE
)
num_l = fluid.layers.cast(
fluid.layers.reduce_sum(u_lmask), dtype=DATATYPE
)
r_m = num_h / num_m r_m = num_h / num_m
u_smmask = fluid.layers.assign( u_smmask = fluid.layers.assign(
...@@ -391,11 +383,7 @@ def bmn_loss_func( ...@@ -391,11 +383,7 @@ def bmn_loss_func(
weights.stop_gradient = True weights.stop_gradient = True
loss = fluid.layers.square_error_cost(pred_score, gt_iou_map) loss = fluid.layers.square_error_cost(pred_score, gt_iou_map)
loss = fluid.layers.elementwise_mul(loss, weights) loss = fluid.layers.elementwise_mul(loss, weights)
loss = ( loss = 0.5 * paddle.sum(loss) / paddle.sum(weights)
0.5
* fluid.layers.reduce_sum(loss)
/ fluid.layers.reduce_sum(weights)
)
return loss return loss
...@@ -406,8 +394,8 @@ def bmn_loss_func( ...@@ -406,8 +394,8 @@ def bmn_loss_func(
nmask = fluid.layers.cast(x=(gt_iou_map <= 0.9), dtype=DATATYPE) nmask = fluid.layers.cast(x=(gt_iou_map <= 0.9), dtype=DATATYPE)
nmask = fluid.layers.elementwise_mul(nmask, mask) nmask = fluid.layers.elementwise_mul(nmask, mask)
num_positive = fluid.layers.reduce_sum(pmask) num_positive = paddle.sum(pmask)
num_entries = num_positive + fluid.layers.reduce_sum(nmask) num_entries = num_positive + paddle.sum(nmask)
ratio = num_entries / num_positive ratio = num_entries / num_positive
coef_0 = 0.5 * ratio / (ratio - 1) coef_0 = 0.5 * ratio / (ratio - 1)
coef_1 = 0.5 * ratio coef_1 = 0.5 * ratio
...@@ -415,11 +403,11 @@ def bmn_loss_func( ...@@ -415,11 +403,11 @@ def bmn_loss_func(
loss_pos = fluid.layers.elementwise_mul( loss_pos = fluid.layers.elementwise_mul(
fluid.layers.log(pred_score + epsilon), pmask fluid.layers.log(pred_score + epsilon), pmask
) )
loss_pos = coef_1 * fluid.layers.reduce_sum(loss_pos) loss_pos = coef_1 * paddle.sum(loss_pos)
loss_neg = fluid.layers.elementwise_mul( loss_neg = fluid.layers.elementwise_mul(
fluid.layers.log(1.0 - pred_score + epsilon), nmask fluid.layers.log(1.0 - pred_score + epsilon), nmask
) )
loss_neg = coef_0 * fluid.layers.reduce_sum(loss_neg) loss_neg = coef_0 * paddle.sum(loss_neg)
loss = -1 * (loss_pos + loss_neg) / num_entries loss = -1 * (loss_pos + loss_neg) / num_entries
return loss return loss
......
...@@ -220,7 +220,7 @@ class PtbModel(fluid.Layer): ...@@ -220,7 +220,7 @@ class PtbModel(fluid.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss, last_hidden, last_cell return loss, last_hidden, last_cell
......
...@@ -214,7 +214,7 @@ class PtbModel(paddle.nn.Layer): ...@@ -214,7 +214,7 @@ class PtbModel(paddle.nn.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = paddle.mean(loss, axis=[0]) loss = paddle.mean(loss, axis=[0])
loss = paddle.fluid.layers.reduce_sum(loss) loss = paddle.paddle.sum(loss)
return loss, last_hidden, last_cell return loss, last_hidden, last_cell
......
...@@ -123,7 +123,7 @@ def train(args, place, to_static): ...@@ -123,7 +123,7 @@ def train(args, place, to_static):
loss_probs = fluid.layers.log(loss_probs) loss_probs = fluid.layers.log(loss_probs)
loss_probs = fluid.layers.elementwise_mul(loss_probs, mask) loss_probs = fluid.layers.elementwise_mul(loss_probs, mask)
loss_probs = fluid.layers.reduce_sum(loss_probs, dim=-1) loss_probs = paddle.sum(loss_probs, axis=-1)
policy.saved_log_probs.append(loss_probs) policy.saved_log_probs.append(loss_probs)
return action, loss_probs return action, loss_probs
...@@ -153,7 +153,7 @@ def train(args, place, to_static): ...@@ -153,7 +153,7 @@ def train(args, place, to_static):
policy_loss.append(cur_loss) policy_loss.append(cur_loss)
policy_loss = fluid.layers.concat(policy_loss) policy_loss = fluid.layers.concat(policy_loss)
policy_loss = fluid.layers.reduce_sum(policy_loss) policy_loss = paddle.sum(policy_loss)
policy_loss.backward() policy_loss.backward()
optimizer.minimize(policy_loss) optimizer.minimize(policy_loss)
......
...@@ -144,7 +144,7 @@ class BOW(fluid.dygraph.Layer): ...@@ -144,7 +144,7 @@ class BOW(fluid.dygraph.Layer):
mask_emb = paddle.expand(o_np_mask, [-1, self.hid_dim]) mask_emb = paddle.expand(o_np_mask, [-1, self.hid_dim])
emb = emb * mask_emb emb = emb * mask_emb
emb = paddle.reshape(emb, shape=[-1, self.seq_len, self.hid_dim]) emb = paddle.reshape(emb, shape=[-1, self.seq_len, self.hid_dim])
bow_1 = fluid.layers.reduce_sum(emb, dim=1) bow_1 = paddle.sum(emb, axis=1)
bow_1 = paddle.tanh(bow_1) bow_1 = paddle.tanh(bow_1)
fc_1 = self._fc1(bow_1) fc_1 = self._fc1(bow_1)
fc_2 = self._fc2(fc_1) fc_2 = self._fc2(fc_1)
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import math import math
import random import random
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import unittest import unittest
...@@ -259,7 +260,7 @@ class SkipGram(fluid.dygraph.Layer): ...@@ -259,7 +260,7 @@ class SkipGram(fluid.dygraph.Layer):
word_sim = fluid.layers.elementwise_mul( word_sim = fluid.layers.elementwise_mul(
center_words_emb, target_words_emb center_words_emb, target_words_emb
) )
word_sim = fluid.layers.reduce_sum(word_sim, dim=-1) word_sim = paddle.sum(word_sim, axis=-1)
pred = paddle.nn.functional.sigmoid(word_sim) pred = paddle.nn.functional.sigmoid(word_sim)
......
...@@ -586,8 +586,8 @@ class CrossEntropyCriterion: ...@@ -586,8 +586,8 @@ class CrossEntropyCriterion:
soft_label=True if self.label_smooth_eps else False, soft_label=True if self.label_smooth_eps else False,
) )
weighted_cost = cost * weights weighted_cost = cost * weights
sum_cost = layers.reduce_sum(weighted_cost) sum_cost = paddle.sum(weighted_cost)
token_num = layers.reduce_sum(weights) token_num = paddle.sum(weights)
token_num.stop_gradient = True token_num.stop_gradient = True
avg_cost = sum_cost / token_num avg_cost = sum_cost / token_num
return sum_cost, avg_cost, token_num return sum_cost, avg_cost, token_num
......
...@@ -134,7 +134,7 @@ class TestMin(TestMean): ...@@ -134,7 +134,7 @@ class TestMin(TestMean):
class TestSum(TestMean): class TestSum(TestMean):
def set_test_op(self): def set_test_op(self):
self.op = paddle.fluid.layers.reduce_sum self.op = paddle.paddle.sum
class TestLogsumexp(TestMean): class TestLogsumexp(TestMean):
......
...@@ -17,6 +17,7 @@ import unittest ...@@ -17,6 +17,7 @@ import unittest
import numpy as np import numpy as np
from inference_pass_test import InferencePassTest from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig, PassVersionChecker from paddle.fluid.core import AnalysisConfig, PassVersionChecker
...@@ -28,9 +29,7 @@ class TRTReduceSumTest(InferencePassTest): ...@@ -28,9 +29,7 @@ class TRTReduceSumTest(InferencePassTest):
data = fluid.data( data = fluid.data(
name="data", shape=[-1, 3, 10, 192], dtype="float32" name="data", shape=[-1, 3, 10, 192], dtype="float32"
) )
reduce_sum = fluid.layers.reduce_sum( reduce_sum = paddle.sum(data, axis=[2, -1], keepdim=True)
data, dim=[2, -1], keep_dim=True
)
out = fluid.layers.batch_norm(reduce_sum, is_test=True) out = fluid.layers.batch_norm(reduce_sum, is_test=True)
self.feeds = { self.feeds = {
...@@ -63,7 +62,7 @@ class TRTReduceSumAllTest(InferencePassTest): ...@@ -63,7 +62,7 @@ class TRTReduceSumAllTest(InferencePassTest):
data = fluid.data( data = fluid.data(
name="data", shape=[-1, 3, 10, 192], dtype="float32" name="data", shape=[-1, 3, 10, 192], dtype="float32"
) )
reduce_sum = fluid.layers.reduce_sum(data, keep_dim=True) reduce_sum = paddle.sum(data, keepdim=True)
out = fluid.layers.batch_norm(reduce_sum, is_test=True) out = fluid.layers.batch_norm(reduce_sum, is_test=True)
self.feeds = { self.feeds = {
......
...@@ -98,7 +98,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase): ...@@ -98,7 +98,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
if self.bn_dtype == np.float16: if self.bn_dtype == np.float16:
bn = fluid.layers.cast(bn, 'float32') bn = fluid.layers.cast(bn, 'float32')
sigmoid = paddle.nn.functional.sigmoid(bn) sigmoid = paddle.nn.functional.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid) out = paddle.sum(sigmoid)
# if not sync_bn: # if not sync_bn:
# out = out / core.get_mlu_device_count() # out = out / core.get_mlu_device_count()
if not only_forward: if not only_forward:
......
...@@ -100,7 +100,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase): ...@@ -100,7 +100,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
# if self.dtype == np.float16: # if self.dtype == np.float16:
# bn = fluid.layers.cast(bn, 'float32') # bn = fluid.layers.cast(bn, 'float32')
sigmoid = paddle.nn.functional.sigmoid(bn) sigmoid = paddle.nn.functional.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid) out = paddle.sum(sigmoid)
# if not sync_bn: # if not sync_bn:
# out = out / core.get_npu_device_count() # out = out / core.get_npu_device_count()
if not only_forward: if not only_forward:
......
...@@ -85,7 +85,7 @@ class TestReduceSum2(OpTest): ...@@ -85,7 +85,7 @@ class TestReduceSum2(OpTest):
class TestReduceSumNet(unittest.TestCase): class TestReduceSumNet(unittest.TestCase):
def set_reduce_sum_function(self, x): def set_reduce_sum_function(self, x):
# keep_dim = False # keep_dim = False
return paddle.fluid.layers.reduce_sum(x, dim=-1) return paddle.sum(x, axis=-1)
def _test(self, run_npu=True): def _test(self, run_npu=True):
main_prog = paddle.static.Program() main_prog = paddle.static.Program()
...@@ -153,7 +153,7 @@ class TestReduceSumNet(unittest.TestCase): ...@@ -153,7 +153,7 @@ class TestReduceSumNet(unittest.TestCase):
class TestReduceSumNet2(TestReduceSumNet): class TestReduceSumNet2(TestReduceSumNet):
def set_reduce_sum_function(self, x): def set_reduce_sum_function(self, x):
# keep_dim = True # keep_dim = True
return paddle.fluid.layers.reduce_sum(x, dim=-1, keep_dim=True) return paddle.sum(x, axis=-1, keepdim=True)
class TestReduceSumNet3(TestReduceSumNet): class TestReduceSumNet3(TestReduceSumNet):
...@@ -172,7 +172,7 @@ class TestReduceSumNet3(TestReduceSumNet): ...@@ -172,7 +172,7 @@ class TestReduceSumNet3(TestReduceSumNet):
b = paddle.static.data(name="b", shape=[2, 3, 4], dtype='float32') b = paddle.static.data(name="b", shape=[2, 3, 4], dtype='float32')
z = paddle.add(a, b) z = paddle.add(a, b)
loss = fluid.layers.reduce_sum(z) loss = paddle.sum(z)
sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd = fluid.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss) sgd.minimize(loss)
......
...@@ -73,7 +73,7 @@ class SimpleNet(fluid.Layer): ...@@ -73,7 +73,7 @@ class SimpleNet(fluid.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss return loss
......
...@@ -95,7 +95,7 @@ class TestArgsortOpCPU(unittest.TestCase): ...@@ -95,7 +95,7 @@ class TestArgsortOpCPU(unittest.TestCase):
) )
self.sorted_x.stop_gradient = False self.sorted_x.stop_gradient = False
loss = fluid.layers.elementwise_mul(self.sorted_x, label) loss = fluid.layers.elementwise_mul(self.sorted_x, label)
self.loss = fluid.layers.reduce_sum(loss) self.loss = paddle.sum(loss)
def forward(self): def forward(self):
self.feed_map = { self.feed_map = {
......
...@@ -151,7 +151,7 @@ class TestDistFleetHeterProgram(unittest.TestCase): ...@@ -151,7 +151,7 @@ class TestDistFleetHeterProgram(unittest.TestCase):
with fluid.device_guard("gpu"): with fluid.device_guard("gpu"):
labels = fluid.layers.cast(inputs[-1], dtype="int64") labels = fluid.layers.cast(inputs[-1], dtype="int64")
cost = fluid.layers.cross_entropy(input=predict, label=labels) cost = fluid.layers.cross_entropy(input=predict, label=labels)
avg_cost = fluid.layers.reduce_sum(cost) avg_cost = paddle.sum(cost)
return avg_cost return avg_cost
......
...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -41,7 +41,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -41,7 +41,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -42,7 +42,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -42,7 +42,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -41,7 +41,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -41,7 +41,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size): def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64') cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond) cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div( acc = fluid.layers.elementwise_div(
cond_3, cond_3,
fluid.layers.fill_constant( fluid.layers.fill_constant(
......
...@@ -405,7 +405,7 @@ class TestFakeInit(TranspilerTest): ...@@ -405,7 +405,7 @@ class TestFakeInit(TranspilerTest):
neg_emb_b_vec = paddle.reshape(neg_emb_b, shape=[-1, neg_num]) neg_emb_b_vec = paddle.reshape(neg_emb_b, shape=[-1, neg_num])
true_logits = fluid.layers.elementwise_add( true_logits = fluid.layers.elementwise_add(
fluid.layers.reduce_sum( paddle.sum(
fluid.layers.elementwise_mul(input_emb, true_emb_w), fluid.layers.elementwise_mul(input_emb, true_emb_w),
dim=1, dim=1,
keep_dim=True, keep_dim=True,
...@@ -435,8 +435,8 @@ class TestFakeInit(TranspilerTest): ...@@ -435,8 +435,8 @@ class TestFakeInit(TranspilerTest):
neg_logits, label_zeros neg_logits, label_zeros
) )
cost = fluid.layers.elementwise_add( cost = fluid.layers.elementwise_add(
fluid.layers.reduce_sum(true_xent, dim=1), paddle.sum(true_xent, axis=1),
fluid.layers.reduce_sum(neg_xent, dim=1), paddle.sum(neg_xent, axis=1),
) )
avg_cost = fluid.layers.reduce_mean(cost) avg_cost = fluid.layers.reduce_mean(cost)
......
...@@ -468,7 +468,7 @@ def lm_model( ...@@ -468,7 +468,7 @@ def lm_model(
loss = paddle.reshape(loss, shape=[-1, num_steps]) loss = paddle.reshape(loss, shape=[-1, num_steps])
loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_mean(loss, dim=[0])
loss = layers.reduce_sum(loss) loss = paddle.sum(loss)
loss.persistable = True loss.persistable = True
last_cell.persistable = True last_cell.persistable = True
......
...@@ -592,7 +592,7 @@ class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1): ...@@ -592,7 +592,7 @@ class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1):
mem_pre = rnn_1.memory(shape=[-1, self.input_dim], batch_ref=x) mem_pre = rnn_1.memory(shape=[-1, self.input_dim], batch_ref=x)
x_t = rnn_1.step_input(x) x_t = rnn_1.step_input(x)
last_rnn_output = rnn_0() last_rnn_output = rnn_0()
last_rnn_sum = fluid.layers.reduce_sum(last_rnn_output) last_rnn_sum = paddle.sum(last_rnn_output)
mem = layers.elementwise_add(x=x_t, y=last_rnn_sum) mem = layers.elementwise_add(x=x_t, y=last_rnn_sum)
y = layers.elementwise_add(x=mem_pre, y=mem) y = layers.elementwise_add(x=mem_pre, y=mem)
rnn_1.update_memory(mem_pre, mem) rnn_1.update_memory(mem_pre, mem)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.incubate.fleet.base.role_maker as role_maker import paddle.fluid.incubate.fleet.base.role_maker as role_maker
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import ( from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import (
...@@ -55,7 +56,7 @@ class TestPyramidHashOpApi(unittest.TestCase): ...@@ -55,7 +56,7 @@ class TestPyramidHashOpApi(unittest.TestCase):
name=None, name=None,
) )
cost = fluid.layers.reduce_sum(hash_embd) cost = paddle.sum(hash_embd)
role = role_maker.UserDefinedRoleMaker( role = role_maker.UserDefinedRoleMaker(
current_id=0, current_id=0,
......
...@@ -34,7 +34,7 @@ class MyLayer(fluid.Layer): ...@@ -34,7 +34,7 @@ class MyLayer(fluid.Layer):
x = fluid.layers.relu(inputs) x = fluid.layers.relu(inputs)
self._x_for_debug = x self._x_for_debug = x
x = fluid.layers.elementwise_mul(x, x) x = fluid.layers.elementwise_mul(x, x)
x = fluid.layers.reduce_sum(x) x = paddle.sum(x)
return [x] return [x]
...@@ -65,7 +65,7 @@ class MLP(fluid.Layer): ...@@ -65,7 +65,7 @@ class MLP(fluid.Layer):
def forward(self, inputs): def forward(self, inputs):
x = self._linear1(inputs) x = self._linear1(inputs)
x = self._linear2(x) x = self._linear2(x)
x = fluid.layers.reduce_sum(x) x = paddle.sum(x)
return x return x
...@@ -108,7 +108,7 @@ class SimpleRNNCell(fluid.Layer): ...@@ -108,7 +108,7 @@ class SimpleRNNCell(fluid.Layer):
hidden = self._helper.append_activation(hidden, act='tanh') hidden = self._helper.append_activation(hidden, act='tanh')
out = paddle.fluid.layers.nn.mul(hidden, self._h2o_w) out = paddle.fluid.layers.nn.mul(hidden, self._h2o_w)
softmax_out = paddle.nn.functional.softmax(out) softmax_out = paddle.nn.functional.softmax(out)
reduce_out = paddle.fluid.layers.nn.reduce_sum(softmax_out) reduce_out = paddle.sum(softmax_out)
return reduce_out, hidden return reduce_out, hidden
...@@ -342,7 +342,7 @@ class TestImperative(unittest.TestCase): ...@@ -342,7 +342,7 @@ class TestImperative(unittest.TestCase):
tmp.stop_gradient = False tmp.stop_gradient = False
inputs.append(tmp) inputs.append(tmp)
ret = paddle.add_n(inputs) ret = paddle.add_n(inputs)
loss = fluid.layers.reduce_sum(ret) loss = paddle.sum(ret)
loss.backward() loss.backward()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inputs2 = [] inputs2 = []
...@@ -351,7 +351,7 @@ class TestImperative(unittest.TestCase): ...@@ -351,7 +351,7 @@ class TestImperative(unittest.TestCase):
tmp.stop_gradient = False tmp.stop_gradient = False
inputs2.append(tmp) inputs2.append(tmp)
ret2 = paddle.add_n(inputs2) ret2 = paddle.add_n(inputs2)
loss2 = fluid.layers.reduce_sum(ret2) loss2 = paddle.sum(ret2)
fluid.set_flags({'FLAGS_sort_sum_gradient': True}) fluid.set_flags({'FLAGS_sort_sum_gradient': True})
loss2.backward() loss2.backward()
...@@ -739,11 +739,11 @@ class TestImperative(unittest.TestCase): ...@@ -739,11 +739,11 @@ class TestImperative(unittest.TestCase):
) )
a = paddle.expand( a = paddle.expand(
paddle.reshape(fluid.layers.reduce_sum(inp_data1), [1, 1]), paddle.reshape(paddle.sum(inp_data1), [1, 1]),
[4, -1], [4, -1],
) )
b = paddle.expand( b = paddle.expand(
paddle.reshape(fluid.layers.reduce_sum(inp_data2), [1, 1]), paddle.reshape(paddle.sum(inp_data2), [1, 1]),
[4, -1], [4, -1],
) )
cond = fluid.layers.less_than(x=a, y=b) cond = fluid.layers.less_than(x=a, y=b)
......
...@@ -254,9 +254,7 @@ class TestDygraphDeepCF(unittest.TestCase): ...@@ -254,9 +254,7 @@ class TestDygraphDeepCF(unittest.TestCase):
deepcf = DeepCF(num_users, num_items, matrix) deepcf = DeepCF(num_users, num_items, matrix)
prediction = deepcf(users, items) prediction = deepcf(users, items)
loss = fluid.layers.reduce_sum( loss = paddle.sum(fluid.layers.log_loss(prediction, labels))
fluid.layers.log_loss(prediction, labels)
)
adam = fluid.optimizer.AdamOptimizer(0.01) adam = fluid.optimizer.AdamOptimizer(0.01)
adam.minimize(loss) adam.minimize(loss)
...@@ -309,7 +307,7 @@ class TestDygraphDeepCF(unittest.TestCase): ...@@ -309,7 +307,7 @@ class TestDygraphDeepCF(unittest.TestCase):
to_variable(users_np[slice : slice + self.batch_size]), to_variable(users_np[slice : slice + self.batch_size]),
to_variable(items_np[slice : slice + self.batch_size]), to_variable(items_np[slice : slice + self.batch_size]),
) )
loss = fluid.layers.reduce_sum( loss = paddle.sum(
fluid.layers.log_loss( fluid.layers.log_loss(
prediction, prediction,
to_variable( to_variable(
...@@ -343,7 +341,7 @@ class TestDygraphDeepCF(unittest.TestCase): ...@@ -343,7 +341,7 @@ class TestDygraphDeepCF(unittest.TestCase):
to_variable(users_np[slice : slice + self.batch_size]), to_variable(users_np[slice : slice + self.batch_size]),
to_variable(items_np[slice : slice + self.batch_size]), to_variable(items_np[slice : slice + self.batch_size]),
) )
loss2 = fluid.layers.reduce_sum( loss2 = paddle.sum(
fluid.layers.log_loss( fluid.layers.log_loss(
prediction2, prediction2,
to_variable( to_variable(
...@@ -386,7 +384,7 @@ class TestDygraphDeepCF(unittest.TestCase): ...@@ -386,7 +384,7 @@ class TestDygraphDeepCF(unittest.TestCase):
items_np[slice : slice + self.batch_size] items_np[slice : slice + self.batch_size]
), ),
) )
loss = fluid.layers.reduce_sum( loss = paddle.sum(
fluid.layers.log_loss( fluid.layers.log_loss(
prediction, prediction,
to_variable( to_variable(
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
...@@ -46,7 +47,7 @@ class MLP(fluid.Layer): ...@@ -46,7 +47,7 @@ class MLP(fluid.Layer):
def forward(self, inputs): def forward(self, inputs):
x = self._linear1(inputs) x = self._linear1(inputs)
x = self._linear2(x) x = self._linear2(x)
x = fluid.layers.reduce_sum(x) x = paddle.sum(x)
return x return x
......
...@@ -96,7 +96,7 @@ class TestDygraphGNN(unittest.TestCase): ...@@ -96,7 +96,7 @@ class TestDygraphGNN(unittest.TestCase):
# In other example, it's nll with log_softmax. However, paddle's # In other example, it's nll with log_softmax. However, paddle's
# log_loss only supports binary classification now. # log_loss only supports binary classification now.
loss = fluid.layers.softmax_with_cross_entropy(logits, labels) loss = fluid.layers.softmax_with_cross_entropy(logits, labels)
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
adam = AdamOptimizer(learning_rate=1e-3) adam = AdamOptimizer(learning_rate=1e-3)
adam.minimize(loss) adam.minimize(loss)
...@@ -136,7 +136,7 @@ class TestDygraphGNN(unittest.TestCase): ...@@ -136,7 +136,7 @@ class TestDygraphGNN(unittest.TestCase):
loss = fluid.layers.softmax_with_cross_entropy( loss = fluid.layers.softmax_with_cross_entropy(
logits, to_variable(labels) logits, to_variable(labels)
) )
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
loss.backward() loss.backward()
adam = AdamOptimizer( adam = AdamOptimizer(
learning_rate=1e-3, parameter_list=model.parameters() learning_rate=1e-3, parameter_list=model.parameters()
...@@ -164,7 +164,7 @@ class TestDygraphGNN(unittest.TestCase): ...@@ -164,7 +164,7 @@ class TestDygraphGNN(unittest.TestCase):
loss2 = fluid.layers.softmax_with_cross_entropy( loss2 = fluid.layers.softmax_with_cross_entropy(
logits2, to_variable(labels2) logits2, to_variable(labels2)
) )
loss2 = fluid.layers.reduce_sum(loss2) loss2 = paddle.sum(loss2)
loss2.backward() loss2.backward()
adam2 = AdamOptimizer( adam2 = AdamOptimizer(
learning_rate=1e-3, parameter_list=model2.parameters() learning_rate=1e-3, parameter_list=model2.parameters()
......
...@@ -72,7 +72,7 @@ class SimpleNet(fluid.Layer): ...@@ -72,7 +72,7 @@ class SimpleNet(fluid.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss return loss
......
...@@ -325,7 +325,7 @@ class SimpleAttention(fluid.dygraph.Layer): ...@@ -325,7 +325,7 @@ class SimpleAttention(fluid.dygraph.Layer):
scaled = fluid.layers.elementwise_mul( scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weights_reshape, axis=0 x=encoder_vec, y=weights_reshape, axis=0
) )
context = fluid.layers.reduce_sum(scaled, dim=1) context = paddle.sum(scaled, axis=1)
return context return context
...@@ -498,7 +498,7 @@ class TestDygraphOCRAttention(unittest.TestCase): ...@@ -498,7 +498,7 @@ class TestDygraphOCRAttention(unittest.TestCase):
loss = fluid.layers.cross_entropy( loss = fluid.layers.cross_entropy(
input=dy_prediction, label=label_out input=dy_prediction, label=label_out
) )
avg_loss = fluid.layers.reduce_sum(loss) avg_loss = paddle.sum(loss)
dy_out = avg_loss.numpy() dy_out = avg_loss.numpy()
...@@ -576,7 +576,7 @@ class TestDygraphOCRAttention(unittest.TestCase): ...@@ -576,7 +576,7 @@ class TestDygraphOCRAttention(unittest.TestCase):
cost = fluid.layers.cross_entropy( cost = fluid.layers.cross_entropy(
input=static_prediction, label=static_label_out input=static_prediction, label=static_label_out
) )
static_avg_loss = fluid.layers.reduce_sum(cost) static_avg_loss = paddle.sum(cost)
# param_grad_list = fluid.backward.append_backward(static_avg_loss) # param_grad_list = fluid.backward.append_backward(static_avg_loss)
optimizer.minimize(static_avg_loss) optimizer.minimize(static_avg_loss)
......
...@@ -16,6 +16,7 @@ import unittest ...@@ -16,6 +16,7 @@ import unittest
import numpy as np import numpy as np
from collections import OrderedDict from collections import OrderedDict
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph.parallel import DataParallel from paddle.fluid.dygraph.parallel import DataParallel
...@@ -34,7 +35,7 @@ class MyLayer(fluid.Layer): ...@@ -34,7 +35,7 @@ class MyLayer(fluid.Layer):
def forward(self, inputs): def forward(self, inputs):
x = fluid.layers.relu(inputs) x = fluid.layers.relu(inputs)
x = fluid.layers.elementwise_mul(x, x) x = fluid.layers.elementwise_mul(x, x)
x = fluid.layers.reduce_sum(x) x = paddle.sum(x)
return [x] return [x]
......
...@@ -231,7 +231,7 @@ class PtbModel(fluid.Layer): ...@@ -231,7 +231,7 @@ class PtbModel(fluid.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss, last_hidden, last_cell return loss, last_hidden, last_cell
......
...@@ -28,7 +28,7 @@ class RecurrentTest(fluid.Layer): ...@@ -28,7 +28,7 @@ class RecurrentTest(fluid.Layer):
def forward(self, in1, in2): def forward(self, in1, in2):
out = fluid.layers.mul(in1, in2) out = fluid.layers.mul(in1, in2)
sum_out = fluid.layers.reduce_sum(out) sum_out = paddle.sum(out)
return sum_out, out return sum_out, out
......
...@@ -73,13 +73,13 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -73,13 +73,13 @@ class TestImperativeMnist(unittest.TestCase):
loss_probs = fluid.layers.log(loss_probs) loss_probs = fluid.layers.log(loss_probs)
loss_probs = fluid.layers.elementwise_mul(loss_probs, dy_mask) loss_probs = fluid.layers.elementwise_mul(loss_probs, dy_mask)
loss_probs = fluid.layers.reduce_sum(loss_probs, dim=-1) loss_probs = paddle.sum(loss_probs, axis=-1)
dy_reward = fluid.dygraph.base.to_variable(reward) dy_reward = fluid.dygraph.base.to_variable(reward)
dy_reward.stop_gradient = True dy_reward.stop_gradient = True
loss_probs = fluid.layers.elementwise_mul(dy_reward, loss_probs) loss_probs = fluid.layers.elementwise_mul(dy_reward, loss_probs)
loss = fluid.layers.reduce_sum(loss_probs) loss = paddle.sum(loss_probs)
sgd = SGDOptimizer( sgd = SGDOptimizer(
learning_rate=1e-3, parameter_list=policy.parameters() learning_rate=1e-3, parameter_list=policy.parameters()
...@@ -141,12 +141,12 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -141,12 +141,12 @@ class TestImperativeMnist(unittest.TestCase):
st_loss_probs = fluid.layers.log(st_loss_probs) st_loss_probs = fluid.layers.log(st_loss_probs)
st_loss_probs = fluid.layers.elementwise_mul(st_loss_probs, st_mask) st_loss_probs = fluid.layers.elementwise_mul(st_loss_probs, st_mask)
st_loss_probs = fluid.layers.reduce_sum(st_loss_probs, dim=-1) st_loss_probs = paddle.sum(st_loss_probs, axis=-1)
st_loss_probs = fluid.layers.elementwise_mul( st_loss_probs = fluid.layers.elementwise_mul(
st_reward, st_loss_probs st_reward, st_loss_probs
) )
st_loss = fluid.layers.reduce_sum(st_loss_probs) st_loss = paddle.sum(st_loss_probs)
st_sgd.minimize(st_loss) st_sgd.minimize(st_loss)
......
...@@ -227,7 +227,7 @@ class PtbModel(fluid.Layer): ...@@ -227,7 +227,7 @@ class PtbModel(fluid.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss, last_hidden, last_cell return loss, last_hidden, last_cell
......
...@@ -230,7 +230,7 @@ class PtbModel(fluid.Layer): ...@@ -230,7 +230,7 @@ class PtbModel(fluid.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss, last_hidden, last_cell return loss, last_hidden, last_cell
......
...@@ -81,7 +81,7 @@ class SimpleNet(fluid.Layer): ...@@ -81,7 +81,7 @@ class SimpleNet(fluid.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss return loss
......
...@@ -379,9 +379,7 @@ def loss_cls(cls, label, cfg): ...@@ -379,9 +379,7 @@ def loss_cls(cls, label, cfg):
cls_shape = cls.shape cls_shape = cls.shape
cls = paddle.reshape(cls, [-1, cls_shape[1] * cls_shape[2] * cls_shape[3]]) cls = paddle.reshape(cls, [-1, cls_shape[1] * cls_shape[2] * cls_shape[3]])
return ( return (
fluid.layers.reduce_sum( paddle.sum(fluid.layers.sigmoid_cross_entropy_with_logits(cls, label))
fluid.layers.sigmoid_cross_entropy_with_logits(cls, label)
)
/ cfg.batch_size / cfg.batch_size
) )
......
...@@ -1102,8 +1102,8 @@ class TransFormer(Layer): ...@@ -1102,8 +1102,8 @@ class TransFormer(Layer):
soft_label=True if self._label_smooth_eps else False, soft_label=True if self._label_smooth_eps else False,
) )
weighted_cost = cost * weights weighted_cost = cost * weights
sum_cost = fluid.layers.reduce_sum(weighted_cost) sum_cost = paddle.sum(weighted_cost)
token_num = fluid.layers.reduce_sum(weights) token_num = paddle.sum(weights)
token_num.stop_gradient = True token_num.stop_gradient = True
avg_cost = sum_cost / token_num avg_cost = sum_cost / token_num
return sum_cost, avg_cost, predict, token_num return sum_cost, avg_cost, predict, token_num
......
...@@ -73,9 +73,8 @@ class TestInplaceANBOpTraining(unittest.TestCase): ...@@ -73,9 +73,8 @@ class TestInplaceANBOpTraining(unittest.TestCase):
# may have same name, multiply 1. to generate # may have same name, multiply 1. to generate
# a new Variable for fetch # a new Variable for fetch
bn = bn * 1.0 bn = bn * 1.0
sigmoid = paddle.nn.functional.sigmoid(bn) sigmoid = paddle.nn.functional.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid) out = paddle.sum(sigmoid)
if not only_forward: if not only_forward:
sgd_opt = fluid.optimizer.SGD(learning_rate=0.0) sgd_opt = fluid.optimizer.SGD(learning_rate=0.0)
sgd_opt.backward(out) sgd_opt.backward(out)
......
...@@ -69,7 +69,7 @@ def create_program(data_format="NCHW"): ...@@ -69,7 +69,7 @@ def create_program(data_format="NCHW"):
) )
y = conv(x) + x y = conv(x) + x
loss = fluid.layers.reduce_sum(y) loss = paddle.sum(y)
sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd = fluid.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss) sgd.minimize(loss)
......
...@@ -213,7 +213,7 @@ class TestLookupTableIsSparse(unittest.TestCase): ...@@ -213,7 +213,7 @@ class TestLookupTableIsSparse(unittest.TestCase):
), ),
is_sparse=is_sparse, is_sparse=is_sparse,
) )
y = fluid.layers.reduce_sum(emb, dim=-1) y = paddle.sum(emb, axis=-1)
loss = fluid.layers.square_error_cost(input=y, label=y_) loss = fluid.layers.square_error_cost(input=y, label=y_)
loss = paddle.mean(loss) loss = paddle.mean(loss)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
...@@ -50,7 +51,7 @@ class TestMultiheadAttention(unittest.TestCase): ...@@ -50,7 +51,7 @@ class TestMultiheadAttention(unittest.TestCase):
num_heads=8, num_heads=8,
dropout_rate=0.0, dropout_rate=0.0,
) )
out = fluid.layers.reduce_sum(contexts, dim=None) out = paddle.sum(contexts, axis=None)
fluid.backward.append_backward(loss=out) fluid.backward.append_backward(loss=out)
self.fetch_list = [contexts] self.fetch_list = [contexts]
......
...@@ -98,7 +98,7 @@ class TestReduceSumWithDimDoubleGradCheck(unittest.TestCase): ...@@ -98,7 +98,7 @@ class TestReduceSumWithDimDoubleGradCheck(unittest.TestCase):
x = layers.data('x', shape, False, dtype) x = layers.data('x', shape, False, dtype)
x.persistable = True x.persistable = True
y = layers.reduce_sum(x, dim=0) y = paddle.sum(x, axis=0)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype) x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
...@@ -37,7 +38,7 @@ class TestNormalization(unittest.TestCase): ...@@ -37,7 +38,7 @@ class TestNormalization(unittest.TestCase):
) )
data.stop_gradient = False data.stop_gradient = False
l2_norm = fluid.layers.l2_normalize(x=data, axis=axis, epsilon=epsilon) l2_norm = fluid.layers.l2_normalize(x=data, axis=axis, epsilon=epsilon)
out = fluid.layers.reduce_sum(l2_norm, dim=None) out = paddle.sum(l2_norm, axis=None)
fluid.backward.append_backward(loss=out) fluid.backward.append_backward(loss=out)
self.fetch_list = [l2_norm] self.fetch_list = [l2_norm]
......
...@@ -914,10 +914,10 @@ class TestReduceSumOpError(unittest.TestCase): ...@@ -914,10 +914,10 @@ class TestReduceSumOpError(unittest.TestCase):
x1 = fluid.create_lod_tensor( x1 = fluid.create_lod_tensor(
np.array([[-1]]), [[1]], fluid.CPUPlace() np.array([[-1]]), [[1]], fluid.CPUPlace()
) )
self.assertRaises(TypeError, fluid.layers.reduce_sum, x1) self.assertRaises(TypeError, paddle.sum, x1)
# The input dtype of reduce_sum_op must be float32 or float64 or int32 or int64. # The input dtype of reduce_sum_op must be float32 or float64 or int32 or int64.
x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
self.assertRaises(TypeError, fluid.layers.reduce_sum, x2) self.assertRaises(TypeError, paddle.sum, x2)
class API_TestSumOp(unittest.TestCase): class API_TestSumOp(unittest.TestCase):
......
...@@ -226,7 +226,7 @@ class TestRegularizer(unittest.TestCase): ...@@ -226,7 +226,7 @@ class TestRegularizer(unittest.TestCase):
para_sum = [] para_sum = []
for para in param_list: for para in param_list:
para_mul = paddle.square(x=para) para_mul = paddle.square(x=para)
para_sum.append(fluid.layers.reduce_sum(input=para_mul)) para_sum.append(paddle.sum(para_mul))
avg_cost_l2 += fluid.layers.sums(para_sum) * 0.5 avg_cost_l2 += fluid.layers.sums(para_sum) * 0.5
optimizer = fluid.optimizer.Adagrad(learning_rate=0.1) optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
...@@ -261,7 +261,7 @@ class TestRegularizer(unittest.TestCase): ...@@ -261,7 +261,7 @@ class TestRegularizer(unittest.TestCase):
with fluid.program_guard(fluid.Program(), fluid.Program()): with fluid.program_guard(fluid.Program(), fluid.Program()):
x = fluid.layers.uniform_random([2, 2, 3]) x = fluid.layers.uniform_random([2, 2, 3])
out = fluid.layers.fc(x, 5, param_attr=fc_param_attr) out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
loss = fluid.layers.reduce_sum(out) loss = paddle.sum(out)
sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2) sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
sgd.minimize(loss) sgd.minimize(loss)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
......
...@@ -134,7 +134,7 @@ class TestRegularizer(unittest.TestCase): ...@@ -134,7 +134,7 @@ class TestRegularizer(unittest.TestCase):
para_sum = [] para_sum = []
for para in param_list: for para in param_list:
para_mul = paddle.square(x=para) para_mul = paddle.square(x=para)
para_sum.append(fluid.layers.reduce_sum(input=para_mul)) para_sum.append(paddle.sum(para_mul))
avg_cost_l2 += fluid.layers.sums(para_sum) * 0.5 avg_cost_l2 += fluid.layers.sums(para_sum) * 0.5
optimizer = fluid.optimizer.Adagrad(learning_rate=0.1) optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
...@@ -171,7 +171,7 @@ class TestRegularizer(unittest.TestCase): ...@@ -171,7 +171,7 @@ class TestRegularizer(unittest.TestCase):
with fluid.program_guard(fluid.Program(), fluid.Program()): with fluid.program_guard(fluid.Program(), fluid.Program()):
x = fluid.layers.uniform_random([2, 2, 3]) x = fluid.layers.uniform_random([2, 2, 3])
out = fluid.layers.fc(x, 5, param_attr=fc_param_attr) out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
loss = fluid.layers.reduce_sum(out) loss = paddle.sum(out)
sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2) sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
sgd.minimize(loss) sgd.minimize(loss)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.layers.control_flow import lod_rank_table from paddle.fluid.layers.control_flow import lod_rank_table
...@@ -51,7 +52,7 @@ class TestReorderLoDTensor(unittest.TestCase): ...@@ -51,7 +52,7 @@ class TestReorderLoDTensor(unittest.TestCase):
new_dat = fluid.layers.reorder_lod_tensor_by_rank( new_dat = fluid.layers.reorder_lod_tensor_by_rank(
x=dat, rank_table=table x=dat, rank_table=table
) )
loss = fluid.layers.reduce_sum(new_dat) loss = paddle.sum(new_dat)
fluid.backward.append_backward(loss=loss) fluid.backward.append_backward(loss=loss)
cls.fetch_list = [new_dat, cls.data_desc[0][0] + '@GRAD'] cls.fetch_list = [new_dat, cls.data_desc[0][0] + '@GRAD']
......
...@@ -148,7 +148,7 @@ class TestReverseLoDTensorArray(unittest.TestCase): ...@@ -148,7 +148,7 @@ class TestReverseLoDTensorArray(unittest.TestCase):
reverse_array = fluid.layers.reverse(tensor_array, axis=axis) reverse_array = fluid.layers.reverse(tensor_array, axis=axis)
output, _ = fluid.layers.tensor_array_to_tensor(reverse_array) output, _ = fluid.layers.tensor_array_to_tensor(reverse_array)
loss = fluid.layers.reduce_sum(output) loss = paddle.sum(output)
fluid.backward.append_backward(loss) fluid.backward.append_backward(loss)
input_grads = list( input_grads = list(
map( map(
......
...@@ -644,7 +644,7 @@ def def_seq2seq_model( ...@@ -644,7 +644,7 @@ def def_seq2seq_model(
) )
loss = loss * tar_mask loss = loss * tar_mask
loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_mean(loss, dim=[0])
loss = layers.reduce_sum(loss) loss = paddle.sum(loss)
# optimizer # optimizer
optimizer = fluid.optimizer.Adam(0.001) optimizer = fluid.optimizer.Adam(0.001)
......
...@@ -318,7 +318,7 @@ class PolicyGradient: ...@@ -318,7 +318,7 @@ class PolicyGradient:
neg_log_prob = layers.cross_entropy(act_prob, action) neg_log_prob = layers.cross_entropy(act_prob, action)
cost = neg_log_prob * reward cost = neg_log_prob * reward
cost = ( cost = (
(layers.reduce_sum(cost) / layers.reduce_sum(length)) (paddle.sum(cost) / paddle.sum(length))
if length is not None if length is not None
else layers.reduce_mean(cost) else layers.reduce_mean(cost)
) )
...@@ -407,7 +407,7 @@ class MLE: ...@@ -407,7 +407,7 @@ class MLE:
mask = layers.sequence_mask(length, maxlen=max_seq_len, dtype="float32") mask = layers.sequence_mask(length, maxlen=max_seq_len, dtype="float32")
loss = loss * mask loss = loss * mask
loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_mean(loss, dim=[0])
loss = layers.reduce_sum(loss) loss = paddle.sum(loss)
optimizer = fluid.optimizer.Adam(self.lr) optimizer = fluid.optimizer.Adam(self.lr)
optimizer.minimize(loss) optimizer.minimize(loss)
return loss return loss
......
...@@ -417,7 +417,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest): ...@@ -417,7 +417,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest):
self.op_type = "run_program" self.op_type = "run_program"
self.dtype = np.float32 self.dtype = np.float32
self.input_names = {'X': ['x'], 'Params': ['emb_weight']} self.input_names = {'X': ['x'], 'Params': ['emb_weight']}
self.output_names = {'Out': ['reduce_sum_0.tmp_0']} self.output_names = {'Out': ['sum_0.tmp_0']}
self.inputs = { self.inputs = {
'X': {'x': np.array([[1, 3, 0, 4, 7]]).astype("int64")}, 'X': {'x': np.array([[1, 3, 0, 4, 7]]).astype("int64")},
...@@ -456,7 +456,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest): ...@@ -456,7 +456,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest):
), ),
is_sparse=True, is_sparse=True,
) )
y = fluid.layers.reduce_sum(emb, dim=-1) y = paddle.sum(emb, axis=-1)
# 2. get forward op num # 2. get forward op num
fwd_op_num = fluid.default_main_program().global_block().desc.op_size() fwd_op_num = fluid.default_main_program().global_block().desc.op_size()
# 3. append backward # 3. append backward
......
...@@ -1436,7 +1436,7 @@ class TestGradientTruncated(unittest.TestCase): ...@@ -1436,7 +1436,7 @@ class TestGradientTruncated(unittest.TestCase):
# set_value_grad_op will not be run during backward. # set_value_grad_op will not be run during backward.
y, value = op(x) y, value = op(x)
y2 = y + 1 y2 = y + 1
loss = paddle.fluid.layers.reduce_sum(y2) loss = paddle.paddle.sum(y2)
sgd = paddle.optimizer.Adam() sgd = paddle.optimizer.Adam()
sgd.minimize(loss) sgd.minimize(loss)
place = ( place = (
......
...@@ -716,7 +716,7 @@ class TestSliceApiWithLoDTensorArray(unittest.TestCase): ...@@ -716,7 +716,7 @@ class TestSliceApiWithLoDTensorArray(unittest.TestCase):
slice_arr, axis=self.axis, use_stack=True slice_arr, axis=self.axis, use_stack=True
) )
loss = fluid.layers.reduce_sum(output) loss = paddle.sum(output)
fluid.backward.append_backward(loss) fluid.backward.append_backward(loss)
g_vars = list( g_vars = list(
map( map(
......
...@@ -241,7 +241,7 @@ class PtbModel(fluid.Layer): ...@@ -241,7 +241,7 @@ class PtbModel(fluid.Layer):
) )
loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss) loss = paddle.sum(loss)
return loss, last_hidden, last_cell return loss, last_hidden, last_cell
......
...@@ -95,7 +95,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): ...@@ -95,7 +95,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
else: else:
bn = fluid.layers.cast(bn, 'float64') bn = fluid.layers.cast(bn, 'float64')
sigmoid = paddle.nn.functional.sigmoid(bn) sigmoid = paddle.nn.functional.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid) out = paddle.sum(sigmoid)
if not sync_bn: if not sync_bn:
out = out / core.get_cuda_device_count() out = out / core.get_cuda_device_count()
if not only_forward: if not only_forward:
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
...@@ -196,7 +197,7 @@ class TestLoDTensorArrayStack(unittest.TestCase): ...@@ -196,7 +197,7 @@ class TestLoDTensorArrayStack(unittest.TestCase):
output, output_index = fluid.layers.tensor_array_to_tensor( output, output_index = fluid.layers.tensor_array_to_tensor(
input=array, **self.attrs input=array, **self.attrs
) )
loss = fluid.layers.reduce_sum(output) loss = paddle.sum(output)
fluid.backward.append_backward(loss) fluid.backward.append_backward(loss)
self.output_vars = [output, output_index] self.output_vars = [output, output_index]
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
import collections import collections
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.initializer import ConstantInitializer from paddle.fluid.initializer import ConstantInitializer
...@@ -46,7 +47,7 @@ class TestWeightNormalization(unittest.TestCase): ...@@ -46,7 +47,7 @@ class TestWeightNormalization(unittest.TestCase):
bias_attr=False, bias_attr=False,
act=None, act=None,
) )
loss = fluid.layers.reduce_sum(out) loss = paddle.sum(out)
fluid.backward.append_backward(loss=loss) fluid.backward.append_backward(loss=loss)
cls.fetch_list = [ cls.fetch_list = [
'weight_norm_param_g', 'weight_norm_param_g',
......
...@@ -158,7 +158,7 @@ def multi_head_attention( ...@@ -158,7 +158,7 @@ def multi_head_attention(
def __softmax(x, eps=1e-9): def __softmax(x, eps=1e-9):
exp_out = paddle.exp(x=x) exp_out = paddle.exp(x=x)
sum_out = layers.reduce_sum(exp_out, dim=-1, keep_dim=False) sum_out = paddle.sum(exp_out, axis=-1, keepdim=False)
return layers.elementwise_div(x=exp_out, y=sum_out, axis=0) return layers.elementwise_div(x=exp_out, y=sum_out, axis=0)
scaled_q = paddle.scale(x=q, scale=d_model**-0.5) scaled_q = paddle.scale(x=q, scale=d_model**-0.5)
...@@ -595,4 +595,4 @@ def transformer( ...@@ -595,4 +595,4 @@ def transformer(
cost = layers.cross_entropy(input=predict, label=gold) cost = layers.cross_entropy(input=predict, label=gold)
weighted_cost = cost * weights weighted_cost = cost * weights
return layers.reduce_sum(weighted_cost) return paddle.sum(weighted_cost)
...@@ -143,21 +143,21 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase): ...@@ -143,21 +143,21 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
global_norm_var = [] global_norm_var = []
if len(sum_square_list_fp16) > 0: if len(sum_square_list_fp16) > 0:
global_norm_var_fp16 = layers.concat(sum_square_list_fp16) global_norm_var_fp16 = layers.concat(sum_square_list_fp16)
global_norm_var_fp16 = layers.reduce_sum(global_norm_var_fp16) global_norm_var_fp16 = paddle.sum(global_norm_var_fp16)
global_norm_var.append(global_norm_var_fp16.astype(sum_dtype)) global_norm_var.append(global_norm_var_fp16.astype(sum_dtype))
if len(sum_square_list_fp32) > 0: if len(sum_square_list_fp32) > 0:
global_norm_var_fp32 = layers.concat(sum_square_list_fp32) global_norm_var_fp32 = layers.concat(sum_square_list_fp32)
global_norm_var_fp32 = layers.reduce_sum(global_norm_var_fp32) global_norm_var_fp32 = paddle.sum(global_norm_var_fp32)
if sum_dtype == 'float32': if sum_dtype == 'float32':
global_norm_var.append(global_norm_var_fp32) global_norm_var.append(global_norm_var_fp32)
else: else:
global_norm_var.append(global_norm_var_fp32.astype(sum_dtype)) global_norm_var.append(global_norm_var_fp32.astype(sum_dtype))
if len(sum_square_list) > 0: if len(sum_square_list) > 0:
global_norm_var_fp64 = layers.concat(sum_square_list) global_norm_var_fp64 = layers.concat(sum_square_list)
global_norm_var_fp64 = layers.reduce_sum(global_norm_var_fp64) global_norm_var_fp64 = paddle.sum(global_norm_var_fp64)
global_norm_var.append(global_norm_var_fp64) global_norm_var.append(global_norm_var_fp64)
global_norm_var = layers.concat(global_norm_var) global_norm_var = layers.concat(global_norm_var)
global_norm_var = layers.reduce_sum(global_norm_var) global_norm_var = paddle.sum(global_norm_var)
return global_norm_var, sum_dtype return global_norm_var, sum_dtype
@imperative_base.no_grad @imperative_base.no_grad
......
...@@ -593,7 +593,8 @@ class MSELoss(Layer): ...@@ -593,7 +593,8 @@ class MSELoss(Layer):
reduce_op = 'reduce_mean' reduce_op = 'reduce_mean'
if self.reduction == 'sum': if self.reduction == 'sum':
reduce_op = 'reduce_sum' square_out = paddle.sum(square_out)
return square_out
return getattr(fluid.layers, reduce_op)(square_out) return getattr(fluid.layers, reduce_op)(square_out)
......
...@@ -163,7 +163,7 @@ def dynamic_train(model, dataloader): ...@@ -163,7 +163,7 @@ def dynamic_train(model, dataloader):
for inputs, labels in dataloader: for inputs, labels in dataloader:
outputs = model(inputs) outputs = model(inputs)
loss = CrossEntropyLoss(reduction="sum")(outputs, labels) loss = CrossEntropyLoss(reduction="sum")(outputs, labels)
avg_loss = fluid.layers.reduce_sum(loss) avg_loss = paddle.sum(loss)
avg_loss.backward() avg_loss.backward()
optim.minimize(avg_loss) optim.minimize(avg_loss)
model.clear_gradients() model.clear_gradients()
...@@ -510,7 +510,7 @@ class TestModelFunction(unittest.TestCase): ...@@ -510,7 +510,7 @@ class TestModelFunction(unittest.TestCase):
m.train() m.train()
output = m(to_tensor(data)) output = m(to_tensor(data))
loss = CrossEntropyLoss(reduction='sum')(output, to_tensor(label)) loss = CrossEntropyLoss(reduction='sum')(output, to_tensor(label))
avg_loss = fluid.layers.reduce_sum(loss) avg_loss = paddle.sum(loss)
avg_loss.backward() avg_loss.backward()
optim.minimize(avg_loss) optim.minimize(avg_loss)
m.clear_gradients() m.clear_gradients()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册