未验证 提交 8d00f76e 编写于 作者: X xiaoguoguo626807 提交者: GitHub

【fluid api clear】Remove reduce sum (#48330)

* remove fluid.reduce_sum

* remove fluid.reduce_sum

* modify axis and import paddle

* modify keepdim and out_name

* modift unittest

* modift unittest

* modify CI_static and loss.py

* modify test_mse_loss

* modify static ci

* modify static ci datatype

* add import paddle in test

* fix conflict

* fix conflict

* modify ci

* modify ci

* fix_conflict

* fix bug

* code_style
上级 4527d249
......@@ -136,6 +136,7 @@ TODO
# Examples
```python
import paddle
class MyLayer(fluid.imperative.Layer):
def __init__(self):
super(MyLayer, self).__init__()
......@@ -143,7 +144,7 @@ class MyLayer(fluid.imperative.Layer):
def forward(self, inputs):
x = fluid.layers.relu(inputs)
x = fluid.layers.elementwise_mul(x, x)
x = fluid.layers.reduce_sum(x)
x = paddle.sum(x)
return [x]
......@@ -184,7 +185,7 @@ class MLP(fluid.Layer):
def forward(self, inputs):
x = self._linear1(inputs)
x = self._linear2(x)
x = fluid.layers.reduce_sum(x)
x = paddle.sum(x)
return x
......
......@@ -69,7 +69,7 @@ class GroupShardedClipGrad:
layers.merge_selected_rows(g)
)
square = paddle.square(merge_grad)
sum_square = layers.reduce_sum(square)
sum_square = paddle.sum(square)
if p.dtype == paddle.float16:
if p_slice:
......@@ -87,7 +87,7 @@ class GroupShardedClipGrad:
global_norm_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32)
else:
global_norm_fp16 = layers.concat(sum_square_fp16)
global_norm_fp16 = layers.reduce_sum(global_norm_fp16)
global_norm_fp16 = paddle.sum(global_norm_fp16)
global_norm_fp16 = paddle.cast(
global_norm_fp16, dtype=paddle.float32
)
......@@ -97,7 +97,7 @@ class GroupShardedClipGrad:
global_unslice_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32)
else:
global_unslice_fp16 = layers.concat(unslice_params_fp16)
global_unslice_fp16 = layers.reduce_sum(global_unslice_fp16)
global_unslice_fp16 = paddle.sum(global_unslice_fp16)
global_unslice_fp16 = paddle.cast(
global_unslice_fp16, dtype=paddle.float32
)
......@@ -108,7 +108,7 @@ class GroupShardedClipGrad:
if len(sum_square_fp32) != 0
else paddle.to_tensor([0.0], dtype=paddle.float32)
)
global_norm_fp32 = layers.reduce_sum(global_norm_fp32)
global_norm_fp32 = paddle.sum(global_norm_fp32)
# global norm of non-distributed FP32 params_and_grads for unslice parameters
global_unslice_fp32 = (
......@@ -116,7 +116,7 @@ class GroupShardedClipGrad:
if len(unslice_params_fp32) != 0
else paddle.to_tensor([0.0], dtype=paddle.float32)
)
global_unslice_fp32 = layers.reduce_sum(global_unslice_fp32)
global_unslice_fp32 = paddle.sum(global_unslice_fp32)
global_unslice_var = global_unslice_fp16 + global_unslice_fp32
global_norm_var = (
......
......@@ -70,8 +70,7 @@ class ShardingClipGrad:
layers.merge_selected_rows(g)
)
square = paddle.square(merge_grad)
sum_square = layers.reduce_sum(square)
sum_square = paddle.sum(square)
if p.dtype == paddle.float16:
if p_slice:
sum_square_fp16.append(sum_square)
......@@ -88,7 +87,7 @@ class ShardingClipGrad:
global_norm_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32)
else:
global_norm_fp16 = layers.concat(sum_square_fp16)
global_norm_fp16 = layers.reduce_sum(global_norm_fp16)
global_norm_fp16 = paddle.sum(global_norm_fp16)
global_norm_fp16 = paddle.cast(
global_norm_fp16, dtype=paddle.float32
)
......@@ -98,7 +97,7 @@ class ShardingClipGrad:
global_unslice_fp16 = paddle.to_tensor([0.0], dtype=paddle.float32)
else:
global_unslice_fp16 = layers.concat(unslice_params_fp16)
global_unslice_fp16 = layers.reduce_sum(global_unslice_fp16)
global_unslice_fp16 = paddle.sum(global_unslice_fp16)
global_unslice_fp16 = paddle.cast(
global_unslice_fp16, dtype=paddle.float32
)
......@@ -109,7 +108,7 @@ class ShardingClipGrad:
if len(sum_square_fp32) != 0
else paddle.to_tensor([0.0], dtype=paddle.float32)
)
global_norm_fp32 = layers.reduce_sum(global_norm_fp32)
global_norm_fp32 = paddle.sum(global_norm_fp32)
# global norm of non-distributed FP32 params_and_grads for unslice parameter
global_unslice_fp32 = (
......@@ -117,7 +116,7 @@ class ShardingClipGrad:
if len(unslice_params_fp32) != 0
else paddle.to_tensor([0.0], dtype=paddle.float32)
)
global_unslice_fp32 = layers.reduce_sum(global_unslice_fp32)
global_unslice_fp32 = paddle.sum(global_unslice_fp32)
global_unslice_var = global_unslice_fp16 + global_unslice_fp32
global_norm_var = (
......
......@@ -37,7 +37,7 @@ def sum(input, scope=None, util=None):
# in model.py
input = fluid.layers.cast(some_input, dtype='float32')
cnt = fluid.layers.reduce_sum(input)
cnt = paddle.sum(input)
global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0)
tmp = fluid.layers.elementwise_add(cnt, global_cnt)
fluid.layers.assign(tmp, global_cnt)
......@@ -77,7 +77,7 @@ def max(input, scope=None, util=None):
# in model.py
input = fluid.layers.cast(some_input, dtype='float32')
cnt = fluid.layers.reduce_sum(input)
cnt = paddle.sum(input)
global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0)
tmp = paddle.maximum(cnt, global_cnt)
fluid.layers.assign(tmp, global_cnt)
......@@ -117,7 +117,7 @@ def min(input, scope=None, util=None):
# in model.py
input = fluid.layers.cast(some_input, dtype='float32')
cnt = fluid.layers.reduce_sum(input)
cnt = paddle.sum(input)
global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0)
tmp = fluid.layers.elementwise_min(cnt, global_cnt)
fluid.layers.assign(tmp, global_cnt)
......
......@@ -73,7 +73,7 @@ def _squared_l2_norm(x):
or x.dtype == core.VarDesc.VarType.BF16
):
square = paddle.square(x)
sum_square = layers.reduce_sum(square)
sum_square = paddle.sum(square)
return sum_square
if in_dygraph_mode():
......
......@@ -64,9 +64,7 @@ class AdaRoundLoss:
square_cost = fluid.layers.square_error_cost(
ada_quantized_output, orig_output
)
recon_loss = fluid.layers.reduce_mean(
fluid.layers.reduce_sum(square_cost, dim=-1)
)
recon_loss = fluid.layers.reduce_mean(paddle.sum(square_cost, axis=-1))
return recon_loss
def compute_round_loss(self, alpha_v, warm_start, beta):
......@@ -76,7 +74,7 @@ class AdaRoundLoss:
# calculate regularization term - which ensures parameter to converge to exactly zeros and ones
# at the end of optimization
reg_term = fluid.layers.reduce_sum(
reg_term = paddle.sum(
-paddle.pow(paddle.abs(2 * h_v - 1), beta) + 1
)
......
......@@ -113,7 +113,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""):
cost = fluid.layers.softmax_with_cross_entropy(
logits, label, return_softmax=False
)
sum_cost = fluid.layers.reduce_sum(cost)
sum_cost = paddle.sum(cost)
# Test program
test_program = train_program.clone(for_test=True)
......
......@@ -15,6 +15,7 @@
import warnings
import numpy as np
import paddle
from . import layers
from .framework import Program, Variable, program_guard
from . import unique_name
......
......@@ -1640,6 +1640,7 @@ class Variable(metaclass=VariableMetaClass):
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import numpy as np
......@@ -1652,7 +1653,7 @@ class Variable(metaclass=VariableMetaClass):
tmp.stop_gradient=False
inputs2.append(tmp)
ret2 = fluid.layers.sums(inputs2)
loss2 = fluid.layers.reduce_sum(ret2)
loss2 = paddle.sum(ret2)
loss2.backward()
print(loss2.gradient())
......@@ -1687,6 +1688,7 @@ class Variable(metaclass=VariableMetaClass):
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import numpy as np
......@@ -1698,7 +1700,7 @@ class Variable(metaclass=VariableMetaClass):
tmp.stop_gradient=False
inputs2.append(tmp)
ret2 = fluid.layers.sums(inputs2)
loss2 = fluid.layers.reduce_sum(ret2)
loss2 = paddle.sum(ret2)
loss2.backward()
print(loss2.gradient())
loss2.clear_gradient()
......
......@@ -45,7 +45,7 @@ class SimpleLayer(Layer):
def forward(self, inputs):
x = self._linear1(inputs)
x = layers.reduce_sum(x)
x = paddle.sum(x)
return x
......
......@@ -3399,7 +3399,7 @@ class IfElse:
output = ie() # [array([[-7.], [-9.], [ 8.], [ 7.]], dtype=float32)]
# Get the first Variable in the output List and add all elements.
out = fluid.layers.reduce_sum(output[0])
out = paddle.sum(output[0])
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
......
......@@ -582,6 +582,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
Examples:
.. code-block:: python
import paddle
import numpy as np
import paddle.fluid as fluid
......@@ -591,7 +592,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
batch_size = 32
max_iter = 20
paddle.enable_static()
def gen_train_data():
x_data = np.random.uniform(0, 255, (batch_size, 3, image_height,
image_width)).astype('float64')
......@@ -601,12 +602,12 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
def get_focal_loss(pred, label, fg_num, num_classes):
pred = fluid.layers.reshape(pred, [-1, num_classes])
label = fluid.layers.reshape(label, [-1, 1])
pred = paddle.reshape(pred, [-1, num_classes])
label = paddle.reshape(label, [-1, 1])
label.stop_gradient = True
loss = fluid.layers.sigmoid_focal_loss(
pred, label, fg_num, gamma=2.0, alpha=0.25)
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss
......@@ -628,7 +629,7 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
data = fluid.layers.fill_constant(shape=[1], value=1, dtype='int32')
fg_label = fluid.layers.greater_equal(label, data)
fg_label = fluid.layers.cast(fg_label, dtype='int32')
fg_num = fluid.layers.reduce_sum(fg_label)
fg_num = paddle.sum(fg_label, dtype='int32')
fg_num.stop_gradient = True
avg_loss = get_focal_loss(output, label, fg_num, num_classes)
return avg_loss
......@@ -1847,9 +1848,9 @@ def ssd_loss(
# shape=(-1, 0) is set for compile-time, the correct shape is set by
# actual_shape in runtime.
loss = paddle.reshape(x=loss, shape=actual_shape)
loss = nn.reduce_sum(loss, dim=1, keep_dim=True)
loss = paddle.sum(loss, axis=1, keepdim=True)
if normalize:
normalizer = nn.reduce_sum(target_loc_weight)
normalizer = paddle.sum(target_loc_weight)
loss = loss / normalizer
return loss
......
......@@ -538,13 +538,13 @@ class Categorical(Distribution):
)
e_logits = paddle.exp(logits)
other_e_logits = paddle.exp(other_logits)
z = nn.reduce_sum(e_logits, dim=-1, keep_dim=True)
other_z = nn.reduce_sum(other_e_logits, dim=-1, keep_dim=True)
z = paddle.sum(e_logits, axis=-1, keepdim=True)
other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True)
prob = e_logits / z
kl = nn.reduce_sum(
kl = paddle.sum(
prob * (logits - nn.log(z) - other_logits + nn.log(other_z)),
dim=-1,
keep_dim=True,
axis=-1,
keepdim=True,
)
return kl
......@@ -558,10 +558,11 @@ class Categorical(Distribution):
"""
logits = self.logits - paddle.max(self.logits, axis=-1, keepdim=True)
e_logits = paddle.exp(logits)
z = nn.reduce_sum(e_logits, dim=-1, keep_dim=True)
z = paddle.sum(e_logits, axis=-1, keepdim=True)
prob = e_logits / z
entropy = -1.0 * nn.reduce_sum(
prob * (logits - nn.log(z)), dim=-1, keep_dim=True
entropy = -1.0 * paddle.sum(
prob * (logits - nn.log(z)), axis=-1, keepdim=True
)
return entropy
......@@ -703,7 +704,7 @@ class MultivariateNormalDiag(Distribution):
"""
check_type(other, 'other', MultivariateNormalDiag, 'kl_divergence')
tr_cov_matmul = nn.reduce_sum(self._inv(other.scale) * self.scale)
tr_cov_matmul = paddle.sum(self._inv(other.scale) * self.scale)
loc_matmul_cov = nn.matmul(
(other.loc - self.loc), self._inv(other.scale)
)
......
......@@ -75,7 +75,6 @@ __all__ = [
'batch_norm',
'instance_norm',
'data_norm',
'reduce_sum',
'reduce_mean',
'reduce_all',
'reduce_any',
......
......@@ -1788,14 +1788,16 @@ class LarsMomentumOptimizer(Optimizer):
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import numpy as np
paddle.enable_static()
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
out = fluid.layers.fc(inp, size=3)
out = fluid.layers.reduce_sum(out)
out = paddle.sum(out)
optimizer = fluid.optimizer.LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
optimizer.minimize(out)
......@@ -2046,13 +2048,15 @@ class AdagradOptimizer(Optimizer):
Examples:
.. code-block:: python
import paddle
import numpy as np
import paddle.fluid as fluid
paddle.enable_static()
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
inp = fluid.data(name="inp", shape=[2, 2])
out = fluid.layers.fc(inp, size=3)
out = fluid.layers.reduce_sum(out)
out = paddle.sum(out)
optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.2)
optimizer.minimize(out)
......
......@@ -186,7 +186,7 @@ class TestIfElse(unittest.TestCase):
false_target = paddle.tanh(false_target)
ie.output(false_target)
if_out = ie()
out = layers.reduce_sum(if_out[0])
out = paddle.sum(if_out[0])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
......
......@@ -947,8 +947,8 @@ class TransFormer(Layer):
soft_label=True if self._label_smooth_eps else False,
)
weighted_cost = cost * weights
sum_cost = fluid.layers.reduce_sum(weighted_cost)
token_num = fluid.layers.reduce_sum(weights)
sum_cost = paddle.sum(weighted_cost)
token_num = paddle.sum(weights)
token_num.stop_gradient = True
avg_cost = sum_cost / token_num
return sum_cost, avg_cost, predict, token_num
......
......@@ -56,7 +56,7 @@ def fake_simnet_reader():
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -1591,8 +1591,8 @@ def transformer(
soft_label=True if label_smooth_eps else False,
)
weighted_cost = cost * weights
sum_cost = layers.reduce_sum(weighted_cost)
token_num = layers.reduce_sum(weights)
sum_cost = paddle.sum(weighted_cost)
token_num = paddle.sum(weights)
avg_cost = sum_cost / token_num
avg_cost.stop_gradient = True
return sum_cost, avg_cost, predict, token_num
......
......@@ -302,7 +302,7 @@ class BaseModel(fluid.dygraph.Layer):
)
loss = loss * tar_mask
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss
......@@ -405,7 +405,7 @@ class BaseModel(fluid.dygraph.Layer):
parent_ids = []
for step_idx in range(paddle.to_tensor(self.beam_max_step_num)):
if fluid.layers.reduce_sum(1 - beam_finished).numpy()[0] == 0:
if paddle.sum(1 - beam_finished).numpy()[0] == 0:
break
step_input = self._merge_batch_beams(step_input)
new_dec_hidden, new_dec_cell = [], []
......@@ -830,6 +830,6 @@ class AttentionModel(fluid.dygraph.Layer):
)
loss = loss * tar_mask
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss
......@@ -510,8 +510,8 @@ class BOW(Layer):
right_emb, shape=[-1, self.seq_len, self.bow_dim]
)
bow_left = fluid.layers.reduce_sum(left_emb, dim=1)
bow_right = fluid.layers.reduce_sum(right_emb, dim=1)
bow_left = paddle.sum(left_emb, axis=1)
bow_right = paddle.sum(right_emb, axis=1)
softsign_layer = SoftsignLayer()
left_soft = softsign_layer.ops(bow_left)
right_soft = softsign_layer.ops(bow_right)
......
......@@ -497,8 +497,8 @@ class BOW(paddle.nn.Layer):
right_emb, shape=[-1, self.seq_len, self.bow_dim]
)
bow_left = paddle.fluid.layers.reduce_sum(left_emb, dim=1)
bow_right = paddle.fluid.layers.reduce_sum(right_emb, dim=1)
bow_left = paddle.sum(left_emb, axis=1)
bow_right = paddle.sum(right_emb, axis=1)
softsign_layer = SoftsignLayer()
left_soft = softsign_layer.ops(bow_left)
right_soft = softsign_layer.ops(bow_right)
......
......@@ -324,9 +324,7 @@ def bmn_loss_func(
num_entries = fluid.layers.cast(
fluid.layers.shape(pmask), dtype=DATATYPE
)
num_positive = fluid.layers.cast(
fluid.layers.reduce_sum(pmask), dtype=DATATYPE
)
num_positive = fluid.layers.cast(paddle.sum(pmask), dtype=DATATYPE)
ratio = num_entries / num_positive
coef_0 = 0.5 * ratio / (ratio - 1)
coef_1 = 0.5 * ratio
......@@ -359,15 +357,9 @@ def bmn_loss_func(
u_lmask = fluid.layers.cast(x=u_lmask, dtype=DATATYPE)
u_lmask = fluid.layers.elementwise_mul(u_lmask, mask)
num_h = fluid.layers.cast(
fluid.layers.reduce_sum(u_hmask), dtype=DATATYPE
)
num_m = fluid.layers.cast(
fluid.layers.reduce_sum(u_mmask), dtype=DATATYPE
)
num_l = fluid.layers.cast(
fluid.layers.reduce_sum(u_lmask), dtype=DATATYPE
)
num_h = fluid.layers.cast(paddle.sum(u_hmask), dtype=DATATYPE)
num_m = fluid.layers.cast(paddle.sum(u_mmask), dtype=DATATYPE)
num_l = fluid.layers.cast(paddle.sum(u_lmask), dtype=DATATYPE)
r_m = num_h / num_m
u_smmask = fluid.layers.assign(
......@@ -391,11 +383,7 @@ def bmn_loss_func(
weights.stop_gradient = True
loss = fluid.layers.square_error_cost(pred_score, gt_iou_map)
loss = fluid.layers.elementwise_mul(loss, weights)
loss = (
0.5
* fluid.layers.reduce_sum(loss)
/ fluid.layers.reduce_sum(weights)
)
loss = 0.5 * paddle.sum(loss) / paddle.sum(weights)
return loss
......@@ -406,8 +394,8 @@ def bmn_loss_func(
nmask = fluid.layers.cast(x=(gt_iou_map <= 0.9), dtype=DATATYPE)
nmask = fluid.layers.elementwise_mul(nmask, mask)
num_positive = fluid.layers.reduce_sum(pmask)
num_entries = num_positive + fluid.layers.reduce_sum(nmask)
num_positive = paddle.sum(pmask)
num_entries = num_positive + paddle.sum(nmask)
ratio = num_entries / num_positive
coef_0 = 0.5 * ratio / (ratio - 1)
coef_1 = 0.5 * ratio
......@@ -415,11 +403,11 @@ def bmn_loss_func(
loss_pos = fluid.layers.elementwise_mul(
fluid.layers.log(pred_score + epsilon), pmask
)
loss_pos = coef_1 * fluid.layers.reduce_sum(loss_pos)
loss_pos = coef_1 * paddle.sum(loss_pos)
loss_neg = fluid.layers.elementwise_mul(
fluid.layers.log(1.0 - pred_score + epsilon), nmask
)
loss_neg = coef_0 * fluid.layers.reduce_sum(loss_neg)
loss_neg = coef_0 * paddle.sum(loss_neg)
loss = -1 * (loss_pos + loss_neg) / num_entries
return loss
......
......@@ -220,7 +220,7 @@ class PtbModel(fluid.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss, last_hidden, last_cell
......
......@@ -214,7 +214,7 @@ class PtbModel(paddle.nn.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = paddle.mean(loss, axis=[0])
loss = paddle.fluid.layers.reduce_sum(loss)
loss = paddle.paddle.sum(loss)
return loss, last_hidden, last_cell
......
......@@ -123,7 +123,7 @@ def train(args, place, to_static):
loss_probs = fluid.layers.log(loss_probs)
loss_probs = fluid.layers.elementwise_mul(loss_probs, mask)
loss_probs = fluid.layers.reduce_sum(loss_probs, dim=-1)
loss_probs = paddle.sum(loss_probs, axis=-1)
policy.saved_log_probs.append(loss_probs)
return action, loss_probs
......@@ -153,7 +153,7 @@ def train(args, place, to_static):
policy_loss.append(cur_loss)
policy_loss = fluid.layers.concat(policy_loss)
policy_loss = fluid.layers.reduce_sum(policy_loss)
policy_loss = paddle.sum(policy_loss)
policy_loss.backward()
optimizer.minimize(policy_loss)
......
......@@ -144,7 +144,7 @@ class BOW(fluid.dygraph.Layer):
mask_emb = paddle.expand(o_np_mask, [-1, self.hid_dim])
emb = emb * mask_emb
emb = paddle.reshape(emb, shape=[-1, self.seq_len, self.hid_dim])
bow_1 = fluid.layers.reduce_sum(emb, dim=1)
bow_1 = paddle.sum(emb, axis=1)
bow_1 = paddle.tanh(bow_1)
fc_1 = self._fc1(bow_1)
fc_2 = self._fc2(fc_1)
......
......@@ -15,6 +15,7 @@
import math
import random
import numpy as np
import paddle
import paddle.fluid as fluid
import unittest
......@@ -259,7 +260,7 @@ class SkipGram(fluid.dygraph.Layer):
word_sim = fluid.layers.elementwise_mul(
center_words_emb, target_words_emb
)
word_sim = fluid.layers.reduce_sum(word_sim, dim=-1)
word_sim = paddle.sum(word_sim, axis=-1)
pred = paddle.nn.functional.sigmoid(word_sim)
......
......@@ -586,8 +586,8 @@ class CrossEntropyCriterion:
soft_label=True if self.label_smooth_eps else False,
)
weighted_cost = cost * weights
sum_cost = layers.reduce_sum(weighted_cost)
token_num = layers.reduce_sum(weights)
sum_cost = paddle.sum(weighted_cost)
token_num = paddle.sum(weights)
token_num.stop_gradient = True
avg_cost = sum_cost / token_num
return sum_cost, avg_cost, token_num
......
......@@ -134,7 +134,7 @@ class TestMin(TestMean):
class TestSum(TestMean):
def set_test_op(self):
self.op = paddle.fluid.layers.reduce_sum
self.op = paddle.paddle.sum
class TestLogsumexp(TestMean):
......
......@@ -17,6 +17,7 @@ import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig, PassVersionChecker
......@@ -28,9 +29,7 @@ class TRTReduceSumTest(InferencePassTest):
data = fluid.data(
name="data", shape=[-1, 3, 10, 192], dtype="float32"
)
reduce_sum = fluid.layers.reduce_sum(
data, dim=[2, -1], keep_dim=True
)
reduce_sum = paddle.sum(data, axis=[2, -1], keepdim=True)
out = fluid.layers.batch_norm(reduce_sum, is_test=True)
self.feeds = {
......@@ -63,7 +62,7 @@ class TRTReduceSumAllTest(InferencePassTest):
data = fluid.data(
name="data", shape=[-1, 3, 10, 192], dtype="float32"
)
reduce_sum = fluid.layers.reduce_sum(data, keep_dim=True)
reduce_sum = paddle.sum(data, keepdim=True)
out = fluid.layers.batch_norm(reduce_sum, is_test=True)
self.feeds = {
......
......@@ -98,7 +98,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
if self.bn_dtype == np.float16:
bn = fluid.layers.cast(bn, 'float32')
sigmoid = paddle.nn.functional.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid)
out = paddle.sum(sigmoid)
# if not sync_bn:
# out = out / core.get_mlu_device_count()
if not only_forward:
......
......@@ -100,7 +100,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
# if self.dtype == np.float16:
# bn = fluid.layers.cast(bn, 'float32')
sigmoid = paddle.nn.functional.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid)
out = paddle.sum(sigmoid)
# if not sync_bn:
# out = out / core.get_npu_device_count()
if not only_forward:
......
......@@ -85,7 +85,7 @@ class TestReduceSum2(OpTest):
class TestReduceSumNet(unittest.TestCase):
def set_reduce_sum_function(self, x):
# keep_dim = False
return paddle.fluid.layers.reduce_sum(x, dim=-1)
return paddle.sum(x, axis=-1)
def _test(self, run_npu=True):
main_prog = paddle.static.Program()
......@@ -153,7 +153,7 @@ class TestReduceSumNet(unittest.TestCase):
class TestReduceSumNet2(TestReduceSumNet):
def set_reduce_sum_function(self, x):
# keep_dim = True
return paddle.fluid.layers.reduce_sum(x, dim=-1, keep_dim=True)
return paddle.sum(x, axis=-1, keepdim=True)
class TestReduceSumNet3(TestReduceSumNet):
......@@ -172,7 +172,7 @@ class TestReduceSumNet3(TestReduceSumNet):
b = paddle.static.data(name="b", shape=[2, 3, 4], dtype='float32')
z = paddle.add(a, b)
loss = fluid.layers.reduce_sum(z)
loss = paddle.sum(z)
sgd = fluid.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss)
......
......@@ -73,7 +73,7 @@ class SimpleNet(fluid.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss
......
......@@ -95,7 +95,7 @@ class TestArgsortOpCPU(unittest.TestCase):
)
self.sorted_x.stop_gradient = False
loss = fluid.layers.elementwise_mul(self.sorted_x, label)
self.loss = fluid.layers.reduce_sum(loss)
self.loss = paddle.sum(loss)
def forward(self):
self.feed_map = {
......
......@@ -151,7 +151,7 @@ class TestDistFleetHeterProgram(unittest.TestCase):
with fluid.device_guard("gpu"):
labels = fluid.layers.cast(inputs[-1], dtype="int64")
cost = fluid.layers.cross_entropy(input=predict, label=labels)
avg_cost = fluid.layers.reduce_sum(cost)
avg_cost = paddle.sum(cost)
return avg_cost
......
......@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -41,7 +41,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -42,7 +42,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -41,7 +41,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -38,7 +38,7 @@ class TestPSPassWithBow(unittest.TestCase):
def get_acc(cos_q_nt, cos_q_pt, batch_size):
cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
cond = fluid.layers.cast(cond, dtype='float64')
cond_3 = fluid.layers.reduce_sum(cond)
cond_3 = paddle.sum(cond)
acc = fluid.layers.elementwise_div(
cond_3,
fluid.layers.fill_constant(
......
......@@ -405,7 +405,7 @@ class TestFakeInit(TranspilerTest):
neg_emb_b_vec = paddle.reshape(neg_emb_b, shape=[-1, neg_num])
true_logits = fluid.layers.elementwise_add(
fluid.layers.reduce_sum(
paddle.sum(
fluid.layers.elementwise_mul(input_emb, true_emb_w),
dim=1,
keep_dim=True,
......@@ -435,8 +435,8 @@ class TestFakeInit(TranspilerTest):
neg_logits, label_zeros
)
cost = fluid.layers.elementwise_add(
fluid.layers.reduce_sum(true_xent, dim=1),
fluid.layers.reduce_sum(neg_xent, dim=1),
paddle.sum(true_xent, axis=1),
paddle.sum(neg_xent, axis=1),
)
avg_cost = fluid.layers.reduce_mean(cost)
......
......@@ -468,7 +468,7 @@ def lm_model(
loss = paddle.reshape(loss, shape=[-1, num_steps])
loss = layers.reduce_mean(loss, dim=[0])
loss = layers.reduce_sum(loss)
loss = paddle.sum(loss)
loss.persistable = True
last_cell.persistable = True
......
......@@ -592,7 +592,7 @@ class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1):
mem_pre = rnn_1.memory(shape=[-1, self.input_dim], batch_ref=x)
x_t = rnn_1.step_input(x)
last_rnn_output = rnn_0()
last_rnn_sum = fluid.layers.reduce_sum(last_rnn_output)
last_rnn_sum = paddle.sum(last_rnn_output)
mem = layers.elementwise_add(x=x_t, y=last_rnn_sum)
y = layers.elementwise_add(x=mem_pre, y=mem)
rnn_1.update_memory(mem_pre, mem)
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.incubate.fleet.base.role_maker as role_maker
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import (
......@@ -55,7 +56,7 @@ class TestPyramidHashOpApi(unittest.TestCase):
name=None,
)
cost = fluid.layers.reduce_sum(hash_embd)
cost = paddle.sum(hash_embd)
role = role_maker.UserDefinedRoleMaker(
current_id=0,
......
......@@ -34,7 +34,7 @@ class MyLayer(fluid.Layer):
x = fluid.layers.relu(inputs)
self._x_for_debug = x
x = fluid.layers.elementwise_mul(x, x)
x = fluid.layers.reduce_sum(x)
x = paddle.sum(x)
return [x]
......@@ -65,7 +65,7 @@ class MLP(fluid.Layer):
def forward(self, inputs):
x = self._linear1(inputs)
x = self._linear2(x)
x = fluid.layers.reduce_sum(x)
x = paddle.sum(x)
return x
......@@ -108,7 +108,7 @@ class SimpleRNNCell(fluid.Layer):
hidden = self._helper.append_activation(hidden, act='tanh')
out = paddle.fluid.layers.nn.mul(hidden, self._h2o_w)
softmax_out = paddle.nn.functional.softmax(out)
reduce_out = paddle.fluid.layers.nn.reduce_sum(softmax_out)
reduce_out = paddle.sum(softmax_out)
return reduce_out, hidden
......@@ -342,7 +342,7 @@ class TestImperative(unittest.TestCase):
tmp.stop_gradient = False
inputs.append(tmp)
ret = paddle.add_n(inputs)
loss = fluid.layers.reduce_sum(ret)
loss = paddle.sum(ret)
loss.backward()
with fluid.dygraph.guard():
inputs2 = []
......@@ -351,7 +351,7 @@ class TestImperative(unittest.TestCase):
tmp.stop_gradient = False
inputs2.append(tmp)
ret2 = paddle.add_n(inputs2)
loss2 = fluid.layers.reduce_sum(ret2)
loss2 = paddle.sum(ret2)
fluid.set_flags({'FLAGS_sort_sum_gradient': True})
loss2.backward()
......@@ -739,11 +739,11 @@ class TestImperative(unittest.TestCase):
)
a = paddle.expand(
paddle.reshape(fluid.layers.reduce_sum(inp_data1), [1, 1]),
paddle.reshape(paddle.sum(inp_data1), [1, 1]),
[4, -1],
)
b = paddle.expand(
paddle.reshape(fluid.layers.reduce_sum(inp_data2), [1, 1]),
paddle.reshape(paddle.sum(inp_data2), [1, 1]),
[4, -1],
)
cond = fluid.layers.less_than(x=a, y=b)
......
......@@ -254,9 +254,7 @@ class TestDygraphDeepCF(unittest.TestCase):
deepcf = DeepCF(num_users, num_items, matrix)
prediction = deepcf(users, items)
loss = fluid.layers.reduce_sum(
fluid.layers.log_loss(prediction, labels)
)
loss = paddle.sum(fluid.layers.log_loss(prediction, labels))
adam = fluid.optimizer.AdamOptimizer(0.01)
adam.minimize(loss)
......@@ -309,7 +307,7 @@ class TestDygraphDeepCF(unittest.TestCase):
to_variable(users_np[slice : slice + self.batch_size]),
to_variable(items_np[slice : slice + self.batch_size]),
)
loss = fluid.layers.reduce_sum(
loss = paddle.sum(
fluid.layers.log_loss(
prediction,
to_variable(
......@@ -343,7 +341,7 @@ class TestDygraphDeepCF(unittest.TestCase):
to_variable(users_np[slice : slice + self.batch_size]),
to_variable(items_np[slice : slice + self.batch_size]),
)
loss2 = fluid.layers.reduce_sum(
loss2 = paddle.sum(
fluid.layers.log_loss(
prediction2,
to_variable(
......@@ -386,7 +384,7 @@ class TestDygraphDeepCF(unittest.TestCase):
items_np[slice : slice + self.batch_size]
),
)
loss = fluid.layers.reduce_sum(
loss = paddle.sum(
fluid.layers.log_loss(
prediction,
to_variable(
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import unittest
import paddle
import paddle.fluid as fluid
import numpy as np
from test_imperative_base import new_program_scope
......@@ -46,7 +47,7 @@ class MLP(fluid.Layer):
def forward(self, inputs):
x = self._linear1(inputs)
x = self._linear2(x)
x = fluid.layers.reduce_sum(x)
x = paddle.sum(x)
return x
......
......@@ -96,7 +96,7 @@ class TestDygraphGNN(unittest.TestCase):
# In other example, it's nll with log_softmax. However, paddle's
# log_loss only supports binary classification now.
loss = fluid.layers.softmax_with_cross_entropy(logits, labels)
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
adam = AdamOptimizer(learning_rate=1e-3)
adam.minimize(loss)
......@@ -136,7 +136,7 @@ class TestDygraphGNN(unittest.TestCase):
loss = fluid.layers.softmax_with_cross_entropy(
logits, to_variable(labels)
)
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
loss.backward()
adam = AdamOptimizer(
learning_rate=1e-3, parameter_list=model.parameters()
......@@ -164,7 +164,7 @@ class TestDygraphGNN(unittest.TestCase):
loss2 = fluid.layers.softmax_with_cross_entropy(
logits2, to_variable(labels2)
)
loss2 = fluid.layers.reduce_sum(loss2)
loss2 = paddle.sum(loss2)
loss2.backward()
adam2 = AdamOptimizer(
learning_rate=1e-3, parameter_list=model2.parameters()
......
......@@ -72,7 +72,7 @@ class SimpleNet(fluid.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss
......
......@@ -325,7 +325,7 @@ class SimpleAttention(fluid.dygraph.Layer):
scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weights_reshape, axis=0
)
context = fluid.layers.reduce_sum(scaled, dim=1)
context = paddle.sum(scaled, axis=1)
return context
......@@ -498,7 +498,7 @@ class TestDygraphOCRAttention(unittest.TestCase):
loss = fluid.layers.cross_entropy(
input=dy_prediction, label=label_out
)
avg_loss = fluid.layers.reduce_sum(loss)
avg_loss = paddle.sum(loss)
dy_out = avg_loss.numpy()
......@@ -576,7 +576,7 @@ class TestDygraphOCRAttention(unittest.TestCase):
cost = fluid.layers.cross_entropy(
input=static_prediction, label=static_label_out
)
static_avg_loss = fluid.layers.reduce_sum(cost)
static_avg_loss = paddle.sum(cost)
# param_grad_list = fluid.backward.append_backward(static_avg_loss)
optimizer.minimize(static_avg_loss)
......
......@@ -16,6 +16,7 @@ import unittest
import numpy as np
from collections import OrderedDict
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.dygraph.parallel import DataParallel
......@@ -34,7 +35,7 @@ class MyLayer(fluid.Layer):
def forward(self, inputs):
x = fluid.layers.relu(inputs)
x = fluid.layers.elementwise_mul(x, x)
x = fluid.layers.reduce_sum(x)
x = paddle.sum(x)
return [x]
......
......@@ -231,7 +231,7 @@ class PtbModel(fluid.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss, last_hidden, last_cell
......
......@@ -28,7 +28,7 @@ class RecurrentTest(fluid.Layer):
def forward(self, in1, in2):
out = fluid.layers.mul(in1, in2)
sum_out = fluid.layers.reduce_sum(out)
sum_out = paddle.sum(out)
return sum_out, out
......
......@@ -73,13 +73,13 @@ class TestImperativeMnist(unittest.TestCase):
loss_probs = fluid.layers.log(loss_probs)
loss_probs = fluid.layers.elementwise_mul(loss_probs, dy_mask)
loss_probs = fluid.layers.reduce_sum(loss_probs, dim=-1)
loss_probs = paddle.sum(loss_probs, axis=-1)
dy_reward = fluid.dygraph.base.to_variable(reward)
dy_reward.stop_gradient = True
loss_probs = fluid.layers.elementwise_mul(dy_reward, loss_probs)
loss = fluid.layers.reduce_sum(loss_probs)
loss = paddle.sum(loss_probs)
sgd = SGDOptimizer(
learning_rate=1e-3, parameter_list=policy.parameters()
......@@ -141,12 +141,12 @@ class TestImperativeMnist(unittest.TestCase):
st_loss_probs = fluid.layers.log(st_loss_probs)
st_loss_probs = fluid.layers.elementwise_mul(st_loss_probs, st_mask)
st_loss_probs = fluid.layers.reduce_sum(st_loss_probs, dim=-1)
st_loss_probs = paddle.sum(st_loss_probs, axis=-1)
st_loss_probs = fluid.layers.elementwise_mul(
st_reward, st_loss_probs
)
st_loss = fluid.layers.reduce_sum(st_loss_probs)
st_loss = paddle.sum(st_loss_probs)
st_sgd.minimize(st_loss)
......
......@@ -227,7 +227,7 @@ class PtbModel(fluid.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss, last_hidden, last_cell
......
......@@ -230,7 +230,7 @@ class PtbModel(fluid.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss, last_hidden, last_cell
......
......@@ -81,7 +81,7 @@ class SimpleNet(fluid.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss
......
......@@ -379,9 +379,7 @@ def loss_cls(cls, label, cfg):
cls_shape = cls.shape
cls = paddle.reshape(cls, [-1, cls_shape[1] * cls_shape[2] * cls_shape[3]])
return (
fluid.layers.reduce_sum(
fluid.layers.sigmoid_cross_entropy_with_logits(cls, label)
)
paddle.sum(fluid.layers.sigmoid_cross_entropy_with_logits(cls, label))
/ cfg.batch_size
)
......
......@@ -1102,8 +1102,8 @@ class TransFormer(Layer):
soft_label=True if self._label_smooth_eps else False,
)
weighted_cost = cost * weights
sum_cost = fluid.layers.reduce_sum(weighted_cost)
token_num = fluid.layers.reduce_sum(weights)
sum_cost = paddle.sum(weighted_cost)
token_num = paddle.sum(weights)
token_num.stop_gradient = True
avg_cost = sum_cost / token_num
return sum_cost, avg_cost, predict, token_num
......
......@@ -73,9 +73,8 @@ class TestInplaceANBOpTraining(unittest.TestCase):
# may have same name, multiply 1. to generate
# a new Variable for fetch
bn = bn * 1.0
sigmoid = paddle.nn.functional.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid)
out = paddle.sum(sigmoid)
if not only_forward:
sgd_opt = fluid.optimizer.SGD(learning_rate=0.0)
sgd_opt.backward(out)
......
......@@ -69,7 +69,7 @@ def create_program(data_format="NCHW"):
)
y = conv(x) + x
loss = fluid.layers.reduce_sum(y)
loss = paddle.sum(y)
sgd = fluid.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss)
......
......@@ -213,7 +213,7 @@ class TestLookupTableIsSparse(unittest.TestCase):
),
is_sparse=is_sparse,
)
y = fluid.layers.reduce_sum(emb, dim=-1)
y = paddle.sum(emb, axis=-1)
loss = fluid.layers.square_error_cost(input=y, label=y_)
loss = paddle.mean(loss)
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import numpy as np
......@@ -50,7 +51,7 @@ class TestMultiheadAttention(unittest.TestCase):
num_heads=8,
dropout_rate=0.0,
)
out = fluid.layers.reduce_sum(contexts, dim=None)
out = paddle.sum(contexts, axis=None)
fluid.backward.append_backward(loss=out)
self.fetch_list = [contexts]
......
......@@ -98,7 +98,7 @@ class TestReduceSumWithDimDoubleGradCheck(unittest.TestCase):
x = layers.data('x', shape, False, dtype)
x.persistable = True
y = layers.reduce_sum(x, dim=0)
y = paddle.sum(x, axis=0)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import numpy as np
......@@ -37,7 +38,7 @@ class TestNormalization(unittest.TestCase):
)
data.stop_gradient = False
l2_norm = fluid.layers.l2_normalize(x=data, axis=axis, epsilon=epsilon)
out = fluid.layers.reduce_sum(l2_norm, dim=None)
out = paddle.sum(l2_norm, axis=None)
fluid.backward.append_backward(loss=out)
self.fetch_list = [l2_norm]
......
......@@ -914,10 +914,10 @@ class TestReduceSumOpError(unittest.TestCase):
x1 = fluid.create_lod_tensor(
np.array([[-1]]), [[1]], fluid.CPUPlace()
)
self.assertRaises(TypeError, fluid.layers.reduce_sum, x1)
self.assertRaises(TypeError, paddle.sum, x1)
# The input dtype of reduce_sum_op must be float32 or float64 or int32 or int64.
x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
self.assertRaises(TypeError, fluid.layers.reduce_sum, x2)
self.assertRaises(TypeError, paddle.sum, x2)
class API_TestSumOp(unittest.TestCase):
......
......@@ -226,7 +226,7 @@ class TestRegularizer(unittest.TestCase):
para_sum = []
for para in param_list:
para_mul = paddle.square(x=para)
para_sum.append(fluid.layers.reduce_sum(input=para_mul))
para_sum.append(paddle.sum(para_mul))
avg_cost_l2 += fluid.layers.sums(para_sum) * 0.5
optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
......@@ -261,7 +261,7 @@ class TestRegularizer(unittest.TestCase):
with fluid.program_guard(fluid.Program(), fluid.Program()):
x = fluid.layers.uniform_random([2, 2, 3])
out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
loss = fluid.layers.reduce_sum(out)
loss = paddle.sum(out)
sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
sgd.minimize(loss)
with fluid.dygraph.guard():
......
......@@ -134,7 +134,7 @@ class TestRegularizer(unittest.TestCase):
para_sum = []
for para in param_list:
para_mul = paddle.square(x=para)
para_sum.append(fluid.layers.reduce_sum(input=para_mul))
para_sum.append(paddle.sum(para_mul))
avg_cost_l2 += fluid.layers.sums(para_sum) * 0.5
optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
......@@ -171,7 +171,7 @@ class TestRegularizer(unittest.TestCase):
with fluid.program_guard(fluid.Program(), fluid.Program()):
x = fluid.layers.uniform_random([2, 2, 3])
out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
loss = fluid.layers.reduce_sum(out)
loss = paddle.sum(out)
sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
sgd.minimize(loss)
with fluid.dygraph.guard():
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.layers.control_flow import lod_rank_table
......@@ -51,7 +52,7 @@ class TestReorderLoDTensor(unittest.TestCase):
new_dat = fluid.layers.reorder_lod_tensor_by_rank(
x=dat, rank_table=table
)
loss = fluid.layers.reduce_sum(new_dat)
loss = paddle.sum(new_dat)
fluid.backward.append_backward(loss=loss)
cls.fetch_list = [new_dat, cls.data_desc[0][0] + '@GRAD']
......
......@@ -148,7 +148,7 @@ class TestReverseLoDTensorArray(unittest.TestCase):
reverse_array = fluid.layers.reverse(tensor_array, axis=axis)
output, _ = fluid.layers.tensor_array_to_tensor(reverse_array)
loss = fluid.layers.reduce_sum(output)
loss = paddle.sum(output)
fluid.backward.append_backward(loss)
input_grads = list(
map(
......
......@@ -644,7 +644,7 @@ def def_seq2seq_model(
)
loss = loss * tar_mask
loss = layers.reduce_mean(loss, dim=[0])
loss = layers.reduce_sum(loss)
loss = paddle.sum(loss)
# optimizer
optimizer = fluid.optimizer.Adam(0.001)
......
......@@ -318,7 +318,7 @@ class PolicyGradient:
neg_log_prob = layers.cross_entropy(act_prob, action)
cost = neg_log_prob * reward
cost = (
(layers.reduce_sum(cost) / layers.reduce_sum(length))
(paddle.sum(cost) / paddle.sum(length))
if length is not None
else layers.reduce_mean(cost)
)
......@@ -407,7 +407,7 @@ class MLE:
mask = layers.sequence_mask(length, maxlen=max_seq_len, dtype="float32")
loss = loss * mask
loss = layers.reduce_mean(loss, dim=[0])
loss = layers.reduce_sum(loss)
loss = paddle.sum(loss)
optimizer = fluid.optimizer.Adam(self.lr)
optimizer.minimize(loss)
return loss
......
......@@ -417,7 +417,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest):
self.op_type = "run_program"
self.dtype = np.float32
self.input_names = {'X': ['x'], 'Params': ['emb_weight']}
self.output_names = {'Out': ['reduce_sum_0.tmp_0']}
self.output_names = {'Out': ['sum_0.tmp_0']}
self.inputs = {
'X': {'x': np.array([[1, 3, 0, 4, 7]]).astype("int64")},
......@@ -456,7 +456,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest):
),
is_sparse=True,
)
y = fluid.layers.reduce_sum(emb, dim=-1)
y = paddle.sum(emb, axis=-1)
# 2. get forward op num
fwd_op_num = fluid.default_main_program().global_block().desc.op_size()
# 3. append backward
......
......@@ -1436,7 +1436,7 @@ class TestGradientTruncated(unittest.TestCase):
# set_value_grad_op will not be run during backward.
y, value = op(x)
y2 = y + 1
loss = paddle.fluid.layers.reduce_sum(y2)
loss = paddle.paddle.sum(y2)
sgd = paddle.optimizer.Adam()
sgd.minimize(loss)
place = (
......
......@@ -716,7 +716,7 @@ class TestSliceApiWithLoDTensorArray(unittest.TestCase):
slice_arr, axis=self.axis, use_stack=True
)
loss = fluid.layers.reduce_sum(output)
loss = paddle.sum(output)
fluid.backward.append_backward(loss)
g_vars = list(
map(
......
......@@ -241,7 +241,7 @@ class PtbModel(fluid.Layer):
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss = paddle.sum(loss)
return loss, last_hidden, last_cell
......
......@@ -95,7 +95,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
else:
bn = fluid.layers.cast(bn, 'float64')
sigmoid = paddle.nn.functional.sigmoid(bn)
out = fluid.layers.reduce_sum(sigmoid)
out = paddle.sum(sigmoid)
if not sync_bn:
out = out / core.get_cuda_device_count()
if not only_forward:
......
......@@ -14,6 +14,7 @@
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid import Program, program_guard
......@@ -196,7 +197,7 @@ class TestLoDTensorArrayStack(unittest.TestCase):
output, output_index = fluid.layers.tensor_array_to_tensor(
input=array, **self.attrs
)
loss = fluid.layers.reduce_sum(output)
loss = paddle.sum(output)
fluid.backward.append_backward(loss)
self.output_vars = [output, output_index]
......
......@@ -15,6 +15,7 @@
import unittest
import numpy as np
import collections
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.initializer import ConstantInitializer
......@@ -46,7 +47,7 @@ class TestWeightNormalization(unittest.TestCase):
bias_attr=False,
act=None,
)
loss = fluid.layers.reduce_sum(out)
loss = paddle.sum(out)
fluid.backward.append_backward(loss=loss)
cls.fetch_list = [
'weight_norm_param_g',
......
......@@ -158,7 +158,7 @@ def multi_head_attention(
def __softmax(x, eps=1e-9):
exp_out = paddle.exp(x=x)
sum_out = layers.reduce_sum(exp_out, dim=-1, keep_dim=False)
sum_out = paddle.sum(exp_out, axis=-1, keepdim=False)
return layers.elementwise_div(x=exp_out, y=sum_out, axis=0)
scaled_q = paddle.scale(x=q, scale=d_model**-0.5)
......@@ -595,4 +595,4 @@ def transformer(
cost = layers.cross_entropy(input=predict, label=gold)
weighted_cost = cost * weights
return layers.reduce_sum(weighted_cost)
return paddle.sum(weighted_cost)
......@@ -143,21 +143,21 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
global_norm_var = []
if len(sum_square_list_fp16) > 0:
global_norm_var_fp16 = layers.concat(sum_square_list_fp16)
global_norm_var_fp16 = layers.reduce_sum(global_norm_var_fp16)
global_norm_var_fp16 = paddle.sum(global_norm_var_fp16)
global_norm_var.append(global_norm_var_fp16.astype(sum_dtype))
if len(sum_square_list_fp32) > 0:
global_norm_var_fp32 = layers.concat(sum_square_list_fp32)
global_norm_var_fp32 = layers.reduce_sum(global_norm_var_fp32)
global_norm_var_fp32 = paddle.sum(global_norm_var_fp32)
if sum_dtype == 'float32':
global_norm_var.append(global_norm_var_fp32)
else:
global_norm_var.append(global_norm_var_fp32.astype(sum_dtype))
if len(sum_square_list) > 0:
global_norm_var_fp64 = layers.concat(sum_square_list)
global_norm_var_fp64 = layers.reduce_sum(global_norm_var_fp64)
global_norm_var_fp64 = paddle.sum(global_norm_var_fp64)
global_norm_var.append(global_norm_var_fp64)
global_norm_var = layers.concat(global_norm_var)
global_norm_var = layers.reduce_sum(global_norm_var)
global_norm_var = paddle.sum(global_norm_var)
return global_norm_var, sum_dtype
@imperative_base.no_grad
......
......@@ -593,7 +593,8 @@ class MSELoss(Layer):
reduce_op = 'reduce_mean'
if self.reduction == 'sum':
reduce_op = 'reduce_sum'
square_out = paddle.sum(square_out)
return square_out
return getattr(fluid.layers, reduce_op)(square_out)
......
......@@ -163,7 +163,7 @@ def dynamic_train(model, dataloader):
for inputs, labels in dataloader:
outputs = model(inputs)
loss = CrossEntropyLoss(reduction="sum")(outputs, labels)
avg_loss = fluid.layers.reduce_sum(loss)
avg_loss = paddle.sum(loss)
avg_loss.backward()
optim.minimize(avg_loss)
model.clear_gradients()
......@@ -510,7 +510,7 @@ class TestModelFunction(unittest.TestCase):
m.train()
output = m(to_tensor(data))
loss = CrossEntropyLoss(reduction='sum')(output, to_tensor(label))
avg_loss = fluid.layers.reduce_sum(loss)
avg_loss = paddle.sum(loss)
avg_loss.backward()
optim.minimize(avg_loss)
m.clear_gradients()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册