未验证 提交 048e0c55 编写于 作者: H HongyuJia 提交者: GitHub

clean elem_arithmetic not test.py (#48460)

上级 41f15537
......@@ -138,7 +138,7 @@ class GroupShardedClipGrad:
shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm
)
clip_var = layers.elementwise_div(
clip_var = paddle.divide(
x=max_global_norm,
y=paddle.maximum(x=global_norm_var, y=max_global_norm),
)
......
......@@ -135,7 +135,7 @@ class ShardingClipGrad:
shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm
)
clip_var = layers.elementwise_div(
clip_var = paddle.divide(
x=max_global_norm,
y=paddle.maximum(x=global_norm_var, y=max_global_norm),
)
......
......@@ -22,9 +22,6 @@ from paddle.distribution import distribution
from paddle.fluid.data_feeder import check_type, convert_dtype
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.layers import (
elementwise_add,
elementwise_div,
elementwise_sub,
nn,
tensor,
)
......@@ -191,14 +188,14 @@ class Normal(distribution.Distribution):
zero_tmp_shape, mean=0.0, std=1.0, seed=seed, dtype=self.dtype
)
output = normal_random_tmp * (zero_tmp_reshape + self.scale)
output = elementwise_add(output, self.loc, name=name)
output = paddle.add(output, self.loc, name=name)
return output
else:
output_shape = shape + batch_shape
output = nn.gaussian_random(
output_shape, mean=0.0, std=1.0, seed=seed, dtype=self.dtype
) * (tensor.zeros(output_shape, dtype=self.dtype) + self.scale)
output = elementwise_add(output, self.loc, name=name)
output = paddle.add(output, self.loc, name=name)
if self.all_arg_is_float:
return paddle.reshape(output, shape, name=name)
else:
......@@ -243,7 +240,7 @@ class Normal(distribution.Distribution):
zero_tmp = tensor.fill_constant_batch_size_like(
self.loc + self.scale, batch_shape, self.dtype, 0.0
)
return elementwise_add(
return paddle.add(
0.5 + zero_tmp,
0.5 * math.log(2 * math.pi) + nn.log((self.scale + zero_tmp)),
name=name,
......@@ -264,7 +261,7 @@ class Normal(distribution.Distribution):
var = self.scale * self.scale
log_scale = nn.log(self.scale)
return elementwise_sub(
return paddle.subtract(
-1.0 * ((value - self.loc) * (value - self.loc)) / (2.0 * var),
log_scale + math.log(math.sqrt(2.0 * math.pi)),
name=name,
......@@ -284,7 +281,7 @@ class Normal(distribution.Distribution):
value = self._check_values_dtype_in_probs(self.loc, value)
var = self.scale * self.scale
return elementwise_div(
return paddle.divide(
paddle.exp(
-1.0 * ((value - self.loc) * (value - self.loc)) / (2.0 * var)
),
......@@ -333,6 +330,6 @@ class Normal(distribution.Distribution):
var_ratio = var_ratio * var_ratio
t1 = (self.loc - other.loc) / other.scale
t1 = t1 * t1
return elementwise_add(
return paddle.add(
0.5 * var_ratio, 0.5 * (t1 - 1.0 - nn.log(var_ratio)), name=name
)
......@@ -24,9 +24,6 @@ from paddle.fluid.framework import (
in_dygraph_mode,
)
from paddle.fluid.layers import (
elementwise_add,
elementwise_div,
elementwise_sub,
nn,
tensor,
)
......@@ -184,7 +181,7 @@ class Uniform(distribution.Distribution):
output = uniform_random_tmp_reshape * (
zero_tmp_reshape + self.high - self.low
)
output = elementwise_add(output, self.low, name=name)
output = paddle.add(output, self.low, name=name)
return output
else:
output_shape = shape + batch_shape
......@@ -194,7 +191,7 @@ class Uniform(distribution.Distribution):
tensor.zeros(output_shape, dtype=self.dtype)
+ (self.high - self.low)
)
output = elementwise_add(output, self.low, name=name)
output = paddle.add(output, self.low, name=name)
if self.all_arg_is_float:
return paddle.reshape(output, shape, name=name)
else:
......@@ -235,7 +232,7 @@ class Uniform(distribution.Distribution):
ub_bool = value < self.high
lb = tensor.cast(lb_bool, dtype=value.dtype)
ub = tensor.cast(ub_bool, dtype=value.dtype)
return elementwise_sub(
return paddle.subtract(
nn.log(lb * ub), nn.log(self.high - self.low), name=name
)
......@@ -273,7 +270,7 @@ class Uniform(distribution.Distribution):
ub_bool = value < self.high
lb = tensor.cast(lb_bool, dtype=value.dtype)
ub = tensor.cast(ub_bool, dtype=value.dtype)
return elementwise_div((lb * ub), (self.high - self.low), name=name)
return paddle.divide((lb * ub), (self.high - self.low), name=name)
def entropy(self):
r"""Shannon entropy in nats.
......
......@@ -548,16 +548,14 @@ class ClipGradByGlobalNorm(ClipGradBase):
need_clip = False
if not self.auto_skip_clip: # always apply clip
need_clip = True
clip_var = layers.elementwise_div(
clip_var = paddle.divide(
x=max_global_norm,
y=paddle.maximum(x=global_norm_var, y=max_global_norm),
)
elif global_norm_var > max_global_norm:
# only when global_norm_var > max_global_norm, grad need clip
need_clip = True
clip_var = layers.elementwise_div(
x=max_global_norm, y=global_norm_var
)
clip_var = paddle.divide(x=max_global_norm, y=global_norm_var)
for p, g in params_grads:
if g is None:
......@@ -572,7 +570,7 @@ class ClipGradByGlobalNorm(ClipGradBase):
if clip_var.dtype != g.dtype
else clip_var
)
new_grad = layers.elementwise_mul(g, clip_input)
new_grad = paddle.multiply(g, clip_input)
params_and_grads.append((p, new_grad))
else:
params_and_grads.append((p, g))
......@@ -652,7 +650,7 @@ class ClipGradByGlobalNorm(ClipGradBase):
max_global_norm = layers.fill_constant(
shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm
)
scale_var = layers.elementwise_div(
scale_var = paddle.divide(
x=max_global_norm,
y=paddle.maximum(x=max_global_norm, y=global_norm_var),
)
......@@ -729,7 +727,7 @@ class ClipGradByGlobalNorm(ClipGradBase):
group_norm_var = layers.sums(input=self.context[self.group_name])
group_norm_var = paddle.sqrt(x=group_norm_var)
clip_var = self.context[self.group_name + "_clip"]
group_scale_var = layers.elementwise_div(
group_scale_var = paddle.divide(
x=clip_var,
y=paddle.maximum(x=clip_var, y=group_norm_var),
)
......
......@@ -95,9 +95,7 @@ class DecoupledWeightDecay:
with param.block.program._optimized_guard(
[param, grad]
), framework.name_scope('weight decay'):
updated_param = paddle.fluid.layers.elementwise_sub(
x=param, y=scaled_param
)
updated_param = paddle.subtract(x=param, y=scaled_param)
paddle.fluid.layers.assign(input=updated_param, output=param)
optimize_ops = self.apply_optimize(
......
......@@ -153,7 +153,7 @@ class BasicGRUUnit(Layer):
gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight)
gate_input = layers.elementwise_add(gate_input, self._gate_bias)
gate_input = paddle.add(gate_input, self._gate_bias)
gate_input = self._gate_activation(gate_input)
r, u = layers.split(gate_input, num_or_sections=2, dim=1)
......@@ -163,7 +163,7 @@ class BasicGRUUnit(Layer):
candidate = layers.matmul(
layers.concat([input, r_hidden], 1), self._candidate_weight
)
candidate = layers.elementwise_add(candidate, self._candidate_bias)
candidate = paddle.add(candidate, self._candidate_bias)
c = self._activation(candidate)
new_hidden = u * pre_hidden + (1 - u) * c
......@@ -876,18 +876,14 @@ class BasicLSTMUnit(Layer):
concat_input_hidden = layers.concat([input, pre_hidden], 1)
gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)
gate_input = layers.elementwise_add(gate_input, self._bias)
gate_input = paddle.add(gate_input, self._bias)
i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
new_cell = layers.elementwise_add(
layers.elementwise_mul(
new_cell = paddle.add(
paddle.multiply(
pre_cell,
paddle.nn.functional.sigmoid(
layers.elementwise_add(f, self._forget_bias)
),
),
layers.elementwise_mul(
paddle.nn.functional.sigmoid(i), paddle.tanh(j)
paddle.nn.functional.sigmoid(paddle.add(f, self._forget_bias)),
),
paddle.multiply(paddle.nn.functional.sigmoid(i), paddle.tanh(j)),
)
new_hidden = paddle.tanh(new_cell) * paddle.nn.functional.sigmoid(o)
......
......@@ -18,7 +18,6 @@ from ..layers import (
concat,
fill_constant,
matmul,
elementwise_add,
elementwise_mul,
split,
)
......@@ -217,23 +216,23 @@ class LSTMCell(Layer):
if self._use_cudnn_impl:
igates = matmul(input, y=self._weight_ih, transpose_y=True)
igates = elementwise_add(igates, self._bias_ih)
igates = paddle.add(igates, self._bias_ih)
hgates = matmul(pre_hidden, self._weight_hh, transpose_y=True)
hgates = elementwise_add(hgates, self._bias_hh)
hgates = paddle.add(hgates, self._bias_hh)
chunked_igates = split(igates, num_or_sections=4, dim=1)
chunked_hgates = split(hgates, num_or_sections=4, dim=1)
ingate = elementwise_add(chunked_igates[0], chunked_hgates[0])
ingate = paddle.add(chunked_igates[0], chunked_hgates[0])
ingate = self._gate_activation(ingate)
forgetgate = elementwise_add(chunked_igates[1], chunked_hgates[1])
forgetgate = paddle.add(chunked_igates[1], chunked_hgates[1])
forgetgate = self._gate_activation(forgetgate)
cellgate = elementwise_add(chunked_igates[2], chunked_hgates[2])
cellgate = paddle.add(chunked_igates[2], chunked_hgates[2])
cellgate = self._activation(cellgate)
outgate = elementwise_add(chunked_igates[3], chunked_hgates[3])
outgate = paddle.add(chunked_igates[3], chunked_hgates[3])
outgate = self._gate_activation(outgate)
new_cell = (forgetgate * pre_cell) + (ingate * cellgate)
......@@ -244,16 +243,14 @@ class LSTMCell(Layer):
concat_input_hidden = concat([input, pre_hidden], 1)
gate_input = matmul(x=concat_input_hidden, y=self._weight)
gate_input = elementwise_add(gate_input, self._bias)
gate_input = paddle.add(gate_input, self._bias)
i, j, f, o = split(gate_input, num_or_sections=4, dim=-1)
new_cell = elementwise_add(
elementwise_mul(
new_cell = paddle.add(
paddle.multiply(
pre_cell,
self._gate_activation(
elementwise_add(f, self._forget_bias)
self._gate_activation(paddle.add(f, self._forget_bias)),
),
),
elementwise_mul(
paddle.multiply(
paddle.nn.functional.sigmoid(i), paddle.tanh(j)
),
)
......@@ -466,21 +463,21 @@ class GRUCell(Layer):
if self._use_cudnn_impl:
igates = matmul(input, y=self._weight_ih, transpose_y=True)
igates = elementwise_add(igates, self._bias_ih)
igates = paddle.add(igates, self._bias_ih)
hgates = matmul(pre_hidden, self._weight_hh, transpose_y=True)
hgates = elementwise_add(hgates, self._bias_hh)
hgates = paddle.add(hgates, self._bias_hh)
chunked_igates = split(igates, num_or_sections=3, dim=1)
chunked_hgates = split(hgates, num_or_sections=3, dim=1)
reset_gate = elementwise_add(chunked_igates[0], chunked_hgates[0])
reset_gate = paddle.add(chunked_igates[0], chunked_hgates[0])
reset_gate = self._gate_activation(reset_gate)
input_gate = elementwise_add(chunked_igates[1], chunked_hgates[1])
input_gate = paddle.add(chunked_igates[1], chunked_hgates[1])
input_gate = self._gate_activation(input_gate)
_temp = reset_gate * chunked_hgates[2]
new_gate = elementwise_add(chunked_igates[2], _temp)
new_gate = paddle.add(chunked_igates[2], _temp)
new_gate = self._activation(new_gate)
new_hidden = (pre_hidden - new_gate) * input_gate + new_gate
......@@ -491,7 +488,7 @@ class GRUCell(Layer):
gate_input = matmul(x=concat_input_hidden, y=self._gate_weight)
gate_input = elementwise_add(gate_input, self._gate_bias)
gate_input = paddle.add(gate_input, self._gate_bias)
gate_input = self._gate_activation(gate_input)
r, u = split(gate_input, num_or_sections=2, dim=1)
......@@ -500,7 +497,7 @@ class GRUCell(Layer):
candidate = matmul(
concat([input, r_hidden], 1), self._candidate_weight
)
candidate = elementwise_add(candidate, self._candidate_bias)
candidate = paddle.add(candidate, self._candidate_bias)
c = self._activation(candidate)
new_hidden = u * pre_hidden + (1 - u) * c
......
......@@ -115,7 +115,7 @@ class LayerHelperBase:
)
def _create_weight_normalize(self, attr, shape, dtype):
from .layers import elementwise_mul, elementwise_div
from .layers import elementwise_mul
# Remove these ops when LayerHelper and layers support indicating
# program and block.
......@@ -266,7 +266,7 @@ class LayerHelperBase:
norm = __norm_except_dim(
v, dim=dim, block=self.main_program.current_block()
)
scale = elementwise_div(
scale = paddle.divide(
x=g, y=norm
) # The shapes of g and norm are the same.
# Currently, elementwise_mul only support broadcast when the shape
......
......@@ -1125,10 +1125,9 @@ class BeamSearchDecoder(Decoder):
)
# TODO: use where_op
finished = tensor.cast(finished, dtype=probs.dtype)
probs = nn.elementwise_mul(
probs = paddle.multiply(
paddle.tile(nn.unsqueeze(finished, [2]), [1, 1, self.vocab_size]),
self.noend_mask_tensor,
axis=-1,
) - nn.elementwise_mul(probs, (finished - 1), axis=0)
return probs
......@@ -1503,7 +1502,7 @@ def _dynamic_decode_imperative(
# To confirm states.finished/finished be consistent with
# next_finished.
tensor.assign(next_finished, finished)
next_sequence_lengths = nn.elementwise_add(
next_sequence_lengths = paddle.add(
sequence_lengths,
tensor.cast(
paddle.logical_not(finished), sequence_lengths.dtype
......@@ -1663,7 +1662,7 @@ def _dynamic_decode_declarative(
# Otherwise, perform logical OR which would not change the already
# finished.
next_finished = paddle.logical_or(next_finished, global_finished)
next_sequence_lengths = nn.elementwise_add(
next_sequence_lengths = paddle.add(
sequence_lengths,
tensor.cast(
paddle.logical_not(global_finished),
......
......@@ -390,7 +390,7 @@ def glu(input, dim=-1):
)
a, b = layers.split(input, num_or_sections=2, dim=dim)
act_b = paddle.nn.functional.sigmoid(x=b)
out = layers.elementwise_mul(x=a, y=act_b)
out = paddle.multiply(x=a, y=act_b)
return out
......
......@@ -7298,10 +7298,10 @@ class LookaheadOptimizer:
for param_name in params:
fast_var = main_block.var(param_name)
slow_var = param_to_slow[param_name]
tmp_var = layers.elementwise_add(
layers.elementwise_mul(fast_var, alpha),
layers.elementwise_mul(
slow_var, layers.elementwise_sub(one_var, alpha)
tmp_var = paddle.add(
paddle.multiply(fast_var, alpha),
paddle.multiply(
slow_var, paddle.subtract(one_var, alpha)
),
)
layers.assign(input=tmp_var, output=slow_var)
......
......@@ -212,7 +212,7 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
max_global_norm = layers.fill_constant(
shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm
)
clip_var = layers.elementwise_div(
clip_var = paddle.divide(
x=max_global_norm,
y=paddle.maximum(x=global_norm_var, y=max_global_norm),
)
......@@ -228,7 +228,7 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
if g.dtype == core.VarDesc.VarType.FP16
else clip_var
)
new_grad = layers.elementwise_mul(x=g, y=clip_input)
new_grad = paddle.multiply(x=g, y=clip_input)
params_and_grads.append((p, new_grad))
return params_and_grads
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册