未验证 提交 50f0acc0 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

[Zero-Dim] update 0d tensor api en doc, test=document_fix (#53823)

上级 640cff0a
...@@ -393,7 +393,7 @@ def get_data_parallel_group(dist_ctx, op, act_grad_names, rank): ...@@ -393,7 +393,7 @@ def get_data_parallel_group(dist_ctx, op, act_grad_names, rank):
for var_name in act_grad_names: for var_name in act_grad_names:
var_dim_mapping = op_dist_attr.get_input_dims_mapping(var_name) var_dim_mapping = op_dist_attr.get_input_dims_mapping(var_name)
# consider that the variable's shape is [], which is 0D # consider that the variable's shape is [], which is 0-D
# TODO utilize the batch_dim attr instead of "0" in future # TODO utilize the batch_dim attr instead of "0" in future
batch_size_axis = var_dim_mapping[0] if len(var_dim_mapping) > 0 else -1 batch_size_axis = var_dim_mapping[0] if len(var_dim_mapping) > 0 else -1
......
...@@ -108,7 +108,7 @@ def _all_gather_in_static_mode(tensor_list, tensor, group, sync_op): ...@@ -108,7 +108,7 @@ def _all_gather_in_static_mode(tensor_list, tensor, group, sync_op):
}, },
) )
tensor_list.clear() tensor_list.clear()
# 0D use stack/unstack while others use concat/split # 0-D use stack/unstack while others use concat/split
if len(tensor.shape) == 0: if len(tensor.shape) == 0:
tensor_list.extend(paddle.unstack(out, 0)) tensor_list.extend(paddle.unstack(out, 0))
else: else:
......
...@@ -78,7 +78,7 @@ def _all_to_all_in_static_mode( ...@@ -78,7 +78,7 @@ def _all_to_all_in_static_mode(
if isinstance(in_tensor_or_tensor_list, list): if isinstance(in_tensor_or_tensor_list, list):
if len(in_tensor_or_tensor_list) == 0: if len(in_tensor_or_tensor_list) == 0:
raise RuntimeError("The input tensor_list should not be empty.") raise RuntimeError("The input tensor_list should not be empty.")
# 0D use stack/unstack while others use concat/split # 0-D use stack/unstack while others use concat/split
if len(in_tensor_or_tensor_list[0].shape) == 0: if len(in_tensor_or_tensor_list[0].shape) == 0:
in_tensor = paddle.stack(in_tensor_or_tensor_list, axis=0) in_tensor = paddle.stack(in_tensor_or_tensor_list, axis=0)
else: else:
...@@ -115,7 +115,7 @@ def _all_to_all_in_static_mode( ...@@ -115,7 +115,7 @@ def _all_to_all_in_static_mode(
if isinstance(out_tensor_or_tensor_list, list): if isinstance(out_tensor_or_tensor_list, list):
if not sync_op: if not sync_op:
dist.wait(out_tensor, use_calc_stream=False) dist.wait(out_tensor, use_calc_stream=False)
# 0D use stack/unstack while others use concat/split # 0-D use stack/unstack while others use concat/split
if len(in_tensor_or_tensor_list[0].shape) == 0: if len(in_tensor_or_tensor_list[0].shape) == 0:
out_tensor_or_tensor_list.extend(paddle.unstack(out_tensor, 0)) out_tensor_or_tensor_list.extend(paddle.unstack(out_tensor, 0))
else: else:
......
...@@ -91,7 +91,7 @@ def _scatter_in_static_mode( ...@@ -91,7 +91,7 @@ def _scatter_in_static_mode(
) )
else: else:
tensor_list = [tensor for _ in range(nranks)] tensor_list = [tensor for _ in range(nranks)]
# 0D use stack/unstack while others use concat/split # 0-D use stack/unstack while others use concat/split
if len(tensor_list[0].shape) == 0: if len(tensor_list[0].shape) == 0:
input_tensor = paddle.stack(tensor_list, axis=0) input_tensor = paddle.stack(tensor_list, axis=0)
else: else:
......
...@@ -79,16 +79,16 @@ class Bernoulli(exponential_family.ExponentialFamily): ...@@ -79,16 +79,16 @@ class Bernoulli(exponential_family.ExponentialFamily):
rv = Bernoulli(probs=0.3) rv = Bernoulli(probs=0.3)
print(rv.mean) print(rv.mean)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.30000001]) # 0.30000001)
print(rv.variance) print(rv.variance)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.21000001]) # 0.21000001)
print(rv.entropy()) print(rv.entropy())
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.61086434]) # 0.61086434)
""" """
def __init__(self, probs, name=None): def __init__(self, probs, name=None):
...@@ -247,12 +247,12 @@ class Bernoulli(exponential_family.ExponentialFamily): ...@@ -247,12 +247,12 @@ class Bernoulli(exponential_family.ExponentialFamily):
# The smaller the `temperature`, the distribution of `rsample` closer to `sample`, with `probs` of 0.3. # The smaller the `temperature`, the distribution of `rsample` closer to `sample`, with `probs` of 0.3.
print(paddle.nn.functional.sigmoid(rv.rsample([1000, ], temperature=1.0)).sum()) print(paddle.nn.functional.sigmoid(rv.rsample([1000, ], temperature=1.0)).sum())
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [361.06829834]) # 361.06829834)
print(paddle.nn.functional.sigmoid(rv.rsample([1000, ], temperature=0.1)).sum()) print(paddle.nn.functional.sigmoid(rv.rsample([1000, ], temperature=0.1)).sum())
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [288.66418457]) # 288.66418457)
""" """
name = self.name + '_rsample' name = self.name + '_rsample'
if not _non_static_mode(): if not _non_static_mode():
...@@ -420,8 +420,8 @@ class Bernoulli(exponential_family.ExponentialFamily): ...@@ -420,8 +420,8 @@ class Bernoulli(exponential_family.ExponentialFamily):
rv = Bernoulli(0.3) rv = Bernoulli(0.3)
print(rv.entropy()) print(rv.entropy())
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.61086434]) # 0.61086434)
""" """
name = self.name + '_entropy' name = self.name + '_entropy'
...@@ -455,8 +455,8 @@ class Bernoulli(exponential_family.ExponentialFamily): ...@@ -455,8 +455,8 @@ class Bernoulli(exponential_family.ExponentialFamily):
rv_other = Bernoulli(0.7) rv_other = Bernoulli(0.7)
print(rv.kl_divergence(rv_other)) print(rv.kl_divergence(rv_other))
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.33891910]) # 0.33891910)
""" """
name = self.name + '_kl_divergence' name = self.name + '_kl_divergence'
if not _non_static_mode(): if not _non_static_mode():
......
...@@ -61,13 +61,13 @@ class Beta(exponential_family.ExponentialFamily): ...@@ -61,13 +61,13 @@ class Beta(exponential_family.ExponentialFamily):
beta = paddle.distribution.Beta(alpha=0.5, beta=0.5) beta = paddle.distribution.Beta(alpha=0.5, beta=0.5)
print(beta.mean) print(beta.mean)
# Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [0.50000000]) # 0.50000000)
print(beta.variance) print(beta.variance)
# Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [0.12500000]) # 0.12500000)
print(beta.entropy()) print(beta.entropy())
# Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [0.12500000]) # 0.12500000)
# tensor input with broadcast # tensor input with broadcast
beta = paddle.distribution.Beta(alpha=paddle.to_tensor([0.2, 0.4]), beta=0.6) beta = paddle.distribution.Beta(alpha=paddle.to_tensor([0.2, 0.4]), beta=0.6)
......
...@@ -45,7 +45,7 @@ class Cauchy(distribution.Distribution): ...@@ -45,7 +45,7 @@ class Cauchy(distribution.Distribution):
# init Cauchy with float # init Cauchy with float
rv = Cauchy(loc=0.1, scale=1.2) rv = Cauchy(loc=0.1, scale=1.2)
print(rv.entropy()) print(rv.entropy())
# Tensor(shape=1, dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# 2.71334577) # 2.71334577)
# init Cauchy with N-Dim tensor # init Cauchy with N-Dim tensor
...@@ -228,8 +228,8 @@ class Cauchy(distribution.Distribution): ...@@ -228,8 +228,8 @@ class Cauchy(distribution.Distribution):
# init Cauchy with float # init Cauchy with float
rv = Cauchy(loc=0.1, scale=1.2) rv = Cauchy(loc=0.1, scale=1.2)
print(rv.prob(paddle.to_tensor(1.5))) print(rv.prob(paddle.to_tensor(1.5)))
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.11234467]) # 0.11234467)
# broadcast to value # broadcast to value
rv = Cauchy(loc=0.1, scale=1.2) rv = Cauchy(loc=0.1, scale=1.2)
...@@ -277,8 +277,8 @@ class Cauchy(distribution.Distribution): ...@@ -277,8 +277,8 @@ class Cauchy(distribution.Distribution):
# init Cauchy with float # init Cauchy with float
rv = Cauchy(loc=0.1, scale=1.2) rv = Cauchy(loc=0.1, scale=1.2)
print(rv.log_prob(paddle.to_tensor(1.5))) print(rv.log_prob(paddle.to_tensor(1.5)))
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-2.18618369]) # -2.18618369)
# broadcast to value # broadcast to value
rv = Cauchy(loc=0.1, scale=1.2) rv = Cauchy(loc=0.1, scale=1.2)
...@@ -344,8 +344,8 @@ class Cauchy(distribution.Distribution): ...@@ -344,8 +344,8 @@ class Cauchy(distribution.Distribution):
# init Cauchy with float # init Cauchy with float
rv = Cauchy(loc=0.1, scale=1.2) rv = Cauchy(loc=0.1, scale=1.2)
print(rv.cdf(paddle.to_tensor(1.5))) print(rv.cdf(paddle.to_tensor(1.5)))
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.77443725]) # 0.77443725)
# broadcast to value # broadcast to value
rv = Cauchy(loc=0.1, scale=1.2) rv = Cauchy(loc=0.1, scale=1.2)
......
...@@ -63,10 +63,10 @@ class Dirichlet(exponential_family.ExponentialFamily): ...@@ -63,10 +63,10 @@ class Dirichlet(exponential_family.ExponentialFamily):
print(dirichlet.entropy()) print(dirichlet.entropy())
# Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [-1.24434423]) # -1.24434423)
print(dirichlet.prob(paddle.to_tensor([.3, .5, .6]))) print(dirichlet.prob(paddle.to_tensor([.3, .5, .6])))
# Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [10.80000114]) # 10.80000114)
""" """
......
...@@ -55,16 +55,16 @@ class Geometric(distribution.Distribution): ...@@ -55,16 +55,16 @@ class Geometric(distribution.Distribution):
geom = Geometric(0.5) geom = Geometric(0.5)
geom.mean geom.mean
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [2.]) # 2.)
geom.variance geom.variance
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [2.]) # 2.)
geom.stddev geom.stddev
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [1.41421354]) # 1.41421354)
""" """
def __init__(self, probs): def __init__(self, probs):
...@@ -145,8 +145,8 @@ class Geometric(distribution.Distribution): ...@@ -145,8 +145,8 @@ class Geometric(distribution.Distribution):
geom = Geometric(0.5) geom = Geometric(0.5)
geom.pmf(2) geom.pmf(2)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.25000000]) # 0.25000000)
""" """
if isinstance(k, (numbers.Integral, framework.Variable)): if isinstance(k, (numbers.Integral, framework.Variable)):
return paddle.pow((1.0 - self.probs), k - 1.0) * self.probs return paddle.pow((1.0 - self.probs), k - 1.0) * self.probs
...@@ -176,8 +176,8 @@ class Geometric(distribution.Distribution): ...@@ -176,8 +176,8 @@ class Geometric(distribution.Distribution):
geom = Geometric(0.5) geom = Geometric(0.5)
geom.log_pmf(2) geom.log_pmf(2)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-1.38629436]) # -1.38629436)
""" """
if isinstance(k, (numbers.Integral, framework.Variable)): if isinstance(k, (numbers.Integral, framework.Variable)):
return paddle.log(self.pmf(k)) return paddle.log(self.pmf(k))
...@@ -266,8 +266,8 @@ class Geometric(distribution.Distribution): ...@@ -266,8 +266,8 @@ class Geometric(distribution.Distribution):
geom = Geometric(0.5) geom = Geometric(0.5)
geom.entropy() geom.entropy()
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [1.38629436]) # 1.38629436)
""" """
x = (1.0 - self.probs) * paddle.log(1.0 - self.probs) x = (1.0 - self.probs) * paddle.log(1.0 - self.probs)
y = self.probs * paddle.log(self.probs) y = self.probs * paddle.log(self.probs)
...@@ -296,8 +296,8 @@ class Geometric(distribution.Distribution): ...@@ -296,8 +296,8 @@ class Geometric(distribution.Distribution):
geom = Geometric(0.5) geom = Geometric(0.5)
geom.cdf(4) geom.cdf(4)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.93750000]) # 0.93750000)
""" """
if isinstance(k, (numbers.Integral, framework.Variable)): if isinstance(k, (numbers.Integral, framework.Variable)):
return 1.0 - paddle.pow((1.0 - self.probs), k) return 1.0 - paddle.pow((1.0 - self.probs), k)
...@@ -329,8 +329,8 @@ class Geometric(distribution.Distribution): ...@@ -329,8 +329,8 @@ class Geometric(distribution.Distribution):
geom_p = Geometric(0.5) geom_p = Geometric(0.5)
geom_q = Geometric(0.1) geom_q = Geometric(0.1)
geom_p.kl_divergence(geom_q) geom_p.kl_divergence(geom_q)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.51082563]) # 0.51082563)
""" """
if isinstance(other, Geometric): if isinstance(other, Geometric):
p, q = self.probs, other.probs p, q = self.probs, other.probs
......
...@@ -61,7 +61,7 @@ class Gumbel(TransformedDistribution): ...@@ -61,7 +61,7 @@ class Gumbel(TransformedDistribution):
dist.cdf(value) dist.cdf(value)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [0.54523915]) # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [0.54523915])
dist.entropy() dist.entropy()
# Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, [1.57721567]) # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [1.57721567])
dist.rsample([2]) dist.rsample([2])
# Tensor(shape=[2, 1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [[0.80463481], [0.91893655]]) # Tensor(shape=[2, 1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [[0.80463481], [0.91893655]])
......
...@@ -44,8 +44,8 @@ class Independent(distribution.Distribution): ...@@ -44,8 +44,8 @@ class Independent(distribution.Distribution):
print(reinterpreted_beta.batch_shape, reinterpreted_beta.event_shape) print(reinterpreted_beta.batch_shape, reinterpreted_beta.event_shape)
# () (2,) # () (2,)
print(reinterpreted_beta.log_prob(paddle.to_tensor([0.2, 0.2]))) print(reinterpreted_beta.log_prob(paddle.to_tensor([0.2, 0.2])))
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [-0.45687842]) # -0.45687842)
""" """
def __init__(self, base, reinterpreted_batch_rank): def __init__(self, base, reinterpreted_batch_rank):
......
...@@ -60,7 +60,7 @@ def kl_divergence(p, q): ...@@ -60,7 +60,7 @@ def kl_divergence(p, q):
print(paddle.distribution.kl_divergence(p, q)) print(paddle.distribution.kl_divergence(p, q))
# Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [0.21193528]) # 0.21193528)
""" """
return _dispatch(type(p), type(q))(p, q) return _dispatch(type(p), type(q))(p, q)
......
...@@ -44,12 +44,12 @@ class Laplace(distribution.Distribution): ...@@ -44,12 +44,12 @@ class Laplace(distribution.Distribution):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0))
m.sample() # Laplace distributed with loc=0, scale=1 m.sample() # Laplace distributed with loc=0, scale=1
# Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [3.68546247]) # 3.68546247)
""" """
...@@ -173,13 +173,13 @@ class Laplace(distribution.Distribution): ...@@ -173,13 +173,13 @@ class Laplace(distribution.Distribution):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0))
value = paddle.to_tensor([0.1]) value = paddle.to_tensor(0.1)
m.log_prob(value) m.log_prob(value)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-0.79314721]) # -0.79314721)
""" """
loc, scale, value = self._validate_value(value) loc, scale, value = self._validate_value(value)
...@@ -205,12 +205,12 @@ class Laplace(distribution.Distribution): ...@@ -205,12 +205,12 @@ class Laplace(distribution.Distribution):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0))
m.entropy() m.entropy()
# Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [1.69314718]) # 1.69314718)
""" """
return 1 + paddle.log(2 * self.scale) return 1 + paddle.log(2 * self.scale)
...@@ -236,13 +236,13 @@ class Laplace(distribution.Distribution): ...@@ -236,13 +236,13 @@ class Laplace(distribution.Distribution):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0))
value = paddle.to_tensor([0.1]) value = paddle.to_tensor(0.1)
m.cdf(value) m.cdf(value)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.54758132]) # 0.54758132)
""" """
loc, scale, value = self._validate_value(value) loc, scale, value = self._validate_value(value)
iterm = ( iterm = (
...@@ -277,11 +277,11 @@ class Laplace(distribution.Distribution): ...@@ -277,11 +277,11 @@ class Laplace(distribution.Distribution):
import paddle import paddle
m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0))
value = paddle.to_tensor([0.1]) value = paddle.to_tensor(0.1)
m.icdf(value) m.icdf(value)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-1.60943794]) # -1.60943794)
""" """
loc, scale, value = self._validate_value(value) loc, scale, value = self._validate_value(value)
term = value - 0.5 term = value - 0.5
...@@ -302,10 +302,10 @@ class Laplace(distribution.Distribution): ...@@ -302,10 +302,10 @@ class Laplace(distribution.Distribution):
import paddle import paddle
m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0))
m.sample() # Laplace distributed with loc=0, scale=1 m.sample() # Laplace distributed with loc=0, scale=1
# Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [3.68546247]) # 3.68546247)
""" """
shape = shape if isinstance(shape, tuple) else tuple(shape) shape = shape if isinstance(shape, tuple) else tuple(shape)
with paddle.no_grad(): with paddle.no_grad():
...@@ -395,13 +395,13 @@ class Laplace(distribution.Distribution): ...@@ -395,13 +395,13 @@ class Laplace(distribution.Distribution):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
m1 = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) m1 = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0]))
m2 = paddle.distribution.Laplace(paddle.to_tensor([1.0]), paddle.to_tensor([0.5])) m2 = paddle.distribution.Laplace(paddle.to_tensor([1.0]), paddle.to_tensor([0.5]))
m1.kl_divergence(m2) m1.kl_divergence(m2)
# Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
# [1.04261160]) # [1.04261160])
""" """
var_ratio = other.scale / self.scale var_ratio = other.scale / self.scale
......
...@@ -72,13 +72,13 @@ class LogNormal(TransformedDistribution): ...@@ -72,13 +72,13 @@ class LogNormal(TransformedDistribution):
sample = lognormal_a.sample((2, )) sample = lognormal_a.sample((2, ))
# a random tensor created by lognormal distribution with shape: [2, 1] # a random tensor created by lognormal distribution with shape: [2, 1]
entropy = lognormal_a.entropy() entropy = lognormal_a.entropy()
# [1.4189385] with shape: [] # [1.4189385] with shape: [1]
lp = lognormal_a.log_prob(value_tensor) lp = lognormal_a.log_prob(value_tensor)
# [-0.72069150] with shape: [1] # [-0.72069150] with shape: [1]
p = lognormal_a.probs(value_tensor) p = lognormal_a.probs(value_tensor)
# [0.48641577] with shape: [1] # [0.48641577] with shape: [1]
kl = lognormal_a.kl_divergence(lognormal_b) kl = lognormal_a.kl_divergence(lognormal_b)
# [0.34939718] with shape: [] # [0.34939718] with shape: [1]
""" """
def __init__(self, loc, scale): def __init__(self, loc, scale):
......
...@@ -77,13 +77,13 @@ class Normal(distribution.Distribution): ...@@ -77,13 +77,13 @@ class Normal(distribution.Distribution):
sample = normal_a.sample([2]) sample = normal_a.sample([2])
# a random tensor created by normal distribution with shape: [2, 1] # a random tensor created by normal distribution with shape: [2, 1]
entropy = normal_a.entropy() entropy = normal_a.entropy()
# [1.4189385] with shape: [] # [1.4189385] with shape: [1]
lp = normal_a.log_prob(value_tensor) lp = normal_a.log_prob(value_tensor)
# [-1.2389386] with shape: [1] # [-1.2389386] with shape: [1]
p = normal_a.probs(value_tensor) p = normal_a.probs(value_tensor)
# [0.28969154] with shape: [1] # [0.28969154] with shape: [1]
kl = normal_a.kl_divergence(normal_b) kl = normal_a.kl_divergence(normal_b)
# [0.34939718] with shape: [] # [0.34939718] with shape: [1]
""" """
def __init__(self, loc, scale, name=None): def __init__(self, loc, scale, name=None):
......
...@@ -435,8 +435,8 @@ class AffineTransform(Transform): ...@@ -435,8 +435,8 @@ class AffineTransform(Transform):
# Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1., 2.]) # [1., 2.])
print(affine.forward_log_det_jacobian(x)) print(affine.forward_log_det_jacobian(x))
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.]) # 0.)
""" """
_type = Type.BIJECTION _type = Type.BIJECTION
...@@ -1189,8 +1189,8 @@ class StickBreakingTransform(Transform): ...@@ -1189,8 +1189,8 @@ class StickBreakingTransform(Transform):
# Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.99999988, 2. , 2.99999881]) # [0.99999988, 2. , 2.99999881])
print(t.forward_log_det_jacobian(x)) print(t.forward_log_det_jacobian(x))
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [-9.10835075]) # -9.10835075)
""" """
_type = Type.BIJECTION _type = Type.BIJECTION
......
...@@ -42,8 +42,8 @@ class TransformedDistribution(distribution.Distribution): ...@@ -42,8 +42,8 @@ class TransformedDistribution(distribution.Distribution):
# [-0.10697651, 3.33609009, -0.86234951, 5.07457638, 0.75925219, # [-0.10697651, 3.33609009, -0.86234951, 5.07457638, 0.75925219,
# -4.17087793, 2.22579336, -0.93845034, 0.66054249, 1.50957513]) # -4.17087793, 2.22579336, -0.93845034, 0.66054249, 1.50957513])
print(d.log_prob(paddle.to_tensor(0.5))) print(d.log_prob(paddle.to_tensor(0.5)))
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [-1.64333570]) # -1.64333570)
""" """
def __init__(self, base, transforms): def __init__(self, base, transforms):
......
...@@ -84,7 +84,7 @@ class Uniform(distribution.Distribution): ...@@ -84,7 +84,7 @@ class Uniform(distribution.Distribution):
sample = uniform.sample([2]) sample = uniform.sample([2])
# a random tensor created by uniform distribution with shape: [2, 1] # a random tensor created by uniform distribution with shape: [2, 1]
entropy = uniform.entropy() entropy = uniform.entropy()
# [0.6931472] with shape: [] # [0.6931472] with shape: [1]
lp = uniform.log_prob(value_tensor) lp = uniform.log_prob(value_tensor)
# [-0.6931472] with shape: [1] # [-0.6931472] with shape: [1]
p = uniform.probs(value_tensor) p = uniform.probs(value_tensor)
......
...@@ -387,7 +387,7 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): ...@@ -387,7 +387,7 @@ def _create_op_desc_(op_type, inputs, outputs, attrs):
def _create_loss_op_desc_(loss): def _create_loss_op_desc_(loss):
# 0D Tensor or 0-Size Tensor # 0-D Tensor or 0-Size Tensor
if len(loss.shape) == 0 or 0 in loss.shape: if len(loss.shape) == 0 or 0 in loss.shape:
create_shape = loss.shape create_shape = loss.shape
else: else:
......
...@@ -789,7 +789,7 @@ class ReduceLROnPlateau(LearningRateDecay): ...@@ -789,7 +789,7 @@ class ReduceLROnPlateau(LearningRateDecay):
Reduce learning rate when ``loss`` has stopped descending. Models often benefit from reducing the learning rate Reduce learning rate when ``loss`` has stopped descending. Models often benefit from reducing the learning rate
by 2 to 10 times once model performance has no longer improvement. by 2 to 10 times once model performance has no longer improvement.
The ``loss`` is the one which has been pass into ``step`` , it must be 1-D Tensor with shape [1]. When ``loss`` The ``loss`` is the one which has been pass into ``step`` , it must be 0-D Tensor with shape []. When ``loss``
stop descending for a ``patience`` number of epochs, the learning rate will be reduced to ``learning_rate * decay_rate`` . stop descending for a ``patience`` number of epochs, the learning rate will be reduced to ``learning_rate * decay_rate`` .
(Specially, ``mode`` can also be set to ``'max`` , in this case, when ``loss`` stop ascending for a ``patience`` number (Specially, ``mode`` can also be set to ``'max`` , in this case, when ``loss`` stop ascending for a ``patience`` number
of epochs, the learning rate will be reduced.) of epochs, the learning rate will be reduced.)
...@@ -943,7 +943,7 @@ class ReduceLROnPlateau(LearningRateDecay): ...@@ -943,7 +943,7 @@ class ReduceLROnPlateau(LearningRateDecay):
Args: Args:
loss (Variable): A ``Variable`` that will be monitored to determine whether the learning rate will reduce. loss (Variable): A ``Variable`` that will be monitored to determine whether the learning rate will reduce.
If it stop descending for a ``patience`` number of epochs, the learning rate will reduce. It should If it stop descending for a ``patience`` number of epochs, the learning rate will reduce. It should
be 1-D Tensor with shape [1]. be 0-D Tensor with shape [].
Specially, if ``mode`` has been set to ``'max'`` , the learning rate will reduce when it stops ascending. Specially, if ``mode`` has been set to ``'max'`` , the learning rate will reduce when it stops ascending.
Returns: Returns:
None None
...@@ -952,7 +952,7 @@ class ReduceLROnPlateau(LearningRateDecay): ...@@ -952,7 +952,7 @@ class ReduceLROnPlateau(LearningRateDecay):
Please refer to the example of current LearningRateDecay. Please refer to the example of current LearningRateDecay.
""" """
# loss must be 1-D Tensor with shape [1] # loss.size must be 1
check_type(loss, 'loss', Variable, 'ReduceLROnPlateau.step') check_type(loss, 'loss', Variable, 'ReduceLROnPlateau.step')
assert np.prod(loss.shape) == 1, ( assert np.prod(loss.shape) == 1, (
"The number of elements of loss should be 1, but the current loss.shape is {}, whose number of elements is not 1. " "The number of elements of loss should be 1, but the current loss.shape is {}, whose number of elements is not 1. "
......
...@@ -131,7 +131,7 @@ def monkey_patch_math_tensor(): ...@@ -131,7 +131,7 @@ def monkey_patch_math_tensor():
return int(np.array(var).flatten()[0]) return int(np.array(var).flatten()[0])
def _len_(var): def _len_(var):
assert var.ndim > 0, "len() of a 0D tensor is wrong" assert var.ndim > 0, "len() of a 0-D tensor is wrong"
if var.type == core.VarDesc.VarType.VOCAB: if var.type == core.VarDesc.VarType.VOCAB:
return len(var.value().get_map_tensor()) return len(var.value().get_map_tensor())
elif var.type == core.VarDesc.VarType.STRINGS: elif var.type == core.VarDesc.VarType.STRINGS:
......
...@@ -516,7 +516,7 @@ def monkey_patch_tensor(): ...@@ -516,7 +516,7 @@ def monkey_patch_tensor():
y = paddle.pow(x, 4.0) y = paddle.pow(x, 4.0)
y.backward() y.backward()
print("grad of x: {}".format(x.grad)) print("grad of x: {}".format(x.grad))
# Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=False, [500.]) # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=False, 500.)
""" """
msg = ( msg = (
...@@ -638,12 +638,12 @@ def monkey_patch_tensor(): ...@@ -638,12 +638,12 @@ def monkey_patch_tensor():
y = copy.deepcopy(x) y = copy.deepcopy(x)
print(x) print(x)
# Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True,
# [2.]) # 2.)
print(y) print(y)
# Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True,
# [2.]) # 2.)
""" """
if not self.is_leaf: if not self.is_leaf:
......
...@@ -2476,7 +2476,7 @@ class Variable(metaclass=VariableMetaClass): ...@@ -2476,7 +2476,7 @@ class Variable(metaclass=VariableMetaClass):
def size(self): def size(self):
""" """
Returns the number of elements for current Variable, which is a int64 Variable with shape [1] Returns the number of elements for current Variable, which is a int64 Variable with shape [] .
Returns: Returns:
Variable, the number of elements for current Variable Variable, the number of elements for current Variable
......
...@@ -120,7 +120,6 @@ class TestNumelAPI(unittest.TestCase): ...@@ -120,7 +120,6 @@ class TestNumelAPI(unittest.TestCase):
}, },
fetch_list=[out_1, out_2], fetch_list=[out_1, out_2],
) )
# TODO(zhouwei): will change shape [1] to [] to support zero-dim
assert np.array_equal( assert np.array_equal(
res_1, np.array(np.size(input_1)).astype("int64") res_1, np.array(np.size(input_1)).astype("int64")
) )
......
...@@ -83,7 +83,6 @@ class TestSizeAPI(unittest.TestCase): ...@@ -83,7 +83,6 @@ class TestSizeAPI(unittest.TestCase):
}, },
fetch_list=[out_1, out_2], fetch_list=[out_1, out_2],
) )
# TODO(zhouwei): will change shape [1] to [] to support zero-dim
assert np.array_equal( assert np.array_equal(
res_1, np.array(np.size(input_1)).astype("int64") res_1, np.array(np.size(input_1)).astype("int64")
) )
......
...@@ -517,7 +517,7 @@ def convert_len(var): ...@@ -517,7 +517,7 @@ def convert_len(var):
`shape_op` in var.block. `shape_op` in var.block.
""" """
if isinstance(var, Variable): if isinstance(var, Variable):
assert var.ndim > 0, "len() of a 0D tensor is wrong" assert var.ndim > 0, "len() of a 0-D tensor is wrong"
if var.type in [ if var.type in [
core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.LOD_TENSOR,
core.VarDesc.VarType.SELECTED_ROWS, core.VarDesc.VarType.SELECTED_ROWS,
......
...@@ -798,7 +798,7 @@ def accuracy(input, label, k=1, correct=None, total=None, name=None): ...@@ -798,7 +798,7 @@ def accuracy(input, label, k=1, correct=None, total=None, name=None):
predictions = paddle.to_tensor([[0.2, 0.1, 0.4, 0.1, 0.1], [0.2, 0.3, 0.1, 0.15, 0.25]], dtype='float32') predictions = paddle.to_tensor([[0.2, 0.1, 0.4, 0.1, 0.1], [0.2, 0.3, 0.1, 0.15, 0.25]], dtype='float32')
label = paddle.to_tensor([[2], [0]], dtype="int64") label = paddle.to_tensor([[2], [0]], dtype="int64")
result = paddle.metric.accuracy(input=predictions, label=label, k=1) result = paddle.metric.accuracy(input=predictions, label=label, k=1)
# [0.5] # 0.5
""" """
if label.dtype == paddle.int32: if label.dtype == paddle.int32:
label = paddle.cast(label, paddle.int64) label = paddle.cast(label, paddle.int64)
......
...@@ -61,7 +61,7 @@ def dice_loss(input, label, epsilon=0.00001, name=None): ...@@ -61,7 +61,7 @@ def dice_loss(input, label, epsilon=0.00001, name=None):
For more information, please refer to :ref:`api_guide_Name` For more information, please refer to :ref:`api_guide_Name`
Returns: Returns:
Tensor, which shape is [1], data type is the same as `input` . 0-D Tensor, which shape is [], data type is the same as `input` .
Example: Example:
.. code-block:: python .. code-block:: python
...@@ -327,7 +327,7 @@ def npair_loss(anchor, positive, labels, l2_reg=0.002): ...@@ -327,7 +327,7 @@ def npair_loss(anchor, positive, labels, l2_reg=0.002):
Returns: Returns:
A Tensor representing the npair loss, the data type is the same as anchor, the shape is [1]. A 0-D Tensor representing the npair loss, the data type is the same as anchor, the shape is [].
Examples: Examples:
...@@ -634,7 +634,7 @@ def binary_cross_entropy( ...@@ -634,7 +634,7 @@ def binary_cross_entropy(
input = paddle.to_tensor([0.5, 0.6, 0.7], 'float32') input = paddle.to_tensor([0.5, 0.6, 0.7], 'float32')
label = paddle.to_tensor([1.0, 0.0, 1.0], 'float32') label = paddle.to_tensor([1.0, 0.0, 1.0], 'float32')
output = paddle.nn.functional.binary_cross_entropy(input, label) output = paddle.nn.functional.binary_cross_entropy(input, label)
print(output) # [0.65537095] print(output) # 0.65537095
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
...@@ -774,7 +774,7 @@ def binary_cross_entropy_with_logits( ...@@ -774,7 +774,7 @@ def binary_cross_entropy_with_logits(
logit = paddle.to_tensor([5.0, 1.0, 3.0]) logit = paddle.to_tensor([5.0, 1.0, 3.0])
label = paddle.to_tensor([1.0, 0.0, 1.0]) label = paddle.to_tensor([1.0, 0.0, 1.0])
output = paddle.nn.functional.binary_cross_entropy_with_logits(logit, label) output = paddle.nn.functional.binary_cross_entropy_with_logits(logit, label)
print(output) # [0.45618808] print(output) # 0.45618808
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
...@@ -1077,7 +1077,7 @@ def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None): ...@@ -1077,7 +1077,7 @@ def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None):
label = paddle.rand([3, 3]).astype('float32') label = paddle.rand([3, 3]).astype('float32')
output = paddle.nn.functional.smooth_l1_loss(input, label) output = paddle.nn.functional.smooth_l1_loss(input, label)
print(output) print(output)
# [0.068004] # 0.068004
""" """
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -1147,7 +1147,7 @@ def margin_ranking_loss( ...@@ -1147,7 +1147,7 @@ def margin_ranking_loss(
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
Tensor, if :attr:`reduction` is ``'mean'`` or ``'sum'``, the out shape is :math:`[1]`, otherwise the shape is the same as `input` .The same dtype as input tensor. Tensor, if :attr:`reduction` is ``'mean'`` or ``'sum'``, the out shape is :math:`[]`, otherwise the shape is the same as `input` .The same dtype as input tensor.
Examples: Examples:
...@@ -1159,7 +1159,7 @@ def margin_ranking_loss( ...@@ -1159,7 +1159,7 @@ def margin_ranking_loss(
other = paddle.to_tensor([[2, 1], [2, 4]], dtype='float32') other = paddle.to_tensor([[2, 1], [2, 4]], dtype='float32')
label = paddle.to_tensor([[1, -1], [-1, -1]], dtype='float32') label = paddle.to_tensor([[1, -1], [-1, -1]], dtype='float32')
loss = paddle.nn.functional.margin_ranking_loss(input, other, label) loss = paddle.nn.functional.margin_ranking_loss(input, other, label)
print(loss) # [0.75] print(loss) # 0.75
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
raise ValueError( raise ValueError(
...@@ -1264,7 +1264,7 @@ def l1_loss(input, label, reduction='mean', name=None): ...@@ -1264,7 +1264,7 @@ def l1_loss(input, label, reduction='mean', name=None):
Returns: Returns:
Tensor, the L1 Loss of Tensor ``input`` and ``label``. Tensor, the L1 Loss of Tensor ``input`` and ``label``.
If `reduction` is ``'none'``, the shape of output loss is :math:`[N, *]`, the same as ``input`` . If `reduction` is ``'none'``, the shape of output loss is :math:`[N, *]`, the same as ``input`` .
If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [].
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1276,8 +1276,8 @@ def l1_loss(input, label, reduction='mean', name=None): ...@@ -1276,8 +1276,8 @@ def l1_loss(input, label, reduction='mean', name=None):
l1_loss = paddle.nn.functional.l1_loss(input, label) l1_loss = paddle.nn.functional.l1_loss(input, label)
print(l1_loss) print(l1_loss)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.34999999]) # 0.34999999)
l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='none') l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='none')
print(l1_loss) print(l1_loss)
...@@ -1287,8 +1287,8 @@ def l1_loss(input, label, reduction='mean', name=None): ...@@ -1287,8 +1287,8 @@ def l1_loss(input, label, reduction='mean', name=None):
l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='sum') l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='sum')
print(l1_loss) print(l1_loss)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.39999998]) # 1.39999998)
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
...@@ -1377,7 +1377,7 @@ def nll_loss( ...@@ -1377,7 +1377,7 @@ def nll_loss(
log_out = log_softmax(input) log_out = log_softmax(input)
label = paddle.to_tensor([0, 2, 1, 1, 0], "int64") label = paddle.to_tensor([0, 2, 1, 1, 0], "int64")
result = nll_loss(log_out, label) result = nll_loss(log_out, label)
print(result) # Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True, [1.07202101]) print(result) # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True, 1.07202101)
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
raise ValueError( raise ValueError(
...@@ -1578,9 +1578,9 @@ def kl_div(input, label, reduction='mean', name=None): ...@@ -1578,9 +1578,9 @@ def kl_div(input, label, reduction='mean', name=None):
If `reduction` is ``'none'``, the output loss is the same shape as the input, and the loss at each point is calculated separately. There is no reduction to the result. If `reduction` is ``'none'``, the output loss is the same shape as the input, and the loss at each point is calculated separately. There is no reduction to the result.
If `reduction` is ``'mean'``, the output loss is the shape of [1], and the output is the average of all losses. If `reduction` is ``'mean'``, the output loss is the shape of [], and the output is the average of all losses.
If `reduction` is ``'sum'``, the output loss is the shape of [1], and the output is the sum of all losses. If `reduction` is ``'sum'``, the output loss is the shape of [], and the output is the sum of all losses.
If `reduction` is ``'batchmean'``, the output loss is the shape of [N], N is the batch size, and the output is the sum of all losses divided by the batch size. If `reduction` is ``'batchmean'``, the output loss is the shape of [N], N is the batch size, and the output is the sum of all losses divided by the batch size.
...@@ -1611,17 +1611,17 @@ def kl_div(input, label, reduction='mean', name=None): ...@@ -1611,17 +1611,17 @@ def kl_div(input, label, reduction='mean', name=None):
x = paddle.uniform(shape, min=-10, max=10).astype('float32') x = paddle.uniform(shape, min=-10, max=10).astype('float32')
target = paddle.uniform(shape, min=-10, max=10).astype('float32') target = paddle.uniform(shape, min=-10, max=10).astype('float32')
# 'batchmean' reduction, loss shape will be [1] # 'batchmean' reduction, loss shape will be [], who is 0-D Tensor
pred_loss = F.kl_div(x, target, reduction='batchmean') pred_loss = F.kl_div(x, target, reduction='batchmean')
# shape=[1] # shape=[]
# 'mean' reduction, loss shape will be [1] # 'mean' reduction, loss shape will be [], who is 0-D Tensor
pred_loss = F.kl_div(x, target, reduction='mean') pred_loss = F.kl_div(x, target, reduction='mean')
# shape=[1] # shape=[]
# 'sum' reduction, loss shape will be [1] # 'sum' reduction, loss shape will be [], who is 0-D Tensor
pred_loss = F.kl_div(x, target, reduction='sum') pred_loss = F.kl_div(x, target, reduction='sum')
# shape=[1] # shape=[]
# 'none' reduction, loss shape is same with input shape # 'none' reduction, loss shape is same with input shape
pred_loss = F.kl_div(x, target, reduction='none') pred_loss = F.kl_div(x, target, reduction='none')
...@@ -1724,7 +1724,7 @@ def mse_loss(input, label, reduction='mean', name=None): ...@@ -1724,7 +1724,7 @@ def mse_loss(input, label, reduction='mean', name=None):
label = paddle.to_tensor(1.7) label = paddle.to_tensor(1.7)
output = mse_loss(input, label) output = mse_loss(input, label)
print(output) print(output)
# [0.04000002] # 0.04000002
""" """
...@@ -1780,7 +1780,7 @@ def ctc_loss( ...@@ -1780,7 +1780,7 @@ def ctc_loss(
norm_by_times (bool, optional): Whether to normalize the gradients by the number of time-step, which is also the sequence's length. There is no need to normalize the gradients if reduction mode is 'mean'. Default: False. norm_by_times (bool, optional): Whether to normalize the gradients by the number of time-step, which is also the sequence's length. There is no need to normalize the gradients if reduction mode is 'mean'. Default: False.
Returns: Returns:
Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``log_probs``. Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is []. Data type is the same as ``log_probs``.
Examples: Examples:
...@@ -1834,8 +1834,8 @@ def ctc_loss( ...@@ -1834,8 +1834,8 @@ def ctc_loss(
blank=0, blank=0,
reduction='mean') reduction='mean')
print(loss) print(loss)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.13760614]) # 1.13760614)
""" """
...@@ -1929,7 +1929,7 @@ def rnnt_loss( ...@@ -1929,7 +1929,7 @@ def rnnt_loss(
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
Tensor, The RNN-T loss between ``logprobs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``logprobs``. Tensor, The RNN-T loss between ``logprobs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is []. Data type is the same as ``logprobs``.
Examples: Examples:
...@@ -1961,8 +1961,8 @@ def rnnt_loss( ...@@ -1961,8 +1961,8 @@ def rnnt_loss(
costs = fn(acts, labels, lengths, label_lengths) costs = fn(acts, labels, lengths, label_lengths)
print(costs) print(costs)
# Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=False, # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False,
# [4.49566677]) # 4.49566677)
""" """
def warprnnt( def warprnnt(
...@@ -2078,7 +2078,7 @@ def margin_cross_entropy( ...@@ -2078,7 +2078,7 @@ def margin_cross_entropy(
softmax is shard_softmax when using model parallel, otherwise softmax is shard_softmax when using model parallel, otherwise
softmax is in the same shape with input logits. If softmax is in the same shape with input logits. If
``reduction == None``, the shape of loss is ``[N, 1]``, otherwise ``reduction == None``, the shape of loss is ``[N, 1]``, otherwise
the shape is ``[1]``. the shape is ``[]``.
Examples: Examples:
...@@ -2633,8 +2633,8 @@ def cross_entropy( ...@@ -2633,8 +2633,8 @@ def cross_entropy(
input, input,
label) label)
print(dy_ret) print(dy_ret)
# Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True,
# [5.34043430]) # 5.34043430)
.. code-block:: python .. code-block:: python
...@@ -2659,8 +2659,8 @@ def cross_entropy( ...@@ -2659,8 +2659,8 @@ def cross_entropy(
weight=weight, weight=weight,
reduction=reduction) reduction=reduction)
print(paddle_loss_mean) print(paddle_loss_mean)
# Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True,
# [1.11043464]) # 1.11043464)
""" """
...@@ -3012,7 +3012,7 @@ def sigmoid_focal_loss( ...@@ -3012,7 +3012,7 @@ def sigmoid_focal_loss(
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
Tensor, if :attr:`reduction` is ``'mean'`` or ``'sum'``, the out shape is :math:`[1]`, otherwise the shape is the same as ``logit``. The same dtype as ``logit`` tensor. Tensor, if :attr:`reduction` is ``'mean'`` or ``'sum'``, the out shape is :math:`[]`, otherwise the shape is the same as ``logit``. The same dtype as ``logit`` tensor.
Examples: Examples:
...@@ -3026,7 +3026,7 @@ def sigmoid_focal_loss( ...@@ -3026,7 +3026,7 @@ def sigmoid_focal_loss(
fg_label = paddle.greater_equal(label, one) fg_label = paddle.greater_equal(label, one)
fg_num = paddle.sum(paddle.cast(fg_label, dtype='float32')) fg_num = paddle.sum(paddle.cast(fg_label, dtype='float32'))
output = paddle.nn.functional.sigmoid_focal_loss(logit, label, normalizer=fg_num) output = paddle.nn.functional.sigmoid_focal_loss(logit, label, normalizer=fg_num)
print(output) # [0.65782464] print(output) # 0.65782464
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
...@@ -3183,7 +3183,7 @@ def multi_label_soft_margin_loss( ...@@ -3183,7 +3183,7 @@ def multi_label_soft_margin_loss(
# Tensor([3.49625897, 0.71111226, 0.43989015]) # Tensor([3.49625897, 0.71111226, 0.43989015])
loss = F.multi_label_soft_margin_loss(input, label, reduction='mean') loss = F.multi_label_soft_margin_loss(input, label, reduction='mean')
print(loss) print(loss)
# Tensor([1.54908717]) # Tensor(1.54908717)
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
raise ValueError( raise ValueError(
...@@ -3307,7 +3307,7 @@ def hinge_embedding_loss(input, label, margin=1.0, reduction='mean', name=None): ...@@ -3307,7 +3307,7 @@ def hinge_embedding_loss(input, label, margin=1.0, reduction='mean', name=None):
loss = F.hinge_embedding_loss(input, label, margin=1.0, reduction='mean') loss = F.hinge_embedding_loss(input, label, margin=1.0, reduction='mean')
print(loss) print(loss)
# Tensor([0.22222222]) # Tensor(0.22222222)
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
...@@ -3377,7 +3377,7 @@ def cosine_embedding_loss( ...@@ -3377,7 +3377,7 @@ def cosine_embedding_loss(
Returns: Returns:
Tensor, the cosine embedding Loss of Tensor ``input1`` ``input2`` and ``label``. Tensor, the cosine embedding Loss of Tensor ``input1`` ``input2`` and ``label``.
If `reduction` is ``'none'``, the shape of output loss is [N], the same as ``input`` . If `reduction` is ``'none'``, the shape of output loss is [N], the same as ``input`` .
If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [].
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -3389,10 +3389,10 @@ def cosine_embedding_loss( ...@@ -3389,10 +3389,10 @@ def cosine_embedding_loss(
label = paddle.to_tensor([1, -1], 'int64') label = paddle.to_tensor([1, -1], 'int64')
output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='mean') output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='mean')
print(output) # [0.21155193] print(output) # 0.21155193
output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='sum') output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='sum')
print(output) # [0.42310387] print(output) # 0.42310387
output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='none') output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='none')
print(output) # [0.42310387, 0. ] print(output) # [0.42310387, 0. ]
...@@ -3528,7 +3528,7 @@ def triplet_margin_with_distance_loss( ...@@ -3528,7 +3528,7 @@ def triplet_margin_with_distance_loss(
loss = F.triplet_margin_with_distance_loss(input, positive, negative, margin=1.0, reduction='mean') loss = F.triplet_margin_with_distance_loss(input, positive, negative, margin=1.0, reduction='mean')
print(loss) print(loss)
# Tensor([0.19165580]) # Tensor(0.19165580)
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
...@@ -3678,7 +3678,7 @@ def triplet_margin_loss( ...@@ -3678,7 +3678,7 @@ def triplet_margin_loss(
loss = F.triplet_margin_loss(input, positive, negative, margin=1.0, reduction='mean') loss = F.triplet_margin_loss(input, positive, negative, margin=1.0, reduction='mean')
print(loss) print(loss)
# Tensor([0.19165580]) # Tensor(0.19165580)
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
...@@ -3886,7 +3886,7 @@ def soft_margin_loss(input, label, reduction='mean', name=None): ...@@ -3886,7 +3886,7 @@ def soft_margin_loss(input, label, reduction='mean', name=None):
Returns: Returns:
Output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is [1]. Output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is [].
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -3897,8 +3897,8 @@ def soft_margin_loss(input, label, reduction='mean', name=None): ...@@ -3897,8 +3897,8 @@ def soft_margin_loss(input, label, reduction='mean', name=None):
label = paddle.to_tensor([[1.0, -1.0, 1.0],[-1.0, 1.0, 1.0]], 'float32') label = paddle.to_tensor([[1.0, -1.0, 1.0],[-1.0, 1.0, 1.0]], 'float32')
output = paddle.nn.functional.soft_margin_loss(input, label) output = paddle.nn.functional.soft_margin_loss(input, label)
print(output) print(output)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.64022040]) # 0.64022040)
input = paddle.uniform(shape=(5, 5), dtype="float32", min=0.1, max=0.8) input = paddle.uniform(shape=(5, 5), dtype="float32", min=0.1, max=0.8)
label = paddle.randint(0, 2, shape=(5, 5), dtype="int64") label = paddle.randint(0, 2, shape=(5, 5), dtype="int64")
...@@ -3998,7 +3998,7 @@ def gaussian_nll_loss( ...@@ -3998,7 +3998,7 @@ def gaussian_nll_loss(
Returns: Returns:
output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is [1]. output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is [].
Examples:: Examples::
.. code-block:: python .. code-block:: python
......
...@@ -98,8 +98,8 @@ class BCEWithLogitsLoss(Layer): ...@@ -98,8 +98,8 @@ class BCEWithLogitsLoss(Layer):
bce_logit_loss = paddle.nn.BCEWithLogitsLoss() bce_logit_loss = paddle.nn.BCEWithLogitsLoss()
output = bce_logit_loss(logit, label) output = bce_logit_loss(logit, label)
print(output) print(output)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.45618814]) # 0.45618814)
""" """
...@@ -319,8 +319,8 @@ class CrossEntropyLoss(Layer): ...@@ -319,8 +319,8 @@ class CrossEntropyLoss(Layer):
input, input,
label) label)
print(dy_ret) print(dy_ret)
# Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True,
# [5.34043430]) # 5.34043430)
.. code-block:: python .. code-block:: python
...@@ -345,8 +345,8 @@ class CrossEntropyLoss(Layer): ...@@ -345,8 +345,8 @@ class CrossEntropyLoss(Layer):
weight=weight, weight=weight,
reduction=reduction) reduction=reduction)
print(paddle_loss_mean) print(paddle_loss_mean)
# Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True,
# [1.11043464]) # 1.11043464)
""" """
...@@ -564,7 +564,7 @@ class MSELoss(Layer): ...@@ -564,7 +564,7 @@ class MSELoss(Layer):
label = paddle.to_tensor([1.7]) label = paddle.to_tensor([1.7])
output = mse_loss(input, label) output = mse_loss(input, label)
print(output) print(output)
# [0.04000002] # 0.04000002
""" """
...@@ -637,7 +637,7 @@ class L1Loss(Layer): ...@@ -637,7 +637,7 @@ class L1Loss(Layer):
- label (Tensor): label. The shapes is ``[N, *]``, same shape as ``input`` . It's data type should be float32, float64, int32, int64. - label (Tensor): label. The shapes is ``[N, *]``, same shape as ``input`` . It's data type should be float32, float64, int32, int64.
- output (Tensor): The L1 Loss of ``input`` and ``label``. - output (Tensor): The L1 Loss of ``input`` and ``label``.
If `reduction` is ``'none'``, the shape of output loss is ``[N, *]``, the same as ``input`` . If `reduction` is ``'none'``, the shape of output loss is ``[N, *]``, the same as ``input`` .
If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [].
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -650,14 +650,14 @@ class L1Loss(Layer): ...@@ -650,14 +650,14 @@ class L1Loss(Layer):
l1_loss = paddle.nn.L1Loss() l1_loss = paddle.nn.L1Loss()
output = l1_loss(input, label) output = l1_loss(input, label)
print(output) print(output)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.34999999]) # 0.34999999)
l1_loss = paddle.nn.L1Loss(reduction='sum') l1_loss = paddle.nn.L1Loss(reduction='sum')
output = l1_loss(input, label) output = l1_loss(input, label)
print(output) print(output)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.39999998]) # 1.39999998)
l1_loss = paddle.nn.L1Loss(reduction='none') l1_loss = paddle.nn.L1Loss(reduction='none')
output = l1_loss(input, label) output = l1_loss(input, label)
...@@ -747,8 +747,8 @@ class BCELoss(Layer): ...@@ -747,8 +747,8 @@ class BCELoss(Layer):
bce_loss = paddle.nn.BCELoss() bce_loss = paddle.nn.BCELoss()
output = bce_loss(input, label) output = bce_loss(input, label)
print(output) print(output)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.65537101]) # 0.65537101)
""" """
...@@ -835,7 +835,7 @@ class NLLLoss(Layer): ...@@ -835,7 +835,7 @@ class NLLLoss(Layer):
The data type is int64. The data type is int64.
- output (Tensor): the `negative log likelihood loss` between input `x` and `label`. - output (Tensor): the `negative log likelihood loss` between input `x` and `label`.
If `reduction` is `'none'`, the shape is `[N, *]`. If `reduction` is `'none'`, the shape is `[N, *]`.
If `reduction` is `'sum'` or `'mean'`, the shape is `[1]`. If `reduction` is `'sum'` or `'mean'`, the shape is `[]`.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -853,7 +853,7 @@ class NLLLoss(Layer): ...@@ -853,7 +853,7 @@ class NLLLoss(Layer):
log_out = log_softmax(input) log_out = log_softmax(input)
label = paddle.to_tensor([0, 2, 1, 1, 0], "int64") label = paddle.to_tensor([0, 2, 1, 1, 0], "int64")
result = nll_loss(log_out, label) result = nll_loss(log_out, label)
print(result) # Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True, [1.07202101]) print(result) # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True, 1.07202101)
""" """
...@@ -991,9 +991,9 @@ class KLDivLoss(Layer): ...@@ -991,9 +991,9 @@ class KLDivLoss(Layer):
If `reduction` is ``'none'``, the output loss is the same shape as the input, and the loss at each point is calculated separately. There is no reduction to the result. If `reduction` is ``'none'``, the output loss is the same shape as the input, and the loss at each point is calculated separately. There is no reduction to the result.
If `reduction` is ``'mean'``, the output loss is the shape of [1], and the output is the average of all losses. If `reduction` is ``'mean'``, the output loss is the shape of [], and the output is the average of all losses.
If `reduction` is ``'sum'``, the output loss is the shape of [1], and the output is the sum of all losses. If `reduction` is ``'sum'``, the output loss is the shape of [], and the output is the sum of all losses.
If `reduction` is ``'batchmean'``, the output loss is the shape of [N], N is the batch size, and the output is the sum of all losses divided by the batch size. If `reduction` is ``'batchmean'``, the output loss is the shape of [N], N is the batch size, and the output is the sum of all losses divided by the batch size.
...@@ -1012,7 +1012,7 @@ class KLDivLoss(Layer): ...@@ -1012,7 +1012,7 @@ class KLDivLoss(Layer):
label (Tensor): ``(N, *)``, same shape as input. label (Tensor): ``(N, *)``, same shape as input.
output (Tensor): tensor with shape: [1] by default. output (Tensor): tensor with shape: [] by default.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1024,20 +1024,20 @@ class KLDivLoss(Layer): ...@@ -1024,20 +1024,20 @@ class KLDivLoss(Layer):
x = paddle.uniform(shape, min=-10, max=10).astype('float32') x = paddle.uniform(shape, min=-10, max=10).astype('float32')
target = paddle.uniform(shape, min=-10, max=10).astype('float32') target = paddle.uniform(shape, min=-10, max=10).astype('float32')
# 'batchmean' reduction, loss shape will be [1] # 'batchmean' reduction, loss shape will be []
kldiv_criterion = nn.KLDivLoss(reduction='batchmean') kldiv_criterion = nn.KLDivLoss(reduction='batchmean')
pred_loss = kldiv_criterion(x, target) pred_loss = kldiv_criterion(x, target)
# shape=[1] # shape=[]
# 'mean' reduction, loss shape will be [1] # 'mean' reduction, loss shape will be []
kldiv_criterion = nn.KLDivLoss(reduction='mean') kldiv_criterion = nn.KLDivLoss(reduction='mean')
pred_loss = kldiv_criterion(x, target) pred_loss = kldiv_criterion(x, target)
# shape=[1] # shape=[]
# 'sum' reduction, loss shape will be [1] # 'sum' reduction, loss shape will be []
kldiv_criterion = nn.KLDivLoss(reduction='sum') kldiv_criterion = nn.KLDivLoss(reduction='sum')
pred_loss = kldiv_criterion(x, target) pred_loss = kldiv_criterion(x, target)
# shape=[1] # shape=[]
# 'none' reduction, loss shape is same with X shape # 'none' reduction, loss shape is same with X shape
kldiv_criterion = nn.KLDivLoss(reduction='none') kldiv_criterion = nn.KLDivLoss(reduction='none')
...@@ -1090,7 +1090,7 @@ class MarginRankingLoss(Layer): ...@@ -1090,7 +1090,7 @@ class MarginRankingLoss(Layer):
label: N-D Tensor, label have the same shape and dtype as `input`. label: N-D Tensor, label have the same shape and dtype as `input`.
output: If :attr:`reduction` is ``'mean'`` or ``'sum'`` , the out shape is :math:`[1]`, otherwise the shape is the same as `input` .The same dtype as input tensor. output: If :attr:`reduction` is ``'mean'`` or ``'sum'`` , the out shape is :math:`[]`, otherwise the shape is the same as `input` .The same dtype as input tensor.
Returns: Returns:
A callable object of MarginRankingLoss. A callable object of MarginRankingLoss.
...@@ -1108,7 +1108,7 @@ class MarginRankingLoss(Layer): ...@@ -1108,7 +1108,7 @@ class MarginRankingLoss(Layer):
loss = margin_rank_loss(input, other, label) loss = margin_rank_loss(input, other, label)
print(loss) print(loss)
# [0.75] # 0.75
""" """
def __init__(self, margin=0.0, reduction='mean', name=None): def __init__(self, margin=0.0, reduction='mean', name=None):
...@@ -1149,7 +1149,7 @@ class CTCLoss(Layer): ...@@ -1149,7 +1149,7 @@ class CTCLoss(Layer):
- norm_by_times (bool, optional): Whether to normalize the gradients by the number of time-step, which is also the sequence's length. There is no need to normalize the gradients if reduction mode is 'mean'. Default: False. - norm_by_times (bool, optional): Whether to normalize the gradients by the number of time-step, which is also the sequence's length. There is no need to normalize the gradients if reduction mode is 'mean'. Default: False.
Returns: Returns:
Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``log_probs``. Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is []. Data type is the same as ``log_probs``.
Examples: Examples:
...@@ -1197,8 +1197,8 @@ class CTCLoss(Layer): ...@@ -1197,8 +1197,8 @@ class CTCLoss(Layer):
input_lengths, input_lengths,
label_lengths) label_lengths)
print(loss) print(loss)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.13760614]) # 1.13760614)
""" """
def __init__(self, blank=0, reduction='mean'): def __init__(self, blank=0, reduction='mean'):
...@@ -1242,7 +1242,7 @@ class RNNTLoss(Layer): ...@@ -1242,7 +1242,7 @@ class RNNTLoss(Layer):
label_lengths: Tensor of (batch) containing label length of each example label_lengths: Tensor of (batch) containing label length of each example
Returns: Returns:
Tensor, The RNN-T loss between ``logprobs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``logprobs``. Tensor, The RNN-T loss between ``logprobs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is []. Data type is the same as ``logprobs``.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1272,8 +1272,8 @@ class RNNTLoss(Layer): ...@@ -1272,8 +1272,8 @@ class RNNTLoss(Layer):
costs = fn(acts, labels, lengths, label_lengths) costs = fn(acts, labels, lengths, label_lengths)
print(costs) print(costs)
# Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=False, # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False,
# [4.49566677]) # 4.49566677)
""" """
def __init__( def __init__(
...@@ -1352,7 +1352,7 @@ class SmoothL1Loss(Layer): ...@@ -1352,7 +1352,7 @@ class SmoothL1Loss(Layer):
loss = paddle.nn.SmoothL1Loss() loss = paddle.nn.SmoothL1Loss()
output = loss(input, label) output = loss(input, label)
print(output) print(output)
# [0.049606] # 0.049606
""" """
def __init__(self, reduction='mean', delta=1.0, name=None): def __init__(self, reduction='mean', delta=1.0, name=None):
...@@ -1428,7 +1428,7 @@ class MultiLabelSoftMarginLoss(Layer): ...@@ -1428,7 +1428,7 @@ class MultiLabelSoftMarginLoss(Layer):
multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='mean') multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='mean')
loss = multi_label_soft_margin_loss(input, label) loss = multi_label_soft_margin_loss(input, label)
print(loss) print(loss)
# Tensor([1.54908717]) # Tensor(1.54908717)
""" """
def __init__(self, weight=None, reduction="mean", name=None): def __init__(self, weight=None, reduction="mean", name=None):
...@@ -1529,7 +1529,7 @@ class HingeEmbeddingLoss(Layer): ...@@ -1529,7 +1529,7 @@ class HingeEmbeddingLoss(Layer):
hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='mean') hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='mean')
loss = hinge_embedding_loss(input, label) loss = hinge_embedding_loss(input, label)
print(loss) print(loss)
# Tensor([0.22222222]) # Tensor(0.22222222)
""" """
def __init__(self, margin=1.0, reduction="mean", name=None): def __init__(self, margin=1.0, reduction="mean", name=None):
...@@ -1590,7 +1590,7 @@ class CosineEmbeddingLoss(Layer): ...@@ -1590,7 +1590,7 @@ class CosineEmbeddingLoss(Layer):
Available dtypes are int32, int64, float32, float64. Available dtypes are int32, int64, float32, float64.
output (Tensor): Tensor, the cosine embedding Loss of Tensor ``input1`` ``input2`` and ``label``. output (Tensor): Tensor, the cosine embedding Loss of Tensor ``input1`` ``input2`` and ``label``.
If `reduction` is ``'none'``, the shape of output loss is [N], the same as ``input`` . If `reduction` is ``'none'``, the shape of output loss is [N], the same as ``input`` .
If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [].
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1603,11 +1603,11 @@ class CosineEmbeddingLoss(Layer): ...@@ -1603,11 +1603,11 @@ class CosineEmbeddingLoss(Layer):
cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='mean') cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='mean')
output = cosine_embedding_loss(input1, input2, label) output = cosine_embedding_loss(input1, input2, label)
print(output) # [0.21155193] print(output) # 0.21155193
cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='sum') cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='sum')
output = cosine_embedding_loss(input1, input2, label) output = cosine_embedding_loss(input1, input2, label)
print(output) # [0.42310387] print(output) # 0.42310387
cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='none') cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='none')
output = cosine_embedding_loss(input1, input2, label) output = cosine_embedding_loss(input1, input2, label)
...@@ -1717,7 +1717,7 @@ class TripletMarginWithDistanceLoss(Layer): ...@@ -1717,7 +1717,7 @@ class TripletMarginWithDistanceLoss(Layer):
triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='mean') triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='mean')
loss = triplet_margin_with_distance_loss(input, positive, negative,) loss = triplet_margin_with_distance_loss(input, positive, negative,)
print(loss) print(loss)
# Tensor([0.19165580]) # Tensor(0.19165580)
""" """
...@@ -1825,7 +1825,7 @@ class TripletMarginLoss(Layer): ...@@ -1825,7 +1825,7 @@ class TripletMarginLoss(Layer):
triplet_margin_loss = paddle.nn.TripletMarginLoss(margin=1.0, swap=True, reduction='mean', ) triplet_margin_loss = paddle.nn.TripletMarginLoss(margin=1.0, swap=True, reduction='mean', )
loss = triplet_margin_loss(input, positive, negative,) loss = triplet_margin_loss(input, positive, negative,)
print(loss) print(loss)
# Tensor([0.19165580]) # Tensor(0.19165580)
""" """
...@@ -1995,7 +1995,7 @@ class SoftMarginLoss(Layer): ...@@ -1995,7 +1995,7 @@ class SoftMarginLoss(Layer):
``input``. The target labels which values should be numbers -1 or 1. ``input``. The target labels which values should be numbers -1 or 1.
Available dtype is int32, int64, float32, float64. Available dtype is int32, int64, float32, float64.
- Output (Tensor): If ``reduction`` is ``'none'``, the shape of output is - Output (Tensor): If ``reduction`` is ``'none'``, the shape of output is
same as ``input`` , else the shape of output is [1]. same as ``input`` , else the shape of output is [].
Returns: Returns:
A callable object of SoftMarginLoss. A callable object of SoftMarginLoss.
...@@ -2010,8 +2010,8 @@ class SoftMarginLoss(Layer): ...@@ -2010,8 +2010,8 @@ class SoftMarginLoss(Layer):
soft_margin_loss = paddle.nn.SoftMarginLoss() soft_margin_loss = paddle.nn.SoftMarginLoss()
output = soft_margin_loss(input, label) output = soft_margin_loss(input, label)
print(output) print(output)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.64022040]) # 0.64022040)
input_np = paddle.uniform(shape=(5, 5), min=0.1, max=0.8, dtype="float64") input_np = paddle.uniform(shape=(5, 5), min=0.1, max=0.8, dtype="float64")
label_np = paddle.randint(high=2, shape=(5, 5), dtype="int64") label_np = paddle.randint(high=2, shape=(5, 5), dtype="int64")
......
...@@ -66,10 +66,10 @@ def accuracy(input, label, k=1, correct=None, total=None): ...@@ -66,10 +66,10 @@ def accuracy(input, label, k=1, correct=None, total=None):
exe.run(static.default_startup_program()) exe.run(static.default_startup_program())
x = np.random.rand(3, 32, 32).astype("float32") x = np.random.rand(3, 32, 32).astype("float32")
y = np.array([[1],[0],[1]]) y = np.array([[1],[0],[1]])
output= exe.run(feed={"input": x,"label": y}, output = exe.run(feed={"input": x,"label": y},
fetch_list=[result[0]]) fetch_list=[result])
print(output) print(output)
#[array([0.], dtype=float32)] # [array(0.33333334, dtype=float32)]
""" """
if _non_static_mode(): if _non_static_mode():
......
...@@ -717,10 +717,10 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): ...@@ -717,10 +717,10 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
We use the dtype conversion rules following this: We use the dtype conversion rules following this:
Keep dtype Keep dtype
np.number ───────────► paddle.Tensor np.number ───────────► paddle.Tensor
(0D-Tensor) (0-D Tensor)
default_dtype default_dtype
Python Number ───────────────► paddle.Tensor Python Number ───────────────► paddle.Tensor
(0D-Tensor) (0-D Tensor)
Keep dtype Keep dtype
np.ndarray ───────────► paddle.Tensor np.ndarray ───────────► paddle.Tensor
...@@ -753,7 +753,6 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): ...@@ -753,7 +753,6 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
# 1) # 1)
x = paddle.to_tensor(1, stop_gradient=False) x = paddle.to_tensor(1, stop_gradient=False)
print(x)
# Tensor(shape=[], dtype=int64, place=CPUPlace, stop_gradient=False, # Tensor(shape=[], dtype=int64, place=CPUPlace, stop_gradient=False,
# 1) # 1)
......
...@@ -333,8 +333,8 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): ...@@ -333,8 +333,8 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None):
# compute inf-order norm # compute inf-order norm
out_pnorm = paddle.linalg.norm(x, p=float("inf")) out_pnorm = paddle.linalg.norm(x, p=float("inf"))
# out_pnorm = Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # out_pnorm = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [12.]) # 12.)
out_pnorm = paddle.linalg.norm(x, p=float("inf"), axis=0) out_pnorm = paddle.linalg.norm(x, p=float("inf"), axis=0)
# out_pnorm: Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, # out_pnorm: Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
...@@ -344,8 +344,8 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): ...@@ -344,8 +344,8 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None):
# compute -inf-order norm # compute -inf-order norm
out_pnorm = paddle.linalg.norm(x, p=-float("inf")) out_pnorm = paddle.linalg.norm(x, p=-float("inf"))
# out_pnorm: Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # out_pnorm: Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.]) # 0.)
out_pnorm = paddle.linalg.norm(x, p=-float("inf"), axis=0) out_pnorm = paddle.linalg.norm(x, p=-float("inf"), axis=0)
# out_pnorm: Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, # out_pnorm: Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
...@@ -690,16 +690,16 @@ def dist(x, y, p=2, name=None): ...@@ -690,16 +690,16 @@ def dist(x, y, p=2, name=None):
x = paddle.to_tensor([[3, 3],[3, 3]], dtype="float32") x = paddle.to_tensor([[3, 3],[3, 3]], dtype="float32")
y = paddle.to_tensor([[3, 3],[3, 1]], dtype="float32") y = paddle.to_tensor([[3, 3],[3, 1]], dtype="float32")
out = paddle.dist(x, y, 0) out = paddle.dist(x, y, 0)
print(out) # out = [1.] print(out) # out = 1.
out = paddle.dist(x, y, 2) out = paddle.dist(x, y, 2)
print(out) # out = [2.] print(out) # out = 2.
out = paddle.dist(x, y, float("inf")) out = paddle.dist(x, y, float("inf"))
print(out) # out = [2.] print(out) # out = 2.
out = paddle.dist(x, y, float("-inf")) out = paddle.dist(x, y, float("-inf"))
print(out) # out = [0.] print(out) # out = 0.
""" """
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.dist(x, y, p) return _C_ops.dist(x, y, p)
...@@ -745,48 +745,48 @@ def cond(x, p=None, name=None): ...@@ -745,48 +745,48 @@ def cond(x, p=None, name=None):
# compute conditional number when p is None # compute conditional number when p is None
out = paddle.linalg.cond(x) out = paddle.linalg.cond(x)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.41421342]) # 1.41421342)
# compute conditional number when order of the norm is 'fro' # compute conditional number when order of the norm is 'fro'
out_fro = paddle.linalg.cond(x, p='fro') out_fro = paddle.linalg.cond(x, p='fro')
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [3.16227770]) # 3.16227770)
# compute conditional number when order of the norm is 'nuc' # compute conditional number when order of the norm is 'nuc'
out_nuc = paddle.linalg.cond(x, p='nuc') out_nuc = paddle.linalg.cond(x, p='nuc')
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [9.24263859]) # 9.24263859)
# compute conditional number when order of the norm is 1 # compute conditional number when order of the norm is 1
out_1 = paddle.linalg.cond(x, p=1) out_1 = paddle.linalg.cond(x, p=1)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [2.]) # 2.)
# compute conditional number when order of the norm is -1 # compute conditional number when order of the norm is -1
out_minus_1 = paddle.linalg.cond(x, p=-1) out_minus_1 = paddle.linalg.cond(x, p=-1)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.]) # 1.)
# compute conditional number when order of the norm is 2 # compute conditional number when order of the norm is 2
out_2 = paddle.linalg.cond(x, p=2) out_2 = paddle.linalg.cond(x, p=2)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.41421342]) # 1.41421342)
# compute conditional number when order of the norm is -1 # compute conditional number when order of the norm is -1
out_minus_2 = paddle.linalg.cond(x, p=-2) out_minus_2 = paddle.linalg.cond(x, p=-2)
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.70710683]) # 0.70710683)
# compute conditional number when order of the norm is inf # compute conditional number when order of the norm is inf
out_inf = paddle.linalg.cond(x, p=float("inf")) out_inf = paddle.linalg.cond(x, p=float("inf"))
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [2.]) # 2.)
# compute conditional number when order of the norm is -inf # compute conditional number when order of the norm is -inf
out_minus_inf = paddle.linalg.cond(x, p=-float("inf")) out_minus_inf = paddle.linalg.cond(x, p=-float("inf"))
# Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.]) # 1.)
a = paddle.randn([2, 4, 4]) a = paddle.randn([2, 4, 4])
# Tensor(shape=[2, 4, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[2, 4, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
...@@ -1095,13 +1095,13 @@ def dot(x, y, name=None): ...@@ -1095,13 +1095,13 @@ def dot(x, y, name=None):
x = paddle.to_tensor([1, 2, 3]) x = paddle.to_tensor([1, 2, 3])
y = paddle.to_tensor([4, 5, 6]) y = paddle.to_tensor([4, 5, 6])
z = paddle.dot(x, y) z = paddle.dot(x, y)
print(z) # [32] print(z) # 32
# 2-D Tensor * 2-D Tensor # 2-D Tensor * 2-D Tensor
x = paddle.to_tensor([[1, 2, 3], [2, 4, 6]]) x = paddle.to_tensor([[1, 2, 3], [2, 4, 6]])
y = paddle.to_tensor([[4, 5, 6], [4, 5, 6]]) y = paddle.to_tensor([[4, 5, 6], [4, 5, 6]])
z = paddle.dot(x, y) z = paddle.dot(x, y)
print(z) # [[32], [64]] print(z) # [32, 64]
""" """
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -1163,7 +1163,7 @@ def cov(x, rowvar=True, ddof=True, fweights=None, aweights=None, name=None): ...@@ -1163,7 +1163,7 @@ def cov(x, rowvar=True, ddof=True, fweights=None, aweights=None, name=None):
import paddle import paddle
xt = paddle.rand((3,4)) xt = paddle.rand((3, 4))
paddle.linalg.cov(xt) paddle.linalg.cov(xt)
''' '''
...@@ -1485,7 +1485,7 @@ def matrix_rank(x, tol=None, hermitian=False, name=None): ...@@ -1485,7 +1485,7 @@ def matrix_rank(x, tol=None, hermitian=False, name=None):
a = paddle.eye(10) a = paddle.eye(10)
b = paddle.linalg.matrix_rank(a) b = paddle.linalg.matrix_rank(a)
print(b) print(b)
# b = [10] # b = 10
c = paddle.ones(shape=[3, 4, 5, 5]) c = paddle.ones(shape=[3, 4, 5, 5])
d = paddle.linalg.matrix_rank(c, tol=0.01, hermitian=True) d = paddle.linalg.matrix_rank(c, tol=0.01, hermitian=True)
......
...@@ -288,13 +288,8 @@ def is_empty(x, name=None): ...@@ -288,13 +288,8 @@ def is_empty(x, name=None):
input = paddle.rand(shape=[4, 32, 32], dtype='float32') input = paddle.rand(shape=[4, 32, 32], dtype='float32')
res = paddle.is_empty(x=input) res = paddle.is_empty(x=input)
print("res:", res) # res: Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True,
# ('res:', Tensor: eager_tmp_1 # False)
# - place: CPUPlace
# - shape: [1]
# - layout: NCHW
# - dtype: bool
# - data: [0])
""" """
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -339,9 +334,9 @@ def equal_all(x, y, name=None): ...@@ -339,9 +334,9 @@ def equal_all(x, y, name=None):
y = paddle.to_tensor([1, 2, 3]) y = paddle.to_tensor([1, 2, 3])
z = paddle.to_tensor([1, 4, 3]) z = paddle.to_tensor([1, 4, 3])
result1 = paddle.equal_all(x, y) result1 = paddle.equal_all(x, y)
print(result1) # result1 = [True ] print(result1) # result1 = True
result2 = paddle.equal_all(x, z) result2 = paddle.equal_all(x, z)
print(result2) # result2 = [False ] print(result2) # result2 = False
""" """
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.equal_all(x, y) return _C_ops.equal_all(x, y)
...@@ -388,21 +383,21 @@ def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): ...@@ -388,21 +383,21 @@ def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None):
y = paddle.to_tensor([10000.1, 1e-08]) y = paddle.to_tensor([10000.1, 1e-08])
result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
equal_nan=False, name="ignore_nan") equal_nan=False, name="ignore_nan")
# [False] # False
result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
equal_nan=True, name="equal_nan") equal_nan=True, name="equal_nan")
# [False] # False
x = paddle.to_tensor([1.0, float('nan')]) x = paddle.to_tensor([1.0, float('nan')])
y = paddle.to_tensor([1.0, float('nan')]) y = paddle.to_tensor([1.0, float('nan')])
result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
equal_nan=False, name="ignore_nan") equal_nan=False, name="ignore_nan")
# [False] # False
result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08,
equal_nan=True, name="equal_nan") equal_nan=True, name="equal_nan")
# [True] # True
""" """
if in_dygraph_mode(): if in_dygraph_mode():
......
...@@ -268,11 +268,11 @@ def slice(input, axes, starts, ends): ...@@ -268,11 +268,11 @@ def slice(input, axes, starts, ends):
Args: Args:
input (Tensor): A ``Tensor`` . The data type is ``float16``, ``float32``, ``float64``, ``int32`` or ``int64``. input (Tensor): A ``Tensor`` . The data type is ``float16``, ``float32``, ``float64``, ``int32`` or ``int64``.
axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to . axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to .
starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, the elements of starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, each element of
it should be integers or Tensors with shape [1]. If ``starts`` is an Tensor, it should be an 1-D Tensor. it should be integer or 0-D int Tensor with shape []. If ``starts`` is an Tensor, it should be an 1-D Tensor.
It represents starting indices of corresponding axis in ``axes``. It represents starting indices of corresponding axis in ``axes``.
ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, the elements of ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, each element of
it should be integers or Tensors with shape [1]. If ``ends`` is an Tensor, it should be an 1-D Tensor . it should be integer or 0-D int Tensor with shape []. If ``ends`` is an Tensor, it should be an 1-D Tensor .
It represents ending indices of corresponding axis in ``axes``. It represents ending indices of corresponding axis in ``axes``.
Returns: Returns:
...@@ -1065,21 +1065,21 @@ def tolist(x): ...@@ -1065,21 +1065,21 @@ def tolist(x):
print(expectlist) #[0, 1, 2, 3, 4] print(expectlist) #[0, 1, 2, 3, 4]
""" """
# TODO(zhouwei): will remove 0D Tensor.numpy() hack # TODO(zhouwei): will remove 0-D Tensor.numpy() hack
return x.numpy(False).tolist() return x.numpy(False).tolist()
def concat(x, axis=0, name=None): def concat(x, axis=0, name=None):
""" """
Concatenates the input along the axis. Concatenates the input along the axis. It doesn't support 0-D Tensor because it requires a certain axis, and 0-D Tensor
doesn't have any axis.
Args: Args:
x (list|tuple): ``x`` is a Tensor list or Tensor tuple which is with data type bool, float16, x (list|tuple): ``x`` is a Tensor list or Tensor tuple which is with data type bool, float16,
float32, float64, int32, int64, int8, uint8. All the Tensors in ``x`` must have same data type. float32, float64, int32, int64, int8, uint8. All the Tensors in ``x`` must have same data type.
axis (int|Tensor, optional): Specify the axis to operate on the input Tensors. axis (int|Tensor, optional): Specify the axis to operate on the input Tensors.
It's a scalar with data type int or a Tensor with shape [1] and data type int32 Tt should be integer or 0-D int Tensor with shape []. The effective range is [-R, R), where R is Rank(x). When ``axis < 0``,
or int64. The effective range is [-R, R), where R is Rank(x). When ``axis < 0``,
it works the same way as ``axis+R``. Default is 0. it works the same way as ``axis+R``. Default is 0.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
...@@ -1550,11 +1550,11 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): ...@@ -1550,11 +1550,11 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None):
if x_dim == 0: if x_dim == 0:
if not (isinstance(start_axis, int)) or start_axis not in [0, -1]: if not (isinstance(start_axis, int)) or start_axis not in [0, -1]:
raise ValueError( raise ValueError(
"The start_axis should be int, and should be 0 or -1 when the input tensor is a 0D-Tensor" "The start_axis should be int, and should be 0 or -1 when the input tensor is a 0-D-Tensor"
) )
if not (isinstance(stop_axis, int)) or stop_axis not in [0, -1]: if not (isinstance(stop_axis, int)) or stop_axis not in [0, -1]:
raise ValueError( raise ValueError(
"The stop_axis should be int, and should be 0 or -1 when the input tensor is a 0D-Tensor" "The stop_axis should be int, and should be 0 or -1 when the input tensor is a 0-D-Tensor"
) )
else: else:
if ( if (
...@@ -1913,8 +1913,8 @@ def split(x, num_or_sections, axis=0, name=None): ...@@ -1913,8 +1913,8 @@ def split(x, num_or_sections, axis=0, name=None):
If ``num_or_sections`` is a list or tuple, the length of it indicates the number of If ``num_or_sections`` is a list or tuple, the length of it indicates the number of
sub-Tensors and the elements in it indicate the sizes of sub-Tensors' dimension orderly. sub-Tensors and the elements in it indicate the sizes of sub-Tensors' dimension orderly.
The length of the list must not be larger than the ``x`` 's size of specified ``axis``. The length of the list must not be larger than the ``x`` 's size of specified ``axis``.
axis (int|Tensor, optional): The axis along which to split, it can be a scalar with type axis (int|Tensor, optional): The axis along which to split, it can be a integer or a ``0-D Tensor``
``int`` or a ``Tensor`` with shape [1] and data type ``int32`` or ``int64``. with shape [] and data type ``int32`` or ``int64``.
If :math::`axis < 0`, the axis to split along is :math:`rank(x) + axis`. Default is 0. If :math::`axis < 0`, the axis to split along is :math:`rank(x) + axis`. Default is 0.
name (str, optional): The default value is None. Normally there is no need for user to set this property. name (str, optional): The default value is None. Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name` . For more information, please refer to :ref:`api_guide_Name` .
...@@ -2557,7 +2557,7 @@ def unsqueeze(x, axis, name=None): ...@@ -2557,7 +2557,7 @@ def unsqueeze(x, axis, name=None):
Args: Args:
x (Tensor): The input Tensor to be unsqueezed. Supported data type: bfloat16, float16, float32, float64, bool, int8, int32, int64. x (Tensor): The input Tensor to be unsqueezed. Supported data type: bfloat16, float16, float32, float64, bool, int8, int32, int64.
axis (int|list|tuple|Tensor): Indicates the dimensions to be inserted. The data type is ``int32`` . axis (int|list|tuple|Tensor): Indicates the dimensions to be inserted. The data type is ``int32`` .
If ``axis`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``axis`` is a list or tuple, each element of it should be integer or 0-D Tensor with shape [].
If ``axis`` is a Tensor, it should be an 1-D Tensor . If ``axis`` is a Tensor, it should be an 1-D Tensor .
If ``axis`` is negative, ``axis = axis + ndim(x) + 1``. If ``axis`` is negative, ``axis = axis + ndim(x) + 1``.
name (str|None): Name for this layer. Please refer to :ref:`api_guide_Name`, Default None. name (str|None): Name for this layer. Please refer to :ref:`api_guide_Name`, Default None.
...@@ -3083,8 +3083,8 @@ def chunk(x, chunks, axis=0, name=None): ...@@ -3083,8 +3083,8 @@ def chunk(x, chunks, axis=0, name=None):
Args: Args:
x (Tensor): A N-D Tensor. The data type is bool, float16, float32, float64, int32 or int64. x (Tensor): A N-D Tensor. The data type is bool, float16, float32, float64, int32 or int64.
chunks(int): The number of tensor to be split along the certain axis. chunks(int): The number of tensor to be split along the certain axis.
axis (int|Tensor, optional): The axis along which to split, it can be a scalar with type axis (int|Tensor, optional): The axis along which to split, it can be a integer or a ``0-D Tensor``
``int`` or a ``Tensor`` with shape [1] and data type ``int32`` or ``int64``. with shape [] and data type ``int32`` or ``int64``.
If :math::`axis < 0`, the axis to split along is :math:`rank(x) + axis`. Default is 0. If :math::`axis < 0`, the axis to split along is :math:`rank(x) + axis`. Default is 0.
name (str, optional): The default value is None. Normally there is no need for user to set this property. name (str, optional): The default value is None. Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name` . For more information, please refer to :ref:`api_guide_Name` .
...@@ -3523,7 +3523,7 @@ def reshape(x, shape, name=None): ...@@ -3523,7 +3523,7 @@ def reshape(x, shape, name=None):
Args: Args:
x (Tensor): An N-D Tensor. The data type is ``float32``, ``float64``, ``int32``, ``int64`` or ``bool`` x (Tensor): An N-D Tensor. The data type is ``float32``, ``float64``, ``int32``, ``int64`` or ``bool``
shape (list|tuple|Tensor): Define the target shape. At most one dimension of the target shape can be -1. shape (list|tuple|Tensor): Define the target shape. At most one dimension of the target shape can be -1.
The data type is ``int32`` . If ``shape`` is a list or tuple, the elements of it should be integers or Tensors with shape []. The data type is ``int32`` . If ``shape`` is a list or tuple, each element of it should be integer or Tensor with shape [].
If ``shape`` is an Tensor, it should be an 1-D Tensor . If ``shape`` is an Tensor, it should be an 1-D Tensor .
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
...@@ -3843,11 +3843,15 @@ def strided_slice(x, axes, starts, ends, strides, name=None): ...@@ -3843,11 +3843,15 @@ def strided_slice(x, axes, starts, ends, strides, name=None):
x (Tensor): An N-D ``Tensor``. The data type is ``bool``, ``float16``, ``float32``, ``float64``, ``int32`` or ``int64``. x (Tensor): An N-D ``Tensor``. The data type is ``bool``, ``float16``, ``float32``, ``float64``, ``int32`` or ``int64``.
axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to. axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to.
It's optional. If it is not provides, it will be treated as :math:`[0,1,...,len(starts)-1]`. It's optional. If it is not provides, it will be treated as :math:`[0,1,...,len(starts)-1]`.
starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``starts`` is an Tensor, it should be an 1-D Tensor. It represents starting indices of corresponding axis in ``axes``. starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, the elements of it should be
ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, the elements of integers or Tensors with shape []. If ``starts`` is an Tensor, it should be an 1-D Tensor.
it should be integers or Tensors with shape [1]. If ``ends`` is an Tensor, it should be an 1-D Tensor . It represents ending indices of corresponding axis in ``axes``. It represents starting indices of corresponding axis in ``axes``.
strides (list|tuple|Tensor): The data type is ``int32`` . If ``strides`` is a list or tuple, the elements of ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, the elements of it should be
it should be integers or Tensors with shape [1]. If ``strides`` is an Tensor, it should be an 1-D Tensor . It represents slice step of corresponding axis in ``axes``. integers or Tensors with shape []. If ``ends`` is an Tensor, it should be an 1-D Tensor.
It represents ending indices of corresponding axis in ``axes``.
strides (list|tuple|Tensor): The data type is ``int32`` . If ``strides`` is a list or tuple, the elements of it should be
integers or Tensors with shape []. If ``strides`` is an Tensor, it should be an 1-D Tensor.
It represents slice step of corresponding axis in ``axes``.
name(str, optional): The default value is None. Normally there is no need for user to set this property. name(str, optional): The default value is None. Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name` . For more information, please refer to :ref:`api_guide_Name` .
...@@ -4074,7 +4078,7 @@ def tensordot(x, y, axes=2, name=None): ...@@ -4074,7 +4078,7 @@ def tensordot(x, y, axes=2, name=None):
y = paddle.arange(10, dtype=data_type) y = paddle.arange(10, dtype=data_type)
z1 = paddle.tensordot(x, y, axes=1) z1 = paddle.tensordot(x, y, axes=1)
z2 = paddle.dot(x, y) z2 = paddle.dot(x, y)
# z1 = z2 = [285.] # z1 = z2 = 285.
# For two 2-d tensor x and y, the case axes=1 is equivalent to matrix multiplication. # For two 2-d tensor x and y, the case axes=1 is equivalent to matrix multiplication.
......
...@@ -187,7 +187,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): ...@@ -187,7 +187,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
Args: Args:
x (Tensor): Input N-D Tensor of scale operator. Data type can be float32, float64, int8, int16, int32, int64, uint8. x (Tensor): Input N-D Tensor of scale operator. Data type can be float32, float64, int8, int16, int32, int64, uint8.
scale (float|Tensor): The scale factor of the input, it should be a float number or a Tensor with shape [1] and data type as float32. scale (float|Tensor): The scale factor of the input, it should be a float number or a 0-D Tensor with shape [] and data type as float32.
bias (float): The bias to be put on the input. bias (float): The bias to be put on the input.
bias_after_scale (bool): Apply bias addition after or before scaling. It is useful for numeric stability in some circumstances. bias_after_scale (bool): Apply bias addition after or before scaling. It is useful for numeric stability in some circumstances.
act (str, optional): Activation applied to the output such as tanh, softmax, sigmoid, relu. act (str, optional): Activation applied to the output such as tanh, softmax, sigmoid, relu.
...@@ -1337,9 +1337,9 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): ...@@ -1337,9 +1337,9 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None):
# Each example is followed by the corresponding output tensor. # Each example is followed by the corresponding output tensor.
x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9],
[0.1, 0.2, 0.6, 0.7]]) [0.1, 0.2, 0.6, 0.7]])
out1 = paddle.sum(x) # [3.5] out1 = paddle.sum(x) # 3.5
out2 = paddle.sum(x, axis=0) # [0.3, 0.5, 1.1, 1.6] out2 = paddle.sum(x, axis=0) # [0.3, 0.5, 1.1, 1.6]
out3 = paddle.sum(x, axis=-1) # [1.9, 1.6] out3 = paddle.sum(x, axis=-1) # [1.9, 1.6]
out4 = paddle.sum(x, axis=1, keepdim=True) # [[1.9], [1.6]] out4 = paddle.sum(x, axis=1, keepdim=True) # [[1.9], [1.6]]
# y is a Tensor with shape [2, 2, 2] and elements as below: # y is a Tensor with shape [2, 2, 2] and elements as below:
...@@ -1357,7 +1357,7 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): ...@@ -1357,7 +1357,7 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None):
# Each example is followed by the corresponding output tensor. # Each example is followed by the corresponding output tensor.
x = paddle.to_tensor([[True, True, True, True], x = paddle.to_tensor([[True, True, True, True],
[False, False, False, False]]) [False, False, False, False]])
out7 = paddle.sum(x) # [4] out7 = paddle.sum(x) # 4
out8 = paddle.sum(x, axis=0) # [1, 1, 1, 1] out8 = paddle.sum(x, axis=0) # [1, 1, 1, 1]
out9 = paddle.sum(x, axis=1) # [4, 0] out9 = paddle.sum(x, axis=1) # [4, 0]
""" """
...@@ -1493,9 +1493,9 @@ def nansum(x, axis=None, dtype=None, keepdim=False, name=None): ...@@ -1493,9 +1493,9 @@ def nansum(x, axis=None, dtype=None, keepdim=False, name=None):
# Each example is followed by the corresponding output tensor. # Each example is followed by the corresponding output tensor.
x = paddle.to_tensor([[float('nan'), 0.3, 0.5, 0.9], x = paddle.to_tensor([[float('nan'), 0.3, 0.5, 0.9],
[0.1, 0.2, float('-nan'), 0.7]],dtype="float32") [0.1, 0.2, float('-nan'), 0.7]],dtype="float32")
out1 = paddle.nansum(x) # [2.7] out1 = paddle.nansum(x) # 2.7
out2 = paddle.nansum(x, axis=0) # [0.1, 0.5, 0.5, 1.6] out2 = paddle.nansum(x, axis=0) # [0.1, 0.5, 0.5, 1.6]
out3 = paddle.nansum(x, axis=-1) # [1.7, 1.0] out3 = paddle.nansum(x, axis=-1) # [1.7, 1.0]
out4 = paddle.nansum(x, axis=1, keepdim=True) # [[1.7], [1.0]] out4 = paddle.nansum(x, axis=1, keepdim=True) # [[1.7], [1.0]]
# y is a Tensor with shape [2, 2, 2] and elements as below: # y is a Tensor with shape [2, 2, 2] and elements as below:
...@@ -1553,7 +1553,7 @@ def nanmean(x, axis=None, keepdim=False, name=None): ...@@ -1553,7 +1553,7 @@ def nanmean(x, axis=None, keepdim=False, name=None):
x = paddle.to_tensor([[float('nan'), 0.3, 0.5, 0.9], x = paddle.to_tensor([[float('nan'), 0.3, 0.5, 0.9],
[0.1, 0.2, float('-nan'), 0.7]]) [0.1, 0.2, float('-nan'), 0.7]])
out1 = paddle.nanmean(x) out1 = paddle.nanmean(x)
# [0.44999996] # 0.44999996
out2 = paddle.nanmean(x, axis=0) out2 = paddle.nanmean(x, axis=0)
# [0.1, 0.25, 0.5, 0.79999995] # [0.1, 0.25, 0.5, 0.79999995]
out3 = paddle.nanmean(x, axis=0, keepdim=True) out3 = paddle.nanmean(x, axis=0, keepdim=True)
...@@ -2263,7 +2263,7 @@ def logsumexp(x, axis=None, keepdim=False, name=None): ...@@ -2263,7 +2263,7 @@ def logsumexp(x, axis=None, keepdim=False, name=None):
import paddle import paddle
x = paddle.to_tensor([[-1.5, 0., 2.], [3., 1.2, -2.4]]) x = paddle.to_tensor([[-1.5, 0., 2.], [3., 1.2, -2.4]])
out1 = paddle.logsumexp(x) # [3.4691226] out1 = paddle.logsumexp(x) # 3.4691226
out2 = paddle.logsumexp(x, 1) # [2.15317821, 3.15684602] out2 = paddle.logsumexp(x, 1) # [2.15317821, 3.15684602]
""" """
...@@ -2375,7 +2375,7 @@ def max(x, axis=None, keepdim=False, name=None): ...@@ -2375,7 +2375,7 @@ def max(x, axis=None, keepdim=False, name=None):
result1 = paddle.max(x) result1 = paddle.max(x)
result1.backward() result1.backward()
print(result1, x.grad) print(result1, x.grad)
#[0.9], [[0., 0., 0., 1.], [0., 0., 0., 0.]] # 0.9, [[0., 0., 0., 1.], [0., 0., 0., 0.]]
x.clear_grad() x.clear_grad()
result2 = paddle.max(x, axis=0) result2 = paddle.max(x, axis=0)
...@@ -2476,7 +2476,7 @@ def min(x, axis=None, keepdim=False, name=None): ...@@ -2476,7 +2476,7 @@ def min(x, axis=None, keepdim=False, name=None):
result1 = paddle.min(x) result1 = paddle.min(x)
result1.backward() result1.backward()
print(result1, x.grad) print(result1, x.grad)
#[0.1], [[0., 0., 0., 0.], [1., 0., 0., 0.]] # 0.1, [[0., 0., 0., 0.], [1., 0., 0., 0.]]
x.clear_grad() x.clear_grad()
result2 = paddle.min(x, axis=0) result2 = paddle.min(x, axis=0)
...@@ -2580,13 +2580,13 @@ def amax(x, axis=None, keepdim=False, name=None): ...@@ -2580,13 +2580,13 @@ def amax(x, axis=None, keepdim=False, name=None):
result1 = paddle.amax(x) result1 = paddle.amax(x)
result1.backward() result1.backward()
print(result1, x.grad) print(result1, x.grad)
#[0.9], [[0., 0.2, 0.2, 0.2], [0.2, 0.2, 0., 0.]] # 0.9, [[0., 0.2, 0.2, 0.2], [0.2, 0.2, 0., 0.]]
x.clear_grad() x.clear_grad()
result1_max = paddle.max(x) result1_max = paddle.max(x)
result1_max.backward() result1_max.backward()
print(result1_max, x.grad) print(result1_max, x.grad)
#[0.9], [[0., 1.0, 1.0, 1.0], [1.0, 1.0, 0., 0.]] # 0.9, [[0., 1.0, 1.0, 1.0], [1.0, 1.0, 0., 0.]]
############################### ###############################
...@@ -2690,13 +2690,13 @@ def amin(x, axis=None, keepdim=False, name=None): ...@@ -2690,13 +2690,13 @@ def amin(x, axis=None, keepdim=False, name=None):
result1 = paddle.amin(x) result1 = paddle.amin(x)
result1.backward() result1.backward()
print(result1, x.grad) print(result1, x.grad)
#[0.1], [[0., 0.2, 0.2, 0.2], [0.2, 0.2, 0., 0.]] # 0.1, [[0., 0.2, 0.2, 0.2], [0.2, 0.2, 0., 0.]]
x.clear_grad() x.clear_grad()
result1_min = paddle.min(x) result1_min = paddle.min(x)
result1_min.backward() result1_min.backward()
print(result1_min, x.grad) print(result1_min, x.grad)
#[0.1], [[0., 1.0, 1.0, 1.0], [1.0, 1.0, 0., 0.]] # 0.1, [[0., 1.0, 1.0, 1.0], [1.0, 1.0, 0., 0.]]
############################### ###############################
...@@ -2907,10 +2907,10 @@ def clip(x, min=None, max=None, name=None): ...@@ -2907,10 +2907,10 @@ def clip(x, min=None, max=None, name=None):
Args: Args:
x (Tensor): An N-D Tensor with data type float16, float32, float64, int32 or int64. x (Tensor): An N-D Tensor with data type float16, float32, float64, int32 or int64.
min (float|int|Tensor, optional): The lower bound with type ``float`` , ``int`` or a ``Tensor`` min (float|int|Tensor, optional): The lower bound with type ``float`` , ``int`` or a ``0-D Tensor``
with shape [1] and type ``int32``, ``float16``, ``float32``, ``float64``. with shape [] and type ``int32``, ``float16``, ``float32``, ``float64``.
max (float|int|Tensor, optional): The upper bound with type ``float``, ``int`` or a ``Tensor`` max (float|int|Tensor, optional): The upper bound with type ``float``, ``int`` or a ``0-D Tensor``
with shape [1] and type ``int32``, ``float16``, ``float32``, ``float64``. with shape [] and type ``int32``, ``float16``, ``float32``, ``float64``.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
...@@ -3064,7 +3064,7 @@ def trace(x, offset=0, axis1=0, axis2=1, name=None): ...@@ -3064,7 +3064,7 @@ def trace(x, offset=0, axis1=0, axis2=1, name=None):
case1 = paddle.randn([2, 3]) case1 = paddle.randn([2, 3])
case2 = paddle.randn([3, 10, 10]) case2 = paddle.randn([3, 10, 10])
case3 = paddle.randn([3, 10, 5, 10]) case3 = paddle.randn([3, 10, 5, 10])
data1 = paddle.trace(case1) # data1.shape = [1] data1 = paddle.trace(case1) # data1.shape = []
data2 = paddle.trace(case2, offset=1, axis1=1, axis2=2) # data2.shape = [3] data2 = paddle.trace(case2, offset=1, axis1=1, axis2=2) # data2.shape = [3]
data3 = paddle.trace(case3, offset=-3, axis1=1, axis2=-1) # data2.shape = [3, 5] data3 = paddle.trace(case3, offset=-3, axis1=1, axis2=-1) # data2.shape = [3, 5]
""" """
...@@ -3692,7 +3692,7 @@ def prod(x, axis=None, keepdim=False, dtype=None, name=None): ...@@ -3692,7 +3692,7 @@ def prod(x, axis=None, keepdim=False, dtype=None, name=None):
x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9],
[0.1, 0.2, 0.6, 0.7]]) [0.1, 0.2, 0.6, 0.7]])
out1 = paddle.prod(x) out1 = paddle.prod(x)
# [0.0002268] # 0.0002268
out2 = paddle.prod(x, -1) out2 = paddle.prod(x, -1)
# [0.027 0.0084] # [0.027 0.0084]
...@@ -3898,8 +3898,8 @@ def all(x, axis=None, keepdim=False, name=None): ...@@ -3898,8 +3898,8 @@ def all(x, axis=None, keepdim=False, name=None):
print(x) print(x)
x = paddle.cast(x, 'bool') x = paddle.cast(x, 'bool')
# out1 should be [False] # out1 should be False
out1 = paddle.all(x) # [False] out1 = paddle.all(x) # False
print(out1) print(out1)
# out2 should be [True, False] # out2 should be [True, False]
...@@ -3907,7 +3907,7 @@ def all(x, axis=None, keepdim=False, name=None): ...@@ -3907,7 +3907,7 @@ def all(x, axis=None, keepdim=False, name=None):
print(out2) print(out2)
# keepdim=False, out3 should be [False, True], out.shape should be (2,) # keepdim=False, out3 should be [False, True], out.shape should be (2,)
out3 = paddle.all(x, axis=-1) # [False, True] out3 = paddle.all(x, axis=-1) # [False, True]
print(out3) print(out3)
# keepdim=True, out4 should be [[False], [True]], out.shape should be (2,1) # keepdim=True, out4 should be [[False], [True]], out.shape should be (2,1)
...@@ -3972,12 +3972,12 @@ def any(x, axis=None, keepdim=False, name=None): ...@@ -3972,12 +3972,12 @@ def any(x, axis=None, keepdim=False, name=None):
# [[True, False] # [[True, False]
# [True, True]] # [True, True]]
# out1 should be [True] # out1 should be True
out1 = paddle.any(x) # [True] out1 = paddle.any(x) # True
print(out1) print(out1)
# out2 should be [True, True] # out2 should be [True, True]
out2 = paddle.any(x, axis=0) # [True, True] out2 = paddle.any(x, axis=0) # [True, True]
print(out2) print(out2)
# keepdim=False, out3 should be [True, True], out.shape should be (2,) # keepdim=False, out3 should be [True, True], out.shape should be (2,)
...@@ -4481,8 +4481,8 @@ def rad2deg(x, name=None): ...@@ -4481,8 +4481,8 @@ def rad2deg(x, name=None):
x2 = paddle.to_tensor(math.pi/2) x2 = paddle.to_tensor(math.pi/2)
result2 = paddle.rad2deg(x2) result2 = paddle.rad2deg(x2)
print(result2) print(result2)
# Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [90.]) # 90.)
x3 = paddle.to_tensor(1) x3 = paddle.to_tensor(1)
result3 = paddle.rad2deg(x3) result3 = paddle.rad2deg(x3)
...@@ -5382,27 +5382,27 @@ def trapezoid(y, x=None, dx=None, axis=-1, name=None): ...@@ -5382,27 +5382,27 @@ def trapezoid(y, x=None, dx=None, axis=-1, name=None):
y = paddle.to_tensor([4, 5, 6], dtype='float32') y = paddle.to_tensor([4, 5, 6], dtype='float32')
print(paddle.trapezoid(y)) print(paddle.trapezoid(y))
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [10.]) # 10.)
print(paddle.trapezoid(y, dx=2.)) print(paddle.trapezoid(y, dx=2.))
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [20.]) # 20.)
y = paddle.to_tensor([4, 5, 6], dtype='float32') y = paddle.to_tensor([4, 5, 6], dtype='float32')
x = paddle.to_tensor([1, 2, 3], dtype='float32') x = paddle.to_tensor([1, 2, 3], dtype='float32')
print(paddle.trapezoid(y, x)) print(paddle.trapezoid(y, x))
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [10.]) # 10.)
y = paddle.to_tensor([1, 2, 3], dtype='float64') y = paddle.to_tensor([1, 2, 3], dtype='float64')
x = paddle.to_tensor([8, 6, 4], dtype='float64') x = paddle.to_tensor([8, 6, 4], dtype='float64')
print(paddle.trapezoid(y, x)) print(paddle.trapezoid(y, x))
# Tensor(shape=[1], dtype=float64, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True,
# [-8.]) # -8.)
y = paddle.arange(6).reshape((2, 3)).astype('float32') y = paddle.arange(6).reshape((2, 3)).astype('float32')
print(paddle.trapezoid(y, axis=0)) print(paddle.trapezoid(y, axis=0))
......
...@@ -65,7 +65,7 @@ def mean(x, axis=None, keepdim=False, name=None): ...@@ -65,7 +65,7 @@ def mean(x, axis=None, keepdim=False, name=None):
[17., 18., 19., 20.], [17., 18., 19., 20.],
[21., 22., 23., 24.]]]) [21., 22., 23., 24.]]])
out1 = paddle.mean(x) out1 = paddle.mean(x)
# [12.5] # 12.5
out2 = paddle.mean(x, axis=-1) out2 = paddle.mean(x, axis=-1)
# [[ 2.5 6.5 10.5] # [[ 2.5 6.5 10.5]
# [14.5 18.5 22.5]] # [14.5 18.5 22.5]]
...@@ -140,7 +140,7 @@ def var(x, axis=None, unbiased=True, keepdim=False, name=None): ...@@ -140,7 +140,7 @@ def var(x, axis=None, unbiased=True, keepdim=False, name=None):
x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
out1 = paddle.var(x) out1 = paddle.var(x)
# [2.66666667] # 2.66666667
out2 = paddle.var(x, axis=1) out2 = paddle.var(x, axis=1)
# [1. 4.33333333] # [1. 4.33333333]
""" """
...@@ -205,9 +205,9 @@ def std(x, axis=None, unbiased=True, keepdim=False, name=None): ...@@ -205,9 +205,9 @@ def std(x, axis=None, unbiased=True, keepdim=False, name=None):
x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
out1 = paddle.std(x) out1 = paddle.std(x)
# [1.63299316] # 1.63299316
out2 = paddle.std(x, unbiased=False) out2 = paddle.std(x, unbiased=False)
# [1.49071205] # 1.49071205
out3 = paddle.std(x, axis=1) out3 = paddle.std(x, axis=1)
# [1. 2.081666] # [1. 2.081666]
...@@ -222,8 +222,7 @@ def std(x, axis=None, unbiased=True, keepdim=False, name=None): ...@@ -222,8 +222,7 @@ def std(x, axis=None, unbiased=True, keepdim=False, name=None):
def numel(x, name=None): def numel(x, name=None):
""" """
Returns the number of elements for a tensor, which is a int64 Tensor with shape [1] in static graph mode Returns the number of elements for a tensor, which is a 0-D int64 Tensor with shape [].
or a scalar value in imperative mode.
Args: Args:
x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64. x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64.
...@@ -231,7 +230,7 @@ def numel(x, name=None): ...@@ -231,7 +230,7 @@ def numel(x, name=None):
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
Tensor: The number of elements for the input Tensor. Tensor: The number of elements for the input Tensor, whose shape is [].
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -387,8 +386,8 @@ def median(x, axis=None, keepdim=False, name=None): ...@@ -387,8 +386,8 @@ def median(x, axis=None, keepdim=False, name=None):
# [8 , 9 , 10, 11]]) # [8 , 9 , 10, 11]])
y1 = paddle.median(x) y1 = paddle.median(x)
# Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
# [5.50000000]) # 5.50000000)
y2 = paddle.median(x, axis=0) y2 = paddle.median(x, axis=0)
# Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, # Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
...@@ -416,7 +415,7 @@ def median(x, axis=None, keepdim=False, name=None): ...@@ -416,7 +415,7 @@ def median(x, axis=None, keepdim=False, name=None):
-1, -1,
0, 0,
None, None,
], 'when input 0D, axis can only be [-1, 0] or default None' ], 'when input 0-D, axis can only be [-1, 0] or default None'
is_flatten = True is_flatten = True
if axis is None: if axis is None:
......
...@@ -181,8 +181,7 @@ def _format_tensor(var, summary, indent=0, max_width=0, signed=False): ...@@ -181,8 +181,7 @@ def _format_tensor(var, summary, indent=0, max_width=0, signed=False):
linewidth = DEFAULT_PRINT_OPTIONS.linewidth linewidth = DEFAULT_PRINT_OPTIONS.linewidth
if len(var.shape) == 0: if len(var.shape) == 0:
# currently, shape = [], i.e., scaler tensor is not supported. # 0-D Tensor, whose shape = [], should be formatted like this.
# If it is supported, it should be formatted like this.
return _format_item(var, max_width, signed) return _format_item(var, max_width, signed)
elif len(var.shape) == 1: elif len(var.shape) == 1:
item_length = max_width + 2 item_length = max_width + 2
...@@ -291,7 +290,7 @@ def _format_dense_tensor(tensor, indent): ...@@ -291,7 +290,7 @@ def _format_dense_tensor(tensor, indent):
if tensor.dtype == core.VarDesc.VarType.BF16: if tensor.dtype == core.VarDesc.VarType.BF16:
tensor = tensor.astype('float32') tensor = tensor.astype('float32')
# TODO(zhouwei): will remove 0D Tensor.numpy() hack # TODO(zhouwei): will remove 0-D Tensor.numpy() hack
np_tensor = tensor.numpy(False) np_tensor = tensor.numpy(False)
if len(tensor.shape) == 0: if len(tensor.shape) == 0:
......
...@@ -562,8 +562,6 @@ def normalize_extension_kwargs(kwargs, use_cuda=False): ...@@ -562,8 +562,6 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
extra_compile_args[compiler] = [] extra_compile_args[compiler] = []
if IS_WINDOWS: if IS_WINDOWS:
# TODO(zhouwei): may append compile flags in future
pass
# append link flags # append link flags
extra_link_args = kwargs.get('extra_link_args', []) extra_link_args = kwargs.get('extra_link_args', [])
extra_link_args.extend(MSVC_LINK_FLAGS) extra_link_args.extend(MSVC_LINK_FLAGS)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册