diff --git a/python/paddle/distributed/auto_parallel/operators/common.py b/python/paddle/distributed/auto_parallel/operators/common.py index 987c533a012eb9f5314f6c1f720d8dd1e1cc358c..5d7ff78fa2c53c4c755088321f4a3f440ef8d6cb 100644 --- a/python/paddle/distributed/auto_parallel/operators/common.py +++ b/python/paddle/distributed/auto_parallel/operators/common.py @@ -393,7 +393,7 @@ def get_data_parallel_group(dist_ctx, op, act_grad_names, rank): for var_name in act_grad_names: var_dim_mapping = op_dist_attr.get_input_dims_mapping(var_name) - # consider that the variable's shape is [], which is 0D + # consider that the variable's shape is [], which is 0-D # TODO utilize the batch_dim attr instead of "0" in future batch_size_axis = var_dim_mapping[0] if len(var_dim_mapping) > 0 else -1 diff --git a/python/paddle/distributed/communication/stream/all_gather.py b/python/paddle/distributed/communication/stream/all_gather.py index 69d9c5d52e080205776d5ecd1135dd0dc22b65f3..9f4b19fd5cee5b629c0a26c953b7b8bf9d3f4b68 100644 --- a/python/paddle/distributed/communication/stream/all_gather.py +++ b/python/paddle/distributed/communication/stream/all_gather.py @@ -108,7 +108,7 @@ def _all_gather_in_static_mode(tensor_list, tensor, group, sync_op): }, ) tensor_list.clear() - # 0D use stack/unstack while others use concat/split + # 0-D use stack/unstack while others use concat/split if len(tensor.shape) == 0: tensor_list.extend(paddle.unstack(out, 0)) else: diff --git a/python/paddle/distributed/communication/stream/all_to_all.py b/python/paddle/distributed/communication/stream/all_to_all.py index 38b1d2fcb3e8228f186919ca676c0ed8841b54e7..df9c72c1da44ebcf1ec147e2ecd81018460d1c6a 100644 --- a/python/paddle/distributed/communication/stream/all_to_all.py +++ b/python/paddle/distributed/communication/stream/all_to_all.py @@ -78,7 +78,7 @@ def _all_to_all_in_static_mode( if isinstance(in_tensor_or_tensor_list, list): if len(in_tensor_or_tensor_list) == 0: raise RuntimeError("The input tensor_list should not be empty.") - # 0D use stack/unstack while others use concat/split + # 0-D use stack/unstack while others use concat/split if len(in_tensor_or_tensor_list[0].shape) == 0: in_tensor = paddle.stack(in_tensor_or_tensor_list, axis=0) else: @@ -115,7 +115,7 @@ def _all_to_all_in_static_mode( if isinstance(out_tensor_or_tensor_list, list): if not sync_op: dist.wait(out_tensor, use_calc_stream=False) - # 0D use stack/unstack while others use concat/split + # 0-D use stack/unstack while others use concat/split if len(in_tensor_or_tensor_list[0].shape) == 0: out_tensor_or_tensor_list.extend(paddle.unstack(out_tensor, 0)) else: diff --git a/python/paddle/distributed/communication/stream/scatter.py b/python/paddle/distributed/communication/stream/scatter.py index c112516a1fc106859e3d7e176395cbe1c1258403..13f9c3ecf641e6cae1fcf89e2c8233eacc734274 100644 --- a/python/paddle/distributed/communication/stream/scatter.py +++ b/python/paddle/distributed/communication/stream/scatter.py @@ -91,7 +91,7 @@ def _scatter_in_static_mode( ) else: tensor_list = [tensor for _ in range(nranks)] - # 0D use stack/unstack while others use concat/split + # 0-D use stack/unstack while others use concat/split if len(tensor_list[0].shape) == 0: input_tensor = paddle.stack(tensor_list, axis=0) else: diff --git a/python/paddle/distribution/bernoulli.py b/python/paddle/distribution/bernoulli.py index d6c6551b0c5ced31a1a78f71e8150ba106324f75..e961ee9d0186870e98b1d6a63e6c5118ad79509e 100644 --- a/python/paddle/distribution/bernoulli.py +++ b/python/paddle/distribution/bernoulli.py @@ -79,16 +79,16 @@ class Bernoulli(exponential_family.ExponentialFamily): rv = Bernoulli(probs=0.3) print(rv.mean) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.30000001]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.30000001) print(rv.variance) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.21000001]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.21000001) print(rv.entropy()) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.61086434]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.61086434) """ def __init__(self, probs, name=None): @@ -247,12 +247,12 @@ class Bernoulli(exponential_family.ExponentialFamily): # The smaller the `temperature`, the distribution of `rsample` closer to `sample`, with `probs` of 0.3. print(paddle.nn.functional.sigmoid(rv.rsample([1000, ], temperature=1.0)).sum()) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [361.06829834]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 361.06829834) print(paddle.nn.functional.sigmoid(rv.rsample([1000, ], temperature=0.1)).sum()) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [288.66418457]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 288.66418457) """ name = self.name + '_rsample' if not _non_static_mode(): @@ -420,8 +420,8 @@ class Bernoulli(exponential_family.ExponentialFamily): rv = Bernoulli(0.3) print(rv.entropy()) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.61086434]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.61086434) """ name = self.name + '_entropy' @@ -455,8 +455,8 @@ class Bernoulli(exponential_family.ExponentialFamily): rv_other = Bernoulli(0.7) print(rv.kl_divergence(rv_other)) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.33891910]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.33891910) """ name = self.name + '_kl_divergence' if not _non_static_mode(): diff --git a/python/paddle/distribution/beta.py b/python/paddle/distribution/beta.py index 07cbf9155c70148a931ad8a628ce200a5e686103..ebf373bf6b1153a8285fcbc0fdc1384b010b6d52 100644 --- a/python/paddle/distribution/beta.py +++ b/python/paddle/distribution/beta.py @@ -61,13 +61,13 @@ class Beta(exponential_family.ExponentialFamily): beta = paddle.distribution.Beta(alpha=0.5, beta=0.5) print(beta.mean) # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [0.50000000]) + # 0.50000000) print(beta.variance) # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [0.12500000]) + # 0.12500000) print(beta.entropy()) # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [0.12500000]) + # 0.12500000) # tensor input with broadcast beta = paddle.distribution.Beta(alpha=paddle.to_tensor([0.2, 0.4]), beta=0.6) diff --git a/python/paddle/distribution/cauchy.py b/python/paddle/distribution/cauchy.py index dde50d8a2287ef5fcff06582dae9363942672892..e149bf0fd8939023b675c1a1b13435dac99e7e99 100644 --- a/python/paddle/distribution/cauchy.py +++ b/python/paddle/distribution/cauchy.py @@ -45,7 +45,7 @@ class Cauchy(distribution.Distribution): # init Cauchy with float rv = Cauchy(loc=0.1, scale=1.2) print(rv.entropy()) - # Tensor(shape=1, dtype=float32, place=Place(cpu), stop_gradient=True, + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, # 2.71334577) # init Cauchy with N-Dim tensor @@ -228,8 +228,8 @@ class Cauchy(distribution.Distribution): # init Cauchy with float rv = Cauchy(loc=0.1, scale=1.2) print(rv.prob(paddle.to_tensor(1.5))) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.11234467]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.11234467) # broadcast to value rv = Cauchy(loc=0.1, scale=1.2) @@ -277,8 +277,8 @@ class Cauchy(distribution.Distribution): # init Cauchy with float rv = Cauchy(loc=0.1, scale=1.2) print(rv.log_prob(paddle.to_tensor(1.5))) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [-2.18618369]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # -2.18618369) # broadcast to value rv = Cauchy(loc=0.1, scale=1.2) @@ -344,8 +344,8 @@ class Cauchy(distribution.Distribution): # init Cauchy with float rv = Cauchy(loc=0.1, scale=1.2) print(rv.cdf(paddle.to_tensor(1.5))) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.77443725]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.77443725) # broadcast to value rv = Cauchy(loc=0.1, scale=1.2) diff --git a/python/paddle/distribution/dirichlet.py b/python/paddle/distribution/dirichlet.py index e4773af7109a62d73044dc005ba06111f46769c4..ca55285540d79a61695a2d0efc4618311e63b2a1 100644 --- a/python/paddle/distribution/dirichlet.py +++ b/python/paddle/distribution/dirichlet.py @@ -63,10 +63,10 @@ class Dirichlet(exponential_family.ExponentialFamily): print(dirichlet.entropy()) # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [-1.24434423]) + # -1.24434423) print(dirichlet.prob(paddle.to_tensor([.3, .5, .6]))) # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [10.80000114]) + # 10.80000114) """ diff --git a/python/paddle/distribution/geometric.py b/python/paddle/distribution/geometric.py index 599b78c1f9e4282592e5085531de61a25118134c..d338ee6f35ac73aaf8065bf827cf93a9855b46ab 100644 --- a/python/paddle/distribution/geometric.py +++ b/python/paddle/distribution/geometric.py @@ -55,16 +55,16 @@ class Geometric(distribution.Distribution): geom = Geometric(0.5) geom.mean - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [2.]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 2.) geom.variance - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [2.]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 2.) geom.stddev - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [1.41421354]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 1.41421354) """ def __init__(self, probs): @@ -145,8 +145,8 @@ class Geometric(distribution.Distribution): geom = Geometric(0.5) geom.pmf(2) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.25000000]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.25000000) """ if isinstance(k, (numbers.Integral, framework.Variable)): return paddle.pow((1.0 - self.probs), k - 1.0) * self.probs @@ -176,8 +176,8 @@ class Geometric(distribution.Distribution): geom = Geometric(0.5) geom.log_pmf(2) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [-1.38629436]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # -1.38629436) """ if isinstance(k, (numbers.Integral, framework.Variable)): return paddle.log(self.pmf(k)) @@ -266,8 +266,8 @@ class Geometric(distribution.Distribution): geom = Geometric(0.5) geom.entropy() - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [1.38629436]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 1.38629436) """ x = (1.0 - self.probs) * paddle.log(1.0 - self.probs) y = self.probs * paddle.log(self.probs) @@ -296,8 +296,8 @@ class Geometric(distribution.Distribution): geom = Geometric(0.5) geom.cdf(4) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.93750000]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.93750000) """ if isinstance(k, (numbers.Integral, framework.Variable)): return 1.0 - paddle.pow((1.0 - self.probs), k) @@ -329,8 +329,8 @@ class Geometric(distribution.Distribution): geom_p = Geometric(0.5) geom_q = Geometric(0.1) geom_p.kl_divergence(geom_q) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.51082563]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.51082563) """ if isinstance(other, Geometric): p, q = self.probs, other.probs diff --git a/python/paddle/distribution/gumbel.py b/python/paddle/distribution/gumbel.py index dc0ad391685a5a25de9648bc66224a912a101536..7513046212c56ac3e4a1a5022057c49296294d76 100644 --- a/python/paddle/distribution/gumbel.py +++ b/python/paddle/distribution/gumbel.py @@ -61,7 +61,7 @@ class Gumbel(TransformedDistribution): dist.cdf(value) # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [0.54523915]) dist.entropy() - # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, [1.57721567]) + # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [1.57721567]) dist.rsample([2]) # Tensor(shape=[2, 1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [[0.80463481], [0.91893655]]) diff --git a/python/paddle/distribution/independent.py b/python/paddle/distribution/independent.py index 4119941b94c59aaf2ce44d2da8e98180b2ccce5f..3d180814b4aee5a7c64fc1805e76b8dc6905678a 100644 --- a/python/paddle/distribution/independent.py +++ b/python/paddle/distribution/independent.py @@ -44,8 +44,8 @@ class Independent(distribution.Distribution): print(reinterpreted_beta.batch_shape, reinterpreted_beta.event_shape) # () (2,) print(reinterpreted_beta.log_prob(paddle.to_tensor([0.2, 0.2]))) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [-0.45687842]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # -0.45687842) """ def __init__(self, base, reinterpreted_batch_rank): diff --git a/python/paddle/distribution/kl.py b/python/paddle/distribution/kl.py index 2e9b5e878536dfa25655a4452e815151805be295..a47eefc7893e14a8e96dc794e5bece3cedbecff6 100644 --- a/python/paddle/distribution/kl.py +++ b/python/paddle/distribution/kl.py @@ -60,7 +60,7 @@ def kl_divergence(p, q): print(paddle.distribution.kl_divergence(p, q)) # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [0.21193528]) + # 0.21193528) """ return _dispatch(type(p), type(q))(p, q) diff --git a/python/paddle/distribution/laplace.py b/python/paddle/distribution/laplace.py index 124da20576a8f54c61df19d22e61cc6bb3da5357..eebf72cc16943adb3f322934a3a0ba7fb9d0caf8 100644 --- a/python/paddle/distribution/laplace.py +++ b/python/paddle/distribution/laplace.py @@ -44,12 +44,12 @@ class Laplace(distribution.Distribution): Examples: .. code-block:: python - import paddle + import paddle - m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) - m.sample() # Laplace distributed with loc=0, scale=1 - # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, - # [3.68546247]) + m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0)) + m.sample() # Laplace distributed with loc=0, scale=1 + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 3.68546247) """ @@ -173,13 +173,13 @@ class Laplace(distribution.Distribution): Examples: .. code-block:: python - import paddle + import paddle - m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) - value = paddle.to_tensor([0.1]) - m.log_prob(value) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [-0.79314721]) + m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0)) + value = paddle.to_tensor(0.1) + m.log_prob(value) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # -0.79314721) """ loc, scale, value = self._validate_value(value) @@ -205,12 +205,12 @@ class Laplace(distribution.Distribution): Examples: .. code-block:: python - import paddle + import paddle - m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) - m.entropy() - # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, - # [1.69314718]) + m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0)) + m.entropy() + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 1.69314718) """ return 1 + paddle.log(2 * self.scale) @@ -236,13 +236,13 @@ class Laplace(distribution.Distribution): Examples: .. code-block:: python - import paddle + import paddle - m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) - value = paddle.to_tensor([0.1]) - m.cdf(value) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.54758132]) + m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0)) + value = paddle.to_tensor(0.1) + m.cdf(value) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.54758132) """ loc, scale, value = self._validate_value(value) iterm = ( @@ -277,11 +277,11 @@ class Laplace(distribution.Distribution): import paddle - m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) - value = paddle.to_tensor([0.1]) + m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0)) + value = paddle.to_tensor(0.1) m.icdf(value) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [-1.60943794]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # -1.60943794) """ loc, scale, value = self._validate_value(value) term = value - 0.5 @@ -302,10 +302,10 @@ class Laplace(distribution.Distribution): import paddle - m = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) + m = paddle.distribution.Laplace(paddle.to_tensor(0.0), paddle.to_tensor(1.0)) m.sample() # Laplace distributed with loc=0, scale=1 # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, - # [3.68546247]) + # 3.68546247) """ shape = shape if isinstance(shape, tuple) else tuple(shape) with paddle.no_grad(): @@ -395,13 +395,13 @@ class Laplace(distribution.Distribution): Examples: .. code-block:: python - import paddle + import paddle - m1 = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) - m2 = paddle.distribution.Laplace(paddle.to_tensor([1.0]), paddle.to_tensor([0.5])) - m1.kl_divergence(m2) - # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, - # [1.04261160]) + m1 = paddle.distribution.Laplace(paddle.to_tensor([0.0]), paddle.to_tensor([1.0])) + m2 = paddle.distribution.Laplace(paddle.to_tensor([1.0]), paddle.to_tensor([0.5])) + m1.kl_divergence(m2) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [1.04261160]) """ var_ratio = other.scale / self.scale diff --git a/python/paddle/distribution/lognormal.py b/python/paddle/distribution/lognormal.py index c69a8a6cf91137fc661a9e1d8bf75aa38b5aea97..f437be3ab8ac596ec16e0894c0d48f4a9f627276 100644 --- a/python/paddle/distribution/lognormal.py +++ b/python/paddle/distribution/lognormal.py @@ -72,13 +72,13 @@ class LogNormal(TransformedDistribution): sample = lognormal_a.sample((2, )) # a random tensor created by lognormal distribution with shape: [2, 1] entropy = lognormal_a.entropy() - # [1.4189385] with shape: [] + # [1.4189385] with shape: [1] lp = lognormal_a.log_prob(value_tensor) # [-0.72069150] with shape: [1] p = lognormal_a.probs(value_tensor) # [0.48641577] with shape: [1] kl = lognormal_a.kl_divergence(lognormal_b) - # [0.34939718] with shape: [] + # [0.34939718] with shape: [1] """ def __init__(self, loc, scale): diff --git a/python/paddle/distribution/normal.py b/python/paddle/distribution/normal.py index 5538c41458f04538aa1e63beb1ec959d21f4a498..31a3750a0481978a2fc0652b171b0c83bf82b979 100644 --- a/python/paddle/distribution/normal.py +++ b/python/paddle/distribution/normal.py @@ -77,13 +77,13 @@ class Normal(distribution.Distribution): sample = normal_a.sample([2]) # a random tensor created by normal distribution with shape: [2, 1] entropy = normal_a.entropy() - # [1.4189385] with shape: [] + # [1.4189385] with shape: [1] lp = normal_a.log_prob(value_tensor) # [-1.2389386] with shape: [1] p = normal_a.probs(value_tensor) # [0.28969154] with shape: [1] kl = normal_a.kl_divergence(normal_b) - # [0.34939718] with shape: [] + # [0.34939718] with shape: [1] """ def __init__(self, loc, scale, name=None): diff --git a/python/paddle/distribution/transform.py b/python/paddle/distribution/transform.py index 85575b3c61a153b2e2651e9d1434b49fcf0a1582..f1ee702c15b66e2d7ee2e4ac70dac07f7fc13a91 100644 --- a/python/paddle/distribution/transform.py +++ b/python/paddle/distribution/transform.py @@ -435,8 +435,8 @@ class AffineTransform(Transform): # Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=True, # [1., 2.]) print(affine.forward_log_det_jacobian(x)) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 0.) """ _type = Type.BIJECTION @@ -1189,8 +1189,8 @@ class StickBreakingTransform(Transform): # Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True, # [0.99999988, 2. , 2.99999881]) print(t.forward_log_det_jacobian(x)) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [-9.10835075]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # -9.10835075) """ _type = Type.BIJECTION diff --git a/python/paddle/distribution/transformed_distribution.py b/python/paddle/distribution/transformed_distribution.py index 95f7670c563a325f5a93c465c6c80352c0e03ac9..6fa8e2aed482aa937c596d01d749456e1d328f93 100644 --- a/python/paddle/distribution/transformed_distribution.py +++ b/python/paddle/distribution/transformed_distribution.py @@ -42,8 +42,8 @@ class TransformedDistribution(distribution.Distribution): # [-0.10697651, 3.33609009, -0.86234951, 5.07457638, 0.75925219, # -4.17087793, 2.22579336, -0.93845034, 0.66054249, 1.50957513]) print(d.log_prob(paddle.to_tensor(0.5))) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [-1.64333570]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # -1.64333570) """ def __init__(self, base, transforms): diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py index 1edc9920ac280b153127a679b3e62a227ac4df6f..8547c6f9900ebe342c9801bcacecefbaa0750db5 100644 --- a/python/paddle/distribution/uniform.py +++ b/python/paddle/distribution/uniform.py @@ -84,7 +84,7 @@ class Uniform(distribution.Distribution): sample = uniform.sample([2]) # a random tensor created by uniform distribution with shape: [2, 1] entropy = uniform.entropy() - # [0.6931472] with shape: [] + # [0.6931472] with shape: [1] lp = uniform.log_prob(value_tensor) # [-0.6931472] with shape: [1] p = uniform.probs(value_tensor) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index f7b8531aee4d94b1fdadc9d0564251c684a7fd01..1635c7d5d211bb292df06ce20aa331ece1efd1b6 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -387,7 +387,7 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): def _create_loss_op_desc_(loss): - # 0D Tensor or 0-Size Tensor + # 0-D Tensor or 0-Size Tensor if len(loss.shape) == 0 or 0 in loss.shape: create_shape = loss.shape else: diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index 27840c8b61ad449a5290f5d56703f51b172fa7a6..9951f5a7c40f4a7785a22998cb6b431a4e4c8dc4 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -789,7 +789,7 @@ class ReduceLROnPlateau(LearningRateDecay): Reduce learning rate when ``loss`` has stopped descending. Models often benefit from reducing the learning rate by 2 to 10 times once model performance has no longer improvement. - The ``loss`` is the one which has been pass into ``step`` , it must be 1-D Tensor with shape [1]. When ``loss`` + The ``loss`` is the one which has been pass into ``step`` , it must be 0-D Tensor with shape []. When ``loss`` stop descending for a ``patience`` number of epochs, the learning rate will be reduced to ``learning_rate * decay_rate`` . (Specially, ``mode`` can also be set to ``'max`` , in this case, when ``loss`` stop ascending for a ``patience`` number of epochs, the learning rate will be reduced.) @@ -943,7 +943,7 @@ class ReduceLROnPlateau(LearningRateDecay): Args: loss (Variable): A ``Variable`` that will be monitored to determine whether the learning rate will reduce. If it stop descending for a ``patience`` number of epochs, the learning rate will reduce. It should - be 1-D Tensor with shape [1]. + be 0-D Tensor with shape []. Specially, if ``mode`` has been set to ``'max'`` , the learning rate will reduce when it stops ascending. Returns: None @@ -952,7 +952,7 @@ class ReduceLROnPlateau(LearningRateDecay): Please refer to the example of current LearningRateDecay. """ - # loss must be 1-D Tensor with shape [1] + # loss.size must be 1 check_type(loss, 'loss', Variable, 'ReduceLROnPlateau.step') assert np.prod(loss.shape) == 1, ( "The number of elements of loss should be 1, but the current loss.shape is {}, whose number of elements is not 1. " diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index 5d8fa6493ee720b31cb702845ce7bc4bb2afd3e5..fc26715d7cc4e82723ee2ab6b02c68dfc7dc3279 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -131,7 +131,7 @@ def monkey_patch_math_tensor(): return int(np.array(var).flatten()[0]) def _len_(var): - assert var.ndim > 0, "len() of a 0D tensor is wrong" + assert var.ndim > 0, "len() of a 0-D tensor is wrong" if var.type == core.VarDesc.VarType.VOCAB: return len(var.value().get_map_tensor()) elif var.type == core.VarDesc.VarType.STRINGS: diff --git a/python/paddle/fluid/dygraph/tensor_patch_methods.py b/python/paddle/fluid/dygraph/tensor_patch_methods.py index a081c6ca294ee40afb1a13a0ee64d0449cc92b6c..ad51d8ff31aff5e775d0badf818c8b195cce1ab5 100644 --- a/python/paddle/fluid/dygraph/tensor_patch_methods.py +++ b/python/paddle/fluid/dygraph/tensor_patch_methods.py @@ -516,7 +516,7 @@ def monkey_patch_tensor(): y = paddle.pow(x, 4.0) y.backward() print("grad of x: {}".format(x.grad)) - # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=False, [500.]) + # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=False, 500.) """ msg = ( @@ -638,12 +638,12 @@ def monkey_patch_tensor(): y = copy.deepcopy(x) print(x) - # Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True, - # [2.]) + # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True, + # 2.) print(y) - # Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True, - # [2.]) + # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True, + # 2.) """ if not self.is_leaf: diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index e440cf4c9e6239b47bbc54f732a954fe35d35ffb..2ba5b7c0d5ecc53ed37f6ebcbdf5a95221ab37b2 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -2476,7 +2476,7 @@ class Variable(metaclass=VariableMetaClass): def size(self): """ - Returns the number of elements for current Variable, which is a int64 Variable with shape [1] + Returns the number of elements for current Variable, which is a int64 Variable with shape [] . Returns: Variable, the number of elements for current Variable diff --git a/python/paddle/fluid/tests/unittests/test_numel_op.py b/python/paddle/fluid/tests/unittests/test_numel_op.py index 81ddbdc45a02fffcf294aef011f0f202f8e05a14..c841cde6cbb1fcdddb96f5df5251ca7bea614de6 100644 --- a/python/paddle/fluid/tests/unittests/test_numel_op.py +++ b/python/paddle/fluid/tests/unittests/test_numel_op.py @@ -120,7 +120,6 @@ class TestNumelAPI(unittest.TestCase): }, fetch_list=[out_1, out_2], ) - # TODO(zhouwei): will change shape [1] to [] to support zero-dim assert np.array_equal( res_1, np.array(np.size(input_1)).astype("int64") ) diff --git a/python/paddle/fluid/tests/unittests/test_size_op.py b/python/paddle/fluid/tests/unittests/test_size_op.py index 610ee4eaa5f0ee2fa0a5142edce91c25248e9daa..dfff90b742fcae869ce843551e201b972c9bab63 100644 --- a/python/paddle/fluid/tests/unittests/test_size_op.py +++ b/python/paddle/fluid/tests/unittests/test_size_op.py @@ -83,7 +83,6 @@ class TestSizeAPI(unittest.TestCase): }, fetch_list=[out_1, out_2], ) - # TODO(zhouwei): will change shape [1] to [] to support zero-dim assert np.array_equal( res_1, np.array(np.size(input_1)).astype("int64") ) diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py index 52d6b7cb854d988c742ed4bdbb4a029ce75d9360..b5d5b3da4c3c81f011a057b34b6b24b16de993d7 100644 --- a/python/paddle/jit/dy2static/convert_operators.py +++ b/python/paddle/jit/dy2static/convert_operators.py @@ -517,7 +517,7 @@ def convert_len(var): `shape_op` in var.block. """ if isinstance(var, Variable): - assert var.ndim > 0, "len() of a 0D tensor is wrong" + assert var.ndim > 0, "len() of a 0-D tensor is wrong" if var.type in [ core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.SELECTED_ROWS, diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index 430b82c07ae0cf8c25989a181b378cac3050a62f..bf3f164c3381323dc7858f76080747e03cdcfc11 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -798,7 +798,7 @@ def accuracy(input, label, k=1, correct=None, total=None, name=None): predictions = paddle.to_tensor([[0.2, 0.1, 0.4, 0.1, 0.1], [0.2, 0.3, 0.1, 0.15, 0.25]], dtype='float32') label = paddle.to_tensor([[2], [0]], dtype="int64") result = paddle.metric.accuracy(input=predictions, label=label, k=1) - # [0.5] + # 0.5 """ if label.dtype == paddle.int32: label = paddle.cast(label, paddle.int64) diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index f27fa360609af221b491a089f1cf79299b6b0e9f..578a5da900e143e35162f0addda69611fb7279e2 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -61,7 +61,7 @@ def dice_loss(input, label, epsilon=0.00001, name=None): For more information, please refer to :ref:`api_guide_Name` Returns: - Tensor, which shape is [1], data type is the same as `input` . + 0-D Tensor, which shape is [], data type is the same as `input` . Example: .. code-block:: python @@ -327,7 +327,7 @@ def npair_loss(anchor, positive, labels, l2_reg=0.002): Returns: - A Tensor representing the npair loss, the data type is the same as anchor, the shape is [1]. + A 0-D Tensor representing the npair loss, the data type is the same as anchor, the shape is []. Examples: @@ -634,7 +634,7 @@ def binary_cross_entropy( input = paddle.to_tensor([0.5, 0.6, 0.7], 'float32') label = paddle.to_tensor([1.0, 0.0, 1.0], 'float32') output = paddle.nn.functional.binary_cross_entropy(input, label) - print(output) # [0.65537095] + print(output) # 0.65537095 """ if reduction not in ['sum', 'mean', 'none']: @@ -774,7 +774,7 @@ def binary_cross_entropy_with_logits( logit = paddle.to_tensor([5.0, 1.0, 3.0]) label = paddle.to_tensor([1.0, 0.0, 1.0]) output = paddle.nn.functional.binary_cross_entropy_with_logits(logit, label) - print(output) # [0.45618808] + print(output) # 0.45618808 """ if reduction not in ['sum', 'mean', 'none']: @@ -1077,7 +1077,7 @@ def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None): label = paddle.rand([3, 3]).astype('float32') output = paddle.nn.functional.smooth_l1_loss(input, label) print(output) - # [0.068004] + # 0.068004 """ if in_dygraph_mode(): @@ -1147,7 +1147,7 @@ def margin_ranking_loss( name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: - Tensor, if :attr:`reduction` is ``'mean'`` or ``'sum'``, the out shape is :math:`[1]`, otherwise the shape is the same as `input` .The same dtype as input tensor. + Tensor, if :attr:`reduction` is ``'mean'`` or ``'sum'``, the out shape is :math:`[]`, otherwise the shape is the same as `input` .The same dtype as input tensor. Examples: @@ -1159,7 +1159,7 @@ def margin_ranking_loss( other = paddle.to_tensor([[2, 1], [2, 4]], dtype='float32') label = paddle.to_tensor([[1, -1], [-1, -1]], dtype='float32') loss = paddle.nn.functional.margin_ranking_loss(input, other, label) - print(loss) # [0.75] + print(loss) # 0.75 """ if reduction not in ['sum', 'mean', 'none']: raise ValueError( @@ -1264,7 +1264,7 @@ def l1_loss(input, label, reduction='mean', name=None): Returns: Tensor, the L1 Loss of Tensor ``input`` and ``label``. If `reduction` is ``'none'``, the shape of output loss is :math:`[N, *]`, the same as ``input`` . - If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. + If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is []. Examples: .. code-block:: python @@ -1276,8 +1276,8 @@ def l1_loss(input, label, reduction='mean', name=None): l1_loss = paddle.nn.functional.l1_loss(input, label) print(l1_loss) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.34999999]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 0.34999999) l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='none') print(l1_loss) @@ -1287,8 +1287,8 @@ def l1_loss(input, label, reduction='mean', name=None): l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='sum') print(l1_loss) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [1.39999998]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 1.39999998) """ if reduction not in ['sum', 'mean', 'none']: @@ -1377,7 +1377,7 @@ def nll_loss( log_out = log_softmax(input) label = paddle.to_tensor([0, 2, 1, 1, 0], "int64") result = nll_loss(log_out, label) - print(result) # Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True, [1.07202101]) + print(result) # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True, 1.07202101) """ if reduction not in ['sum', 'mean', 'none']: raise ValueError( @@ -1578,9 +1578,9 @@ def kl_div(input, label, reduction='mean', name=None): If `reduction` is ``'none'``, the output loss is the same shape as the input, and the loss at each point is calculated separately. There is no reduction to the result. - If `reduction` is ``'mean'``, the output loss is the shape of [1], and the output is the average of all losses. + If `reduction` is ``'mean'``, the output loss is the shape of [], and the output is the average of all losses. - If `reduction` is ``'sum'``, the output loss is the shape of [1], and the output is the sum of all losses. + If `reduction` is ``'sum'``, the output loss is the shape of [], and the output is the sum of all losses. If `reduction` is ``'batchmean'``, the output loss is the shape of [N], N is the batch size, and the output is the sum of all losses divided by the batch size. @@ -1611,17 +1611,17 @@ def kl_div(input, label, reduction='mean', name=None): x = paddle.uniform(shape, min=-10, max=10).astype('float32') target = paddle.uniform(shape, min=-10, max=10).astype('float32') - # 'batchmean' reduction, loss shape will be [1] + # 'batchmean' reduction, loss shape will be [], who is 0-D Tensor pred_loss = F.kl_div(x, target, reduction='batchmean') - # shape=[1] + # shape=[] - # 'mean' reduction, loss shape will be [1] + # 'mean' reduction, loss shape will be [], who is 0-D Tensor pred_loss = F.kl_div(x, target, reduction='mean') - # shape=[1] + # shape=[] - # 'sum' reduction, loss shape will be [1] + # 'sum' reduction, loss shape will be [], who is 0-D Tensor pred_loss = F.kl_div(x, target, reduction='sum') - # shape=[1] + # shape=[] # 'none' reduction, loss shape is same with input shape pred_loss = F.kl_div(x, target, reduction='none') @@ -1724,7 +1724,7 @@ def mse_loss(input, label, reduction='mean', name=None): label = paddle.to_tensor(1.7) output = mse_loss(input, label) print(output) - # [0.04000002] + # 0.04000002 """ @@ -1780,7 +1780,7 @@ def ctc_loss( norm_by_times (bool, optional): Whether to normalize the gradients by the number of time-step, which is also the sequence's length. There is no need to normalize the gradients if reduction mode is 'mean'. Default: False. Returns: - Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``log_probs``. + Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is []. Data type is the same as ``log_probs``. Examples: @@ -1834,8 +1834,8 @@ def ctc_loss( blank=0, reduction='mean') print(loss) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [1.13760614]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 1.13760614) """ @@ -1929,7 +1929,7 @@ def rnnt_loss( name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: - Tensor, The RNN-T loss between ``logprobs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``logprobs``. + Tensor, The RNN-T loss between ``logprobs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is []. Data type is the same as ``logprobs``. Examples: @@ -1961,8 +1961,8 @@ def rnnt_loss( costs = fn(acts, labels, lengths, label_lengths) print(costs) - # Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=False, - # [4.49566677]) + # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False, + # 4.49566677) """ def warprnnt( @@ -2078,7 +2078,7 @@ def margin_cross_entropy( softmax is shard_softmax when using model parallel, otherwise softmax is in the same shape with input logits. If ``reduction == None``, the shape of loss is ``[N, 1]``, otherwise - the shape is ``[1]``. + the shape is ``[]``. Examples: @@ -2633,8 +2633,8 @@ def cross_entropy( input, label) print(dy_ret) - # Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=True, - # [5.34043430]) + # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True, + # 5.34043430) .. code-block:: python @@ -2659,8 +2659,8 @@ def cross_entropy( weight=weight, reduction=reduction) print(paddle_loss_mean) - # Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=True, - # [1.11043464]) + # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True, + # 1.11043464) """ @@ -3012,7 +3012,7 @@ def sigmoid_focal_loss( For more information, please refer to :ref:`api_guide_Name`. Returns: - Tensor, if :attr:`reduction` is ``'mean'`` or ``'sum'``, the out shape is :math:`[1]`, otherwise the shape is the same as ``logit``. The same dtype as ``logit`` tensor. + Tensor, if :attr:`reduction` is ``'mean'`` or ``'sum'``, the out shape is :math:`[]`, otherwise the shape is the same as ``logit``. The same dtype as ``logit`` tensor. Examples: @@ -3026,7 +3026,7 @@ def sigmoid_focal_loss( fg_label = paddle.greater_equal(label, one) fg_num = paddle.sum(paddle.cast(fg_label, dtype='float32')) output = paddle.nn.functional.sigmoid_focal_loss(logit, label, normalizer=fg_num) - print(output) # [0.65782464] + print(output) # 0.65782464 """ if reduction not in ['sum', 'mean', 'none']: @@ -3183,7 +3183,7 @@ def multi_label_soft_margin_loss( # Tensor([3.49625897, 0.71111226, 0.43989015]) loss = F.multi_label_soft_margin_loss(input, label, reduction='mean') print(loss) - # Tensor([1.54908717]) + # Tensor(1.54908717) """ if reduction not in ['sum', 'mean', 'none']: raise ValueError( @@ -3307,7 +3307,7 @@ def hinge_embedding_loss(input, label, margin=1.0, reduction='mean', name=None): loss = F.hinge_embedding_loss(input, label, margin=1.0, reduction='mean') print(loss) - # Tensor([0.22222222]) + # Tensor(0.22222222) """ if reduction not in ['sum', 'mean', 'none']: @@ -3377,7 +3377,7 @@ def cosine_embedding_loss( Returns: Tensor, the cosine embedding Loss of Tensor ``input1`` ``input2`` and ``label``. If `reduction` is ``'none'``, the shape of output loss is [N], the same as ``input`` . - If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. + If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is []. Examples: .. code-block:: python @@ -3389,10 +3389,10 @@ def cosine_embedding_loss( label = paddle.to_tensor([1, -1], 'int64') output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='mean') - print(output) # [0.21155193] + print(output) # 0.21155193 output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='sum') - print(output) # [0.42310387] + print(output) # 0.42310387 output = paddle.nn.functional.cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='none') print(output) # [0.42310387, 0. ] @@ -3528,7 +3528,7 @@ def triplet_margin_with_distance_loss( loss = F.triplet_margin_with_distance_loss(input, positive, negative, margin=1.0, reduction='mean') print(loss) - # Tensor([0.19165580]) + # Tensor(0.19165580) """ if reduction not in ['sum', 'mean', 'none']: @@ -3678,7 +3678,7 @@ def triplet_margin_loss( loss = F.triplet_margin_loss(input, positive, negative, margin=1.0, reduction='mean') print(loss) - # Tensor([0.19165580]) + # Tensor(0.19165580) """ if reduction not in ['sum', 'mean', 'none']: @@ -3886,7 +3886,7 @@ def soft_margin_loss(input, label, reduction='mean', name=None): Returns: - Output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is [1]. + Output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is []. Examples: .. code-block:: python @@ -3897,8 +3897,8 @@ def soft_margin_loss(input, label, reduction='mean', name=None): label = paddle.to_tensor([[1.0, -1.0, 1.0],[-1.0, 1.0, 1.0]], 'float32') output = paddle.nn.functional.soft_margin_loss(input, label) print(output) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.64022040]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 0.64022040) input = paddle.uniform(shape=(5, 5), dtype="float32", min=0.1, max=0.8) label = paddle.randint(0, 2, shape=(5, 5), dtype="int64") @@ -3998,7 +3998,7 @@ def gaussian_nll_loss( Returns: - output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is [1]. + output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is []. Examples:: .. code-block:: python diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 967d490897f6e43c9103e37ed6f9d14d38b14548..15dbeb54c0e0a1b625027f21bd3837bbe55d0e42 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -98,8 +98,8 @@ class BCEWithLogitsLoss(Layer): bce_logit_loss = paddle.nn.BCEWithLogitsLoss() output = bce_logit_loss(logit, label) print(output) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.45618814]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 0.45618814) """ @@ -319,8 +319,8 @@ class CrossEntropyLoss(Layer): input, label) print(dy_ret) - # Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=True, - # [5.34043430]) + # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True, + # 5.34043430) .. code-block:: python @@ -345,8 +345,8 @@ class CrossEntropyLoss(Layer): weight=weight, reduction=reduction) print(paddle_loss_mean) - # Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=True, - # [1.11043464]) + # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True, + # 1.11043464) """ @@ -564,7 +564,7 @@ class MSELoss(Layer): label = paddle.to_tensor([1.7]) output = mse_loss(input, label) print(output) - # [0.04000002] + # 0.04000002 """ @@ -637,7 +637,7 @@ class L1Loss(Layer): - label (Tensor): label. The shapes is ``[N, *]``, same shape as ``input`` . It's data type should be float32, float64, int32, int64. - output (Tensor): The L1 Loss of ``input`` and ``label``. If `reduction` is ``'none'``, the shape of output loss is ``[N, *]``, the same as ``input`` . - If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. + If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is []. Examples: .. code-block:: python @@ -650,14 +650,14 @@ class L1Loss(Layer): l1_loss = paddle.nn.L1Loss() output = l1_loss(input, label) print(output) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.34999999]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 0.34999999) l1_loss = paddle.nn.L1Loss(reduction='sum') output = l1_loss(input, label) print(output) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [1.39999998]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 1.39999998) l1_loss = paddle.nn.L1Loss(reduction='none') output = l1_loss(input, label) @@ -747,8 +747,8 @@ class BCELoss(Layer): bce_loss = paddle.nn.BCELoss() output = bce_loss(input, label) print(output) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.65537101]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 0.65537101) """ @@ -835,7 +835,7 @@ class NLLLoss(Layer): The data type is int64. - output (Tensor): the `negative log likelihood loss` between input `x` and `label`. If `reduction` is `'none'`, the shape is `[N, *]`. - If `reduction` is `'sum'` or `'mean'`, the shape is `[1]`. + If `reduction` is `'sum'` or `'mean'`, the shape is `[]`. Examples: .. code-block:: python @@ -853,7 +853,7 @@ class NLLLoss(Layer): log_out = log_softmax(input) label = paddle.to_tensor([0, 2, 1, 1, 0], "int64") result = nll_loss(log_out, label) - print(result) # Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True, [1.07202101]) + print(result) # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True, 1.07202101) """ @@ -991,9 +991,9 @@ class KLDivLoss(Layer): If `reduction` is ``'none'``, the output loss is the same shape as the input, and the loss at each point is calculated separately. There is no reduction to the result. - If `reduction` is ``'mean'``, the output loss is the shape of [1], and the output is the average of all losses. + If `reduction` is ``'mean'``, the output loss is the shape of [], and the output is the average of all losses. - If `reduction` is ``'sum'``, the output loss is the shape of [1], and the output is the sum of all losses. + If `reduction` is ``'sum'``, the output loss is the shape of [], and the output is the sum of all losses. If `reduction` is ``'batchmean'``, the output loss is the shape of [N], N is the batch size, and the output is the sum of all losses divided by the batch size. @@ -1012,7 +1012,7 @@ class KLDivLoss(Layer): label (Tensor): ``(N, *)``, same shape as input. - output (Tensor): tensor with shape: [1] by default. + output (Tensor): tensor with shape: [] by default. Examples: .. code-block:: python @@ -1024,20 +1024,20 @@ class KLDivLoss(Layer): x = paddle.uniform(shape, min=-10, max=10).astype('float32') target = paddle.uniform(shape, min=-10, max=10).astype('float32') - # 'batchmean' reduction, loss shape will be [1] + # 'batchmean' reduction, loss shape will be [] kldiv_criterion = nn.KLDivLoss(reduction='batchmean') pred_loss = kldiv_criterion(x, target) - # shape=[1] + # shape=[] - # 'mean' reduction, loss shape will be [1] + # 'mean' reduction, loss shape will be [] kldiv_criterion = nn.KLDivLoss(reduction='mean') pred_loss = kldiv_criterion(x, target) - # shape=[1] + # shape=[] - # 'sum' reduction, loss shape will be [1] + # 'sum' reduction, loss shape will be [] kldiv_criterion = nn.KLDivLoss(reduction='sum') pred_loss = kldiv_criterion(x, target) - # shape=[1] + # shape=[] # 'none' reduction, loss shape is same with X shape kldiv_criterion = nn.KLDivLoss(reduction='none') @@ -1090,7 +1090,7 @@ class MarginRankingLoss(Layer): label: N-D Tensor, label have the same shape and dtype as `input`. - output: If :attr:`reduction` is ``'mean'`` or ``'sum'`` , the out shape is :math:`[1]`, otherwise the shape is the same as `input` .The same dtype as input tensor. + output: If :attr:`reduction` is ``'mean'`` or ``'sum'`` , the out shape is :math:`[]`, otherwise the shape is the same as `input` .The same dtype as input tensor. Returns: A callable object of MarginRankingLoss. @@ -1108,7 +1108,7 @@ class MarginRankingLoss(Layer): loss = margin_rank_loss(input, other, label) print(loss) - # [0.75] + # 0.75 """ def __init__(self, margin=0.0, reduction='mean', name=None): @@ -1149,7 +1149,7 @@ class CTCLoss(Layer): - norm_by_times (bool, optional): Whether to normalize the gradients by the number of time-step, which is also the sequence's length. There is no need to normalize the gradients if reduction mode is 'mean'. Default: False. Returns: - Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``log_probs``. + Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is []. Data type is the same as ``log_probs``. Examples: @@ -1197,8 +1197,8 @@ class CTCLoss(Layer): input_lengths, label_lengths) print(loss) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [1.13760614]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 1.13760614) """ def __init__(self, blank=0, reduction='mean'): @@ -1242,7 +1242,7 @@ class RNNTLoss(Layer): label_lengths: Tensor of (batch) containing label length of each example Returns: - Tensor, The RNN-T loss between ``logprobs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``logprobs``. + Tensor, The RNN-T loss between ``logprobs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is []. Data type is the same as ``logprobs``. Examples: .. code-block:: python @@ -1272,8 +1272,8 @@ class RNNTLoss(Layer): costs = fn(acts, labels, lengths, label_lengths) print(costs) - # Tensor(shape=[1], dtype=float64, place=Place(gpu:0), stop_gradient=False, - # [4.49566677]) + # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False, + # 4.49566677) """ def __init__( @@ -1352,7 +1352,7 @@ class SmoothL1Loss(Layer): loss = paddle.nn.SmoothL1Loss() output = loss(input, label) print(output) - # [0.049606] + # 0.049606 """ def __init__(self, reduction='mean', delta=1.0, name=None): @@ -1428,7 +1428,7 @@ class MultiLabelSoftMarginLoss(Layer): multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='mean') loss = multi_label_soft_margin_loss(input, label) print(loss) - # Tensor([1.54908717]) + # Tensor(1.54908717) """ def __init__(self, weight=None, reduction="mean", name=None): @@ -1529,7 +1529,7 @@ class HingeEmbeddingLoss(Layer): hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='mean') loss = hinge_embedding_loss(input, label) print(loss) - # Tensor([0.22222222]) + # Tensor(0.22222222) """ def __init__(self, margin=1.0, reduction="mean", name=None): @@ -1590,7 +1590,7 @@ class CosineEmbeddingLoss(Layer): Available dtypes are int32, int64, float32, float64. output (Tensor): Tensor, the cosine embedding Loss of Tensor ``input1`` ``input2`` and ``label``. If `reduction` is ``'none'``, the shape of output loss is [N], the same as ``input`` . - If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. + If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is []. Examples: .. code-block:: python @@ -1603,11 +1603,11 @@ class CosineEmbeddingLoss(Layer): cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='mean') output = cosine_embedding_loss(input1, input2, label) - print(output) # [0.21155193] + print(output) # 0.21155193 cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='sum') output = cosine_embedding_loss(input1, input2, label) - print(output) # [0.42310387] + print(output) # 0.42310387 cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='none') output = cosine_embedding_loss(input1, input2, label) @@ -1717,7 +1717,7 @@ class TripletMarginWithDistanceLoss(Layer): triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='mean') loss = triplet_margin_with_distance_loss(input, positive, negative,) print(loss) - # Tensor([0.19165580]) + # Tensor(0.19165580) """ @@ -1825,7 +1825,7 @@ class TripletMarginLoss(Layer): triplet_margin_loss = paddle.nn.TripletMarginLoss(margin=1.0, swap=True, reduction='mean', ) loss = triplet_margin_loss(input, positive, negative,) print(loss) - # Tensor([0.19165580]) + # Tensor(0.19165580) """ @@ -1995,7 +1995,7 @@ class SoftMarginLoss(Layer): ``input``. The target labels which values should be numbers -1 or 1. Available dtype is int32, int64, float32, float64. - Output (Tensor): If ``reduction`` is ``'none'``, the shape of output is - same as ``input`` , else the shape of output is [1]. + same as ``input`` , else the shape of output is []. Returns: A callable object of SoftMarginLoss. @@ -2010,8 +2010,8 @@ class SoftMarginLoss(Layer): soft_margin_loss = paddle.nn.SoftMarginLoss() output = soft_margin_loss(input, label) print(output) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.64022040]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 0.64022040) input_np = paddle.uniform(shape=(5, 5), min=0.1, max=0.8, dtype="float64") label_np = paddle.randint(high=2, shape=(5, 5), dtype="int64") diff --git a/python/paddle/static/nn/metric.py b/python/paddle/static/nn/metric.py index ee1163db828256edc9a2cd7e22d06dd60456c305..3c1e0bf3852700e040870b377791209f75fd3d37 100644 --- a/python/paddle/static/nn/metric.py +++ b/python/paddle/static/nn/metric.py @@ -66,10 +66,10 @@ def accuracy(input, label, k=1, correct=None, total=None): exe.run(static.default_startup_program()) x = np.random.rand(3, 32, 32).astype("float32") y = np.array([[1],[0],[1]]) - output= exe.run(feed={"input": x,"label": y}, - fetch_list=[result[0]]) + output = exe.run(feed={"input": x,"label": y}, + fetch_list=[result]) print(output) - #[array([0.], dtype=float32)] + # [array(0.33333334, dtype=float32)] """ if _non_static_mode(): diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 59ce9d6b3677cefa8e983e87ea24d2fe8ff60e86..b6c9cce21ff96af772df27de749ccdb28f0cbae3 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -717,10 +717,10 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): We use the dtype conversion rules following this: Keep dtype np.number ───────────► paddle.Tensor - (0D-Tensor) + (0-D Tensor) default_dtype Python Number ───────────────► paddle.Tensor - (0D-Tensor) + (0-D Tensor) Keep dtype np.ndarray ───────────► paddle.Tensor @@ -753,7 +753,6 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): # 1) x = paddle.to_tensor(1, stop_gradient=False) - print(x) # Tensor(shape=[], dtype=int64, place=CPUPlace, stop_gradient=False, # 1) diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 3aebc68dc61bf7ecc23ee58c5436fa471556b1cd..3b3c7565ab31fe6ca8ac72c5edb484798ff3a546 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -333,8 +333,8 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): # compute inf-order norm out_pnorm = paddle.linalg.norm(x, p=float("inf")) - # out_pnorm = Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [12.]) + # out_pnorm = Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 12.) out_pnorm = paddle.linalg.norm(x, p=float("inf"), axis=0) # out_pnorm: Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, @@ -344,8 +344,8 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): # compute -inf-order norm out_pnorm = paddle.linalg.norm(x, p=-float("inf")) - # out_pnorm: Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [0.]) + # out_pnorm: Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 0.) out_pnorm = paddle.linalg.norm(x, p=-float("inf"), axis=0) # out_pnorm: Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, @@ -690,16 +690,16 @@ def dist(x, y, p=2, name=None): x = paddle.to_tensor([[3, 3],[3, 3]], dtype="float32") y = paddle.to_tensor([[3, 3],[3, 1]], dtype="float32") out = paddle.dist(x, y, 0) - print(out) # out = [1.] + print(out) # out = 1. out = paddle.dist(x, y, 2) - print(out) # out = [2.] + print(out) # out = 2. out = paddle.dist(x, y, float("inf")) - print(out) # out = [2.] + print(out) # out = 2. out = paddle.dist(x, y, float("-inf")) - print(out) # out = [0.] + print(out) # out = 0. """ if in_dygraph_mode(): return _C_ops.dist(x, y, p) @@ -745,48 +745,48 @@ def cond(x, p=None, name=None): # compute conditional number when p is None out = paddle.linalg.cond(x) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [1.41421342]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 1.41421342) # compute conditional number when order of the norm is 'fro' out_fro = paddle.linalg.cond(x, p='fro') - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [3.16227770]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 3.16227770) # compute conditional number when order of the norm is 'nuc' out_nuc = paddle.linalg.cond(x, p='nuc') - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [9.24263859]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 9.24263859) # compute conditional number when order of the norm is 1 out_1 = paddle.linalg.cond(x, p=1) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [2.]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 2.) # compute conditional number when order of the norm is -1 out_minus_1 = paddle.linalg.cond(x, p=-1) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [1.]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 1.) # compute conditional number when order of the norm is 2 out_2 = paddle.linalg.cond(x, p=2) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [1.41421342]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 1.41421342) # compute conditional number when order of the norm is -1 out_minus_2 = paddle.linalg.cond(x, p=-2) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.70710683]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 0.70710683) # compute conditional number when order of the norm is inf out_inf = paddle.linalg.cond(x, p=float("inf")) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [2.]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 2.) # compute conditional number when order of the norm is -inf out_minus_inf = paddle.linalg.cond(x, p=-float("inf")) - # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [1.]) + # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # 1.) a = paddle.randn([2, 4, 4]) # Tensor(shape=[2, 4, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True, @@ -1095,13 +1095,13 @@ def dot(x, y, name=None): x = paddle.to_tensor([1, 2, 3]) y = paddle.to_tensor([4, 5, 6]) z = paddle.dot(x, y) - print(z) # [32] + print(z) # 32 # 2-D Tensor * 2-D Tensor x = paddle.to_tensor([[1, 2, 3], [2, 4, 6]]) y = paddle.to_tensor([[4, 5, 6], [4, 5, 6]]) z = paddle.dot(x, y) - print(z) # [[32], [64]] + print(z) # [32, 64] """ if in_dygraph_mode(): @@ -1163,7 +1163,7 @@ def cov(x, rowvar=True, ddof=True, fweights=None, aweights=None, name=None): import paddle - xt = paddle.rand((3,4)) + xt = paddle.rand((3, 4)) paddle.linalg.cov(xt) ''' @@ -1485,7 +1485,7 @@ def matrix_rank(x, tol=None, hermitian=False, name=None): a = paddle.eye(10) b = paddle.linalg.matrix_rank(a) print(b) - # b = [10] + # b = 10 c = paddle.ones(shape=[3, 4, 5, 5]) d = paddle.linalg.matrix_rank(c, tol=0.01, hermitian=True) diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py index 8a69e4fd2a65467389ae5e9ff04fa5113cfe8937..cf3c1f5284dd45f939341bd775917236a7580805 100755 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -288,13 +288,8 @@ def is_empty(x, name=None): input = paddle.rand(shape=[4, 32, 32], dtype='float32') res = paddle.is_empty(x=input) - print("res:", res) - # ('res:', Tensor: eager_tmp_1 - # - place: CPUPlace - # - shape: [1] - # - layout: NCHW - # - dtype: bool - # - data: [0]) + # res: Tensor(shape=[], dtype=bool, place=Place(cpu), stop_gradient=True, + # False) """ if in_dygraph_mode(): @@ -339,9 +334,9 @@ def equal_all(x, y, name=None): y = paddle.to_tensor([1, 2, 3]) z = paddle.to_tensor([1, 4, 3]) result1 = paddle.equal_all(x, y) - print(result1) # result1 = [True ] + print(result1) # result1 = True result2 = paddle.equal_all(x, z) - print(result2) # result2 = [False ] + print(result2) # result2 = False """ if in_dygraph_mode(): return _C_ops.equal_all(x, y) @@ -388,21 +383,21 @@ def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): y = paddle.to_tensor([10000.1, 1e-08]) result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name="ignore_nan") - # [False] + # False result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=True, name="equal_nan") - # [False] + # False x = paddle.to_tensor([1.0, float('nan')]) y = paddle.to_tensor([1.0, float('nan')]) result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name="ignore_nan") - # [False] + # False result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=True, name="equal_nan") - # [True] + # True """ if in_dygraph_mode(): diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index e6bf626cb1a4c1fc25a41554d8ffcb2fa790f660..6b94843b65c134e1bb51c5dd876af868d4f6e39c 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -268,11 +268,11 @@ def slice(input, axes, starts, ends): Args: input (Tensor): A ``Tensor`` . The data type is ``float16``, ``float32``, ``float64``, ``int32`` or ``int64``. axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to . - starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, the elements of - it should be integers or Tensors with shape [1]. If ``starts`` is an Tensor, it should be an 1-D Tensor. + starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, each element of + it should be integer or 0-D int Tensor with shape []. If ``starts`` is an Tensor, it should be an 1-D Tensor. It represents starting indices of corresponding axis in ``axes``. - ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, the elements of - it should be integers or Tensors with shape [1]. If ``ends`` is an Tensor, it should be an 1-D Tensor . + ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, each element of + it should be integer or 0-D int Tensor with shape []. If ``ends`` is an Tensor, it should be an 1-D Tensor . It represents ending indices of corresponding axis in ``axes``. Returns: @@ -1065,21 +1065,21 @@ def tolist(x): print(expectlist) #[0, 1, 2, 3, 4] """ - # TODO(zhouwei): will remove 0D Tensor.numpy() hack + # TODO(zhouwei): will remove 0-D Tensor.numpy() hack return x.numpy(False).tolist() def concat(x, axis=0, name=None): """ - Concatenates the input along the axis. + Concatenates the input along the axis. It doesn't support 0-D Tensor because it requires a certain axis, and 0-D Tensor + doesn't have any axis. Args: x (list|tuple): ``x`` is a Tensor list or Tensor tuple which is with data type bool, float16, float32, float64, int32, int64, int8, uint8. All the Tensors in ``x`` must have same data type. axis (int|Tensor, optional): Specify the axis to operate on the input Tensors. - It's a scalar with data type int or a Tensor with shape [1] and data type int32 - or int64. The effective range is [-R, R), where R is Rank(x). When ``axis < 0``, + Tt should be integer or 0-D int Tensor with shape []. The effective range is [-R, R), where R is Rank(x). When ``axis < 0``, it works the same way as ``axis+R``. Default is 0. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -1550,11 +1550,11 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): if x_dim == 0: if not (isinstance(start_axis, int)) or start_axis not in [0, -1]: raise ValueError( - "The start_axis should be int, and should be 0 or -1 when the input tensor is a 0D-Tensor" + "The start_axis should be int, and should be 0 or -1 when the input tensor is a 0-D-Tensor" ) if not (isinstance(stop_axis, int)) or stop_axis not in [0, -1]: raise ValueError( - "The stop_axis should be int, and should be 0 or -1 when the input tensor is a 0D-Tensor" + "The stop_axis should be int, and should be 0 or -1 when the input tensor is a 0-D-Tensor" ) else: if ( @@ -1913,8 +1913,8 @@ def split(x, num_or_sections, axis=0, name=None): If ``num_or_sections`` is a list or tuple, the length of it indicates the number of sub-Tensors and the elements in it indicate the sizes of sub-Tensors' dimension orderly. The length of the list must not be larger than the ``x`` 's size of specified ``axis``. - axis (int|Tensor, optional): The axis along which to split, it can be a scalar with type - ``int`` or a ``Tensor`` with shape [1] and data type ``int32`` or ``int64``. + axis (int|Tensor, optional): The axis along which to split, it can be a integer or a ``0-D Tensor`` + with shape [] and data type ``int32`` or ``int64``. If :math::`axis < 0`, the axis to split along is :math:`rank(x) + axis`. Default is 0. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . @@ -2557,7 +2557,7 @@ def unsqueeze(x, axis, name=None): Args: x (Tensor): The input Tensor to be unsqueezed. Supported data type: bfloat16, float16, float32, float64, bool, int8, int32, int64. axis (int|list|tuple|Tensor): Indicates the dimensions to be inserted. The data type is ``int32`` . - If ``axis`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. + If ``axis`` is a list or tuple, each element of it should be integer or 0-D Tensor with shape []. If ``axis`` is a Tensor, it should be an 1-D Tensor . If ``axis`` is negative, ``axis = axis + ndim(x) + 1``. name (str|None): Name for this layer. Please refer to :ref:`api_guide_Name`, Default None. @@ -3083,8 +3083,8 @@ def chunk(x, chunks, axis=0, name=None): Args: x (Tensor): A N-D Tensor. The data type is bool, float16, float32, float64, int32 or int64. chunks(int): The number of tensor to be split along the certain axis. - axis (int|Tensor, optional): The axis along which to split, it can be a scalar with type - ``int`` or a ``Tensor`` with shape [1] and data type ``int32`` or ``int64``. + axis (int|Tensor, optional): The axis along which to split, it can be a integer or a ``0-D Tensor`` + with shape [] and data type ``int32`` or ``int64``. If :math::`axis < 0`, the axis to split along is :math:`rank(x) + axis`. Default is 0. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . @@ -3523,7 +3523,7 @@ def reshape(x, shape, name=None): Args: x (Tensor): An N-D Tensor. The data type is ``float32``, ``float64``, ``int32``, ``int64`` or ``bool`` shape (list|tuple|Tensor): Define the target shape. At most one dimension of the target shape can be -1. - The data type is ``int32`` . If ``shape`` is a list or tuple, the elements of it should be integers or Tensors with shape []. + The data type is ``int32`` . If ``shape`` is a list or tuple, each element of it should be integer or Tensor with shape []. If ``shape`` is an Tensor, it should be an 1-D Tensor . name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -3843,11 +3843,15 @@ def strided_slice(x, axes, starts, ends, strides, name=None): x (Tensor): An N-D ``Tensor``. The data type is ``bool``, ``float16``, ``float32``, ``float64``, ``int32`` or ``int64``. axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to. It's optional. If it is not provides, it will be treated as :math:`[0,1,...,len(starts)-1]`. - starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``starts`` is an Tensor, it should be an 1-D Tensor. It represents starting indices of corresponding axis in ``axes``. - ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, the elements of - it should be integers or Tensors with shape [1]. If ``ends`` is an Tensor, it should be an 1-D Tensor . It represents ending indices of corresponding axis in ``axes``. - strides (list|tuple|Tensor): The data type is ``int32`` . If ``strides`` is a list or tuple, the elements of - it should be integers or Tensors with shape [1]. If ``strides`` is an Tensor, it should be an 1-D Tensor . It represents slice step of corresponding axis in ``axes``. + starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, the elements of it should be + integers or Tensors with shape []. If ``starts`` is an Tensor, it should be an 1-D Tensor. + It represents starting indices of corresponding axis in ``axes``. + ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, the elements of it should be + integers or Tensors with shape []. If ``ends`` is an Tensor, it should be an 1-D Tensor. + It represents ending indices of corresponding axis in ``axes``. + strides (list|tuple|Tensor): The data type is ``int32`` . If ``strides`` is a list or tuple, the elements of it should be + integers or Tensors with shape []. If ``strides`` is an Tensor, it should be an 1-D Tensor. + It represents slice step of corresponding axis in ``axes``. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . @@ -4074,7 +4078,7 @@ def tensordot(x, y, axes=2, name=None): y = paddle.arange(10, dtype=data_type) z1 = paddle.tensordot(x, y, axes=1) z2 = paddle.dot(x, y) - # z1 = z2 = [285.] + # z1 = z2 = 285. # For two 2-d tensor x and y, the case axes=1 is equivalent to matrix multiplication. diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index cbe0474960de219815cfdd75f674fdaa249c53c9..debf74e87ac45bb2aeef50fd8367a0e17da3e3cc 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -187,7 +187,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): Args: x (Tensor): Input N-D Tensor of scale operator. Data type can be float32, float64, int8, int16, int32, int64, uint8. - scale (float|Tensor): The scale factor of the input, it should be a float number or a Tensor with shape [1] and data type as float32. + scale (float|Tensor): The scale factor of the input, it should be a float number or a 0-D Tensor with shape [] and data type as float32. bias (float): The bias to be put on the input. bias_after_scale (bool): Apply bias addition after or before scaling. It is useful for numeric stability in some circumstances. act (str, optional): Activation applied to the output such as tanh, softmax, sigmoid, relu. @@ -1337,9 +1337,9 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): # Each example is followed by the corresponding output tensor. x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], [0.1, 0.2, 0.6, 0.7]]) - out1 = paddle.sum(x) # [3.5] + out1 = paddle.sum(x) # 3.5 out2 = paddle.sum(x, axis=0) # [0.3, 0.5, 1.1, 1.6] - out3 = paddle.sum(x, axis=-1) # [1.9, 1.6] + out3 = paddle.sum(x, axis=-1) # [1.9, 1.6] out4 = paddle.sum(x, axis=1, keepdim=True) # [[1.9], [1.6]] # y is a Tensor with shape [2, 2, 2] and elements as below: @@ -1357,7 +1357,7 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): # Each example is followed by the corresponding output tensor. x = paddle.to_tensor([[True, True, True, True], [False, False, False, False]]) - out7 = paddle.sum(x) # [4] + out7 = paddle.sum(x) # 4 out8 = paddle.sum(x, axis=0) # [1, 1, 1, 1] out9 = paddle.sum(x, axis=1) # [4, 0] """ @@ -1493,9 +1493,9 @@ def nansum(x, axis=None, dtype=None, keepdim=False, name=None): # Each example is followed by the corresponding output tensor. x = paddle.to_tensor([[float('nan'), 0.3, 0.5, 0.9], [0.1, 0.2, float('-nan'), 0.7]],dtype="float32") - out1 = paddle.nansum(x) # [2.7] + out1 = paddle.nansum(x) # 2.7 out2 = paddle.nansum(x, axis=0) # [0.1, 0.5, 0.5, 1.6] - out3 = paddle.nansum(x, axis=-1) # [1.7, 1.0] + out3 = paddle.nansum(x, axis=-1) # [1.7, 1.0] out4 = paddle.nansum(x, axis=1, keepdim=True) # [[1.7], [1.0]] # y is a Tensor with shape [2, 2, 2] and elements as below: @@ -1553,7 +1553,7 @@ def nanmean(x, axis=None, keepdim=False, name=None): x = paddle.to_tensor([[float('nan'), 0.3, 0.5, 0.9], [0.1, 0.2, float('-nan'), 0.7]]) out1 = paddle.nanmean(x) - # [0.44999996] + # 0.44999996 out2 = paddle.nanmean(x, axis=0) # [0.1, 0.25, 0.5, 0.79999995] out3 = paddle.nanmean(x, axis=0, keepdim=True) @@ -2263,7 +2263,7 @@ def logsumexp(x, axis=None, keepdim=False, name=None): import paddle x = paddle.to_tensor([[-1.5, 0., 2.], [3., 1.2, -2.4]]) - out1 = paddle.logsumexp(x) # [3.4691226] + out1 = paddle.logsumexp(x) # 3.4691226 out2 = paddle.logsumexp(x, 1) # [2.15317821, 3.15684602] """ @@ -2375,7 +2375,7 @@ def max(x, axis=None, keepdim=False, name=None): result1 = paddle.max(x) result1.backward() print(result1, x.grad) - #[0.9], [[0., 0., 0., 1.], [0., 0., 0., 0.]] + # 0.9, [[0., 0., 0., 1.], [0., 0., 0., 0.]] x.clear_grad() result2 = paddle.max(x, axis=0) @@ -2476,7 +2476,7 @@ def min(x, axis=None, keepdim=False, name=None): result1 = paddle.min(x) result1.backward() print(result1, x.grad) - #[0.1], [[0., 0., 0., 0.], [1., 0., 0., 0.]] + # 0.1, [[0., 0., 0., 0.], [1., 0., 0., 0.]] x.clear_grad() result2 = paddle.min(x, axis=0) @@ -2580,13 +2580,13 @@ def amax(x, axis=None, keepdim=False, name=None): result1 = paddle.amax(x) result1.backward() print(result1, x.grad) - #[0.9], [[0., 0.2, 0.2, 0.2], [0.2, 0.2, 0., 0.]] + # 0.9, [[0., 0.2, 0.2, 0.2], [0.2, 0.2, 0., 0.]] x.clear_grad() result1_max = paddle.max(x) result1_max.backward() print(result1_max, x.grad) - #[0.9], [[0., 1.0, 1.0, 1.0], [1.0, 1.0, 0., 0.]] + # 0.9, [[0., 1.0, 1.0, 1.0], [1.0, 1.0, 0., 0.]] ############################### @@ -2690,13 +2690,13 @@ def amin(x, axis=None, keepdim=False, name=None): result1 = paddle.amin(x) result1.backward() print(result1, x.grad) - #[0.1], [[0., 0.2, 0.2, 0.2], [0.2, 0.2, 0., 0.]] + # 0.1, [[0., 0.2, 0.2, 0.2], [0.2, 0.2, 0., 0.]] x.clear_grad() result1_min = paddle.min(x) result1_min.backward() print(result1_min, x.grad) - #[0.1], [[0., 1.0, 1.0, 1.0], [1.0, 1.0, 0., 0.]] + # 0.1, [[0., 1.0, 1.0, 1.0], [1.0, 1.0, 0., 0.]] ############################### @@ -2907,10 +2907,10 @@ def clip(x, min=None, max=None, name=None): Args: x (Tensor): An N-D Tensor with data type float16, float32, float64, int32 or int64. - min (float|int|Tensor, optional): The lower bound with type ``float`` , ``int`` or a ``Tensor`` - with shape [1] and type ``int32``, ``float16``, ``float32``, ``float64``. - max (float|int|Tensor, optional): The upper bound with type ``float``, ``int`` or a ``Tensor`` - with shape [1] and type ``int32``, ``float16``, ``float32``, ``float64``. + min (float|int|Tensor, optional): The lower bound with type ``float`` , ``int`` or a ``0-D Tensor`` + with shape [] and type ``int32``, ``float16``, ``float32``, ``float64``. + max (float|int|Tensor, optional): The upper bound with type ``float``, ``int`` or a ``0-D Tensor`` + with shape [] and type ``int32``, ``float16``, ``float32``, ``float64``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: @@ -3064,7 +3064,7 @@ def trace(x, offset=0, axis1=0, axis2=1, name=None): case1 = paddle.randn([2, 3]) case2 = paddle.randn([3, 10, 10]) case3 = paddle.randn([3, 10, 5, 10]) - data1 = paddle.trace(case1) # data1.shape = [1] + data1 = paddle.trace(case1) # data1.shape = [] data2 = paddle.trace(case2, offset=1, axis1=1, axis2=2) # data2.shape = [3] data3 = paddle.trace(case3, offset=-3, axis1=1, axis2=-1) # data2.shape = [3, 5] """ @@ -3692,7 +3692,7 @@ def prod(x, axis=None, keepdim=False, dtype=None, name=None): x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], [0.1, 0.2, 0.6, 0.7]]) out1 = paddle.prod(x) - # [0.0002268] + # 0.0002268 out2 = paddle.prod(x, -1) # [0.027 0.0084] @@ -3898,8 +3898,8 @@ def all(x, axis=None, keepdim=False, name=None): print(x) x = paddle.cast(x, 'bool') - # out1 should be [False] - out1 = paddle.all(x) # [False] + # out1 should be False + out1 = paddle.all(x) # False print(out1) # out2 should be [True, False] @@ -3907,7 +3907,7 @@ def all(x, axis=None, keepdim=False, name=None): print(out2) # keepdim=False, out3 should be [False, True], out.shape should be (2,) - out3 = paddle.all(x, axis=-1) # [False, True] + out3 = paddle.all(x, axis=-1) # [False, True] print(out3) # keepdim=True, out4 should be [[False], [True]], out.shape should be (2,1) @@ -3972,12 +3972,12 @@ def any(x, axis=None, keepdim=False, name=None): # [[True, False] # [True, True]] - # out1 should be [True] - out1 = paddle.any(x) # [True] + # out1 should be True + out1 = paddle.any(x) # True print(out1) # out2 should be [True, True] - out2 = paddle.any(x, axis=0) # [True, True] + out2 = paddle.any(x, axis=0) # [True, True] print(out2) # keepdim=False, out3 should be [True, True], out.shape should be (2,) @@ -4481,8 +4481,8 @@ def rad2deg(x, name=None): x2 = paddle.to_tensor(math.pi/2) result2 = paddle.rad2deg(x2) print(result2) - # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [90.]) + # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True, + # 90.) x3 = paddle.to_tensor(1) result3 = paddle.rad2deg(x3) @@ -5382,27 +5382,27 @@ def trapezoid(y, x=None, dx=None, axis=-1, name=None): y = paddle.to_tensor([4, 5, 6], dtype='float32') print(paddle.trapezoid(y)) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [10.]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 10.) print(paddle.trapezoid(y, dx=2.)) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [20.]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 20.) y = paddle.to_tensor([4, 5, 6], dtype='float32') x = paddle.to_tensor([1, 2, 3], dtype='float32') print(paddle.trapezoid(y, x)) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [10.]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 10.) y = paddle.to_tensor([1, 2, 3], dtype='float64') x = paddle.to_tensor([8, 6, 4], dtype='float64') print(paddle.trapezoid(y, x)) - # Tensor(shape=[1], dtype=float64, place=Place(cpu), stop_gradient=True, - # [-8.]) + # Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True, + # -8.) y = paddle.arange(6).reshape((2, 3)).astype('float32') print(paddle.trapezoid(y, axis=0)) diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index 7677f71bcac299ae8ca24e799b581ac2c1bafd42..c2332da9ea7d99b2cc3ac3dcbba004dd0f15ade3 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -65,7 +65,7 @@ def mean(x, axis=None, keepdim=False, name=None): [17., 18., 19., 20.], [21., 22., 23., 24.]]]) out1 = paddle.mean(x) - # [12.5] + # 12.5 out2 = paddle.mean(x, axis=-1) # [[ 2.5 6.5 10.5] # [14.5 18.5 22.5]] @@ -140,7 +140,7 @@ def var(x, axis=None, unbiased=True, keepdim=False, name=None): x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) out1 = paddle.var(x) - # [2.66666667] + # 2.66666667 out2 = paddle.var(x, axis=1) # [1. 4.33333333] """ @@ -205,9 +205,9 @@ def std(x, axis=None, unbiased=True, keepdim=False, name=None): x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) out1 = paddle.std(x) - # [1.63299316] + # 1.63299316 out2 = paddle.std(x, unbiased=False) - # [1.49071205] + # 1.49071205 out3 = paddle.std(x, axis=1) # [1. 2.081666] @@ -222,8 +222,7 @@ def std(x, axis=None, unbiased=True, keepdim=False, name=None): def numel(x, name=None): """ - Returns the number of elements for a tensor, which is a int64 Tensor with shape [1] in static graph mode - or a scalar value in imperative mode. + Returns the number of elements for a tensor, which is a 0-D int64 Tensor with shape []. Args: x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64. @@ -231,7 +230,7 @@ def numel(x, name=None): For more information, please refer to :ref:`api_guide_Name`. Returns: - Tensor: The number of elements for the input Tensor. + Tensor: The number of elements for the input Tensor, whose shape is []. Examples: .. code-block:: python @@ -387,8 +386,8 @@ def median(x, axis=None, keepdim=False, name=None): # [8 , 9 , 10, 11]]) y1 = paddle.median(x) - # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - # [5.50000000]) + # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + # 5.50000000) y2 = paddle.median(x, axis=0) # Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, @@ -416,7 +415,7 @@ def median(x, axis=None, keepdim=False, name=None): -1, 0, None, - ], 'when input 0D, axis can only be [-1, 0] or default None' + ], 'when input 0-D, axis can only be [-1, 0] or default None' is_flatten = True if axis is None: diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py index 897735333ace8191ad5e2f6fc3f14b508ea72100..0d8cd9a6b819692b1507035506ecf47c4c380fa0 100644 --- a/python/paddle/tensor/to_string.py +++ b/python/paddle/tensor/to_string.py @@ -181,8 +181,7 @@ def _format_tensor(var, summary, indent=0, max_width=0, signed=False): linewidth = DEFAULT_PRINT_OPTIONS.linewidth if len(var.shape) == 0: - # currently, shape = [], i.e., scaler tensor is not supported. - # If it is supported, it should be formatted like this. + # 0-D Tensor, whose shape = [], should be formatted like this. return _format_item(var, max_width, signed) elif len(var.shape) == 1: item_length = max_width + 2 @@ -291,7 +290,7 @@ def _format_dense_tensor(tensor, indent): if tensor.dtype == core.VarDesc.VarType.BF16: tensor = tensor.astype('float32') - # TODO(zhouwei): will remove 0D Tensor.numpy() hack + # TODO(zhouwei): will remove 0-D Tensor.numpy() hack np_tensor = tensor.numpy(False) if len(tensor.shape) == 0: diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index 7da4d1f1ba1228df396a92f0f024c702b4c02f79..f5fbcc35d46de4fd59a283823739aa57b30befe4 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -562,8 +562,6 @@ def normalize_extension_kwargs(kwargs, use_cuda=False): extra_compile_args[compiler] = [] if IS_WINDOWS: - # TODO(zhouwei): may append compile flags in future - pass # append link flags extra_link_args = kwargs.get('extra_link_args', []) extra_link_args.extend(MSVC_LINK_FLAGS)