[Zero-Dim] correct some code to adapt to 0D Tensor (#51562)

6737226f · zhouweiwei2014 · GitHub · bb9eb20f · 6737226f · 6737226f
18 changed file
--- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py
+++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py
@@ -92,12 +92,10 @@ class LearningRateDecay:
                continue
            value = self.__dict__[key]
            if isinstance(value, Variable):
-                assert value.shape == [
-                    1
-                ], "shape of Variable in state_dict must be [1] {}".format(
-                    value.shape
-                )
-                value = value.numpy()[0]
+                assert (
+                    value.size == 1
+                ), "size of Variable in state_dict must be 1"
+                value = float(value)
            state_dict[key] = value

        return state_dict
@@ -857,7 +855,7 @@ class ReduceLROnPlateau(LearningRateDecay):
                # adjust learning rate according to avg_loss
                reduce_lr.step(avg_loss)
                lr = adam.current_step_lr()
-                print("current avg_loss is %s, current lr is %s" % (avg_loss.numpy()[0], lr))
+                print("current avg_loss is %s, current lr is %s" % (float(avg_loss), lr))

    """

@@ -979,14 +977,11 @@ class ReduceLROnPlateau(LearningRateDecay):
                )
                if self.learning_rate - new_lr > self.eps:
                    if self.verbose:
-                        old_lr = (
-                            self.learning_rate.numpy()[0]
-                            if isinstance(self.learning_rate, Variable)
-                            else self.learning_rate
-                        )
                        print(
                            'Epoch {}: reducing learning rate from {} to {}.'.format(
-                                self.epoch_num, old_lr, new_lr.numpy()[0]
+                                self.epoch_num,
+                                float(self.learning_rate),
+                                float(new_lr),
                            )
                        )
                    self.learning_rate = new_lr

--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -1150,7 +1150,7 @@ def while_loop(cond, body, loop_vars, is_test=False, name=None):
        )

    if in_dygraph_mode():
-        now_cond = pre_cond.numpy()[0]
+        now_cond = pre_cond.numpy().item()
        while now_cond:
            output_vars = body(*loop_vars)
            if not isinstance(output_vars, (list, tuple)):
@@ -1160,7 +1160,7 @@ def while_loop(cond, body, loop_vars, is_test=False, name=None):
                    "body in while_loop should return the same arity "
                    "(length and structure) and types as loop_vars"
                )
-            now_cond = cond(*output_vars).numpy()[0]
+            now_cond = cond(*output_vars).numpy().item()
            map_structure(assign_skip_lod_tensor_array, output_vars, loop_vars)
        return loop_vars
    else:

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -596,19 +596,19 @@ class Optimizer:
        """
        current_lr = self._global_learning_rate()
        if isinstance(current_lr, framework.Variable):
-            return self._global_learning_rate().numpy()[0]
+            return float(current_lr)

        if isinstance(self._learning_rate, float):
            return self._learning_rate
        elif isinstance(self._learning_rate, _LearningRateEpochDecay):
            step_lr = self._learning_rate()
-            return step_lr.numpy()[0]
+            return float(step_lr)
        else:
            step_lr = self._learning_rate.step()
            if isinstance(step_lr, (float, int)):
                return step_lr
            else:
-                return step_lr.numpy()[0]
+                return float(step_lr)

    def _global_learning_rate(self, program=None):
        """

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
@@ -41,7 +41,7 @@ def dyfunc_empty_nonlocal(x):


 def dyfunc_with_if_else(x_v, label=None):
-    if paddle.mean(x_v).numpy()[0] > 5:
+    if paddle.mean(x_v).numpy() > 5:
        x_v = x_v - 1
    else:
        x_v = x_v + 1
@@ -61,7 +61,7 @@ def dyfunc_with_if_else2(x, col=100):
        #  `x` is Tensor, `col` is not Tensor, and `col` is the return value of `true_fn` after transformed.
        # col = -1
        col = fluid.layers.fill_constant(shape=[1], value=-1, dtype="int64")
-    if paddle.mean(x).numpy()[0] > x.numpy()[row][col]:
+    if paddle.mean(x).numpy() > x.numpy()[row][col]:
        y = paddle.nn.functional.relu(x)
    else:
        x_pow = paddle.pow(x, 2)
@@ -89,14 +89,14 @@ def dyfunc_with_if_else3(x):
        m = x + 2
        n = x + 3
        return q, x, y, z
-    q, x, y, z = paddle.static.nn.cond(paddle.mean(x)[0] < 5, lambda :
+    q, x, y, z = paddle.static.nn.cond(paddle.mean(x) < 5, lambda :
        paddle.jit.dy2static.convert_call(true_fn_0)(q, x, y),
        lambda : paddle.jit.dy2static.convert_call(false_fn_0)(q,
        x, y))
    """
    y = x + 1
    # NOTE: x_v[0] < 5 is True
-    if paddle.mean(x).numpy()[0] < 5:
+    if paddle.mean(x).numpy() < 5:
        x = x + 1
        z = x + 2
        q = x + 3
@@ -164,7 +164,7 @@ def nested_if_else(x_v):
        if y.numpy()[0] < 10:
            tmp = y * w
            y = paddle.nn.functional.relu(tmp)
-            if paddle.mean(y).numpy()[0] < batch_size:
+            if paddle.mean(y).numpy() < batch_size:
                y = paddle.abs(y)
            else:
                tmp = fluid.layers.fill_constant(
@@ -264,7 +264,7 @@ class NetWithControlFlowIf(fluid.dygraph.Layer):
        )
        # Control flow `if` statement
        fc_out = self.fc(input)
-        if paddle.mean(fc_out).numpy()[0] < 0:
+        if paddle.mean(fc_out).numpy() < 0:
            y = fc_out + self.constant_vars['bias']
            self.constant_vars['w'] = fluid.layers.fill_constant(
                [5], dtype='float32', value=10
@@ -297,7 +297,7 @@ def if_with_and_or(x_v, label=None):
    batch_size = paddle.shape(x_v)
    if (
        x_v is not None
-        and (paddle.mean(x_v).numpy()[0] > 0 or label is not None)
+        and (paddle.mean(x_v).numpy() > 0 or label is not None)
        and batch_size[0] > 1
        and True
    ):
@@ -338,10 +338,10 @@ def if_with_and_or_3(x, y=None):
        x is not None
        and batch_size[0] > 1
        and y is not None
-        and mean_res.numpy()[0] > 0
+        and mean_res.numpy() > 0
    ):
        x = x + 1
-    if mean_res.numpy()[0] > 0 and (x is not None and batch_size[0] > 1) and y:
+    if mean_res.numpy() > 0 and (x is not None and batch_size[0] > 1) and y:
        x = x - 1
    return x

@@ -350,11 +350,11 @@ def if_with_and_or_4(x, y=None):
    batch_size = paddle.shape(x)
    mean_res = paddle.mean(x)
    if (x is not None and batch_size[0] > 1) or (
-        y is not None and mean_res.numpy()[0] > 0
+        y is not None and mean_res.numpy() > 0
    ):
        x = x + 1
    if (x is not None or batch_size[0] > 1) and (
-        y is not None or mean_res.numpy()[0] > 0
+        y is not None or mean_res.numpy() > 0
    ):
        x = x - 1
    return x

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -620,19 +620,19 @@ def val_bmn(model, args):
        avg_loss = paddle.mean(loss)

        loss_data += [
-            avg_loss.numpy()[0],
-            tem_loss.numpy()[0],
-            pem_reg_loss.numpy()[0],
-            pem_cls_loss.numpy()[0],
+            float(avg_loss),
+            float(tem_loss),
+            float(pem_reg_loss),
+            float(pem_cls_loss),
        ]

        print(
            '[VALID] iter {} '.format(batch_id)
            + '\tLoss = {}, \ttem_loss = {}, \tpem_reg_loss = {}, \tpem_cls_loss = {}'.format(
-                '%f' % avg_loss.numpy()[0],
-                '%f' % tem_loss.numpy()[0],
-                '%f' % pem_reg_loss.numpy()[0],
-                '%f' % pem_cls_loss.numpy()[0],
+                '%f' % float(avg_loss),
+                '%f' % float(tem_loss),
+                '%f' % float(pem_reg_loss),
+                '%f' % float(pem_cls_loss),
            )
        )

@@ -716,10 +716,10 @@ class TestTrain(unittest.TestCase):
                    bmn.clear_gradients()
                    # log loss data to verify correctness
                    loss_data += [
-                        avg_loss.numpy()[0],
-                        tem_loss.numpy()[0],
-                        pem_reg_loss.numpy()[0],
-                        pem_cls_loss.numpy()[0],
+                        float(avg_loss),
+                        float(tem_loss),
+                        float(pem_reg_loss),
+                        float(pem_cls_loss),
                    ]

                    if args.log_interval > 0 and (
@@ -728,10 +728,10 @@ class TestTrain(unittest.TestCase):
                        print(
                            '[TRAIN] Epoch {}, iter {} '.format(epoch, batch_id)
                            + '\tLoss = {}, \ttem_loss = {}, \tpem_reg_loss = {}, \tpem_cls_loss = {}'.format(
-                                '%f' % avg_loss.numpy()[0],
-                                '%f' % tem_loss.numpy()[0],
-                                '%f' % pem_reg_loss.numpy()[0],
-                                '%f' % pem_cls_loss.numpy()[0],
+                                '%f' % float(avg_loss),
+                                '%f' % float(tem_loss),
+                                '%f' % float(pem_reg_loss),
+                                '%f' % float(pem_cls_loss),
                            )
                        )


--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
@@ -32,7 +32,7 @@ np.random.seed(SEED)
 # Use a decorator to test exception
 @paddle.jit.to_static
 def dyfunc_with_if(x_v):
-    if paddle.mean(x_v).numpy()[0] > 5:
+    if paddle.mean(x_v).numpy() > 5:
        x_v = x_v - 1
    else:
        x_v = x_v + 1
@@ -53,7 +53,7 @@ def nested_func(x_v):
 @paddle.jit.to_static
 def dyfunc_with_third_library_logging(x_v):
    logging.info('test dyfunc_with_third_library_logging')
-    if paddle.mean(x_v).numpy()[0] > 5:
+    if paddle.mean(x_v).numpy() > 5:
        x_v = x_v - 1
    else:
        x_v = x_v + 1

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
@@ -669,7 +669,7 @@ def train(args, to_static):
                    cyc_B_loss,
                    idt_loss_B,
                ]
-                cur_batch_loss = [x.numpy()[0] for x in cur_batch_loss]
+                cur_batch_loss = [float(x) for x in cur_batch_loss]

                batch_time = time.time() - s_time
                t_time += batch_time

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py
@@ -75,12 +75,12 @@ class TestNestLayerHook(unittest.TestCase):
        if to_static:
            paddle.jit.save(net, self.path)

-        return out.numpy()[0]
+        return float(out)

    def load_train(self):
        net = paddle.jit.load(self.path)
        out = net(self.x)
-        return out.numpy()[0]
+        return float(out)

    def test_hook(self):
        dy_out = self.train_net(to_static=False)

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
@@ -219,7 +219,7 @@ class TestMNISTWithToStatic(TestMNIST):
                    avg_loss.backward()

                    adam.minimize(avg_loss)
-                    loss_data.append(avg_loss.numpy()[0])
+                    loss_data.append(float(avg_loss))
                    # save checkpoint
                    mnist.clear_gradients()
                    if batch_id % 10 == 0:
@@ -236,7 +236,7 @@ class TestMNISTWithToStatic(TestMNIST):
                    if batch_id == 50:
                        mnist.eval()
                        prediction, acc, avg_loss = mnist(img, label)
-                        loss_data.append(avg_loss.numpy()[0])
+                        loss_data.append(float(avg_loss))
                        # new save load check
                        self.check_jit_save_load(
                            mnist, [dy_x_data], [img], to_static, prediction

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py
@@ -86,7 +86,7 @@ class TestAMP(TestMNIST):
                scaled.backward()
                scaler.minimize(adam, scaled)

-                loss_data.append(avg_loss.numpy()[0])
+                loss_data.append(float(avg_loss))
                # save checkpoint
                mnist.clear_gradients()
                if batch_id % 10 == 0:

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
@@ -346,25 +346,25 @@ def train(args, fake_data_reader, to_static):
                optimizer.minimize(avg_loss)
                video_model.clear_gradients()

-                total_loss += avg_loss.numpy()[0]
-                total_acc1 += acc_top1.numpy()[0]
-                total_acc5 += acc_top5.numpy()[0]
+                total_loss += float(avg_loss)
+                total_acc1 += float(acc_top1)
+                total_acc5 += float(acc_top5)
                total_sample += 1

                print(
                    'TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.format(
                        epoch,
                        batch_id,
-                        avg_loss.numpy()[0],
-                        acc_top1.numpy()[0],
-                        acc_top5.numpy()[0],
+                        float(avg_loss),
+                        float(acc_top1),
+                        float(acc_top5),
                    )
                )
                ret.extend(
                    [
-                        avg_loss.numpy()[0],
-                        acc_top1.numpy()[0],
-                        acc_top5.numpy()[0],
+                        float(avg_loss),
+                        float(acc_top1),
+                        float(acc_top5),
                    ]
                )


--- a/python/paddle/fluid/tests/unittests/gradient_checker.py
+++ b/python/paddle/fluid/tests/unittests/gradient_checker.py
@@ -25,10 +25,7 @@ from paddle.fluid.backward import _append_grad_suffix_, _as_list


 def _product(t):
-    if isinstance(t, int):
-        return t
-    else:
-        return np.product(t)
+    return int(np.product(t))


 def dtype_to_np_dtype(dtype):

--- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
@@ -1546,7 +1546,19 @@ class TestSundryAPI(unittest.TestCase):
        self.assertEqual(x2.grad.numpy(), 0)

    def test_lerp(self):
-        # 0D + 0D
+        # 0D + 0D, weight is float scalar
+        x = paddle.rand([])
+        y = paddle.rand([])
+        x.stop_gradient = False
+        y.stop_gradient = False
+        out = paddle.lerp(x, y, 0.5)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(y.grad.shape, [])
+
+        # 0D + 0D, weigh is 0D
        x0 = paddle.rand([])
        y0 = paddle.rand([])
        w0 = paddle.rand([])
@@ -2896,11 +2908,15 @@ class TestSundryAPIStatic(unittest.TestCase):
            [(), (), (), ()],
            [(), (64, 64), (), (64, 64)],
            [(64, 64), (), (), (64, 64)],
+            [(64, 64), (), 0.5, (64, 64)],
        ]
        for shape in shapes:
            x = paddle.rand(shape[0])
            y = paddle.rand(shape[1])
-            w = paddle.rand(shape[2])
+            if isinstance(shape[2], float):
+                w = shape[2]
+            else:
+                w = paddle.rand(shape[2])

            x.stop_gradient = False
            y.stop_gradient = False

--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -706,7 +706,7 @@ def _unpool_output_size(x, kernel_size, stride, padding, output_size):
        else:
            for i, var in enumerate(output_size):
                if isinstance(var, Variable):
-                    output_size[i] = var.numpy()[0]
+                    output_size[i] = var.numpy().item()

    if len(output_size) == len(kernel_size) + 2:
        output_size = output_size[2:]

--- a/python/paddle/optimizer/lr.py
+++ b/python/paddle/optimizer/lr.py
@@ -156,12 +156,10 @@ class LRScheduler:
                continue
            value = self.__dict__[key]
            if isinstance(value, Tensor):
-                assert value.shape == [
-                    1
-                ], "shape of Tensor in state_dict must be [1] {}".format(
-                    value.shape
-                )
-                value = value.numpy()[0]
+                assert (
+                    value.size == 1
+                ), "numel of Tensor in state_dict must be 1"
+                value = float(value)
            state_dict[key] = value

        return state_dict
@@ -1236,7 +1234,7 @@ class ReduceOnPlateau(LRScheduler):
    Reduce learning rate when ``metrics`` has stopped descending. Models often benefit from reducing the learning rate
    by 2 to 10 times once model performance has no longer improvement.

-    The ``metrics`` is the one which has been pass into ``step`` , it must be 1-D Tensor with shape [1]. When ``metrics``
+    The ``metrics`` is the one which has been pass into ``step`` , it's shape must [] or [1]. When ``metrics``
    stop descending for a ``patience`` number of epochs, the learning rate will be reduced to ``learning_rate * factor`` .
    (Specially, ``mode`` can also be set to ``'max`` , in this case, when ``metrics`` stop ascending for a ``patience``
    number of epochs, the learning rate will be reduced.)
@@ -1390,7 +1388,7 @@ class ReduceOnPlateau(LRScheduler):
        Args:
            metrics (Tensor|numpy.ndarray|float): Which will be monitored to determine whether the learning rate will reduce.
                If it stop descending for a ``patience`` number of epochs, the learning rate will reduce. If it's 'Tensor' or
-                'numpy.ndarray', its shape must be [1].
+                'numpy.ndarray', its numel must be 1.
            epoch (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1.

        Returns:
@@ -1404,13 +1402,12 @@ class ReduceOnPlateau(LRScheduler):
        else:
            self.last_epoch = epoch

-        # loss must be float, numpy.ndarray or 1-D Tensor with shape [1]
+        # loss must be float, numpy.ndarray or 1-D Tensor with numel 1
        if isinstance(metrics, (core.eager.Tensor, numpy.ndarray)):
-            assert len(metrics.shape) == 1 and metrics.shape[0] == 1, (
-                "the metrics.shape "
-                "should be (1L,), but the current metrics.shape is {}. Maybe that "
+            assert metrics.size == 1, (
+                "the size of metrics must be 1, but the current metrics.size is {}. Maybe that "
                "you should call paddle.mean to process it first.".format(
-                    metrics.shape
+                    metrics.size
                )
            )
        elif not isinstance(

--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -437,7 +437,7 @@ class Optimizer:
                self._learning_rate._var_name = lr_name
                lr_var = self.helper.create_global_variable(
                    name=lr_name,
-                    shape=[1],
+                    shape=[],
                    persistable=True,
                    stop_gradient=True,
                    dtype=_lr_dtype,
@@ -465,7 +465,7 @@ class Optimizer:
                    framework.default_main_program()
                ] = paddle.static.create_global_var(
                    name=unique_name.generate("learning_rate"),
-                    shape=[1],
+                    shape=[],
                    value=float(self._learning_rate),
                    dtype=_lr_dtype,
                    persistable=True,

--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -4200,15 +4200,12 @@ def lerp(x, y, weight, name=None):
            # out: [5.5, 6., 6.5, 7.]

    """
-    if in_dygraph_mode():
-        if isinstance(weight, float):
-            weight = paddle.to_tensor(weight, dtype=x.dtype)
+    if isinstance(weight, float):
+        weight = paddle.full(shape=[], fill_value=weight, dtype=x.dtype)

+    if in_dygraph_mode():
        return _C_ops.lerp(x, y, weight)
    else:
-        if isinstance(weight, float):
-            weight = paddle.full(shape=[1], fill_value=weight, dtype=x.dtype)
-
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'lerp')
        check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'lerp')
        check_variable_and_dtype(

--- a/python/paddle/vision/transforms/functional_tensor.py
+++ b/python/paddle/vision/transforms/functional_tensor.py
@@ -457,7 +457,7 @@ def rotate(
        nh = npos[0][1]

        if paddle.in_dynamic_mode():
-            ow, oh = int(nw.numpy()[0]), int(nh.numpy()[0])
+            ow, oh = int(nw), int(nh)
        else:
            ow, oh = nw.astype("int32"), nh.astype("int32")