move private weight fields to public ones (#21982)

* move private weight properties to public ones test=develop * revert changes to FC test=develop * fix unittest test=develop * fix unittest test=develop * fix coverage test=develop * fix merged dev test=develop * bug fix test=develop

move private weight fields to public ones (#21982)
* move private weight properties to public ones test=develop * revert changes to FC test=develop * fix unittest test=develop * fix unittest test=develop * fix coverage test=develop * fix merged dev test=develop * bug fix test=develop
56414c7d · songyouwei · hong · b852ef73 · 56414c7d · 56414c7d
8 changed file
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
--- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
@@ -153,8 +153,8 @@ class TestImperativeAutoPrune(unittest.TestCase):
            v2 = fluid.dygraph.to_variable(value2)
            loss = case1(v1, v2)
            loss.backward()
-            self.assertTrue(case1.fc2._w._grad_ivar() is not None)
+            self.assertTrue(case1.fc2.weight._grad_ivar() is not None)
-            self.assertTrue(case1.fc1._w._grad_ivar() is not None)
+            self.assertTrue(case1.fc1.weight._grad_ivar() is not None)
    def test_auto_prune2(self):
        with fluid.dygraph.guard():
@@ -166,8 +166,8 @@ class TestImperativeAutoPrune(unittest.TestCase):
            loss = case2(v1, v2)
            loss.backward()
-            self.assertTrue(case2.fc2._w._grad_ivar() is None)
+            self.assertTrue(case2.fc2.weight._grad_ivar() is None)
-            self.assertTrue(case2.fc1._w._grad_ivar() is not None)
+            self.assertTrue(case2.fc1.weight._grad_ivar() is not None)
    def test_auto_prune3(self):
        with fluid.dygraph.guard():
@@ -178,7 +178,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
            v2 = fluid.dygraph.to_variable(value2)
            loss, part2 = case3(v1, v2, 1)
            loss.backward()
-            self.assertTrue(case3.fc._w._grad_ivar() is not None)
+            self.assertTrue(case3.fc.weight._grad_ivar() is not None)
            self.assertTrue((part2.gradient() == 0).all())
    def test_auto_prune4(self):
@@ -190,7 +190,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
            v2 = fluid.dygraph.to_variable(value2)
            loss, part2 = case4(v1, v2, 1)
            part2.backward()
-            self.assertTrue(case4.fc._w._grad_ivar() is not None)
+            self.assertTrue(case4.fc.weight._grad_ivar() is not None)
            self.assertTrue((part2.gradient() == 1).all())
    def test_auto_prune5(self):
@@ -202,7 +202,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
            v2 = fluid.dygraph.to_variable(value2)
            loss, part1, part2 = case4(v1, v2, 2)
            part1.backward()
-            self.assertTrue(case4.fc._w._grad_ivar() is not None)
+            self.assertTrue(case4.fc.weight._grad_ivar() is not None)
            self.assertTrue((part2.gradient() == 0).all())
    def test_auto_prune6(self):
@@ -220,7 +220,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
            out1.stop_gradient = True
            out = fluid.layers.concat(input=[out1, out2, c], axis=1)
            out.backward()
-            self.assertTrue((fc._w.gradient() == 0).all())
+            self.assertTrue((fc.weight.gradient() == 0).all())
            self.assertTrue((out1.gradient() == 0).all())
    def test_auto_prune7(self):
@@ -239,7 +239,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
            out = fluid.layers.concat(input=[out1, out2, c], axis=1)
            backward_strategy = fluid.dygraph.BackwardStrategy()
            out.backward(backward_strategy)
-            self.assertTrue((fc._w.gradient() == 0).all())
+            self.assertTrue((fc.weight.gradient() == 0).all())
            self.assertTrue((out1.gradient() == 0).all())
    def test_auto_prune8(self):
@@ -253,17 +253,17 @@ class TestImperativeAutoPrune(unittest.TestCase):
            b = fluid.dygraph.to_variable(value1)
            c = fluid.dygraph.to_variable(value2)
            out1 = fc(a)
-            fc_origin = fc._w.numpy()
+            fc_origin = fc.weight.numpy()
            out2 = fc2(out1)
-            fc2_origin = fc2._w.numpy()
+            fc2_origin = fc2.weight.numpy()
-            fc2._w.stop_gradient = True
+            fc2.weight.stop_gradient = True
            out2.backward()
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.003,
                parameter_list=(fc.parameters() + fc2.parameters()))
            optimizer.minimize(out2)
-            self.assertTrue(np.array_equal(fc2_origin, fc2._w.numpy()))
+            self.assertTrue(np.array_equal(fc2_origin, fc2.weight.numpy()))
-            self.assertFalse(np.array_equal(fc_origin, fc._w.numpy()))
+            self.assertFalse(np.array_equal(fc_origin, fc.weight.numpy()))
    def test_auto_prune9(self):
        with fluid.dygraph.guard():
@@ -276,19 +276,19 @@ class TestImperativeAutoPrune(unittest.TestCase):
            b = fluid.dygraph.to_variable(value1)
            c = fluid.dygraph.to_variable(value2)
            out1 = fc(a)
-            fc_origin = fc._w.numpy()
+            fc_origin = fc.weight.numpy()
            out2 = fc2(out1)
-            fc2_origin = fc2._w.numpy()
+            fc2_origin = fc2.weight.numpy()
            out2.stop_gradient = True
            out2.backward()
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.003,
                parameter_list=(fc.parameters() + fc2.parameters()))
            optimizer.minimize(out2)
-            self.assertTrue(np.array_equal(fc2_origin, fc2._w.numpy()))
+            self.assertTrue(np.array_equal(fc2_origin, fc2.weight.numpy()))
-            self.assertTrue(np.array_equal(fc_origin, fc._w.numpy()))
+            self.assertTrue(np.array_equal(fc_origin, fc.weight.numpy()))
            try:
-                fc2._w.gradient()
+                fc2.weight.gradient()
            except ValueError as e:
                assert type(e) == ValueError
@@ -309,7 +309,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
            backward_strategy = fluid.dygraph.BackwardStrategy()
            backward_strategy.sort_sum_gradient = True
            out.backward(backward_strategy)
-            self.assertTrue((fc._w.gradient() == 0).all())
+            self.assertTrue((fc.weight.gradient() == 0).all())
            self.assertTrue((out1.gradient() == 0).all())
    def test_auto_prune_with_optimizer(self):
@@ -336,10 +336,10 @@ class TestImperativeAutoPrune(unittest.TestCase):
            loss.backward()
            _, params_grads = optimizer.minimize(loss, grad_clip=grad_clip)
            for items in params_grads:
-                assert items[0].name is not model.embed1._w.name
+                assert items[0].name is not model.embed1.weight.name
-                assert items[0].name is not model.fc1._w.name
+                assert items[0].name is not model.fc1.weight.name
-            assert model.embed1._w._grad_ivar() is None
+            assert model.embed1.weight._grad_ivar() is None
-            assert model.fc1._w._grad_ivar() is None
+            assert model.fc1.weight._grad_ivar() is None
        with fluid.dygraph.guard(place):
            model = MyLayer2("mylayer", vocab_size, size)
@@ -355,10 +355,10 @@ class TestImperativeAutoPrune(unittest.TestCase):
            loss.backward()
            optimizer.minimize(loss, grad_clip=grad_clip)
            for items in params_grads:
-                assert items[0].name is not model.embed1._w.name
+                assert items[0].name is not model.embed1.weight.name
-                assert items[0].name is not model.fc1._w.name
+                assert items[0].name is not model.fc1.weight.name
-            assert model.embed1._w._grad_ivar() is None
+            assert model.embed1.weight._grad_ivar() is None
-            assert model.fc1._w._grad_ivar() is None
+            assert model.fc1.weight._grad_ivar() is None
    def test_case2_prune_no_grad_branch(self):
        with fluid.dygraph.guard():
@@ -369,8 +369,8 @@ class TestImperativeAutoPrune(unittest.TestCase):
            case3 = AutoPruneLayer2("l2")
            loss = case3(v1, v2)
            loss.backward()
-            self.assertTrue(case3.fc2._w._grad_ivar() is None)
+            self.assertTrue(case3.fc2.weight._grad_ivar() is None)
-            self.assertTrue(case3.fc._w._grad_ivar() is not None)
+            self.assertTrue(case3.fc.weight._grad_ivar() is not None)
    def test_case2_prune_no_grad_branch(self):
        with fluid.dygraph.guard():
@@ -381,8 +381,8 @@ class TestImperativeAutoPrune(unittest.TestCase):
            case3 = AutoPruneLayer2("l2")
            loss = case3(v1, v2)
            loss.backward()
-            self.assertTrue(case3.fc2._w._grad_ivar() is None)
+            self.assertTrue(case3.fc2.weight._grad_ivar() is None)
-            self.assertTrue(case3.fc._w._grad_ivar() is not None)
+            self.assertTrue(case3.fc.weight._grad_ivar() is not None)
    def test_case3_prune_no_grad_branch2(self):
        with fluid.dygraph.guard():
@@ -395,7 +395,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
            out = fluid.layers.one_hot(input=label, depth=100)
            loss = fluid.layers.mean(out)
            loss.backward()
-            self.assertTrue(fc._w._grad_ivar() is None)
+            self.assertTrue(fc.weight._grad_ivar() is None)
    def test_case4_with_no_grad_op_maker(self):
        with fluid.dygraph.guard():

--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -342,7 +342,7 @@ class TestImperative(unittest.TestCase):
            out = mlp(var_inp)
            dy_out = out.numpy()
            out.backward()
-            dy_grad = mlp._fc1._w.gradient()
+            dy_grad = mlp._fc1.weight.gradient()
        with fluid.dygraph.guard():
            var_inp2 = fluid.dygraph.base.to_variable(np_inp)
@@ -352,7 +352,7 @@ class TestImperative(unittest.TestCase):
            backward_strategy = fluid.dygraph.BackwardStrategy()
            backward_strategy.sort_sum_gradient = True
            out2.backward(backward_strategy)
-            dy_grad2 = mlp2._fc1._w.gradient()
+            dy_grad2 = mlp2._fc1.weight.gradient()
        with new_program_scope():
            inp = fluid.layers.data(
@@ -360,7 +360,7 @@ class TestImperative(unittest.TestCase):
            mlp = MLP("mlp")
            out = mlp(inp)
            param_grads = fluid.backward.append_backward(
-                out, parameter_list=[mlp._fc1._w.name])[0]
+                out, parameter_list=[mlp._fc1.weight.name])[0]
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            exe.run(fluid.default_startup_program())

--- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
@@ -59,7 +59,7 @@ class SimpleNet(fluid.Layer):
        x_emb = self.embedding(input)
        projection = fluid.layers.matmul(
            x_emb, fluid.layers.transpose(
-                self.embedding._w, perm=[1, 0]))
+                self.embedding.weight, perm=[1, 0]))
        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
        projection = fluid.layers.reshape(
            projection, shape=[-1, self.vocab_size])

--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
@@ -61,7 +61,7 @@ class TestSimpleNet(unittest.TestCase):
                        input_emb, emb = simplenet(input)
                        try:
-                            emb._w.gradient()
+                            emb.weight.gradient()
                        except ValueError as e:
                            assert "has no grad, Please set Variable.stop_gradient=False, or check if this is the first and only variable need grad, if so, please set its pre-Variable's stop_gradient=False, to make sure it has gradient" in str(
                                e)
@@ -73,11 +73,11 @@ class TestSimpleNet(unittest.TestCase):
                        input_emb.backward(backward_strategy)
                        adam.minimize(input_emb)  # grad_clip=grad_clip
-                        emb._w.gradient()
+                        emb.weight.gradient()
                        emb.clear_gradients()
                        try:
-                            emb._w.gradient()
+                            emb.weight.gradient()
                        except ValueError as e:
                            assert "has no grad, Please set Variable.stop_gradient=False, or check if this is the first and only variable need grad, if so, please set its pre-Variable's stop_gradient=False, to make sure it has gradient" in str(
                                e)
@@ -108,7 +108,7 @@ class TestSimpleNet(unittest.TestCase):
                    input_emb, emb = simplenet(input)
                    try:
-                        emb._w.gradient()
+                        emb.weight.gradient()
                    except ValueError as e:
                        assert "has no grad, Please set Variable.stop_gradient=False, or check if this is the first and only variable need grad, if so, please set its pre-Variable's stop_gradient=False, to make sure it has gradient" in str(
                            e)
@@ -120,11 +120,11 @@ class TestSimpleNet(unittest.TestCase):
                    input_emb.backward(backward_strategy)
                    adam.minimize(input_emb, grad_clip=grad_clip)
-                    emb._w.gradient()
+                    emb.weight.gradient()
                    emb.clear_gradients()
                    try:
-                        emb._w.gradient()
+                        emb.weight.gradient()
                    except ValueError as e:
                        assert "has no grad, Please set Variable.stop_gradient=False, or check if this is the first and only variable need grad, if so, please set its pre-Variable's stop_gradient=False, to make sure it has gradient" in str(
                            e)

--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
@@ -66,7 +66,7 @@ class SimpleNet(fluid.Layer):
        fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
        projection = fluid.layers.matmul(
            fc, fluid.layers.transpose(
-                self.embedding._w, perm=[1, 0]))
+                self.embedding.weight, perm=[1, 0]))
        projection = fluid.layers.reshape(
            projection, shape=[-1, self.vocab_size])
        loss = fluid.layers.softmax_with_cross_entropy(

--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -843,7 +843,7 @@ class WrapDecoderLayer(Layer):
        if self._weight_sharing:
            predict = fluid.layers.matmul(
                x=dec_output_reshape,
-                y=self._prepare_decoder_layer._input_emb._w,
+                y=self._prepare_decoder_layer._input_emb.weight,
                transpose_y=True)
        else:
            predict = self._fc(dec_output_reshape)
@@ -917,7 +917,7 @@ class TransFormer(Layer):
            is_sparse=is_sparse)
        if weight_sharing:
-            self._wrap_decoder_layer._prepare_decoder_layer._input_emb._w = self._wrap_encoder_layer._prepare_encoder_layer._input_emb._w
+            self._wrap_decoder_layer._prepare_decoder_layer._input_emb.weight = self._wrap_encoder_layer._prepare_encoder_layer._input_emb.weight
    def forward(self, enc_inputs, dec_inputs, label, weights):
        enc_output = self._wrap_encoder_layer(enc_inputs)

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -368,7 +368,7 @@ class TestLayer(LayerTest):
                filter_size=[2, 2],
                bias_attr=False)
            dy_ret = conv2d(base.to_variable(images))
-            self.assertTrue(conv2d._bias_param is None)
+            self.assertTrue(conv2d.bias is None)
        self.assertTrue(np.allclose(static_ret, dy_ret_value))
        self.assertTrue(np.allclose(static_ret, static_ret2))