From 9f91ebc4fd24bb7dd5710a3362d5009c6da813b2 Mon Sep 17 00:00:00 2001 From: songyouwei Date: Tue, 7 Jan 2020 16:07:23 +0800 Subject: [PATCH] delete name_scope related docs (#1717) * del left name_scope docs test=develop * minor fix test=develop --- doc/fluid/api_cn/dygraph_cn/GRUUnit_cn.rst | 3 +- doc/fluid/api_cn/fluid_cn/Variable_cn.rst | 3 +- .../user_guides/howto/dygraph/DyGraph.md | 106 +++++++++--------- 3 files changed, 58 insertions(+), 54 deletions(-) diff --git a/doc/fluid/api_cn/dygraph_cn/GRUUnit_cn.rst b/doc/fluid/api_cn/dygraph_cn/GRUUnit_cn.rst index 6d9a6d727..e0b44fa34 100644 --- a/doc/fluid/api_cn/dygraph_cn/GRUUnit_cn.rst +++ b/doc/fluid/api_cn/dygraph_cn/GRUUnit_cn.rst @@ -33,7 +33,6 @@ GRUUnit :math:`W_{uh}, b_u` 、 :math:`W_{rh}, b_r` 和 :math:`W_{ch}, b_c` 分别代表更新门、重置门和候选隐状态在计算时使用的权重矩阵和偏置。在实现上,三个权重矩阵合并为一个维度为 :math:`[D, D \times 3]` 的Tensor存放。 参数: - - **name_scope** (str) – 该类的名称。 - **size** (int) – 输入数据的维度大小。 - **param_attr** (ParamAttr,可选) – 指定权重参数属性的对象。默认值为None,表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。 **注意** @@ -64,7 +63,7 @@ GRUUnit hidden_input = numpy.random.rand(T, D).astype('float32') with fluid.dygraph.guard(): x = numpy.random.random((3, 32, 32)).astype('float32') - gru = fluid.dygraph.GRUUnit('gru', size=D * 3) + gru = fluid.dygraph.GRUUnit(size=D * 3) dy_ret = gru( base.to_variable(input), base.to_variable(hidden_input)) diff --git a/doc/fluid/api_cn/fluid_cn/Variable_cn.rst b/doc/fluid/api_cn/fluid_cn/Variable_cn.rst index 70b1e0b62..530ad2de9 100644 --- a/doc/fluid/api_cn/fluid_cn/Variable_cn.rst +++ b/doc/fluid/api_cn/fluid_cn/Variable_cn.rst @@ -207,7 +207,6 @@ Variable # example2: 返回tuple of ndarray with fluid.dygraph.guard(): embedding = fluid.dygraph.Embedding( - name_scope='embedding', size=[20, 32], param_attr='emb.w', is_sparse=True) @@ -216,7 +215,7 @@ Variable x = fluid.dygraph.base.to_variable(x_data) out = embedding(x) out.backward() - print(embedding._w.gradient()) + print(embedding.weight.gradient()) .. py:method:: clear_gradient() diff --git a/doc/fluid/user_guides/howto/dygraph/DyGraph.md b/doc/fluid/user_guides/howto/dygraph/DyGraph.md index 66690ee88..42ecf4348 100644 --- a/doc/fluid/user_guides/howto/dygraph/DyGraph.md +++ b/doc/fluid/user_guides/howto/dygraph/DyGraph.md @@ -70,18 +70,20 @@ print(loss.gradient()) 1. 编写一段用于DyGraph执行的Object-Oriented-Designed, PaddlePaddle模型代码主要由以下**两部分**组成: **请注意,如果您设计的这一层结构是包含参数的,则必须要使用继承自`fluid.dygraph.Layer`的Object-Oriented-Designed的类来描述该层的行为。** - 1. 建立一个可以在DyGraph模式中执行的,Object-Oriented的网络,需要继承自`fluid.dygraph.Layer`,其中需要调用基类的`__init__`方法,并且实现带有参数`name_scope`(用来标识本层的名字)的`__init__`构造函数,在构造函数中,我们通常会执行一些例如参数初始化,子网络初始化的操作,执行这些操作时不依赖于输入的动态信息: + 1. 建立一个可以在DyGraph模式中执行的,Object-Oriented的网络,需要继承自`fluid.dygraph.Layer`,其中需要调用基类的`__init__`方法,在构造函数中,我们通常会执行一些例如参数初始化,子网络初始化的操作,执行这些操作时不依赖于输入的动态信息: ```python class MyLayer(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(MyLayer, self).__init__(name_scope) + def __init__(self, input_size): + super(MyLayer, self).__init__() + self.linear = fluid.dygraph.nn.Linear(input_size, 12) ``` - 2. 实现一个`forward(self, *inputs)`的执行函数,该函数将负责执行实际运行时网络的执行逻辑, 该函数将会在每一轮训练/预测中被调用,这里我们将执行一个简单的`relu` -> `elementwise add` -> `reduce sum`: + 2. 实现一个`forward(self, *inputs)`的执行函数,该函数将负责执行实际运行时网络的执行逻辑, 该函数将会在每一轮训练/预测中被调用,这里我们将执行一个简单的 `linear` -> `relu` -> `elementwise add` -> `reduce sum`: ```python def forward(self, inputs): + x = self.linear(inputs) x = fluid.layers.relu(inputs) self._x_for_debug = x x = fluid.layers.elementwise_mul(x, x) @@ -102,7 +104,7 @@ print(loss.gradient()) ```python with fluid.dygraph.guard(): var_inp = fluid.dygraph.to_variable(np_inp) - my_layer = MyLayer("my_layer") + my_layer = MyLayer(np_inp.shape[-1]) x = my_layer(var_inp)[0] dy_out = x.numpy() ``` @@ -122,12 +124,12 @@ import numpy as np class MyLayer(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(MyLayer, self).__init__(name_scope) - self.fc = fluid.dygraph.nn.FC(self.full_name(), size=12) + def __init__(self, input_size): + super(MyLayer, self).__init__() + self.linear = fluid.dygraph.nn.Linear(input_size, 12) def forward(self, inputs): - x = self.fc(inputs) + x = self.linear(inputs) x = fluid.layers.relu(x) self._x_for_debug = x x = fluid.layers.elementwise_mul(x, x) @@ -139,7 +141,7 @@ if __name__ == '__main__': np_inp = np.array([[1.0, 2.0, -1.0]], dtype=np.float32) with fluid.dygraph.guard(): var_inp = fluid.dygraph.to_variable(np_inp) - my_layer = MyLayer("my_layer") + my_layer = MyLayer(np_inp.shape[-1]) x = my_layer(var_inp)[0] dy_out = x.numpy() x.backward() @@ -187,8 +189,8 @@ with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") value2 = np.arange(10).reshape(2, 5).astype("float32") - fc = fluid.FC("fc1", size=5, dtype="float32") - fc2 = fluid.FC("fc2", size=3, dtype="float32") + fc = fluid.Linear(13, 5, dtype="float32") + fc2 = fluid.Linear(3, 3, dtype="float32") a = fluid.dygraph.to_variable(value0) b = fluid.dygraph.to_variable(value1) c = fluid.dygraph.to_variable(value2) @@ -198,7 +200,7 @@ with fluid.dygraph.guard(): out = fluid.layers.concat(input=[out1, out2, c], axis=1) out.backward() # 可以发现这里fc参数的梯度都为0 - assert (fc._w.gradient() == 0).all() + assert (fc.weight.gradient() == 0).all() assert (out1.gradient() == 0).all() ``` @@ -220,7 +222,7 @@ with fluid.dygraph.guard(): ```python class SimpleImgConvPool(fluid.dygraph.Layer): def __init__(self, - name_scope, + num_channels, num_filters, filter_size, pool_size, @@ -236,10 +238,10 @@ with fluid.dygraph.guard(): use_cudnn=False, param_attr=None, bias_attr=None): - super(SimpleImgConvPool, self).__init__(name_scope) + super(SimpleImgConvPool, self).__init__() self._conv2d = fluid.dygraph.Conv2D( - self.full_name(), + num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=conv_stride, @@ -252,7 +254,6 @@ with fluid.dygraph.guard(): use_cudnn=use_cudnn) self._pool2d = fluid.dygraph.Pool2D( - self.full_name(), pool_size=pool_size, pool_type=pool_type, pool_stride=pool_stride, @@ -272,28 +273,30 @@ with fluid.dygraph.guard(): ```python class MNIST(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(MNIST, self).__init__(name_scope) + def __init__(self): + super(MNIST, self).__init__() self._simple_img_conv_pool_1 = SimpleImgConvPool( - self.full_name(), 20, 5, 2, 2, act="relu") + 1, 20, 5, 2, 2, act="relu") self._simple_img_conv_pool_2 = SimpleImgConvPool( - self.full_name(), 50, 5, 2, 2, act="relu") + 20, 50, 5, 2, 2, act="relu") - pool_2_shape = 50 * 4 * 4 + self.pool_2_shape = 50 * 4 * 4 SIZE = 10 - scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 - self._fc = fluid.dygraph.FC(self.full_name(), - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") + scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5 + self._fc = fluid.dygraph.Linear( + self.pool_2_shape, + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") def forward(self, inputs, label=None): x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_2(x) + x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape]) x = self._fc(x) if label is not None: acc = fluid.layers.accuracy(input=x, label=label) @@ -306,7 +309,9 @@ with fluid.dygraph.guard(): ```python with fluid.dygraph.guard(): - mnist = MNIST("mnist") + mnist = MNIST() + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=32, drop_last=True) id, data = list(enumerate(train_reader()))[0] dy_x_data = np.array( [x[0].reshape(1, 28, 28) @@ -335,8 +340,8 @@ with fluid.dygraph.guard(): BATCH_SIZE = 64 train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=32, drop_last=True) - mnist = MNIST("mnist") - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.001) + mnist = MNIST() + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) @@ -370,8 +375,8 @@ with fluid.dygraph.guard(): epoch_num = 5 BATCH_SIZE = 64 - mnist = MNIST("mnist") - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.001) + mnist = MNIST() + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size= BATCH_SIZE, drop_last=True) @@ -452,16 +457,17 @@ with fluid.dygraph.guard(): ```python place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) with fluid.dygraph.guard(place): - strategy = fluid.dygraph.parallel.prepare_context() - mnist = MNIST("mnist") - adam = AdamOptimizer(learning_rate=0.001) + epoch_num = 5 + BATCH_SIZE = 64 + mnist = MNIST() + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) train_reader = fluid.contrib.reader.distributed_batch_reader( - train_reader) + train_reader) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): @@ -470,8 +476,8 @@ with fluid.dygraph.guard(place): y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) - img = to_variable(dy_x_data) - label = to_variable(y_data) + img = fluid.dygraph.to_variable(dy_x_data) + label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True cost, acc = mnist(img, label) @@ -482,7 +488,7 @@ with fluid.dygraph.guard(place): avg_loss = mnist.scale_loss(avg_loss) avg_loss.backward() mnist.apply_collective_grads() - + adam.minimize(avg_loss) mnist.clear_gradients() if batch_id % 100 == 0 and batch_id is not 0: @@ -500,8 +506,8 @@ with fluid.dygraph.guard(place): ```python strategy = fluid.dygraph.parallel.prepare_context() - mnist = MNIST("mnist") - adam = AdamOptimizer(learning_rate=0.001) + mnist = MNIST() + adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) ``` @@ -619,8 +625,8 @@ with fluid.dygraph.guard(): epoch_num = 5 BATCH_SIZE = 64 - mnist = MNIST("mnist") - adam = fluid.optimizer.Adam(learning_rate=0.001) + mnist = MNIST() + adam = fluid.optimizer.Adam(learning_rate=0.001, parameter_list=mnist.parameters()) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size= BATCH_SIZE, drop_last=True) @@ -715,7 +721,7 @@ def test_mnist(reader, model, batch_size): def inference_mnist(): with fluid.dygraph.guard(): - mnist_infer = MNIST("mnist") + mnist_infer = MNIST() # load checkpoint model_dict, _ = fluid.dygraph.load_dygraph("paddle_dy") mnist_infer.load_dict(model_dict) @@ -741,8 +747,8 @@ def inference_mnist(): with fluid.dygraph.guard(): epoch_num = 1 BATCH_SIZE = 64 - mnist = MNIST("mnist") - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.001) + mnist = MNIST() + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) @@ -818,8 +824,8 @@ epoch_num = 1 BATCH_SIZE = 64 exe = fluid.Executor(fluid.CPUPlace()) -mnist = MNIST("mnist") -sgd = fluid.optimizer.SGDOptimizer(learning_rate=1e-3) +mnist = MNIST() +sgd = fluid.optimizer.SGDOptimizer(learning_rate=1e-3, parameter_list=mnist.parameters()) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) img = fluid.layers.data( -- GitLab