From 80b13bc80085462d10e7a2921e3774dd7b0ec512 Mon Sep 17 00:00:00 2001 From: Youwei Song Date: Wed, 16 Oct 2019 10:52:13 +0800 Subject: [PATCH] add stop_gradient notes in Dygraph (#1511) * add stop_gradient notes in Dygraph, test=develop * add stop_gradient notes in Dygraph, test=develop * polish code in Dygraph, test=develop * polish code in Dygraph, test=develop * polish code in Dygraph, test=develop * add Automatic pruning in Dygraph, test=develop --- .../user_guides/howto/dygraph/DyGraph.md | 67 +++++++++++++++++-- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/doc/fluid/user_guides/howto/dygraph/DyGraph.md b/doc/fluid/user_guides/howto/dygraph/DyGraph.md index 3c062f4e8..4cc260144 100644 --- a/doc/fluid/user_guides/howto/dygraph/DyGraph.md +++ b/doc/fluid/user_guides/howto/dygraph/DyGraph.md @@ -134,9 +134,11 @@ import numpy as np class MyLayer(fluid.dygraph.Layer): def __init__(self, name_scope): super(MyLayer, self).__init__(name_scope) + self.fc = fluid.dygraph.nn.FC(self.full_name(), size=12) def forward(self, inputs): - x = fluid.layers.relu(inputs) + x = self.fc(inputs) + x = fluid.layers.relu(x) self._x_for_debug = x x = fluid.layers.elementwise_mul(x, x) x = fluid.layers.reduce_sum(x) @@ -144,10 +146,9 @@ class MyLayer(fluid.dygraph.Layer): if __name__ == '__main__': - np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) + np_inp = np.array([[1.0, 2.0, -1.0]], dtype=np.float32) with fluid.dygraph.guard(): var_inp = fluid.dygraph.to_variable(np_inp) - var_inp.stop_gradient = False my_layer = MyLayer("my_layer") x = my_layer(var_inp)[0] dy_out = x.numpy() @@ -156,6 +157,61 @@ if __name__ == '__main__': my_layer.clear_gradients() # 将参数梯度清零以保证下一轮训练的正确性 ``` +### 关于自动剪枝 + +每个 ``Variable`` 都有一个 ``stop_gradient`` 属性,可以用于细粒度地在反向梯度计算时排除部分子图,以提高效率。 + +如果OP只有一个输入需要梯度,那么该OP的输出也需要梯度。 +相反,只有当OP的所有输入都不需要梯度时,该OP的输出也不需要梯度。 +在所有的 ``Variable`` 都不需要梯度的子图中,反向计算就不会进行计算了。 + +在动态图模式下,除参数以外的所有 ``Variable`` 的 ``stop_gradient`` 属性默认值都为 ``True``,而参数的 ``stop_gradient`` 属性默认值为 ``False``。 +该属性用于自动剪枝,避免不必要的反向运算。 + +例如: + +```python +import paddle.fluid as fluid +import numpy as np + +with fluid.dygraph.guard(): + x = fluid.dygraph.to_variable(np.random.randn(5, 5)) # 默认stop_gradient=True + y = fluid.dygraph.to_variable(np.random.randn(5, 5)) # 默认stop_gradient=True + z = fluid.dygraph.to_variable(np.random.randn(5, 5)) + z.stop_gradient = False + a = x + y + a.stop_gradient # True + b = a + z + b.stop_gradient # False +``` + +当你想冻结你的模型的一部分,或者你事先知道你不会使用某些参数的梯度的时候,这个功能是非常有用的。 + +例如: + +```python +import paddle.fluid as fluid +import numpy as np + +with fluid.dygraph.guard(): + value0 = np.arange(26).reshape(2, 13).astype("float32") + value1 = np.arange(6).reshape(2, 3).astype("float32") + value2 = np.arange(10).reshape(2, 5).astype("float32") + fc = fluid.FC("fc1", size=5, dtype="float32") + fc2 = fluid.FC("fc2", size=3, dtype="float32") + a = fluid.dygraph.to_variable(value0) + b = fluid.dygraph.to_variable(value1) + c = fluid.dygraph.to_variable(value2) + out1 = fc(a) + out2 = fc2(b) + out1.stop_gradient = True # 将不会对out1这部分子图做反向计算 + out = fluid.layers.concat(input=[out1, out2, c], axis=1) + out.backward() + # 可以发现这里fc参数的梯度都为0 + assert (fc._w.gradient() == 0).all() + assert (out1.gradient() == 0).all() +``` + ## 使用DyGraph训练模型 接下来我们将以“手写数字识别”这个最基础的模型为例,展示如何利用DyGraph模式搭建并训练一个模型: @@ -201,8 +257,8 @@ if __name__ == '__main__': padding=conv_padding, dilation=conv_dilation, groups=conv_groups, - param_attr=None, - bias_attr=None, + param_attr=param_attr, + bias_attr=bias_attr, act=act, use_cudnn=use_cudnn) @@ -291,7 +347,6 @@ if __name__ == '__main__': train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=32, drop_last=True) mnist = MNIST("mnist") - id, data = list(enumerate(train_reader()))[0] adam = fluid.optimizer.AdamOptimizer(learning_rate=0.001) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): -- GitLab