dygraph使用多次backward问题
Created by: sserdoubleh
代码大概是要这样: 第一次forward: x -> fc1 -> hidden -> fc2 -> pred 之后中断hidden、pred回传梯度 给hidden添加噪音,得到adv_hidden 继续forward: adv_hidden -> fc2 -> adv_pred 计算第一个loss = mse(pred, adv_pred) 接着backward计算adv_hidden的梯度 根据adv_hidden的梯度更新adv_hidden 进而更新adv_pred的值 取消对hidden、pred回传梯度的中断 最后计算最终的loss = mse(pred, y) + mse(pred, adv_pred) 进行第二次backward 相关代码:
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.dygraph as dygraph
import numpy as np
from paddle.fluid.framework import Variable
random.seed(11)
np.random.seed(11)
place = fluid.CPUPlace()
x = np.random.randn(2, 2).astype("float32")
y = np.random.randn(2, 1).astype("float32")
opt = fluid.optimizer.Adam()
sz = 2 * 1
with dygraph.guard(place):
fc1 = dygraph.FC("fc1", 2)
fc2 = dygraph.FC("fc2", 1)
for _ in range(10):
x = dygraph.to_variable(x)
y = dygraph.to_variable(y)
hidden = fc1(x)
pred = fc2(hidden)
pred.stop_gradient = True
hidden.stop_gradient = True
noise = np.random.normal(scale=1e-5, size=hidden.shape).astype("float32")
noise = dygraph.to_variable(noise)
adv_hidden = hidden + noise
adv_hidden.stop_gradient = False
adv_pred = fc2(adv_hidden)
diff = layers.mse_loss(layers.reshape(adv_pred, [sz]), layers.reshape(pred, [sz]))
diff.backward()
adv_hidden_grad = adv_hidden.gradient()
adv_hidden_grad = dygraph.to_variable(adv_hidden_grad)
max_grad_value = layers.reduce_max(adv_hidden_grad)
adv_hidden_grad = adv_hidden_grad / max_grad_value
adv_hidden = adv_hidden + adv_hidden_grad * 1e-5
# fc1.clear_gradients()
fc2.clear_gradients()
pred.stop_gradient = False
hidden.stop_gradient = False
adv_pred = fc2(adv_hidden)
loss = layers.mse_loss(layers.reshape(pred, [sz]), layers.reshape(y, [sz])) \
+ layers.mse_loss(layers.reshape(pred, [sz]), layers.reshape(adv_pred, [sz]))
loss.backward()
opt.minimize(loss)
fc1.clear_gradients()
fc2.clear_gradients()
运行报错:
PaddleCheckError: Expected param_dims == ctx->GetInputDim("Grad"), but received param_dims:2, 2 != ctx->GetInputDim("Grad"):0.
Param and Grad input of AdamOp should have same dimension at [/paddle/paddle/fluid/operators/optimizers/adam_op.cc:65]
这应该是缺少fc1参数梯度的报错