提示梯度为空的报错
Created by: TomorrowIsAnOtherDay
Paddle 版本:1.5.1 背景:复现算法COMA,multi-aget算法 详细错误提示:
error 1:
code:
def learn(self, obs, actions, last_actions, q_vals, lr):
"""
Args:
obs: [4*env*batch,time,84]
actions: [4*env*batch,time,1]
last_actions: [4*env*batch,time,1]
q_vals:[env*batch,4,time,22]
lr: float scalar of learning rate.
"""
mac_out = []
hidden_state = None
pre_cell = None
obs_batch = self._build_actor_inputs(obs, last_actions) # [4*env*batch,time,106]
for t in range(obs_batch.shape[1]):
obs_ = layers.slice(obs_batch, axes=[1], starts=[t], ends=[t + 1]) # [4*env*batch,106]
if hidden_state is None:
hidden_state, pre_cell = self.model.init_hidden_state(obs_) # [4*env*batch,64]
logits, hidden_state, pre_cell = self.model.policy(obs_, hidden_state, pre_cell) # [4*env*batch, 22]
mac_out.append(logits) # [times,4*env*batch, 22]
mac_out = layers.stack(mac_out, axis=1) # [4*env*batch,time,22]
# Calculated baseline
q_vals = layers.reshape(q_vals, [-1, self.action_dim]) # [4*env*batch*(time),22]
pi = layers.reshape(mac_out, [-1, self.action_dim]) # [4*env*batch*(time),22]
baseline = layers.reduce_sum(pi * q_vals, dim=-1, keep_dim=True) # [4*env*batch*(time),1]
# Calculate policy grad
actions_for_one_hot = layers.reshape(actions, [-1, 1]) # [4*env*batch*(time),1]
actions_one_hot = layers.one_hot(actions_for_one_hot, self.action_dim) # [4*env*batch*(time),22]
q_taken = layers.reduce_sum(actions_one_hot * q_vals, dim=-1, keep_dim=True) # [4*env*batch*(time),1]
pi_taken = layers.reduce_sum(actions_one_hot * pi, dim=-1, keep_dim=True) # [4*env*batch*time,1]
log_pi_taken = layers.log(pi_taken) # [4*env*batch*time,1]
advantages = (q_taken - baseline)
coma_loss = layers.reduce_sum(advantages * log_pi_taken) # [1]
# Optimise agents
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=self.grad_norm_clip))
optimizer = fluid.optimizer.RMSPropOptimizer(lr, rho=self.optim_alpha, epsilon=self.optim_eps)
optimizer.minimize(coma_loss) # error
return coma_loss
error
line 300, in learn optimizer.minimize(total_loss) File "", line 2, in minimize File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/wrapped_decorator.py", line 25, in impl return wrapped_func(*args, **kwargs) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/dygraph/base.py", line 87, in impl return func(*args, **kwargs) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/optimizer.py", line 594, in minimize no_grad_set=no_grad_set) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/optimizer.py", line 493, in backward no_grad_set, callbacks) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/backward.py", line 578, in append_backward append_backward_vars(root_block, fwd_op_num, grad_to_var, grad_info_map) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/backward.py", line 392, in append_backward_vars op_desc.infer_shape(block.desc) paddle.fluid.core_avx.EnforceNotMet: Input(C@GRAD) should not be null at [/home/teamcity/work/ef54dc8a5b211854/paddle/fluid/operators/lstm_unit_op.cc:88]
error two:
code
def _train_critic(self, obs, actions, last_actions, rewards, targets, lr_critic):
"""
:param obs: [4*env*batch,time,84]
:param actions: [4*env*batch,time,1]
:param last_actions: [4*env*batch,time,1]
:param rewards: [env*batch,time]
:param targets: [env*batch,4,time]
:return: q_vals, critic_train_stats [env*batch,4,time,22]
"""
# init state
batch = self._build_critic_inputs(obs, actions, last_actions) # [env*batch,time,452]
actions_one_hot = layers.one_hot(actions, self.action_dim) # [4*env*batch,time,22]
actions_one_hot = layers.reshape(actions_one_hot, [-1, 4, batch.shape[-2], self.action_dim]) # [env*batch,4,time,22]
# Optimise agents
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=self.grad_norm_clip))
optimizer = fluid.optimizer.RMSPropOptimizer(lr_critic, rho=self.optim_alpha, epsilon=self.optim_eps)
critic_train_stats = {
"critic_loss": [],
"td_error_abs": [],
"target_mean": [],
"q_taken_mean": []
}
q_vals_list = []
for t in range(rewards.shape[1]): # time
obs_ = batch[:, t] # [env*batch,452]
q_t = self.model.value(obs_) # [env*batch,22]
q_t = layers.reshape(q_t, [q_t.shape[0], 1, q_t.shape[-1]])
q_t = layers.expand(q_t, [1, 4, 1]) # [env*batch,4,22]
q_taken = layers.reduce_sum(q_t * actions_one_hot[:, :, t, :], dim=-1) # [env*batch,4]
q_t_taken = targets[:, :, t] # [env*batch,4]
td_error = q_taken - q_t_taken # [env*batch,4]
q_vals_list.append(q_t) # [env*batch,4,22]
loss = layers.reduce_sum(td_error ** 2) # [1]
optimizer.minimize(loss)
critic_train_stats["critic_loss"].append(loss)
critic_train_stats['td_error_abs'].append(td_error)
critic_train_stats['q_taken_mean'].append(q_taken)
critic_train_stats['target_mean'].append(q_t_taken)
q_vals = layers.stack(q_vals_list, axis=2) # [env*batch,4,time,22]
for key in critic_train_stats.keys():
critic_train_stats[key] = layers.reduce_sum(layers.stack(critic_train_stats[key]))
return q_vals, critic_train_stats
error:
File ,line 150, in learn q_vals, critic_train_stats = self._train_critic(obs, actions, last_actions, rewards, targets, lr_critic) File line 116, in train_critic optimizer.minimize(loss) File "", line 2, in minimize File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/wrapped_decorator.py", line 25, in impl return wrapped_func(*args, **kwargs) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/dygraph/base.py", line 87, in impl return func(*args, **kwargs) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/optimizer.py", line 594, in minimize no_grad_set=no_grad_set) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/optimizer.py", line 493, in backward no_grad_set, callbacks) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/backward.py", line 571, in append_backward input_grad_names_set=input_grad_names_set) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/paddle/fluid/backward.py", line 310, in append_backward_ops op.desc, cpt.to_text(no_grad_dict[block.idx]), grad_sub_block_list) paddle.fluid.core_avx.EnforceNotMet: grad_op_maker should not be null Operator GradOpMaker has not been registered. at [/home/teamcity/work/ef54dc8a5b211854/paddle/fluid/framework/op_info.h:69]