关于 mujoco_agent 中的 'CLIP' 部分
Created by: Ryan906k9
PARL/examples/PPO/mujoco_agent.py / 这个文件的代码中只看到了 'KLPEN' 的部分,请问 'CLIP' 部分是在哪里实现的呢?
def policy_learn(self, obs, actions, advantages):
""" Learn policy:
1. Sync parameters of policy model to old policy model
2. Fix old policy model, and learn policy model multi times
3. if use KLPEN loss, Adjust kl loss coefficient: beta
"""
self.alg.sync_old_policy()
all_loss, all_kl = [], []
for _ in range(self.policy_learn_times):
loss, kl = self._batch_policy_learn(obs, actions, advantages)
all_loss.append(loss)
all_kl.append(kl)
if self.loss_type == 'KLPEN':
# Adative KL penalty coefficient
if kl > self.kl_targ * 2:
self.beta = 1.5 * self.beta
elif kl < self.kl_targ / 2:
self.beta = self.beta / 1.5
return np.mean(all_loss), np.mean(all_kl)