diff --git a/labml_nn/rl/ppo/gae.py b/labml_nn/rl/ppo/gae.py index f9ffb140ff99b9738ad905e53dfde583442087f3..a99f9506e6b3cad9b149adb15ba48e2ec731deb9 100644 --- a/labml_nn/rl/ppo/gae.py +++ b/labml_nn/rl/ppo/gae.py @@ -33,7 +33,7 @@ class GAE: \hat{A_t^{(\infty)}} &= r_t + \gamma r_{t+1} +\gamma^2 r_{t+1} + ... - V(s) \end{align} - $\hat{A_t^{(1)}}$ is high bias, low variance whilst + $\hat{A_t^{(1)}}$ is high bias, low variance, whilst $\hat{A_t^{(\infty)}}$ is unbiased, high variance. We take a weighted average of $\hat{A_t^{(k)}}$ to balance bias and variance.