From 26834ab6a08213a31632c4dbd99faba9c19c2a3e Mon Sep 17 00:00:00 2001 From: xiaowei_xing <997427575@qq.com> Date: Sun, 24 Nov 2019 22:22:05 +0900 Subject: [PATCH] test --- docs/10.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/10.md b/docs/10.md index e5db421..509e414 100644 --- a/docs/10.md +++ b/docs/10.md @@ -200,9 +200,11 @@ $$ $$ $$ -= 2\mathbb{E} [(\nabla_{\theta} \log \pi_{\theta}(\tau))^2 b] - 2\mathbb{E} [(\nabla_{\theta} \log \pi_{\theta}(\tau))^2 r(tau)] = 0, += 2\mathbb{E} [(\nabla_{\theta} \log \pi_{\theta}(\tau))^2 b] - 2\mathbb{E} [(\nabla_{\theta} \log \pi_{\theta}(\tau))^2 r(\tau)] = 0, $$ $$ -b = \frac{\mathbb{E} [(\nabla_{\theta} \log \pi_{\theta}(\tau))^2 r(tau)]}{\mathbb{E} [(\nabla_{\theta} \log \pi_{\theta}(\tau))^2]}。 -$$ \ No newline at end of file +b = \frac{\mathbb{E} [(\nabla_{\theta} \log \pi_{\theta}(\tau))^2 r(\tau)]}{\mathbb{E} [(\nabla_{\theta} \log \pi_{\theta}(\tau))^2]}。 +$$ + +## 3. 离线策略策略梯度(Off Policy Policy Gradient) \ No newline at end of file -- GitLab