fix(pu): fix dqfd compatibility (#161)

* polish(pu): polish r2d3 * polish(pu): first abs then sum each item in td-error * fix(pu): fix dqfd compatibility

fix(pu): fix dqfd compatibility (#161)
* polish(pu): polish r2d3 * polish(pu): first abs then sum each item in td-error * fix(pu): fix dqfd compatibility
1040c9fc · 蒲源 · GitHub · b532b4cd · 1040c9fc · 1040c9fc
隐藏空白更改
内联并排

Showing with 2 addition and 2 deletion

ding/policy/dqfd.py ding/policy/dqfd.py +1 -1

ding/rl_utils/tests/test_td.py ding/rl_utils/tests/test_td.py +1 -1

未找到文件。
--- a/ding/policy/dqfd.py
+++ b/ding/policy/dqfd.py
@@ -209,7 +209,7 @@ class DQFDPolicy(DQNPolicy):
            data['is_expert']  # set is_expert flag(expert 1, agent 0)
        )
        value_gamma = data.get('value_gamma')
-        loss, td_error_per_sample = dqfd_nstep_td_error(
+        loss, td_error_per_sample, loss_statistics = dqfd_nstep_td_error(
            data_n,
            self._gamma,
            self.lambda1,

--- a/ding/rl_utils/tests/test_td.py
+++ b/ding/rl_utils/tests/test_td.py
@@ -253,7 +253,7 @@ def test_dqfd_nstep_td():
        data = dqfd_nstep_td_data(
            q, next_q, action, next_action, reward, done, done_1, None, next_q_one_step, next_action_one_step, is_expert
        )
-        loss, td_error_per_sample = dqfd_nstep_td_error(
+        loss, td_error_per_sample, loss_statistics = dqfd_nstep_td_error(
            data, 0.95, lambda_n_step_td=1, lambda_supervised_loss=1, margin_function=0.8, nstep=nstep
        )
        assert td_error_per_sample.shape == (batch_size, )