fix(nyz): fix qmix double_q hidden state bug

835e3c4c · niuyazhe · e22e5e43 · 835e3c4c · 835e3c4c · 835e3c4c
3 changed file
--- a/ding/model/template/qmix.py
+++ b/ding/model/template/qmix.py
@@ -121,7 +121,7 @@ class QMix(nn.Module):
        self.mixer = mixer
        if self.mixer:
            self._mixer = Mixer(agent_num, global_obs_shape, embedding_size)
-            self._global_state_encoder = nn.Sequential()
+            self._global_state_encoder = nn.Identity()

    def forward(self, data: dict, single_step: bool = True) -> dict:
        """
@@ -428,7 +428,7 @@ class CollaQ(nn.Module):
        embedding_size = hidden_size_list[-1]
        if self.mixer:
            self._mixer = Mixer(agent_num, global_obs_shape, embedding_size)
-            self._global_state_encoder = nn.Sequential()
+            self._global_state_encoder = nn.Identity()

    def forward(self, data: dict, single_step: bool = True) -> dict:
        """

--- a/ding/policy/qmix.py
+++ b/ding/policy/qmix.py
@@ -197,6 +197,7 @@ class QMIXPolicy(Policy):

        if self._cfg.learn.double_q:
            next_inputs = {'obs': data['next_obs']}
+            self._learn_model.reset(state=data['prev_state'][1])
            logit_detach = self._learn_model.forward(next_inputs, single_step=False)['logit'].clone().detach()
            next_inputs = {'obs': data['next_obs'], 'action': logit_detach.argmax(dim=-1)}
        else:

--- a/dizoo/multiagent_particle/config/cooperative_navigation_qmix_config.py
+++ b/dizoo/multiagent_particle/config/cooperative_navigation_qmix_config.py
@@ -32,6 +32,7 @@ main_config = dict(
            learning_rate=0.0005,
            target_update_theta=0.001,
            discount_factor=0.99,
+            double_q=True,
        ),
        collect=dict(
            n_sample=600,