fix(wyh): add model test and policy/entry test and remove unused qacd(#92)

* fix(wyh):model test and policy/entry test * fix(wyh):delect qacd * fix(wyh):test serial entry onpolicy

fix(wyh): add model test and policy/entry test and remove unused qacd(#92)
* fix(wyh):model test and policy/entry test * fix(wyh):delect qacd * fix(wyh):test serial entry onpolicy
1568e53d · Weiyuhong-1998 · GitHub · e2ca8738 · 1568e53d · e2ca8738
3 changed file
--- a/ding/entry/tests/test_serial_entry_onpolicy.py
+++ b/ding/entry/tests/test_serial_entry_onpolicy.py
+import pytest
+import time
+import os
+from copy import deepcopy
+
+from ding.entry import serial_pipeline_onpolicy
+from dizoo.classic_control.cartpole.config.cartpole_ppo_config import cartpole_ppo_config, cartpole_ppo_create_config
+
+
+@pytest.mark.unittest
+def test_mappo():
+    config = [deepcopy(cartpole_ppo_config), deepcopy(cartpole_ppo_create_config)]
+    config[0].policy.learn.epoch_per_collect = 1
+    try:
+        serial_pipeline_onpolicy(config, seed=0, max_iterations=1)
+    except Exception:
+        assert False, "pipeline fail"
--- a/ding/model/template/qacd.py
+++ b/ding/model/template/qacd.py
-from typing import Union, Dict, Optional
-import torch
-import torch.nn as nn
-
-from ding.utils import SequenceType, squeeze, MODEL_REGISTRY
-from ..common import ReparameterizationHead, RegressionHead, DiscreteHead, MultiHead, \
-    FCEncoder, ConvEncoder
-
-
-@MODEL_REGISTRY.register('qacd')
-class QACD(nn.Module):
-    r"""
-    Overview:
-        The QACD model.
-    Interfaces:
-        ``__init__``, ``forward``, ``compute_actor``, ``compute_critic``
-    """
-    mode = ['compute_actor', 'compute_critic']
-
-    def __init__(
-            self,
-            obs_shape: Union[int, SequenceType],
-            action_shape: Union[int, SequenceType],
-            encoder_hidden_size_list: SequenceType = [128, 128, 64],
-            actor_head_hidden_size: int = 64,
-            actor_head_layer_num: int = 1,
-            critic_head_hidden_size: int = 64,
-            critic_head_layer_num: int = 1,
-            activation: Optional[nn.Module] = nn.ReLU(),
-            norm_type: Optional[str] = None,
-    ) -> None:
-        r"""
-        Overview:
-            Init the QAC Model according to arguments.
-        Arguments:
-            - obs_shape (:obj:`Union[int, SequenceType]`): Observation's space.
-            - action_shape (:obj:`Union[int, SequenceType]`): Action's space.
-            - actor_head_type (:obj:`str`): Whether choose ``regression`` or ``reparameterization``.
-            - twin_critic (:obj:`bool`): Whether include twin critic.
-            - actor_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to actor-nn's ``Head``.
-            - actor_head_layer_num (:obj:`int`):
-                The num of layers used in the network to compute Q value output for actor's nn.
-            - critic_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to critic-nn's ``Head``.
-            - critic_head_layer_num (:obj:`int`):
-                The num of layers used in the network to compute Q value output for critic's nn.
-            - activation (:obj:`Optional[nn.Module]`):
-                The type of activation function to use in ``MLP`` the after ``layer_fn``,
-                if ``None`` then default set to ``nn.ReLU()``
-            - norm_type (:obj:`Optional[str]`):
-                The type of normalization to use, see ``ding.torch_utils.fc_block`` for more details.
-        """
-        super(QACD, self).__init__()
-        obs_shape: int = squeeze(obs_shape)
-        action_shape: int = squeeze(action_shape)
-        if isinstance(obs_shape, int) or len(obs_shape) == 1:
-            encoder_cls = FCEncoder
-        elif len(obs_shape) == 3:
-            encoder_cls = ConvEncoder
-        else:
-            raise RuntimeError(
-                "not support obs_shape for pre-defined encoder: {}, please customize your own DQN".format(obs_shape)
-            )
-
-        self.actor_encoder = encoder_cls(
-            obs_shape, encoder_hidden_size_list, activation=activation, norm_type=norm_type
-        )
-        self.critic_encoder = encoder_cls(
-            obs_shape, encoder_hidden_size_list, activation=activation, norm_type=norm_type
-        )
-
-        self.critic_head = RegressionHead(
-            critic_head_hidden_size, action_shape, critic_head_layer_num, activation=activation, norm_type=norm_type
-        )
-        self.actor_head = DiscreteHead(
-            actor_head_hidden_size, action_shape, actor_head_layer_num, activation=activation, norm_type=norm_type
-        )
-        self.actor = [self.actor_encoder, self.actor_head]
-        self.critic = [self.critic_encoder, self.critic_head]
-        self.actor = nn.ModuleList(self.actor)
-        self.critic = nn.ModuleList(self.critic)
-
-    def forward(self, inputs: Union[torch.Tensor, Dict], mode: str) -> Dict:
-        r"""
-        Overview:
-            Use bbservation and action tensor to predict output.
-            Parameter updates with QAC's MLPs forward setup.
-        Arguments:
-            Forward with ``'compute_actor'``:
-                - inputs (:obj:`torch.Tensor`):
-                    The encoded embedding tensor, determined with given ``hidden_size``, i.e. ``(B, N=hidden_size)``.
-                    Whether ``actor_head_hidden_size`` or ``critic_head_hidden_size`` depend on ``mode``.
-
-            Forward with ``'compute_critic'``, inputs (`Dict`) Necessary Keys:
-                - ``obs``, ``action`` encoded tensors.
-
-            - mode (:obj:`str`): Name of the forward mode.
-        Returns:
-            - outputs (:obj:`Dict`): Outputs of network forward.
-
-                Forward with ``'compute_actor'``, Necessary Keys (either):
-                    - action (:obj:`torch.Tensor`): Action tensor with same size as input ``x``.
-                    - logit (:obj:`torch.Tensor`):
-                        Logit tensor encoding ``mu`` and ``sigma``, both with same size as input ``x``.
-
-                Forward with ``'compute_critic'``, Necessary Keys:
-                    - q_value (:obj:`torch.Tensor`): Q value tensor with same size as batch size.
-        Actor Shapes:
-            - inputs (:obj:`torch.Tensor`): :math:`(B, N0)`, B is batch size and N0 corresponds to ``hidden_size``
-            - action (:obj:`torch.Tensor`): :math:`(B, N0)`
-            - q_value (:obj:`torch.FloatTensor`): :math:`(B, )`, where B is batch size.
-
-        Critic Shapes:
-            - obs (:obj:`torch.Tensor`): :math:`(B, N1)`, where B is batch size and N1 is ``obs_shape``
-            - action (:obj:`torch.Tensor`): :math:`(B, N2)`, where B is batch size and N2 is``action_shape``
-            - logit (:obj:`torch.FloatTensor`): :math:`(B, N2)`, where B is batch size and N3 is ``action_shape``
-
-        Actor Examples:
-            >>> # Regression mode
-            >>> model = QAC(64, 64, 'regression')
-            >>> inputs = torch.randn(4, 64)
-            >>> actor_outputs = model(inputs,'compute_actor')
-            >>> assert actor_outputs['action'].shape == torch.Size([4, 64])
-            >>> # Reparameterization Mode
-            >>> model = QAC(64, 64, 'reparameterization')
-            >>> inputs = torch.randn(4, 64)
-            >>> actor_outputs = model(inputs,'compute_actor')
-            >>> actor_outputs['logit'][0].shape # mu
-            >>> torch.Size([4, 64])
-            >>> actor_outputs['logit'][1].shape # sigma
-            >>> torch.Size([4, 64])
-
-        Critic Examples:
-            >>> inputs = {'obs': torch.randn(4,N), 'action': torch.randn(4,1)}
-            >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression')
-            >>> model(inputs, mode='compute_critic')['q_value'] # q value
-            tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=<SqueezeBackward1>)
-
-        """
-        assert mode in self.mode, "not support forward mode: {}/{}".format(mode, self.mode)
-        return getattr(self, mode)(inputs)
-
-    def compute_actor(self, inputs: torch.Tensor) -> Dict:
-        r"""
-        Overview:
-            Use encoded embedding tensor to predict output.
-            Execute parameter updates with ``'compute_actor'`` mode
-            Use encoded embedding tensor to predict output.
-        Arguments:
-            - inputs (:obj:`torch.Tensor`):
-                The encoded embedding tensor, determined with given ``hidden_size``, i.e. ``(B, N=hidden_size)``.
-                ``hidden_size = actor_head_hidden_size``
-            - mode (:obj:`str`): Name of the forward mode.
-        Returns:
-            - outputs (:obj:`Dict`): Outputs of forward pass encoder and head.
-
-        ReturnsKeys (either):
-            - action (:obj:`torch.Tensor`): Continuous action tensor with same size as ``action_shape``.
-            - logit (:obj:`torch.Tensor`):
-                Logit tensor encoding ``mu`` and ``sigma``, both with same size as input ``x``.
-        Shapes:
-            - inputs (:obj:`torch.Tensor`): :math:`(B, N0)`, B is batch size and N0 corresponds to ``hidden_size``
-            - action (:obj:`torch.Tensor`): :math:`(B, N0)`
-            - logit (:obj:`list`): 2 elements, mu and sigma, each is the shape of :math:`(B, N0)`.
-            - q_value (:obj:`torch.FloatTensor`): :math:`(B, )`, B is batch size.
-        Examples:
-            >>> # Regression mode
-            >>> model = QAC(64, 64, 'regression')
-            >>> inputs = torch.randn(4, 64)
-            >>> actor_outputs = model(inputs,'compute_actor')
-            >>> assert actor_outputs['action'].shape == torch.Size([4, 64])
-            >>> # Reparameterization Mode
-            >>> model = QAC(64, 64, 'reparameterization')
-            >>> inputs = torch.randn(4, 64)
-            >>> actor_outputs = model(inputs,'compute_actor')
-            >>> actor_outputs['logit'][0].shape # mu
-            >>> torch.Size([4, 64])
-            >>> actor_outputs['logit'][1].shape # sigma
-            >>> torch.Size([4, 64])
-        """
-        x = self.actor(inputs)
-        if self.actor_head_type == 'regression':
-            return {'action': x['pred']}
-        elif self.actor_head_type == 'reparameterization':
-            return {'logit': [x['mu'], x['sigma']]}
-
-    def compute_critic(self, inputs: Dict) -> Dict:
-        r"""
-        Overview:
-            Execute parameter updates with ``'compute_critic'`` mode
-            Use encoded embedding tensor to predict output.
-        Arguments:
-            - ``obs``, ``action`` encoded tensors.
-            - mode (:obj:`str`): Name of the forward mode.
-        Returns:
-            - outputs (:obj:`Dict`): Q-value output.
-
-        ReturnKeys:
-            - q_value (:obj:`torch.Tensor`): Q value tensor with same size as batch size.
-        Shapes:
-            - obs (:obj:`torch.Tensor`): :math:`(B, N1)`, where B is batch size and N1 is ``obs_shape``
-            - action (:obj:`torch.Tensor`): :math:`(B, N2)`, where B is batch size and N2 is ``action_shape``
-            - q_value (:obj:`torch.FloatTensor`): :math:`(B, )`, where B is batch size.
-
-        Examples:
-            >>> inputs = {'obs': torch.randn(4, N), 'action': torch.randn(4, 1)}
-            >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression')
-            >>> model(inputs, mode='compute_critic')['q_value'] # q value
-            tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=<SqueezeBackward1>)
-
-        """
-
-        obs, action = inputs['obs'], inputs['action']
-        assert len(obs.shape) == 2
-        if len(action.shape) == 1:  # (B, ) -> (B, 1)
-            action = action.unsqueeze(1)
-        x = torch.cat([obs, action], dim=1)
-        if self.twin_critic:
-            x = [m(x)['pred'] for m in self.critic]
-        else:
-            x = self.critic(x)['pred']
-        return {'q_value': x}
--- a/ding/model/template/tests/test_mappo.py
+++ b/ding/model/template/tests/test_mappo.py
+import pytest
+import numpy as np
+import torch
+from itertools import product
+
+from ding.model import MAPPO
+from ding.torch_utils import is_differentiable
+
+B = 32
+agent_obs_shape = [216, 265]
+global_obs_shape = [264, 324]
+agent_num = 8
+action_shape = 14
+args = list(product(*[agent_obs_shape, global_obs_shape]))
+
+
+@pytest.mark.unittest
+@pytest.mark.parametrize('agent_obs_shape, global_obs_shape', args)
+class TestVAC:
+
+    def output_check(self, model, outputs, action_shape):
+        if isinstance(action_shape, tuple):
+            loss = sum([t.sum() for t in outputs])
+        elif np.isscalar(action_shape):
+            loss = outputs.sum()
+        is_differentiable(loss, model)
+
+    def test_vac(self, agent_obs_shape, global_obs_shape):
+        data = {
+            'agent_state': torch.randn(B, agent_num, agent_obs_shape),
+            'global_state': torch.randn(B, agent_num, global_obs_shape),
+            'action_mask': torch.randint(0, 2, size=(B, agent_num, action_shape))
+        }
+        model = MAPPO(agent_obs_shape, global_obs_shape, action_shape, agent_num)
+
+        logit = model(data, mode='compute_actor_critic')['logit']
+        value = model(data, mode='compute_actor_critic')['value']
+
+        outputs = value.sum() + logit.sum()
+        self.output_check(model, outputs, action_shape)
+
+        for p in model.parameters():
+            p.grad = None
+        logit = model(data, mode='compute_actor')['logit']
+        self.output_check(model.actor, logit, model.action_shape)
+
+        for p in model.parameters():
+            p.grad = None
+        value = model(data, mode='compute_critic')['value']
+        assert value.shape == (B, agent_num)
+        self.output_check(model.critic, value, action_shape)