diff --git a/ding/envs/env/base_env.py b/ding/envs/env/base_env.py index 434d9f203c49a60013f75058248cda6dda62fb39..28564d7e3faf3d6829bce6232367a0f93f0fd3d4 100644 --- a/ding/envs/env/base_env.py +++ b/ding/envs/env/base_env.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from typing import Any, List, Tuple import gym +import copy from easydict import EasyDict from namedlist import namedlist from collections import namedtuple @@ -16,10 +17,15 @@ class BaseEnv(ABC, gym.Env): basic environment class, extended from ``gym.Env`` Interface: ``__init__``, ``reset``, ``close``, ``step``, ``info``, ``create_collector_env_cfg``, \ - ``create_evaluator_env_cfg``, - ``enable_save_replay`` + ``create_evaluator_env_cfg``, ``enable_save_replay``, ``default_config`` """ + @classmethod + def default_config(cls: type) -> EasyDict: + cfg = EasyDict(copy.deepcopy(cls.config)) + cfg.cfg_type = cls.__name__ + 'Dict' + return cfg + @abstractmethod def __init__(self, cfg: dict) -> None: """ diff --git a/dizoo/minigrid/__init__.py b/dizoo/minigrid/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dizoo/minigrid/envs/__init__.py b/dizoo/minigrid/envs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9f8da547ea9ccfd1eec5f9d4772695ba8cda36ba --- /dev/null +++ b/dizoo/minigrid/envs/__init__.py @@ -0,0 +1 @@ +from .minigrid_env import MiniGridEnv diff --git a/dizoo/minigrid/envs/minigrid_env.py b/dizoo/minigrid/envs/minigrid_env.py new file mode 100644 index 0000000000000000000000000000000000000000..2e3257cd00b36f39931748c6cb7eed0c0d0d55f6 --- /dev/null +++ b/dizoo/minigrid/envs/minigrid_env.py @@ -0,0 +1,103 @@ +from typing import Any, List, Union, Optional +from collections import namedtuple +import time +import gym +import numpy as np +from gym_minigrid.wrappers import FlatObsWrapper, RGBImgPartialObsWrapper, ImgObsWrapper + +from ding.envs import BaseEnv, BaseEnvTimestep, BaseEnvInfo +from ding.envs.common.env_element import EnvElement, EnvElementInfo +from ding.torch_utils import to_tensor, to_ndarray, to_list +from ding.utils import ENV_REGISTRY + +MINIGRID_INFO_DICT = { + 'MiniGrid-Empty-8x8-v0': BaseEnvInfo( + agent_num=1, + obs_space=EnvElementInfo(shape=(2739, ), value={ + 'min': 0, + 'max': 5, + 'dtype': np.float32 + }), + act_space=EnvElementInfo(shape=(1, ), value={ + 'min': 0, + 'max': 7, + 'dtype': np.int64, + }), + rew_space=EnvElementInfo(shape=(1, ), value={ + 'min': 0, + 'max': 1, + 'dtype': np.float32 + }), + use_wrappers=None, + ), +} + + +@ENV_REGISTRY.register('minigrid') +class MiniGridEnv(BaseEnv): + config = dict( + env_id='MiniGrid-Empty-8x8-v0', + flat_obs=True, + ) + + def __init__(self, cfg: dict) -> None: + self._cfg = cfg + self._init_flag = False + self._env_id = cfg.env_id + self._flat_obs = cfg.flat_obs + + def reset(self) -> np.ndarray: + if not self._init_flag: + self._env = gym.make(self._env_id) + if self._flat_obs: + self._env = FlatObsWrapper(self._env) + # self._env = RGBImgPartialObsWrapper(self._env) + # self._env = ImgObsWrapper(self._env) + self._init_flag = True + if hasattr(self, '_seed') and hasattr(self, '_dynamic_seed') and self._dynamic_seed: + np_seed = 100 * np.random.randint(1, 1000) + self._env.seed(self._seed + np_seed) + elif hasattr(self, '_seed'): + self._env.seed(self._seed) + self._final_eval_reward = 0 + obs = self._env.reset() + obs = to_ndarray(obs).astype(np.float32) + return obs + + def close(self) -> None: + if self._init_flag: + self._env.close() + self._init_flag = False + + def render(self) -> None: + self._env.render() + + def seed(self, seed: int, dynamic_seed: bool = True) -> None: + self._seed = seed + self._dynamic_seed = dynamic_seed + np.random.seed(self._seed) + + def step(self, action: np.ndarray) -> BaseEnvTimestep: + assert isinstance(action, np.ndarray), type(action) + if action.shape == (1, ): + action = action.squeeze() # 0-dim tensor + obs, rew, done, info = self._env.step(action) + rew = float(rew) + self._final_eval_reward += rew + if done: + info['final_eval_reward'] = self._final_eval_reward + obs = to_ndarray(obs).astype(np.float32) + rew = to_ndarray([rew]) # wrapped to be transfered to a Tensor with shape (1,) + return BaseEnvTimestep(obs, rew, done, info) + + def info(self) -> BaseEnvInfo: + return MINIGRID_INFO_DICT[self._env_id] + + def __repr__(self) -> str: + return "DI-engine MiniGrid Env" + + def enable_save_replay(self, replay_path: Optional[str] = None) -> None: + if replay_path is None: + replay_path = './video' + self._replay_path = replay_path + raise NotImplementedError diff --git a/dizoo/minigrid/envs/test_minigrid_env.py b/dizoo/minigrid/envs/test_minigrid_env.py new file mode 100644 index 0000000000000000000000000000000000000000..544f210fcfdbd1b90d6bc1b66b29b11e32efa9d8 --- /dev/null +++ b/dizoo/minigrid/envs/test_minigrid_env.py @@ -0,0 +1,27 @@ +import pytest +import numpy as np +from dizoo.minigrid.envs import MiniGridEnv + + +@pytest.mark.unittest +class TestMiniGridEnv: + + def test_naive(self): + env = MiniGridEnv(MiniGridEnv.default_config()) + env.seed(314) + assert env._seed == 314 + obs = env.reset() + act_val = env.info().act_space.value + min_val, max_val = act_val['min'], act_val['max'] + for i in range(10): + random_action = np.random.randint(min_val, max_val, size=(1, )) + timestep = env.step(random_action) + print(timestep) + assert isinstance(timestep.obs, np.ndarray) + assert isinstance(timestep.done, bool) + assert timestep.obs.shape == (2739, ) + assert timestep.reward.shape == (1, ) + assert timestep.reward >= env.info().rew_space.value['min'] + assert timestep.reward <= env.info().rew_space.value['max'] + print(env.info()) + env.close() diff --git a/setup.py b/setup.py index 36734e0f822fa5777e5689c87cfb3082ba50b35c..75955a1c3c332e718995cdf3f7b35276d3bd5293 100755 --- a/setup.py +++ b/setup.py @@ -105,6 +105,9 @@ setup( 'procgen_env': [ 'procgen', ], + 'minigrid_env': [ + 'gym-minigrid', + ], 'sc2_env': [ 'absl-py>=0.1.0', 'future',