fix the compatibility issue in the A2C example. (#98)

* fix the compatibility issue * fix the comment issue

fix the compatibility issue in the A2C example. (#98)
* fix the compatibility issue * fix the comment issue
33516338 · Bo Zhou · GitHub · d18f19a9 · 33516338 · 33516338
隐藏空白更改
内联并排

Showing with 7 addition and 25 deletion

examples/A2C/actor.py examples/A2C/actor.py +1 -6

examples/A2C/atari_agent.py examples/A2C/atari_agent.py +5 -13

examples/A2C/learner.py examples/A2C/learner.py +1 -6

未找到文件。
--- a/examples/A2C/actor.py
+++ b/examples/A2C/actor.py
@@ -48,12 +48,7 @@ class Actor(object):
        model = AtariModel(act_dim)
        algorithm = parl.algorithms.A3C(
            model, vf_loss_coeff=config['vf_loss_coeff'])
-        self.agent = AtariAgent(
+        self.agent = AtariAgent(algorithm, config)
-            algorithm,
-            obs_shape=self.config['obs_shape'],
-            lr_scheduler=self.config['lr_scheduler'],
-            entropy_coeff_scheduler=self.config['entropy_coeff_scheduler'],
-        )
    def sample(self):
        sample_data = defaultdict(list)

--- a/examples/A2C/atari_agent.py
+++ b/examples/A2C/atari_agent.py
@@ -21,30 +21,22 @@ from parl.utils.scheduler import PiecewiseScheduler, LinearDecayScheduler
 class AtariAgent(parl.Agent):
-    def __init__(self, algorithm, obs_shape, lr_scheduler,
+    def __init__(self, algorithm, config):
-                 entropy_coeff_scheduler):
        """
        Args:
-            algorithm (`parl.Algorithm`): a2c algorithm
+            algorithm (`parl.Algorithm`): algorithm to be used in this agent.
-            obs_shape (list/tuple): observation shape of atari environment
+            config (dict): config file describing the training hyper-parameters(see a2c_config.py)
-            lr_scheduler (list/tuple): learning rate adjustment schedule: (train_step, learning_rate)
-            entropy_coeff_scheduler (list/tuple): coefficient of policy entropy adjustment schedule: (train_step, coefficient)
        """
-        assert isinstance(obs_shape, (list, tuple))
-        assert isinstance(lr_scheduler, (list, tuple))
-        assert isinstance(entropy_coeff_scheduler, (list, tuple))
-        self.obs_shape = obs_shape
-        self.lr_scheduler = lr_scheduler
-        self.entropy_coeff_scheduler = entropy_coeff_scheduler
+        self.obs_shape = config['obs_shape']
        super(AtariAgent, self).__init__(algorithm)
        self.lr_scheduler = LinearDecayScheduler(config['start_lr'],
                                                 config['max_sample_steps'])
        self.entropy_coeff_scheduler = PiecewiseScheduler(
-            self.entropy_coeff_scheduler)
+            config['entropy_coeff_scheduler'])
        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.use_experimental_executor = True

--- a/examples/A2C/learner.py
+++ b/examples/A2C/learner.py
@@ -47,12 +47,7 @@ class Learner(object):
        model = AtariModel(act_dim)
        algorithm = parl.algorithms.A3C(
            model, vf_loss_coeff=config['vf_loss_coeff'])
-        self.agent = AtariAgent(
+        self.agent = AtariAgent(algorithm, config)
-            algorithm,
-            obs_shape=self.config['obs_shape'],
-            lr_scheduler=self.config['lr_scheduler'],
-            entropy_coeff_scheduler=self.config['entropy_coeff_scheduler'],
-        )
        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\