From 8c6711c84ac74bf517f808d03d0c9d9943fbbe38 Mon Sep 17 00:00:00 2001 From: pkpk Date: Tue, 27 Aug 2019 21:31:14 +0800 Subject: [PATCH] test=develop (#3205) --- .gitmodules | 3 +++ PaddleRL | 1 + .../PaddleRL}/DeepQNetwork/DQN_agent.py | 13 ++++++------ .../PaddleRL}/DeepQNetwork/DoubleDQN_agent.py | 19 +++++++++--------- .../DeepQNetwork/DuelingDQN_agent.py | 13 ++++++------ .../PaddleRL}/DeepQNetwork/README.md | 0 .../PaddleRL}/DeepQNetwork/README_cn.md | 0 .../PaddleRL}/DeepQNetwork/assets/dqn.png | Bin .../PaddleRL}/DeepQNetwork/atari.py | 0 .../PaddleRL}/DeepQNetwork/atari_wrapper.py | 0 .../PaddleRL}/DeepQNetwork/expreplay.py | 0 .../PaddleRL}/DeepQNetwork/play.py | 0 .../PaddleRL}/DeepQNetwork/requirement.txt | 0 .../DeepQNetwork/rom_files/breakout.bin | Bin .../PaddleRL}/DeepQNetwork/rom_files/pong.bin | Bin .../PaddleRL}/DeepQNetwork/train.py | 0 {PaddleRL => legacy/PaddleRL}/README.md | 0 .../PaddleRL}/policy_gradient/README.md | 0 .../PaddleRL}/policy_gradient/brain.py | 0 .../PaddleRL}/policy_gradient/env.py | 0 .../PaddleRL}/policy_gradient/images/PG_1.svg | 0 .../PaddleRL}/policy_gradient/images/PG_2.svg | 0 .../PaddleRL}/policy_gradient/images/PG_3.svg | 0 .../PaddleRL}/policy_gradient/run.py | 0 24 files changed, 26 insertions(+), 23 deletions(-) create mode 160000 PaddleRL rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/DQN_agent.py (96%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/DoubleDQN_agent.py (93%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/DuelingDQN_agent.py (96%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/README.md (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/README_cn.md (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/assets/dqn.png (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/atari.py (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/atari_wrapper.py (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/expreplay.py (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/play.py (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/requirement.txt (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/rom_files/breakout.bin (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/rom_files/pong.bin (100%) rename {PaddleRL => legacy/PaddleRL}/DeepQNetwork/train.py (100%) rename {PaddleRL => legacy/PaddleRL}/README.md (100%) rename {PaddleRL => legacy/PaddleRL}/policy_gradient/README.md (100%) rename {PaddleRL => legacy/PaddleRL}/policy_gradient/brain.py (100%) rename {PaddleRL => legacy/PaddleRL}/policy_gradient/env.py (100%) rename {PaddleRL => legacy/PaddleRL}/policy_gradient/images/PG_1.svg (100%) rename {PaddleRL => legacy/PaddleRL}/policy_gradient/images/PG_2.svg (100%) rename {PaddleRL => legacy/PaddleRL}/policy_gradient/images/PG_3.svg (100%) rename {PaddleRL => legacy/PaddleRL}/policy_gradient/run.py (100%) diff --git a/.gitmodules b/.gitmodules index 3f07fecc..a440456e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "PaddleNLP/language_representations_kit/ERNIE"] path = PaddleNLP/language_representations_kit/ERNIE url = https://github.com/PaddlePaddle/ERNIE +[submodule "PaddleRL"] + path = PaddleRL + url = https://github.com/PaddlePaddle/PARL diff --git a/PaddleRL b/PaddleRL new file mode 160000 index 00000000..a8846355 --- /dev/null +++ b/PaddleRL @@ -0,0 +1 @@ +Subproject commit a884635519c529c69c34e1134ca6c9d99f2c0007 diff --git a/PaddleRL/DeepQNetwork/DQN_agent.py b/legacy/PaddleRL/DeepQNetwork/DQN_agent.py similarity index 96% rename from PaddleRL/DeepQNetwork/DQN_agent.py rename to legacy/PaddleRL/DeepQNetwork/DQN_agent.py index 1b27051a..60f86a59 100644 --- a/PaddleRL/DeepQNetwork/DQN_agent.py +++ b/legacy/PaddleRL/DeepQNetwork/DQN_agent.py @@ -71,17 +71,17 @@ class DQNModel(object): optimizer.minimize(cost) vars = list(self.train_program.list_vars()) - target_vars = list(filter( - lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) + target_vars = list( + filter(lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) policy_vars_name = [ - x.name.replace('target', 'policy') for x in target_vars] - policy_vars = list(filter( - lambda x: x.name in policy_vars_name, vars)) + x.name.replace('target', 'policy') for x in target_vars + ] + policy_vars = list(filter(lambda x: x.name in policy_vars_name, vars)) policy_vars.sort(key=lambda x: x.name) target_vars.sort(key=lambda x: x.name) - + with fluid.program_guard(self._sync_program): sync_ops = [] for i, var in enumerate(policy_vars): @@ -153,7 +153,6 @@ class DQNModel(object): bias_attr=ParamAttr(name='{}_fc1_b'.format(variable_field))) return out - def act(self, state, train_or_test): sample = np.random.random() if train_or_test == 'train' and sample < self.exploration: diff --git a/PaddleRL/DeepQNetwork/DoubleDQN_agent.py b/legacy/PaddleRL/DeepQNetwork/DoubleDQN_agent.py similarity index 93% rename from PaddleRL/DeepQNetwork/DoubleDQN_agent.py rename to legacy/PaddleRL/DeepQNetwork/DoubleDQN_agent.py index ecd94abd..87997cd8 100644 --- a/PaddleRL/DeepQNetwork/DoubleDQN_agent.py +++ b/legacy/PaddleRL/DeepQNetwork/DoubleDQN_agent.py @@ -64,9 +64,11 @@ class DoubleDQNModel(object): greedy_action = fluid.layers.argmax(next_s_predcit_value, axis=1) greedy_action = fluid.layers.unsqueeze(greedy_action, axes=[1]) - predict_onehot = fluid.layers.one_hot(greedy_action, self.action_dim) + predict_onehot = fluid.layers.one_hot(greedy_action, + self.action_dim) best_v = fluid.layers.reduce_sum( - fluid.layers.elementwise_mul(predict_onehot, targetQ_predict_value), + fluid.layers.elementwise_mul(predict_onehot, + targetQ_predict_value), dim=1) best_v.stop_gradient = True @@ -79,17 +81,17 @@ class DoubleDQNModel(object): optimizer.minimize(cost) vars = list(self.train_program.list_vars()) - target_vars = list(filter( - lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) + target_vars = list( + filter(lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) policy_vars_name = [ - x.name.replace('target', 'policy') for x in target_vars] - policy_vars = list(filter( - lambda x: x.name in policy_vars_name, vars)) + x.name.replace('target', 'policy') for x in target_vars + ] + policy_vars = list(filter(lambda x: x.name in policy_vars_name, vars)) policy_vars.sort(key=lambda x: x.name) target_vars.sort(key=lambda x: x.name) - + with fluid.program_guard(self._sync_program): sync_ops = [] for i, var in enumerate(policy_vars): @@ -161,7 +163,6 @@ class DoubleDQNModel(object): bias_attr=ParamAttr(name='{}_fc1_b'.format(variable_field))) return out - def act(self, state, train_or_test): sample = np.random.random() if train_or_test == 'train' and sample < self.exploration: diff --git a/PaddleRL/DeepQNetwork/DuelingDQN_agent.py b/legacy/PaddleRL/DeepQNetwork/DuelingDQN_agent.py similarity index 96% rename from PaddleRL/DeepQNetwork/DuelingDQN_agent.py rename to legacy/PaddleRL/DeepQNetwork/DuelingDQN_agent.py index 4c6dbbfb..5d63adc5 100644 --- a/PaddleRL/DeepQNetwork/DuelingDQN_agent.py +++ b/legacy/PaddleRL/DeepQNetwork/DuelingDQN_agent.py @@ -71,17 +71,17 @@ class DuelingDQNModel(object): optimizer.minimize(cost) vars = list(self.train_program.list_vars()) - target_vars = list(filter( - lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) + target_vars = list( + filter(lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) policy_vars_name = [ - x.name.replace('target', 'policy') for x in target_vars] - policy_vars = list(filter( - lambda x: x.name in policy_vars_name, vars)) + x.name.replace('target', 'policy') for x in target_vars + ] + policy_vars = list(filter(lambda x: x.name in policy_vars_name, vars)) policy_vars.sort(key=lambda x: x.name) target_vars.sort(key=lambda x: x.name) - + with fluid.program_guard(self._sync_program): sync_ops = [] for i, var in enumerate(policy_vars): @@ -163,7 +163,6 @@ class DuelingDQNModel(object): advantage, dim=1, keep_dim=True)) return Q - def act(self, state, train_or_test): sample = np.random.random() if train_or_test == 'train' and sample < self.exploration: diff --git a/PaddleRL/DeepQNetwork/README.md b/legacy/PaddleRL/DeepQNetwork/README.md similarity index 100% rename from PaddleRL/DeepQNetwork/README.md rename to legacy/PaddleRL/DeepQNetwork/README.md diff --git a/PaddleRL/DeepQNetwork/README_cn.md b/legacy/PaddleRL/DeepQNetwork/README_cn.md similarity index 100% rename from PaddleRL/DeepQNetwork/README_cn.md rename to legacy/PaddleRL/DeepQNetwork/README_cn.md diff --git a/PaddleRL/DeepQNetwork/assets/dqn.png b/legacy/PaddleRL/DeepQNetwork/assets/dqn.png similarity index 100% rename from PaddleRL/DeepQNetwork/assets/dqn.png rename to legacy/PaddleRL/DeepQNetwork/assets/dqn.png diff --git a/PaddleRL/DeepQNetwork/atari.py b/legacy/PaddleRL/DeepQNetwork/atari.py similarity index 100% rename from PaddleRL/DeepQNetwork/atari.py rename to legacy/PaddleRL/DeepQNetwork/atari.py diff --git a/PaddleRL/DeepQNetwork/atari_wrapper.py b/legacy/PaddleRL/DeepQNetwork/atari_wrapper.py similarity index 100% rename from PaddleRL/DeepQNetwork/atari_wrapper.py rename to legacy/PaddleRL/DeepQNetwork/atari_wrapper.py diff --git a/PaddleRL/DeepQNetwork/expreplay.py b/legacy/PaddleRL/DeepQNetwork/expreplay.py similarity index 100% rename from PaddleRL/DeepQNetwork/expreplay.py rename to legacy/PaddleRL/DeepQNetwork/expreplay.py diff --git a/PaddleRL/DeepQNetwork/play.py b/legacy/PaddleRL/DeepQNetwork/play.py similarity index 100% rename from PaddleRL/DeepQNetwork/play.py rename to legacy/PaddleRL/DeepQNetwork/play.py diff --git a/PaddleRL/DeepQNetwork/requirement.txt b/legacy/PaddleRL/DeepQNetwork/requirement.txt similarity index 100% rename from PaddleRL/DeepQNetwork/requirement.txt rename to legacy/PaddleRL/DeepQNetwork/requirement.txt diff --git a/PaddleRL/DeepQNetwork/rom_files/breakout.bin b/legacy/PaddleRL/DeepQNetwork/rom_files/breakout.bin similarity index 100% rename from PaddleRL/DeepQNetwork/rom_files/breakout.bin rename to legacy/PaddleRL/DeepQNetwork/rom_files/breakout.bin diff --git a/PaddleRL/DeepQNetwork/rom_files/pong.bin b/legacy/PaddleRL/DeepQNetwork/rom_files/pong.bin similarity index 100% rename from PaddleRL/DeepQNetwork/rom_files/pong.bin rename to legacy/PaddleRL/DeepQNetwork/rom_files/pong.bin diff --git a/PaddleRL/DeepQNetwork/train.py b/legacy/PaddleRL/DeepQNetwork/train.py similarity index 100% rename from PaddleRL/DeepQNetwork/train.py rename to legacy/PaddleRL/DeepQNetwork/train.py diff --git a/PaddleRL/README.md b/legacy/PaddleRL/README.md similarity index 100% rename from PaddleRL/README.md rename to legacy/PaddleRL/README.md diff --git a/PaddleRL/policy_gradient/README.md b/legacy/PaddleRL/policy_gradient/README.md similarity index 100% rename from PaddleRL/policy_gradient/README.md rename to legacy/PaddleRL/policy_gradient/README.md diff --git a/PaddleRL/policy_gradient/brain.py b/legacy/PaddleRL/policy_gradient/brain.py similarity index 100% rename from PaddleRL/policy_gradient/brain.py rename to legacy/PaddleRL/policy_gradient/brain.py diff --git a/PaddleRL/policy_gradient/env.py b/legacy/PaddleRL/policy_gradient/env.py similarity index 100% rename from PaddleRL/policy_gradient/env.py rename to legacy/PaddleRL/policy_gradient/env.py diff --git a/PaddleRL/policy_gradient/images/PG_1.svg b/legacy/PaddleRL/policy_gradient/images/PG_1.svg similarity index 100% rename from PaddleRL/policy_gradient/images/PG_1.svg rename to legacy/PaddleRL/policy_gradient/images/PG_1.svg diff --git a/PaddleRL/policy_gradient/images/PG_2.svg b/legacy/PaddleRL/policy_gradient/images/PG_2.svg similarity index 100% rename from PaddleRL/policy_gradient/images/PG_2.svg rename to legacy/PaddleRL/policy_gradient/images/PG_2.svg diff --git a/PaddleRL/policy_gradient/images/PG_3.svg b/legacy/PaddleRL/policy_gradient/images/PG_3.svg similarity index 100% rename from PaddleRL/policy_gradient/images/PG_3.svg rename to legacy/PaddleRL/policy_gradient/images/PG_3.svg diff --git a/PaddleRL/policy_gradient/run.py b/legacy/PaddleRL/policy_gradient/run.py similarity index 100% rename from PaddleRL/policy_gradient/run.py rename to legacy/PaddleRL/policy_gradient/run.py -- GitLab