# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # arguments of coma config = { # ========== Environment ========== 'difficulty': '3', # The difficulty of the game 'map': '3m', # The map of the game 'env_seed': None, # Environment random seed 'replay_dir': '', # Save the replay, not available in Ubuntu # ========== Learn ========== 'gamma': 0.99, 'grad_norm_clip': 10, # Prevent gradient explosion 'td_lambda': 0.8, # Lambda of td-lambda return 'actor_lr': 1e-4, 'critic_lr': 1e-3, 'target_update_cycle': 200, # How often to update the target_net # ========== Epsilon-greedy ========== 'epsilon': 0.5, 'anneal_epsilon': 0.00064, 'min_epsilon': 0.02, # 'epsilon_anneal_scale' : 'epoch', # ========== Other ========== 'n_epoch': 5000, # The number of the epoch to train the agent 'n_episodes': 5, # The number of the episodes in one epoch 'test_episode_n': 20, # The Number of the epochs to evaluate the agent 'threshold': 19, # The threshold to judge whether win 'test_cycle': 5, # How often to evaluate (every 'test_cycle' epcho) 'save_cycle': 1000, # How often to save the model 'model_dir': './model', # The model directory of the policy 'test': False, # Evaluate model and quit (no training) 'restore': False # restore model or not }