# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import parl from parl.utils import logger from env_wrapper import ObsProcessWrapper, ActionProcessWrapper, RewardWrapper from rlschool import LiftSim from lift_model import LiftModel from lift_agent import LiftAgent from a2c_config import config def evaluate_one_day(model_path): env = LiftSim() env = ActionProcessWrapper(env) env = ObsProcessWrapper(env) act_dim = env.act_dim obs_dim = env.obs_dim config['obs_dim'] = obs_dim model = LiftModel(act_dim) algorithm = parl.algorithms.A3C( model, vf_loss_coeff=config['vf_loss_coeff']) agent = LiftAgent(algorithm, config) agent.restore(model_path) reward_24h = 0 obs = env.reset() for i in range(24 * 3600 * 2): # 24h, 1step = 0.5s action, _ = agent.sample(obs) #print(action) obs, reward, done, info = env.step(action) reward_24h += reward if (i + 1) % (3600 * 2) == 0: logger.info('hour {}, total_reward: {}'.format( (i + 1) // (3600 * 2), reward_24h)) logger.info('model_path: {}, 24h reward: {}'.format( model_path, reward_24h)) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument( '--model_path', type=str, help='path of the model to evaluate.') args = parser.parse_args() evaluate_one_day(args.model_path)