add liftsim baseline (#120)

* add liftsim baseline * yapf * yapf... * modify acc. comments * yapf * yapf.......... * yapf! why is yapf on paddle different from that on my mac!!!!!

add liftsim baseline (#120)
* add liftsim baseline * yapf * yapf... * modify acc. comments * yapf * yapf.......... * yapf! why is yapf on paddle different from that on my mac!!!!!
c1646351 · LI Yunxiang · Bo Zhou · 5f71cd4e · c1646351 · c1646351
10 changed file
--- a/examples/LiftSim_demo/README.md
+++ b/examples/LiftSim_demo/README.md
+# LiftSim基线
+
+## 简介
+
+基于PARL库实现Deep Q-network算法，应用于[RLSchool][rlschool]库中的电梯调度模拟环境[LiftSim][liftsim]。
+
+## 依赖库
+
+- paddlepaddle >= 1.5.1
+- parl >= 1.1.2
+- rlschool >= 0.0.1
+
+Windows版本仅支持Python3.5及以上版本。
+
+## 运行
+
+```python
+python demo.py
+```
+
+## Benchmark
+
+<img src="rl_10.png" width="400"/>
+
+Accumulated Reward：每3600 steps内reward的总和，可体现电梯调度在单位时间（模拟环境0.5小时）内的效率。
+
+[rlschool]: https://github.com/PaddlePaddle/RLSchool
+[liftsim]: https://github.com/PaddlePaddle/RLSchool/tree/master/rlschool/liftsim
--- a/examples/LiftSim_demo/__init__.py
+++ b/examples/LiftSim_demo/__init__.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/examples/LiftSim_demo/demo.py
+++ b/examples/LiftSim_demo/demo.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rlschool import LiftSim
+from wrapper import Wrapper, ActionWrapper, ObservationWrapper
+from rl_benchmark.dispatcher import RL_dispatcher
+import sys
+import argparse
+
+
+# run main program with args
+def run_main(args):
+
+    parser = argparse.ArgumentParser(description='demo configuration')
+    parser.add_argument(
+        '--iterations',
+        type=int,
+        default=100000000,
+        help='total number of iterations')
+    args = parser.parse_args(args)
+    print('iterations:', args.iterations)
+
+    mansion_env = LiftSim()
+    # mansion_env.seed(1988)
+
+    mansion_env = Wrapper(mansion_env)
+    mansion_env = ActionWrapper(mansion_env)
+    mansion_env = ObservationWrapper(mansion_env)
+
+    dispatcher = RL_dispatcher(mansion_env, args.iterations)
+    dispatcher.run_episode()
+
+    return 0
+
+
+if __name__ == "__main__":
+    run_main(sys.argv[1:])
--- a/examples/LiftSim_demo/rl_10.png
+++ b/examples/LiftSim_demo/rl_10.png
--- a/examples/LiftSim_demo/rl_benchmark/__init__.py
+++ b/examples/LiftSim_demo/rl_benchmark/__init__.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/examples/LiftSim_demo/rl_benchmark/agent.py
+++ b/examples/LiftSim_demo/rl_benchmark/agent.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import numpy.random as random
+import paddle.fluid as fluid
+from parl import layers
+from parl import Agent
+from parl.utils import get_gpu_count, machine_info
+
+
+class ElevatorAgent(Agent):
+    def __init__(self, algorithm, obs_dim, action_dim):
+        self._action_dim = action_dim
+        self._obs_dim = obs_dim
+        self._update_target_steps = 1000
+
+        self._global_step = 0
+        self.exploration_ratio = 0.9
+        self.exploration_decre = 1e-7
+        self.exploration_min = 0.1
+        super(ElevatorAgent, self).__init__(algorithm)
+
+        use_cuda = machine_info.is_gpu_available()
+        if self.gpu_id >= 0:
+            assert get_gpu_count() == 1, 'Only support training in single GPU,\
+                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .'
+
+        else:
+            os.environ['CPU_NUM'] = str(1)
+
+        exec_strategy = fluid.ExecutionStrategy()
+        exec_strategy.num_threads = 1
+        exec_strategy.num_iteration_per_drop_scope = 10
+        build_strategy = fluid.BuildStrategy()
+        build_strategy.remove_unnecessary_lock = False
+
+        self.learn_pe = fluid.ParallelExecutor(
+            use_cuda=use_cuda,
+            main_program=self.learn_program,
+            build_strategy=build_strategy,
+            exec_strategy=exec_strategy,
+        )
+
+    def build_program(self):
+        self.pred_program = fluid.Program()
+        self.learn_program = fluid.Program()
+
+        with fluid.program_guard(self.pred_program):
+            obs = layers.data(
+                name='obs', shape=[self._obs_dim], dtype='float32')
+            self._value = self.alg.define_predict(obs)
+
+        with fluid.program_guard(self.learn_program):
+            obs = layers.data(
+                name='obs', shape=[self._obs_dim], dtype='float32')
+            action = layers.data(name='act', shape=[1], dtype='int32')
+            reward = layers.data(name='reward', shape=[], dtype='float32')
+            next_obs = layers.data(
+                name='next_obs', shape=[self._obs_dim], dtype='float32')
+            terminal = layers.data(name='terminal', shape=[], dtype='bool')
+            self._cost = self.alg.define_learn(obs, action, reward, next_obs,
+                                               terminal)
+
+    def sample(self, obs):
+        if self.exploration_ratio > self.exploration_min:
+            self.exploration_ratio -= self.exploration_decre
+        q_values = self.predict(obs)
+
+        ret_actions = list()
+        for i in range(len(q_values)):  # number of elevators
+            if (random.random() < self.exploration_ratio):
+                action = random.randint(0, self._action_dim)
+            else:
+                action = np.argmax(q_values[i])
+            ret_actions.append(int(action))
+        return ret_actions
+
+    def predict(self, obs):
+        pred_Q = self.fluid_executor.run(
+            self.pred_program,
+            feed={'obs': obs.astype('float32')},
+            fetch_list=[self._value])
+        return pred_Q[0]
+
+    def learn(self, obs, act, reward, next_obs, terminal):
+        self._global_step += 1
+        if self._global_step % self._update_target_steps == 0:
+            self.alg.sync_target(self.gpu_id)
+
+        feed = {
+            'obs': obs.astype('float32'),
+            'act': act.astype('int32'),
+            'reward': reward,
+            'next_obs': next_obs.astype('float32'),
+            'terminal': terminal
+        }
+        cost = self.learn_pe.run(feed=feed, fetch_list=[self._cost.name])[0]
+        return cost
--- a/examples/LiftSim_demo/rl_benchmark/dispatcher.py
+++ b/examples/LiftSim_demo/rl_benchmark/dispatcher.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import parl
+import numpy as np
+import numpy.random as random
+
+from copy import deepcopy
+from collections import deque
+
+from rlschool import EPSILON, HUGE
+from rl_benchmark.model import RLDispatcherModel
+from rl_benchmark.agent import ElevatorAgent
+from parl.algorithms import DQN
+from parl.utils import ReplayMemory
+
+MEMORY_SIZE = 1000000
+BATCH_SIZE = 64
+
+
+class RL_dispatcher():
+    """
+    An RL benchmark for elevator system
+    """
+
+    def __init__(self, env, max_episode):
+        self.env = env
+
+        self._obs_dim = env.observation_space
+        self._act_dim = env.action_space
+        self._global_step = 0
+        self.max_episode = max_episode
+        self._rpm = ReplayMemory(MEMORY_SIZE, self._obs_dim, 1)
+        self._model = RLDispatcherModel(self._act_dim)
+        hyperparas = {
+            'action_dim': self._act_dim,
+            'lr': 5.0e-4,
+            'gamma': 0.998
+        }
+
+        self._algorithm = DQN(self._model, hyperparas)
+        self._agent = ElevatorAgent(self._algorithm, self._obs_dim,
+                                    self._act_dim)
+        self._warm_up_size = 2000
+        self._statistic_freq = 1000
+        self._loss_queue = deque()
+
+    def run_episode(self):
+        self.env.reset()
+        acc_reward = 0.0
+
+        while self._global_step < self.max_episode:
+            # self.env.render()
+            state = self.env.state
+            action = self._agent.sample(state)
+            state_, reward, done, info = self.env.step(action)
+            output_info = self.learn_step(state, action, reward)
+            acc_reward += reward
+            if (isinstance(output_info, dict) and len(output_info) > 0):
+                self.env.log_notice("%s", output_info)
+            if (self._global_step % 3600 == 0):
+                self.env.log_notice(
+                    "Accumulated Reward: %f, Mansion Status: %s", acc_reward,
+                    self.env.statistics)
+                acc_reward = 0.0
+
+        self._agent.save('./model.ckpt')
+
+    def learn_step(self, state, action, r):
+        self._global_step += 1
+        if (self._global_step > self._warm_up_size):
+            for i in range(self.env.elevator_num):
+                self._rpm.append(self._last_observation_array[i],
+                                 self._last_action[i], self._last_reward,
+                                 deepcopy(state[i]), False)
+        self._last_observation_array = deepcopy(state)
+        self._last_action = deepcopy(action)
+        self._last_reward = r
+
+        ret_dict = {}
+        if self._rpm.size() > self._warm_up_size:
+            batch_obs, batch_action, batch_reward, batch_next_obs, batch_terminal = \
+                self._rpm.sample_batch(BATCH_SIZE)
+            cost = self._agent.learn(batch_obs, batch_action, batch_reward,
+                                     batch_next_obs, batch_terminal)
+            self._loss_queue.appendleft(cost)
+            if (len(self._loss_queue) > self._statistic_freq):
+                self._loss_queue.pop()
+            if (self._global_step % self._statistic_freq == 0):
+                ret_dict["Temporal Difference Error(Average)"] = \
+                    float(sum(self._loss_queue)) / float(len(self._loss_queue))
+
+        return ret_dict
--- a/examples/LiftSim_demo/rl_benchmark/model.py
+++ b/examples/LiftSim_demo/rl_benchmark/model.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import paddle.fluid as fluid
+from parl import layers
+import numpy as np
+import parl
+
+
+class RLDispatcherModel(parl.Model):
+    def __init__(self, act_dim):
+        self._act_dim = act_dim
+        self._fc_1 = layers.fc(size=512, act='relu')
+        self._fc_2 = layers.fc(size=256, act='relu')
+        self._fc_3 = layers.fc(size=128, act='tanh')
+        self._output = layers.fc(size=act_dim)
+
+    def value(self, obs):
+        _h_1 = self._fc_1(obs)
+        _h_2 = self._fc_2(_h_1)
+        _h_3 = self._fc_3(_h_2)
+        self._pred = self._output(_h_3)
+        return self._pred
--- a/examples/LiftSim_demo/wrapper.py
+++ b/examples/LiftSim_demo/wrapper.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# wrapper part modified from
+# https://github.com/openai/gym/blob/master/gym/core.py
+
+from rlschool import LiftSim
+from wrapper_utils import obs_dim, act_dim, mansion_state_preprocessing
+from wrapper_utils import action_idx_to_action
+
+
+class Wrapper(LiftSim):
+    def __init__(self, env):
+        self.env = env
+        self._mansion = env._mansion
+        self.mansion_attr = self._mansion.attribute
+        self.elevator_num = self.mansion_attr.ElevatorNumber
+        self.observation_space = obs_dim(self.mansion_attr)
+        self.action_space = act_dim(self.mansion_attr)
+        self.viewer = env.viewer
+
+    def __getattr__(self, name):
+        if name.startswith('_'):
+            raise AttributeError(
+                "attempted to get missing private attribute '{}'".format(name))
+        return getattr(self.env, name)
+
+    def seed(self, seed=None):
+        return self.env.seed(seed)
+
+    def step(self, action):
+        return self.env.step(action)
+
+    def reset(self):
+        return self.env.reset()
+
+    def render(self):
+        return self.env.render()
+
+    def close(self):
+        return self.env.close()
+
+
+class RewardWrapper(Wrapper):
+    pass
+
+
+class ActionWrapper(Wrapper):
+    def reset(self):
+        return self.env.reset()
+
+    def step(self, action):
+        act = []
+        for a in action:
+            act.extend(self.action(a, self.action_space))
+        return self.env.step(act)
+
+    def action(self, action, action_space):
+        return action_idx_to_action(action, action_space)
+
+
+class ObservationWrapper(Wrapper):
+    def reset(self):
+        self.env.reset()
+        return self.observation(self._mansion.state)
+
+    def step(self, action):
+        observation, reward, done, info = self.env.step(action)
+        return (self.observation(observation), reward, done, info)
+
+    def observation(self, observation):
+        return mansion_state_preprocessing(observation)
+
+    @property
+    def state(self):
+        return self.observation(self._mansion.state)
--- a/examples/LiftSim_demo/wrapper_utils.py
+++ b/examples/LiftSim_demo/wrapper_utils.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import random
+import numpy as np
+from rlschool import ElevatorState, ElevatorAction
+from rlschool import MansionAttribute, MansionState
+from rlschool import EPSILON, HUGE
+from rlschool import MansionConfig
+from rlschool import MansionManager
+
+
+def discretize(value, n_dim, min_val, max_val):
+    """
+    discretize a value into a vector of n_dim dimension 1-hot representation
+    with the value below min_val being [1, 0, 0, ..., 0]
+    and the value above max_val being [0, 0, ..., 0, 1]
+    Args:
+        value: the value that needs to be discretized into 1-hot format
+        n_dim: number of dimensions
+        min_val: minimal value in the result
+        man_val: maximum value in the result
+    Returns:
+        the discretized vector
+    """
+    assert n_dim > 0
+    if (n_dim == 1):
+        return [1]
+    delta = (max_val - min_val) / float(n_dim - 1)
+    active_pos = int((value - min_val) / delta + 0.5)
+    active_pos = min(n_dim - 1, active_pos)
+    active_pos = max(0, active_pos)
+    ret_array = [0 for i in range(n_dim)]
+    ret_array[active_pos] = 1.0
+    return ret_array
+
+
+def linear_discretize(value, n_dim, min_val, max_val):
+    """
+    discretize a value into a vector of n_dim dimensional representation
+    with the value below min_val being [1, 0, 0, ..., 0]
+    and the value above max_val being [0, 0, ..., 0, 1]
+    e.g. if n_dim = 2, min_val = 1.0, max_val = 2.0
+      if value  = 1.5 returns [0.5, 0.5], if value = 1.8 returns [0.2, 0.8]
+    Args:
+        value: the value that needs to be discretized
+        n_dim: number of dimensions
+        min_val: minimal value in the result
+        man_val: maximum value in the result
+    Returns:
+        the discretized vector
+    """
+    assert n_dim > 0
+    if (n_dim == 1):
+        return [1]
+    delta = (max_val - min_val) / float(n_dim - 1)
+    active_pos = int((value - min_val) / delta + 0.5)
+    active_pos = min(n_dim - 2, active_pos)
+    active_pos = max(0, active_pos)
+    anchor_pt = active_pos * delta + min_val
+    if (anchor_pt > value and anchor_pt > min_val + 0.5 * delta):
+        anchor_pt -= delta
+        active_pos -= 1
+    weight = (value - anchor_pt) / delta
+    weight = min(1.0, max(0.0, weight))
+    ret_array = [0 for i in range(n_dim)]
+    ret_array[active_pos] = 1.0 - weight
+    ret_array[active_pos + 1] = weight
+    return ret_array
+
+
+def ele_state_preprocessing(ele_state):
+    """Process elevator state, make it usable for network
+    Args:
+        ele_state: ElevatorState, nametuple, defined in rlschool/liftsim/environment/mansion/utils.py
+    Returns:    
+        ele_feature: list of elevator state
+    """
+    ele_feature = []
+
+    # add floor information
+    ele_feature.extend(
+        linear_discretize(ele_state.Floor, ele_state.MaximumFloor, 1.0,
+                          ele_state.MaximumFloor))
+
+    # add velocity information
+    ele_feature.extend(
+        linear_discretize(ele_state.Velocity, 21, -ele_state.MaximumSpeed,
+                          ele_state.MaximumSpeed))
+
+    # add door information
+    ele_feature.append(ele_state.DoorState)
+    ele_feature.append(float(ele_state.DoorIsOpening))
+    ele_feature.append(float(ele_state.DoorIsClosing))
+
+    # add direction information
+    ele_feature.extend(discretize(ele_state.Direction, 3, -1, 1))
+
+    # add load weight information
+    ele_feature.extend(
+        linear_discretize(ele_state.LoadWeight / ele_state.MaximumLoad, 5, 0.0,
+                          1.0))
+
+    # add other information
+    target_floor_binaries = [0.0 for i in range(ele_state.MaximumFloor)]
+    for target_floor in ele_state.ReservedTargetFloors:
+        target_floor_binaries[target_floor - 1] = 1.0
+    ele_feature.extend(target_floor_binaries)
+
+    dispatch_floor_binaries = [0.0 for i in range(ele_state.MaximumFloor + 1)]
+    dispatch_floor_binaries[ele_state.CurrentDispatchTarget] = 1.0
+    ele_feature.extend(dispatch_floor_binaries)
+    ele_feature.append(ele_state.DispatchTargetDirection)
+
+    return ele_feature
+
+
+def obs_dim(mansion_attr):
+    """Calculate the observation dimension
+    Args:
+        mansion_attr: MansionAttribute, attribute of mansion_manager
+    Returns:
+        observation dimension
+    """
+    assert isinstance(mansion_attr, MansionAttribute)
+    ele_dim = mansion_attr.NumberOfFloor * 3 + 34
+    obs_dim = (ele_dim + 1) * mansion_attr.ElevatorNumber + \
+        mansion_attr.NumberOfFloor * 2
+    return obs_dim
+
+
+def act_dim(mansion_attr):
+    """Calculate the action dimension, which is number of floor times 2 plus 2.
+    The additional two are for special cases: the elevator stops at once if the new dispatch_target is 0,
+    the original dispatch_target does not change if dispatch_target is -1. See implementation in
+    method action_idx_to_action below.
+    Args:
+        mansion_attr: MansionAttribute, attribute of mansion_manager
+    Returns:
+        action dimension
+    """
+    assert isinstance(mansion_attr, MansionAttribute)
+    return mansion_attr.NumberOfFloor * 2 + 2
+
+
+def mansion_state_preprocessing(mansion_state):
+    """Process mansion_state to make it usable for networks, convert it into a numpy array
+    Args:
+        mansion_state: namedtuple of mansion state, 
+            defined in rlschool/liftsim/environment/mansion/utils.py
+    Returns:
+        the converted numpy array
+    """
+    ele_features = list()
+    for ele_state in mansion_state.ElevatorStates:
+        ele_features.append(ele_state_preprocessing(ele_state))
+        max_floor = ele_state.MaximumFloor
+
+    target_floor_binaries_up = [0.0 for i in range(max_floor)]
+    target_floor_binaries_down = [0.0 for i in range(max_floor)]
+    for floor in mansion_state.RequiringUpwardFloors:
+        target_floor_binaries_up[floor - 1] = 1.0
+    for floor in mansion_state.RequiringDownwardFloors:
+        target_floor_binaries_down[floor - 1] = 1.0
+    target_floor_binaries = target_floor_binaries_up + target_floor_binaries_down
+
+    idx = 0
+    man_features = list()
+    for idx in range(len(mansion_state.ElevatorStates)):
+        elevator_id_vec = discretize(idx + 1,
+                                     len(mansion_state.ElevatorStates), 1,
+                                     len(mansion_state.ElevatorStates))
+        idx_array = list(range(len(mansion_state.ElevatorStates)))
+        idx_array.remove(idx)
+        # random.shuffle(idx_array)
+        man_features.append(ele_features[idx])
+        for left_idx in idx_array:
+            man_features[idx] = man_features[idx] + ele_features[left_idx]
+        man_features[idx] = man_features[idx] + \
+            elevator_id_vec + target_floor_binaries
+    return np.asarray(man_features, dtype='float32')
+
+
+def action_idx_to_action(action_idx, act_dim):
+    """Convert action_inx to action
+    Args:
+        action_idx: the index needed to be converted
+        act_dim: action dimension
+    Returns:
+        the converted namedtuple
+    """
+    assert isinstance(action_idx, int)
+    assert isinstance(act_dim, int)
+    realdim = act_dim - 2
+    if (action_idx == realdim):
+        return ElevatorAction(0, 1)
+    elif (action_idx == realdim + 1):
+        return ElevatorAction(-1, 1)
+    action = action_idx
+    if (action_idx < realdim / 2):
+        direction = 1
+        action += 1
+    else:
+        direction = -1
+        action -= int(realdim / 2)
+        action += 1
+    return [action, direction]
+
+
+def action_to_action_idx(action, act_dim):
+    """Convert action to number according to act_dim. 
+    Args:
+        action: namedtuple defined in rlschool/liftsim/environment/mansion/utils.py
+        act_dim: action dimension
+    Returns:
+        action_idx: the result index
+    """
+    assert isinstance(action, ElevatorAction)
+    assert isinstance(act_dim, int)
+    realdim = act_dim - 2
+    if (action.TargetFloor == 0):
+        return realdim
+    elif (action.TargetFloor < 0):
+        return realdim + 1
+    action_idx = 0
+    if (action.DirectionIndicator < 0):
+        action_idx += int(realdim / 2)
+    action_idx += action.TargetFloor - 1
+    return action_idx