diff --git a/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/replay_memory.py b/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/replay_memory.py
new file mode 100755
index 0000000000000000000000000000000000000000..a1a3b4a8ed2cb327b7a4ec34770ea09ccf917ad8
--- /dev/null
+++ b/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/replay_memory.py
@@ -0,0 +1,97 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from parl.utils import logger
+
+__all__ = ['ReplayMemory']
+
+
+class ReplayMemory(object):
+    def __init__(self, max_size, obs_dim, act_dim):
+        self.max_size = int(max_size)
+        self.obs_dim = obs_dim
+        self.act_dim = act_dim
+
+        self.obs = np.zeros((max_size, obs_dim), dtype='float32')
+        self.action = np.zeros((max_size, act_dim), dtype='float32')
+        self.reward = np.zeros((max_size, ), dtype='float32')
+        self.terminal = np.zeros((max_size, ), dtype='bool')
+        self.next_obs = np.zeros((max_size, obs_dim), dtype='float32')
+
+        self._curr_size = 0
+        self._curr_pos = 0
+
+    def sample_batch(self, batch_size):
+        batch_idx = np.random.randint(
+            self._curr_size - 300 - 1, size=batch_size)
+
+        obs = self.obs[batch_idx]
+        reward = self.reward[batch_idx]
+        action = self.action[batch_idx]
+        next_obs = self.next_obs[batch_idx]
+        terminal = self.terminal[batch_idx]
+        return obs, action, reward, next_obs, terminal
+
+    def make_index(self, batch_size):
+        batch_idx = np.random.randint(
+            self._curr_size - 300 - 1, size=batch_size)
+        return batch_idx
+
+    def sample_batch_by_index(self, batch_idx):
+        obs = self.obs[batch_idx]
+        reward = self.reward[batch_idx]
+        action = self.action[batch_idx]
+        next_obs = self.next_obs[batch_idx]
+        terminal = self.terminal[batch_idx]
+        return obs, action, reward, next_obs, terminal
+
+    def append(self, obs, act, reward, next_obs, terminal):
+        if self._curr_size < self.max_size:
+            self._curr_size += 1
+        self.obs[self._curr_pos] = obs
+        self.action[self._curr_pos] = act
+        self.reward[self._curr_pos] = reward
+        self.next_obs[self._curr_pos] = next_obs
+        self.terminal[self._curr_pos] = terminal
+        self._curr_pos = (self._curr_pos + 1) % self.max_size
+
+    def size(self):
+        return self._curr_size
+
+    def save(self, pathname):
+        other = np.array([self._curr_size, self._curr_pos], dtype=np.int32)
+        np.savez(
+            pathname,
+            obs=self.obs,
+            action=self.action,
+            reward=self.reward,
+            terminal=self.terminal,
+            next_obs=self.next_obs,
+            other=other)
+
+    def load(self, pathname):
+        data = np.load(pathname)
+        other = data['other']
+        if int(other[0]) > self.max_size:
+            logger.warn('loading from a bigger size rpm!')
+        self._curr_size = min(int(other[0]), self.max_size)
+        self._curr_pos = min(int(other[1]), self.max_size - 1)
+
+        self.obs[:self._curr_size] = data['obs'][:self._curr_size]
+        self.action[:self._curr_size] = data['action'][:self._curr_size]
+        self.reward[:self._curr_size] = data['reward'][:self._curr_size]
+        self.terminal[:self._curr_size] = data['terminal'][:self._curr_size]
+        self.next_obs[:self._curr_size] = data['next_obs'][:self._curr_size]
+        logger.info("[load rpm]memory loade from {}".format(pathname))
diff --git a/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/simulator_server.py b/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/simulator_server.py
index f3b775e5d85ded5da637960b5f2d142e8c6659a6..a5e98d6fea540b8c869e2ea9f19701e4b4d858e0 100755
--- a/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/simulator_server.py
+++ b/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/simulator_server.py
@@ -28,7 +28,8 @@ from concurrent import futures
 from multi_head_ddpg import MultiHeadDDPG
 from opensim_agent import OpenSimAgent
 from opensim_model import OpenSimModel
-from parl.utils import logger, ReplayMemory
+from parl.utils import logger
+from replay_memory import ReplayMemory
 from utils import calc_indicators, ScalarsManager, TransitionExperience
 
 ACT_DIM = 19
diff --git a/examples/NeurIPS2019-Learn-to-Move-Challenge/evaluate.py b/examples/NeurIPS2019-Learn-to-Move-Challenge/evaluate.py
index e3a8066d79128ed9e969bb7d4c1c8cce3bee3775..98bd276f26730c3bcd1a1b2547dd9949fbc429b0 100755
--- a/examples/NeurIPS2019-Learn-to-Move-Challenge/evaluate.py
+++ b/examples/NeurIPS2019-Learn-to-Move-Challenge/evaluate.py
@@ -22,7 +22,7 @@ import numpy as np
 from actor import Actor
 from opensim_model import OpenSimModel
 from opensim_agent import OpenSimAgent
-from parl.utils import logger, ReplayMemory, summary, get_gpu_count
+from parl.utils import logger, summary, get_gpu_count
 from parl.utils.window_stat import WindowStat
 from parl.remote.client import get_global_client
 from parl.utils import machine_info
diff --git a/examples/NeurIPS2019-Learn-to-Move-Challenge/replay_memory.py b/examples/NeurIPS2019-Learn-to-Move-Challenge/replay_memory.py
new file mode 120000
index 0000000000000000000000000000000000000000..7b008d6094e6aa2a7434f7c007d29efc641bb708
--- /dev/null
+++ b/examples/NeurIPS2019-Learn-to-Move-Challenge/replay_memory.py
@@ -0,0 +1 @@
+../NeurIPS2018-AI-for-Prosthetics-Challenge/replay_memory.py
\ No newline at end of file
diff --git a/examples/NeurIPS2019-Learn-to-Move-Challenge/train.py b/examples/NeurIPS2019-Learn-to-Move-Challenge/train.py
index cf14f1e0306c69c8f134cf6c81c279ac982b52d0..ae624d840f921aba7c29bdad4c5494a2917af34a 100755
--- a/examples/NeurIPS2019-Learn-to-Move-Challenge/train.py
+++ b/examples/NeurIPS2019-Learn-to-Move-Challenge/train.py
@@ -22,7 +22,8 @@ import numpy as np
 from actor import Actor
 from opensim_model import OpenSimModel
 from opensim_agent import OpenSimAgent
-from parl.utils import logger, ReplayMemory, summary, get_gpu_count
+from parl.utils import logger, summary, get_gpu_count
+from replay_memory import ReplayMemory
 from parl.utils.window_stat import WindowStat
 from parl.remote.client import get_global_client
 from parl.utils import machine_info
diff --git a/parl/utils/replay_memory.py b/parl/utils/replay_memory.py
index a1a3b4a8ed2cb327b7a4ec34770ea09ccf917ad8..8a5377a2d6f593cba763969809d3dab483284685 100755
--- a/parl/utils/replay_memory.py
+++ b/parl/utils/replay_memory.py
@@ -34,8 +34,7 @@ class ReplayMemory(object):
         self._curr_pos = 0
 
     def sample_batch(self, batch_size):
-        batch_idx = np.random.randint(
-            self._curr_size - 300 - 1, size=batch_size)
+        batch_idx = np.random.randint(self._curr_size, size=batch_size)
 
         obs = self.obs[batch_idx]
         reward = self.reward[batch_idx]
@@ -45,8 +44,7 @@ class ReplayMemory(object):
         return obs, action, reward, next_obs, terminal
 
     def make_index(self, batch_size):
-        batch_idx = np.random.randint(
-            self._curr_size - 300 - 1, size=batch_size)
+        batch_idx = np.random.randint(self._curr_size, size=batch_size)
         return batch_idx
 
     def sample_batch_by_index(self, batch_idx):