提交 46f59906 编写于 作者: F fuyw 提交者: Bo Zhou

fix unittest bugs (#136)

* add favicon

* Sleep longer for unittest.

* fix unittest bugs

* increase sleep seconds after job_memory error
上级 eca90f14
include parl/remote/static/logo.png include parl/remote/static/logo.png
include parl/remote/static/favicon.ico
recursive-include parl/remote/templates *.html recursive-include parl/remote/templates *.html
recursive-include parl/remote/static/css *.css recursive-include parl/remote/static/css *.css
recursive-include parl/remote/static/js *.js recursive-include parl/remote/static/js *.js
...@@ -59,6 +59,9 @@ class Job(object): ...@@ -59,6 +59,9 @@ class Job(object):
self.lock = threading.Lock() self.lock = threading.Lock()
self._create_sockets() self._create_sockets()
process = psutil.Process(self.pid)
self.init_memory = float(process.memory_info()[0]) / (1024**2)
def _create_sockets(self): def _create_sockets(self):
"""Create three sockets for each job. """Create three sockets for each job.
...@@ -134,7 +137,7 @@ class Job(object): ...@@ -134,7 +137,7 @@ class Job(object):
if self.max_memory is not None: if self.max_memory is not None:
process = psutil.Process(self.pid) process = psutil.Process(self.pid)
used_memory = float(process.memory_info()[0]) / (1024**2) used_memory = float(process.memory_info()[0]) / (1024**2)
if used_memory > self.max_memory: if used_memory > self.max_memory + self.init_memory:
stop_job = True stop_job = True
return stop_job return stop_job
...@@ -174,6 +177,10 @@ class Job(object): ...@@ -174,6 +177,10 @@ class Job(object):
to_byte(self.job_address) to_byte(self.job_address)
]) ])
if stop_job == True: if stop_job == True:
logger.error(
"Memory used by this job exceeds {}. This job will exist."
.format(self.max_memory))
time.sleep(3)
socket.close(0) socket.close(0)
os._exit(1) os._exit(1)
except zmq.error.Again as e: except zmq.error.Again as e:
......
...@@ -133,7 +133,7 @@ def start_master(port, cpu_num, monitor_port): ...@@ -133,7 +133,7 @@ def start_master(port, cpu_num, monitor_port):
## If you want to check cluster status, please view: ## If you want to check cluster status, please view:
http://{}:{}. http://{}:{}
or call: or call:
...@@ -196,8 +196,7 @@ def status(): ...@@ -196,8 +196,7 @@ def status():
status = [] status = []
for monitor in monitors: for monitor in monitors:
monitor_port, _, master_address = monitor.split(' ') monitor_port, _, master_address = monitor.split(' ')
master_ip = master_address.split(':')[0] monitor_address = "{}:{}".format(get_ip_address(), monitor_port)
monitor_address = "{}:{}".format(master_ip, monitor_port)
socket = ctx.socket(zmq.REQ) socket = ctx.socket(zmq.REQ)
socket.connect('tcp://{}'.format(master_address)) socket.connect('tcp://{}'.format(master_address))
socket.send_multipart([STATUS_TAG]) socket.send_multipart([STATUS_TAG])
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
<title>Parl Cluster</title> <title>Parl Cluster</title>
<link rel="shortcut icon" href="../static/favicon.ico">
<script type="text/javascript" src="../static/js/jquery.min.js"></script> <script type="text/javascript" src="../static/js/jquery.min.js"></script>
<script src="../static/js/echarts.min.js"></script> <script src="../static/js/echarts.min.js"></script>
<script src="../static/js/parl.js"></script> <script src="../static/js/parl.js"></script>
...@@ -39,7 +40,7 @@ ...@@ -39,7 +40,7 @@
<tr> <tr>
<th scope="col">#</th> <th scope="col">#</th>
<th scope="col">Path</th> <th scope="col">Path</th>
<th scope="col">Client ID</th> <th scope="col">Hostname</th>
<th scope="col">Actor Num</th> <th scope="col">Actor Num</th>
<th scope="col">Time (min)</th> <th scope="col">Time (min)</th>
</tr> </tr>
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
<title>Parl Cluster</title> <title>Parl Cluster</title>
<link rel="shortcut icon" href="../static/favicon.ico">
<script type="text/javascript" src="../static/js/jquery.min.js"></script> <script type="text/javascript" src="../static/js/jquery.min.js"></script>
<script src="../static/js/echarts.min.js"></script> <script src="../static/js/echarts.min.js"></script>
<script src="../static/js/parl.js"></script> <script src="../static/js/parl.js"></script>
......
...@@ -23,41 +23,54 @@ from parl.remote.worker import Worker ...@@ -23,41 +23,54 @@ from parl.remote.worker import Worker
from parl.remote.client import disconnect from parl.remote.client import disconnect
from parl.remote.monitor import ClusterMonitor from parl.remote.monitor import ClusterMonitor
from multiprocessing import Process
@parl.remote_class(max_memory=200)
@parl.remote_class(max_memory=300)
class Actor(object): class Actor(object):
def __init__(self, x=10): def __init__(self, x=10):
self.x = x self.x = x
self.data = [] self.data = []
def add_100mb(self): def add_500mb(self):
self.data.append(os.urandom(100 * 1024**2)) self.data.append(os.urandom(500 * 1024**2))
self.x += 1 self.x += 1
return self.x return self.x
from parl.utils import logger
class TestMaxMemory(unittest.TestCase): class TestMaxMemory(unittest.TestCase):
def tearDown(self): def tearDown(self):
disconnect() disconnect()
def actor(self):
actor1 = Actor()
time.sleep(10)
actor1.add_500mb()
def test_max_memory(self): def test_max_memory(self):
port = 3001 port = 3001
master = Master(port=port) master = Master(port=port)
th = threading.Thread(target=master.run) th = threading.Thread(target=master.run)
th.start() th.start()
time.sleep(1) time.sleep(5)
worker = Worker('localhost:{}'.format(port), 1) worker = Worker('localhost:{}'.format(port), 1)
cluster_monitor = ClusterMonitor('localhost:{}'.format(port)) cluster_monitor = ClusterMonitor('localhost:{}'.format(port))
time.sleep(1) time.sleep(5)
parl.connect('localhost:{}'.format(port)) parl.connect('localhost:{}'.format(port))
actor = Actor() actor = Actor()
time.sleep(30) time.sleep(20)
self.assertEqual(1, cluster_monitor.data['clients'][0]['actor_num']) self.assertEqual(1, cluster_monitor.data['clients'][0]['actor_num'])
actor.add_100mb()
time.sleep(50)
self.assertEqual(0, cluster_monitor.data['clients'][0]['actor_num'])
actor.job_socket.close(0)
del actor del actor
time.sleep(5)
p = Process(target=self.actor)
p.start()
time.sleep(30)
self.assertEqual(0, cluster_monitor.data['clients'][0]['actor_num'])
p.terminate()
worker.exit() worker.exit()
master.exit() master.exit()
......
...@@ -24,7 +24,7 @@ from parl.remote.client import disconnect ...@@ -24,7 +24,7 @@ from parl.remote.client import disconnect
from parl.remote.monitor import ClusterMonitor from parl.remote.monitor import ClusterMonitor
@parl.remote_class(max_memory=200) @parl.remote_class(max_memory=300)
class Actor(object): class Actor(object):
def __init__(self, x=10): def __init__(self, x=10):
self.x = x self.x = x
...@@ -45,14 +45,14 @@ class TestClusterStatus(unittest.TestCase): ...@@ -45,14 +45,14 @@ class TestClusterStatus(unittest.TestCase):
master = Master(port=port) master = Master(port=port)
th = threading.Thread(target=master.run) th = threading.Thread(target=master.run)
th.start() th.start()
time.sleep(1) time.sleep(5)
worker = Worker('localhost:{}'.format(port), 1) worker = Worker('localhost:{}'.format(port), 1)
time.sleep(5) time.sleep(5)
status_info = master.cluster_monitor.get_status_info() status_info = master.cluster_monitor.get_status_info()
self.assertEqual(status_info, 'has 0 used cpus, 1 vacant cpus.') self.assertEqual(status_info, 'has 0 used cpus, 1 vacant cpus.')
parl.connect('localhost:{}'.format(port)) parl.connect('localhost:{}'.format(port))
actor = Actor() actor = Actor()
time.sleep(30) time.sleep(50)
status_info = master.cluster_monitor.get_status_info() status_info = master.cluster_monitor.get_status_info()
self.assertEqual(status_info, 'has 1 used cpus, 0 vacant cpus.') self.assertEqual(status_info, 'has 1 used cpus, 0 vacant cpus.')
worker.exit() worker.exit()
......
...@@ -307,7 +307,7 @@ class Worker(object): ...@@ -307,7 +307,7 @@ class Worker(object):
total_memory = round(virtual_memory[0] / (1024**3), 2) total_memory = round(virtual_memory[0] / (1024**3), 2)
used_memory = round(virtual_memory[3] / (1024**3), 2) used_memory = round(virtual_memory[3] / (1024**3), 2)
vacant_memory = round(total_memory - used_memory, 2) vacant_memory = round(total_memory - used_memory, 2)
load_average = round(psutil.getloadavg()[0], 2) load_average = round(os.getloadavg()[0], 2)
return (vacant_memory, used_memory, now, load_average) return (vacant_memory, used_memory, now, load_average)
def _reply_heartbeat(self, target): def _reply_heartbeat(self, target):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册