未验证 提交 53ff3d73 编写于 作者: W Wu Tao 提交者: GitHub

scripts: check health status for all clusters (#244)

上级 4b33f36c
#!/usr/bin/python
#
# Copyright (c) 2018, Xiaomi, Inc. All rights reserved.
# This source code is licensed under the Apache License Version 2.0, which
# can be found in the LICENSE file in the root directory of this source tree.
"""
Basic usage:
> vim ~/.bashrc
export PYTHONPATH=$PYTHONPATH:$HOME/.local/lib/python2.7/site-packages/
export PEGASUS_CONFIG_PATH=$HOME/work/conf_pegasus
export PEGASUS_SHELL_PATH=$HOME/work/pegasus
> pip install --user click
> ./pegasus_check_clusters.py --env c3srv
"""
import os
import click
from py_utils import *
@click.command()
@click.option(
"--env", default="", help="Env of pegasus cluster, eg. c3srv or c4tst")
@click.option('-v', '--verbose', count=True)
def main(env, verbose):
pegasus_config_path = os.getenv("PEGASUS_CONFIG_PATH")
if pegasus_config_path is None:
echo(
"Please configure environment variable PEGASUS_CONFIG_PATH in your bashrc or zshrc",
"red")
exit(1)
if env != "":
echo("env = " + env)
set_global_verbose(verbose)
clusters = list_pegasus_clusters(pegasus_config_path, env)
for cluster in clusters:
echo("=== " + cluster.name())
try:
cluster.print_imbalance_nodes()
cluster.print_unhealthy_partitions()
except RuntimeError as e:
echo(str(e), "red")
return
echo("===")
if __name__ == "__main__":
main()
#!/usr/bin/python
#
# Copyright (c) 2018, Xiaomi, Inc. All rights reserved.
# This source code is licensed under the Apache License Version 2.0, which
# can be found in the LICENSE file in the root directory of this source tree.
"""
Basic usage:
> vim ~/.bashrc
export PYTHONPATH=$PYTHONPATH:$HOME/.local/lib/python2.7/site-packages/
export PEGASUS_CONFIG_PATH=$HOME/work/conf_pegasus
export PEGASUS_SHELL_PATH=$HOME/work/pegasus
> pip install --user click
> ./pegasus_check_posts.py --env c3srv
"""
import os
import click
from py_utils import *
@click.command()
@click.option("--env", help="Env of pegasus cluster, eg. c3srv or c4tst")
def main(env):
pegasus_config_path = os.getenv("PEGASUS_CONFIG_PATH")
if pegasus_config_path is None:
echo(
"Please configure environment variable PEGASUS_CONFIG_PATH in your bashrc or zshrc",
"red")
exit(1)
clusters = list_pegasus_clusters(pegasus_config_path, env)
host_to_ports = {}
for cluster in clusters:
try:
p = cluster.get_meta_port()
h = cluster.get_meta_host()
if not h in host_to_ports:
host_to_ports[h] = set()
if p in host_to_ports[h]:
echo(
"port number conflicted: {0} {1} [{2}]".format(
p, cluster.name(), h), "red")
continue
host_to_ports[h].add(p)
echo("cluster {0}: {1} [{2}]".format(cluster.name(), p, h))
except RuntimeError as e:
echo(str(e), "red")
return
echo("")
for h in host_to_ports:
echo("recommended port number for [{0}] is: {1}".format(
h, str(max(host_to_ports[h]) + 1000)))
echo("host [{0}] has in total {1} clusters on it".format(
h, len(host_to_ports[h])))
echo("")
if __name__ == "__main__":
main()
#!/usr/bin/python
#
# Copyright (c) 2018, Xiaomi, Inc. All rights reserved.
# This source code is licensed under the Apache License Version 2.0, which
# can be found in the LICENSE file in the root directory of this source tree.
from .lib import set_global_verbose, echo, list_pegasus_clusters, PegasusCluster
__all__ = [
'set_global_verbose', 'echo', 'list_pegasus_clusters', 'PegasusCluster'
]
#!/usr/bin/python
#
# Copyright (c) 2018, Xiaomi, Inc. All rights reserved.
# This source code is licensed under the Apache License Version 2.0, which
# can be found in the LICENSE file in the root directory of this source tree.
import click
import commands
import os
_global_verbose = False
def set_global_verbose(val):
_global_verbose = val
def echo(message, color=None):
click.echo(click.style(message, fg=color))
class PegasusCluster(object):
def __init__(self, cfg_file_name):
self._cluster_name = os.path.basename(cfg_file_name).replace(
"pegasus-", "").replace(".cfg", "")
self._shell_path = os.getenv("PEGASUS_SHELL_PATH")
self._cfg_file_name = cfg_file_name
if self._shell_path is None:
echo(
"Please configure environment variable PEGASUS_SHELL_PATH in your bashrc or zshrc",
"red")
exit(1)
def print_unhealthy_partitions(self):
list_detail = self._run_shell("ls -d").strip()
read_unhealthy_app_count = int([
line for line in list_detail.splitlines()
if line.startswith("read_unhealthy_app_count")
][0].split(":")[1])
write_unhealthy_app_count = int([
line for line in list_detail.splitlines()
if line.startswith("write_unhealthy_app_count")
][0].split(":")[1])
if write_unhealthy_app_count > 0:
echo("cluster is write unhealthy, write_unhealthy_app_count = " +
str(write_unhealthy_app_count))
return
if read_unhealthy_app_count > 0:
echo("cluster is read unhealthy, read_unhealthy_app_count = " +
str(read_unhealthy_app_count))
return
def print_imbalance_nodes(self):
nodes_detail = self._run_shell("nodes -d").strip()
primaries_per_node = []
for line in nodes_detail.splitlines()[1:]:
columns = line.strip().split()
if len(columns) < 5 or not columns[4].isdigit():
continue
primary_count = int(columns[3])
primaries_per_node.append(primary_count)
primaries_per_node.sort()
if float(primaries_per_node[0]) / float(primaries_per_node[-1]) < 0.8:
print nodes_detail
def get_meta_port(self):
with open(self._cfg_file_name) as cfg:
for line in cfg.readlines():
if line.strip().startswith("base_port"):
return int(line.split("=")[1])
def get_meta_host(self):
with open(self._cfg_file_name) as cfg:
for line in cfg.readlines():
if line.strip().startswith("host.0"):
return line.split("=")[1].strip()
def _run_shell(self, args):
"""
:param args: arguments passed to ./run.sh shell (type `string`)
:return: shell output
"""
global _global_verbose
cmd = "cd {1}; echo {0} | ./run.sh shell -n {2}".format(
args, self._shell_path, self._cluster_name)
if _global_verbose:
echo("executing command: \"{0}\"".format(cmd))
status, output = commands.getstatusoutput(cmd)
if status != 0:
raise RuntimeError("failed to execute \"{0}\": {1}".format(
cmd, output))
result = ""
result_begin = False
for line in output.splitlines():
if line.startswith("The cluster meta list is:"):
result_begin = True
continue
if line.startswith("dsn exit with code"):
break
if result_begin:
result += line + "\n"
return result
def name(self):
return self._cluster_name
def list_pegasus_clusters(config_path, env):
clusters = []
for fname in os.listdir(config_path):
if not os.path.isfile(config_path + "/" + fname):
continue
if not fname.startswith("pegasus-" + env):
continue
if not fname.endswith(".cfg"):
continue
if fname.endswith("proxy.cfg"):
continue
clusters.append(PegasusCluster(config_path + "/" + fname))
return clusters
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册