未验证 提交 d67bc43a 编写于 作者: S Shuo 提交者: GitHub

improvement(lb): add add_node_list.sh to add nodes with copy_pri after all copy_sec done (#528)

add add_node_list.sh to add nodes with copy_pri after all copy_sec done
上级 3b980f6a
......@@ -123,3 +123,21 @@ function minos_restart()
fi
cd $pwd
}
# usage: minos_bootstrap <cluster_name> <job_name> [task_id]
function minos_bootstrap()
{
local pwd=`pwd`
local options="--job $2"
if [ -n "$3" ]; then
options="$options --task $3"
fi
cd $minos_client_dir
echo "./deploy bootstrap pegasus $1 $options"
./deploy bootstrap pegasus $1 $options
if [ $? -ne 0 ]; then
echo "ERROR: minos bootstrap failed"
exit 1
fi
cd $pwd
}
#!/bin/bash
#
# Add replica servers using minos.
#
PID=$$
if [ $# -le 2 ]; then
echo "USAGE: $0 <cluster-name> <cluster-meta-list> <replica-task-id-list>"
echo
echo "For example:"
echo " $0 onebox 127.0.0.1:34601,127.0.0.1:34602 1,2,3"
echo
exit 1
fi
cluster=$1
meta_list=$2
replica_task_id_list=$3
pwd="$( cd "$( dirname "$0" )" && pwd )"
shell_dir="$( cd $pwd/.. && pwd )"
cd $shell_dir
echo "Check the argument..."
source ./scripts/pegasus_check_arguments.sh add_node_list $cluster $meta_list $replica_task_id_list
if [ $? -ne 0 ]; then
echo "ERROR: the argument check failed"
exit 1
fi
echo "Set meta level to steady..."
echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.add_node_list.set_meta_level
set_ok=`grep 'control meta level ok' /tmp/$UID.$PID.pegasus.add_node_list.set_meta_level | wc -l`
if [ $set_ok -ne 1 ]; then
echo "ERROR: set meta level to steady failed"
exit 1
fi
for id in $task_id_list
do
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
minos_bootstrap $cluster replica $id
if [ $? -ne 0 ]; then
echo "ERROR: online replica task $id failed"
exit 1
fi
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
done
echo "Set meta.lb.only_move_primary true"
echo "This remote-command tells the meta-server to ignore copying primaries during rebalancing."
echo "So the following steps only include move_primary and copy_secondary."
echo "remote_command -l $pmeta meta.lb.only_move_primary true" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.add_node_list.only_move_primary
set_ok=`grep OK /tmp/$UID.$PID.pegasus.add_node_list.only_move_primary | wc -l`
if [ $set_ok -ne 1 ]; then
echo "ERROR: meta.lb.only_move_primary true"
exit 1
fi
echo
echo "Set meta level to lively..."
echo "set_meta_level lively" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.add_node_list.set_meta_level
set_ok=`grep 'control meta level ok' /tmp/$UID.$PID.pegasus.add_node_list.set_meta_level | wc -l`
if [ $set_ok -ne 1 ]; then
echo "ERROR: set meta level to lively failed"
exit 1
fi
echo "Wait cluster to become balanced..."
echo "Wait for 3 minutes to do load balance..."
sleep 180
while true; do
op_count=$(echo "cluster_info" | ./run.sh shell --cluster $meta_list | grep balance_operation_count | grep -o 'total=[0-9][0-9]*' | cut -d= -f2)
if [ -z "op_count" ]; then
break
fi
if [ $op_count -eq 0 ]; then
echo "Cluster may be balanced, try wait 30 seconds..."
sleep 30
op_count=$(echo "cluster_info" | ./run.sh shell --cluster $meta_list | grep balance_operation_count | grep -o 'total=[0-9][0-9]*' | cut -d= -f2)
if [ $op_count -eq 0 ]; then
echo "Cluster becomes balanced."
break
fi
else
echo "Still $op_count balance operations to do..."
sleep 1
fi
done
echo
echo "Set meta level to steady..."
echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.add_node_list.set_meta_level
set_ok=`grep 'control meta level ok' /tmp/$UID.$PID.pegasus.add_node_list.set_meta_level | wc -l`
if [ $set_ok -ne 1 ]; then
echo "ERROR: set meta level to steady failed"
exit 1
fi
echo "Set meta.lb.only_move_primary false"
echo "This remote-command tells the meta-server to rebalance with copying primaries."
echo "remote_command -l $pmeta meta.lb.only_move_primary false" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.add_node_list.only_move_primary
set_ok=`grep OK /tmp/$UID.$PID.pegasus.add_node_list.only_move_primary | wc -l`
if [ $set_ok -ne 1 ]; then
echo "ERROR: meta.lb.only_move_primary false"
exit 1
fi
echo
echo "Finish time: `date`"
all_finish_time=$((`date +%s`))
echo "add node list done, elasped time is $((all_finish_time - all_start_time)) seconds."
rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
#!/bin/bash
#
# Check offline_node_list.sh and add_node_list.sh arguments.
#
PID=$$
if [ $# -le 3 ]; then
echo "USAGE: $0 <check_type> <cluster-name> <cluster-meta-list> <replica-task-id-list>"
echo
echo "check_type includes: add_node_list, offline_node_list, for example:"
echo " $0 add_node_list onebox 127.0.0.1:34601,127.0.0.1:34602 1,2,3"
echo
exit 1
fi
check_type=$1
cluster=$2
meta_list=$3
replica_task_id_list=$4
if [ "$check_type" != "add_node_list" -a "$check_type" != "offline_node_list" ]; then
echo "ERROR: $check_type is invalid, only support \"add_node_list\" and \"offline_node_list\""
exit 1
fi
source ./scripts/minos_common.sh
find_cluster $cluster
if [ $? -ne 0 ]; then
echo "ERROR: cluster \"$cluster\" not found"
exit 1
fi
echo "UID=$UID"
echo "PID=$PID"
echo "Start time: `date`"
all_start_time=$((`date +%s`))
echo
id_list_file="/tmp/$UID.$PID.pegasus.$check_type.id_list"
echo "Generating $id_list_file..."
minos_show_replica $cluster $id_list_file
replica_server_count=`cat $id_list_file | wc -l`
if [ $replica_server_count -eq 0 ]; then
echo "ERROR: replica server count is 0 by minos show"
exit 1
fi
echo "Generating /tmp/$UID.$PID.pegasus.$check_type.cluster_info..."
echo cluster_info | ./run.sh shell --cluster $meta_list 2>&1 | sed 's/ *$//' >/tmp/$UID.$PID.pegasus.$check_type.cluster_info
cname=`grep zookeeper_root /tmp/$UID.$PID.pegasus.$check_type.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
if [ "$cname" != "$cluster" ]; then
echo "ERROR: cluster name and meta list not matched"
exit 1
fi
pmeta=`grep primary_meta_server /tmp/$UID.$PID.pegasus.$check_type.cluster_info | grep -o '[0-9.:]*$'`
if [ "$pmeta" == "" ]; then
echo "ERROR: extract primary_meta_server by shell failed"
exit 1
fi
echo "Generating /tmp/$UID.$PID.pegasus.$check_type.nodes_list..."
echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.$check_type.nodes_list
rs_port=`grep '^[0-9.]*:' /tmp/$UID.$PID.pegasus.$check_type.nodes_list | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
if [ "$rs_port" == "" ]; then
echo "ERROR: extract replica server port by shell failed"
exit 1
fi
echo "Checking replica task id list..."
address_list=""
id_list=""
for id in `echo $replica_task_id_list | sed 's/,/ /g'` ; do
if [ "$id_list" != "" ]; then
if echo "$id_list" | grep -q "\<$id\>" ; then
echo "ERROR: duplicate replica task id $id"
exit 1;
fi
fi
pair=`grep "^$id " $id_list_file`
if [ "$pair" == "" ]; then
echo "ERROR: replica task id $id not found, refer to $id_list_file"
exit 1;
fi
node_str=`echo $pair | awk '{print $2}'`
node_ip=`getent hosts $node_str | awk '{print $1}'`
node=${node_ip}:${rs_port}
if [ "$id_list" != "" ]; then
id_list="$id_list $id"
address_list="$address_list,$node"
else
id_list="$id"
address_list="$node"
fi
done
export task_id_list=$id_list
......@@ -22,77 +22,14 @@ pwd="$( cd "$( dirname "$0" )" && pwd )"
shell_dir="$( cd $pwd/.. && pwd )"
cd $shell_dir
source ./scripts/minos_common.sh
find_cluster $cluster
if [ $? -ne 0 ]; then
echo "ERROR: cluster \"$cluster\" not found"
exit 1
fi
echo "UID=$UID"
echo "PID=$PID"
echo "Start time: `date`"
all_start_time=$((`date +%s`))
echo
rs_list_file="/tmp/$UID.$PID.pegasus.rolling_update.rs.list"
echo "Generating $rs_list_file..."
minos_show_replica $cluster $rs_list_file
replica_server_count=`cat $rs_list_file | wc -l`
if [ $replica_server_count -eq 0 ]; then
echo "ERROR: replica server count is 0 by minos show"
exit 1
fi
echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.cluster_info..."
echo cluster_info | ./run.sh shell --cluster $meta_list 2>&1 | sed 's/ *$//' >/tmp/$UID.$PID.pegasus.offline_node_list.cluster_info
cname=`grep zookeeper_root /tmp/$UID.$PID.pegasus.offline_node_list.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
if [ "$cname" != "$cluster" ]; then
echo "ERROR: cluster name and meta list not matched"
exit 1
fi
pmeta=`grep primary_meta_server /tmp/$UID.$PID.pegasus.offline_node_list.cluster_info | grep -o '[0-9.:]*$'`
if [ "$pmeta" == "" ]; then
echo "ERROR: extract primary_meta_server by shell failed"
exit 1
fi
echo "Check the argument..."
source ./scripts/pegasus_check_arguments.sh offline_node_list $cluster $meta_list $replica_task_id_list
echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.nodes..."
echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node_list.nodes
rs_port=`grep '^[0-9.]*:' /tmp/$UID.$PID.pegasus.offline_node_list.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
if [ "$rs_port" == "" ]; then
echo "ERROR: extract replica server port by shell failed"
exit 1
if [ $? -ne 0 ]; then
echo "ERROR: the argument check failed"
exit 1
fi
echo "Checking replica task id list..."
address_list=""
id_list=""
for id in `echo $replica_task_id_list | sed 's/,/ /g'` ; do
if [ "$id_list" != "" ]; then
if echo "$id_list" | grep -q "\<$id\>" ; then
echo "ERROR: duplicate replica task id $id"
exit 1;
fi
fi
pair=`grep "^$id " $rs_list_file`
if [ "$pair" == "" ]; then
echo "ERROR: replica task id $id not found, refer to $rs_list_file"
exit 1;
fi
node_str=`echo $pair | awk '{print $2}'`
node_ip=`getent hosts $node_str | awk '{print $1}'`
node=${node_ip}:${rs_port}
if [ "$id_list" != "" ]; then
id_list="$id_list $id"
address_list="$address_list,$node"
else
id_list="$id"
address_list="$node"
fi
done
echo "Set lb.assign_secondary_black_list..."
echo "remote_command -l $pmeta meta.lb.assign_secondary_black_list $address_list" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node_list.assign_secondary_black_list
set_ok=`grep "set ok" /tmp/$UID.$PID.pegasus.offline_node_list.assign_secondary_black_list | wc -l`
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册