diff --git a/scripts/pegasus_rolling_update.sh b/scripts/pegasus_rolling_update.sh index 24b365f31f31bfc32addc38d6503c780bb9cb335..2ca46bb92bae467d6ddeac178c803d549e15e5f7 100755 --- a/scripts/pegasus_rolling_update.sh +++ b/scripts/pegasus_rolling_update.sh @@ -124,58 +124,70 @@ do echo "ERROR: set lb.add_secondary_max_count_for_one_node to 0 failed" exit 1 fi + echo echo "Migrating primary replicas out of node..." - ./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/$UID.$PID.pegasus.rolling_update.migrate_node - echo "Wait [$node] to migrate done..." - echo "Refer to /tmp/$UID.$PID.pegasus.rolling_update.migrate_node for details" + sleeped=0 while true do + if [ $((sleeped%10)) -eq 0 ]; then + ./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/$UID.$PID.pegasus.rolling_update.migrate_node + echo "Send migrate propose, refer to /tmp/$UID.$PID.pegasus.rolling_update.migrate_node for details" + fi pri_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $4}'` if [ $pri_count -eq 0 ]; then echo "Migrate done." break + elif [ $sleeped -gt 28 ]; then + echo "Downgrade timeout." + break else echo "Still $pri_count primary replicas left on $node" sleep 1 + sleeped=$((sleeped+1)) fi done echo sleep 1 echo "Downgrading replicas on node..." - ./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/$UID.$PID.pegasus.rolling_update.downgrade_node - echo "Wait [$node] to downgrade done..." - echo "Refer to /tmp/$UID.$PID.pegasus.rolling_update.downgrade_node for details" + sleeped=0 while true do + if [ $((sleeped%10)) -eq 0 ]; then + ./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/$UID.$PID.pegasus.rolling_update.downgrade_node + echo "Send downgrade propose, refer to /tmp/$UID.$PID.pegasus.rolling_update.downgrade_node for details" + fi rep_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $3}'` if [ $rep_count -eq 0 ]; then echo "Downgrade done." break + elif [ $sleeped -gt 28 ]; then + echo "Downgrade timeout." + break else echo "Still $rep_count replicas left on $node" sleep 1 + sleeped=$((sleeped+1)) fi done echo sleep 1 - echo "Send kill_partition commands to node..." - grep '^propose ' /tmp/$UID.$PID.pegasus.rolling_update.downgrade_node >/tmp/$UID.$PID.pegasus.rolling_update.downgrade_node.propose - while read line2 - do - gpid=`echo $line2 | awk '{print $3}' | sed 's/\./ /'` - echo "remote_command -l $node replica.kill_partition $gpid" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.kill_partition - done /tmp/$UID.$PID.pegasus.rolling_update.downgrade_node.propose + while read line2 + do + gpid=`echo $line2 | awk '{print $3}' | sed 's/\./ /'` + echo "remote_command -l $node replica.kill_partition $gpid" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.kill_partition + done /tmp/$UID.$PID.pegasus.rolling_update.replica_count_perf_counters serving_count=`grep -o 'replica_stub.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/$UID.$PID.pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'` opening_count=`grep -o 'replica_stub.opening.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/$UID.$PID.pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'` @@ -188,7 +200,7 @@ do if [ $rep_count -eq 0 ]; then echo "Close done." break - elif [ $sleeped -gt 20 ]; then + elif [ $sleeped -gt 28 ]; then echo "Close timeout." break else diff --git a/src/shell/commands.h b/src/shell/commands.h index 2de5de617a95249255461dc9179dfe266c7a2df9..ef3f2afe5d6e2b189b55283527a7c3a4d233b2c6 100644 --- a/src/shell/commands.h +++ b/src/shell/commands.h @@ -3654,10 +3654,9 @@ inline bool app_stat(command_executor *e, shell_context *sc, arguments args) << std::setw(w) << std::right << "INCR" << std::setw(w) << std::right << "CAS" << std::setw(w) << std::right << "CAM" << std::setw(w) << std::right << "SCAN"; if (!only_qps) { - out << std::setw(w) << std::right << "storage_mb" << std::setw(w) << std::right - << "file_count" << std::setw(w) << std::right << "expired" << std::setw(w) << std::right - << "filtered" << std::setw(w) << std::right << "abnormal" << std::setw(w) << std::right - << "file_mb" << std::setw(w) << std::right << "file_num"; + out << std::setw(w) << std::right << "expired" << std::setw(w) << std::right << "filtered" + << std::setw(w) << std::right << "abnormal" << std::setw(w) << std::right << "file_mb" + << std::setw(w) << std::right << "file_num"; } out << std::endl; rows.resize(rows.size() + 1);