clusterSetup.py 7.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-

from fabric import Connection
import random
import time
import logging

class Node:
    def __init__(self, index, username, hostIP, hostName, password, homeDir):
        self.index = index        
        self.username = username
        self.hostIP = hostIP
        self.hostName = hostName
        self.homeDir = homeDir
        self.corePath = '/coredump'
        self.conn = Connection("{}@{}".format(username, hostName), connect_kwargs={"password": "{}".format(password)})        
    
    def buildTaosd(self):
        try:
            self.conn.cd("/root/TDinternal/community")
            self.conn.run("git checkout develop")
            self.conn.run("git pull")
            self.conn.cd("/root/TDinternal")
            self.conn.run("git checkout develop")
            self.conn.run("git pull")
            self.conn.cd("/root/TDinternal/debug")
            self.conn.run("cmake ..")
            self.conn.run("make")
            self.conn.run("make install")
        except Exception as e:
            print("Build Taosd error for node %d " % self.index)
            logging.exception(e)
            pass

    def startTaosd(self):
        try:
            self.conn.run("sudo systemctl start taosd")
        except Exception as e:
            print("Start Taosd error for node %d " % self.index)
            logging.exception(e)
        
    def stopTaosd(self):
        try:
            self.conn.run("sudo systemctl stop taosd")
        except Exception as e:
            print("Stop Taosd error for node %d " % self.index)
            logging.exception(e)
    
    def restartTaosd(self):
        try:
            self.conn.run("sudo systemctl restart taosd")
        except Exception as e:
            print("Stop Taosd error for node %d " % self.index)
            logging.exception(e)    

    def removeTaosd(self):
        try:
            self.conn.run("rmtaos")
        except Exception as e:
            print("remove taosd error for node %d " % self.index)
            logging.exception(e)
    
    def forceStopOneTaosd(self):
        try:
            self.conn.run("kill -9 $(ps -ax|grep taosd|awk '{print $1}')")
        except Exception as e:
            print("kill taosd error on node%d " % self.index)            
    
    def startOneTaosd(self):
        try:
            self.conn.run("nohup taosd -c /etc/taos/ > /dev/null 2>&1 &")
        except Exception as e:
            print("start taosd error on node%d " % self.index)
            logging.exception(e)    
    
    def installTaosd(self, packagePath):
        self.conn.put(packagePath, self.homeDir)
        self.conn.cd(self.homeDir)
        self.conn.run("tar -zxf $(basename '%s')" % packagePath)
        with self.conn.cd("TDengine-enterprise-server"):
            self.conn.run("yes|./install.sh")

    def configTaosd(self, taosConfigKey, taosConfigValue):
        self.conn.run("sudo echo '%s %s' >> %s" % (taosConfigKey, taosConfigValue, "/etc/taos/taos.cfg"))

    def removeTaosConfig(self, taosConfigKey, taosConfigValue): 
        self.conn.run("sudo sed -in-place -e '/%s %s/d' %s" % (taosConfigKey, taosConfigValue, "/etc/taos/taos.cfg"))
    
    def configHosts(self, ip, name):
        self.conn.run("echo '%s %s' >> %s" % (ip, name, '/etc/hosts'))

    def removeData(self):
        try:
            self.conn.run("sudo rm -rf /var/lib/taos/*")
        except Exception as e:
            print("remove taosd data error for node %d " % self.index)
            logging.exception(e)
    
    def removeLog(self):
        try:
            self.conn.run("sudo rm -rf /var/log/taos/*")
        except Exception as e:
            print("remove taosd error for node %d " % self.index)
            logging.exception(e)

    def removeDataForMnode(self):
        try:
            self.conn.run("sudo rm -rf /var/lib/taos/*")
        except Exception as e:
            print("remove taosd error for node %d " % self.index)
            logging.exception(e)

    def removeDataForVnode(self, id):
        try:
            self.conn.run("sudo rm -rf /var/lib/taos/vnode%d/*.data" % id)
        except Exception as e:
            print("remove taosd error for node %d " % self.index)
            logging.exception(e)
    

    def detectCoredumpFile(self):
        try:
            result = self.conn.run("find /coredump -name 'core_*' ", hide=True)
            output = result.stdout
            print("output: %s" % output)
            return output
        except Exception as e:
            print("find coredump file error on node %d " % self.index)
            logging.exception(e)
        

class Nodes:
    def __init__(self):
        self.tdnodes = []
        self.tdnodes.append(Node(0, 'root', '192.168.17.194', 'taosdata', 'r', '/root/'))
        # self.tdnodes.append(Node(1, 'root', '52.250.48.222', 'node2', 'a', '/root/'))
        # self.tdnodes.append(Node(2, 'root', '51.141.167.23', 'node3', 'a', '/root/'))
        # self.tdnodes.append(Node(3, 'root', '52.247.207.173', 'node4', 'a', '/root/'))
        # self.tdnodes.append(Node(4, 'root', '51.141.166.100', 'node5', 'a', '/root/'))

    def stopOneNode(self, index):
        self.tdnodes[index].stopTaosd()
        self.tdnodes[index].forceStopOneTaosd()
    
    def startOneNode(self, index):
        self.tdnodes[index].startOneTaosd()
    
    def detectCoredumpFile(self, index):
        return self.tdnodes[index].detectCoredumpFile()

    def stopAllTaosd(self):
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].stopTaosd()

    def startAllTaosd(self):
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].startTaosd()                    
    
    def restartAllTaosd(self):
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].restartTaosd()       
    
    def addConfigs(self, configKey, configValue):          
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].configTaosd(configKey, configValue)        
    
    def removeConfigs(self, configKey, configValue): 
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].removeTaosConfig(configKey, configValue)  
    
    def removeAllDataFiles(self):
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].removeData()

class Test:
    def __init__(self):
        self.nodes = Nodes()

    # kill taosd randomly every 10 mins
    def randomlyKillDnode(self):        
        loop = 0
        while True:                
            index = random.randint(0, 4)
            print("loop: %d, kill taosd on node%d" %(loop, index))
            self.nodes.stopOneNode(index)
            time.sleep(60)
            self.nodes.startOneNode(index)
            time.sleep(600)
            loop = loop + 1
    
    def detectCoredump(self):
        loop = 0
        while True:
            for i in range(len(self.nodes.tdnodes)):
                result = self.nodes.detectCoredumpFile(i)
                print("core file path is %s" % result)
                if result and not result.isspace():
                    self.nodes.stopAllTaosd()                    
            print("sleep for 10 mins")
            time.sleep(600)

test = Test()
test.detectCoredump()