clusterSetup.py 7.5 KB
Newer Older
P
Ping Xiao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-

from fabric import Connection
import random
P
Ping Xiao 已提交
16
import time
P
Ping Xiao 已提交
17 18 19 20 21 22 23 24 25
import logging

class Node:
    def __init__(self, index, username, hostIP, hostName, password, homeDir):
        self.index = index        
        self.username = username
        self.hostIP = hostIP
        self.hostName = hostName
        self.homeDir = homeDir
P
Ping Xiao 已提交
26 27
        self.corePath = '/coredump'
        self.conn = Connection("{}@{}".format(username, hostName), connect_kwargs={"password": "{}".format(password)})        
P
Ping Xiao 已提交
28
    
P
Ping Xiao 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
    def buildTaosd(self):
        try:
            self.conn.cd("/root/TDinternal/community")
            self.conn.run("git checkout develop")
            self.conn.run("git pull")
            self.conn.cd("/root/TDinternal")
            self.conn.run("git checkout develop")
            self.conn.run("git pull")
            self.conn.cd("/root/TDinternal/debug")
            self.conn.run("cmake ..")
            self.conn.run("make")
            self.conn.run("make install")
        except Exception as e:
            print("Build Taosd error for node %d " % self.index)
            logging.exception(e)
            pass

P
Ping Xiao 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    def startTaosd(self):
        try:
            self.conn.run("sudo systemctl start taosd")
        except Exception as e:
            print("Start Taosd error for node %d " % self.index)
            logging.exception(e)
        
    def stopTaosd(self):
        try:
            self.conn.run("sudo systemctl stop taosd")
        except Exception as e:
            print("Stop Taosd error for node %d " % self.index)
            logging.exception(e)
    
    def restartTaosd(self):
        try:
            self.conn.run("sudo systemctl restart taosd")
        except Exception as e:
            print("Stop Taosd error for node %d " % self.index)
P
Ping Xiao 已提交
65
            logging.exception(e)    
P
Ping Xiao 已提交
66 67 68 69 70 71 72 73

    def removeTaosd(self):
        try:
            self.conn.run("rmtaos")
        except Exception as e:
            print("remove taosd error for node %d " % self.index)
            logging.exception(e)
    
P
Ping Xiao 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86
    def forceStopOneTaosd(self):
        try:
            self.conn.run("kill -9 $(ps -ax|grep taosd|awk '{print $1}')")
        except Exception as e:
            print("kill taosd error on node%d " % self.index)            
    
    def startOneTaosd(self):
        try:
            self.conn.run("nohup taosd -c /etc/taos/ > /dev/null 2>&1 &")
        except Exception as e:
            print("start taosd error on node%d " % self.index)
            logging.exception(e)    
    
P
Ping Xiao 已提交
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    def installTaosd(self, packagePath):
        self.conn.put(packagePath, self.homeDir)
        self.conn.cd(self.homeDir)
        self.conn.run("tar -zxf $(basename '%s')" % packagePath)
        with self.conn.cd("TDengine-enterprise-server"):
            self.conn.run("yes|./install.sh")

    def configTaosd(self, taosConfigKey, taosConfigValue):
        self.conn.run("sudo echo '%s %s' >> %s" % (taosConfigKey, taosConfigValue, "/etc/taos/taos.cfg"))

    def removeTaosConfig(self, taosConfigKey, taosConfigValue): 
        self.conn.run("sudo sed -in-place -e '/%s %s/d' %s" % (taosConfigKey, taosConfigValue, "/etc/taos/taos.cfg"))
    
    def configHosts(self, ip, name):
        self.conn.run("echo '%s %s' >> %s" % (ip, name, '/etc/hosts'))

    def removeData(self):
        try:
            self.conn.run("sudo rm -rf /var/lib/taos/*")
        except Exception as e:
            print("remove taosd data error for node %d " % self.index)
            logging.exception(e)
    
    def removeLog(self):
        try:
            self.conn.run("sudo rm -rf /var/log/taos/*")
        except Exception as e:
            print("remove taosd error for node %d " % self.index)
            logging.exception(e)

    def removeDataForMnode(self):
        try:
            self.conn.run("sudo rm -rf /var/lib/taos/*")
        except Exception as e:
            print("remove taosd error for node %d " % self.index)
            logging.exception(e)

    def removeDataForVnode(self, id):
        try:
            self.conn.run("sudo rm -rf /var/lib/taos/vnode%d/*.data" % id)
        except Exception as e:
            print("remove taosd error for node %d " % self.index)
            logging.exception(e)
P
Ping Xiao 已提交
130 131 132 133 134 135 136 137 138 139 140 141
    

    def detectCoredumpFile(self):
        try:
            result = self.conn.run("find /coredump -name 'core_*' ", hide=True)
            output = result.stdout
            print("output: %s" % output)
            return output
        except Exception as e:
            print("find coredump file error on node %d " % self.index)
            logging.exception(e)
        
P
Ping Xiao 已提交
142 143 144

class Nodes:
    def __init__(self):
P
Ping Xiao 已提交
145
        self.tdnodes = []
P
Ping Xiao 已提交
146 147 148 149 150
        self.tdnodes.append(Node(0, 'root', '192.168.17.194', 'taosdata', 'r', '/root/'))
        # self.tdnodes.append(Node(1, 'root', '52.250.48.222', 'node2', 'a', '/root/'))
        # self.tdnodes.append(Node(2, 'root', '51.141.167.23', 'node3', 'a', '/root/'))
        # self.tdnodes.append(Node(3, 'root', '52.247.207.173', 'node4', 'a', '/root/'))
        # self.tdnodes.append(Node(4, 'root', '51.141.166.100', 'node5', 'a', '/root/'))
P
Ping Xiao 已提交
151 152

    def stopOneNode(self, index):
P
Ping Xiao 已提交
153
        self.tdnodes[index].stopTaosd()
P
Ping Xiao 已提交
154 155 156 157
        self.tdnodes[index].forceStopOneTaosd()
    
    def startOneNode(self, index):
        self.tdnodes[index].startOneTaosd()
P
Ping Xiao 已提交
158 159 160
    
    def detectCoredumpFile(self, index):
        return self.tdnodes[index].detectCoredumpFile()
P
Ping Xiao 已提交
161 162

    def stopAllTaosd(self):
P
Ping Xiao 已提交
163 164 165
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].stopTaosd()

P
Ping Xiao 已提交
166
    def startAllTaosd(self):
P
Ping Xiao 已提交
167 168
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].startTaosd()                    
P
Ping Xiao 已提交
169 170
    
    def restartAllTaosd(self):
P
Ping Xiao 已提交
171 172
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].restartTaosd()       
P
Ping Xiao 已提交
173 174
    
    def addConfigs(self, configKey, configValue):          
P
Ping Xiao 已提交
175 176
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].configTaosd(configKey, configValue)        
P
Ping Xiao 已提交
177
    
P
Ping Xiao 已提交
178 179 180
    def removeConfigs(self, configKey, configValue): 
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].removeTaosConfig(configKey, configValue)  
P
Ping Xiao 已提交
181 182
    
    def removeAllDataFiles(self):
P
Ping Xiao 已提交
183 184 185
        for i in range(len(self.tdnodes)):
            self.tdnodes[i].removeData()

P
Ping Xiao 已提交
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
class Test:
    def __init__(self):
        self.nodes = Nodes()

    # kill taosd randomly every 10 mins
    def randomlyKillDnode(self):        
        loop = 0
        while True:                
            index = random.randint(0, 4)
            print("loop: %d, kill taosd on node%d" %(loop, index))
            self.nodes.stopOneNode(index)
            time.sleep(60)
            self.nodes.startOneNode(index)
            time.sleep(600)
            loop = loop + 1
    
    def detectCoredump(self):
        loop = 0
        while True:
            for i in range(len(self.nodes.tdnodes)):
                result = self.nodes.detectCoredumpFile(i)
                print("core file path is %s" % result)
                if result and not result.isspace():
                    self.nodes.stopAllTaosd()                    
            print("sleep for 10 mins")
            time.sleep(600)

test = Test()
test.detectCoredump()