提交 5b411d09 编写于 作者: C Ciju John

Restart previously killed eosd instances with various sync strategies....

Restart previously killed eosd instances with various sync strategies. SpreadFunds() will skip over dead nodes. Add test calls to driver script.
上级 877d520c
......@@ -19,7 +19,7 @@ import datetime
# Test for different test scenarios.
# Nodes can be producing or non-producing.
# -p <producing nodes count>
# -n <total nodes>
# -c <chain strategy[replay|resync|none]>
# -s <topology>
# -d <delay between nodes startup>
###############################################################
......@@ -30,25 +30,32 @@ ADMIN_ACCOUNT="inita"
INITA_PRV_KEY="5KQwrPbwdL6PhXujxW37FSSQZ1JiwsST4cqQzDeyXtP79zkvFD3"
START_PORT=8888
SYSTEM_BALANCE=0
KILL_PERCENT=.5
KILL_PERCENT=50
AccountInfo=namedtuple("AccountInfo", "name ownerPrivate ownerPublic activePrivate activePublic balance")
EosInstanceInfo=namedtuple("EosInstanceInfo", "port pid cmd alive")
chainStrategy=None
SyncStrategy=namedtuple("ChainSyncStrategy", "name id arg")
chainSyncStrategies={}
chainSyncStrategy=None
ChainArgs={
"replay":"--replay-blockchain",
"resync":"--resync-blockchain"
}
def initializeChainStrategies():
chainSyncStrategy=SyncStrategy("none", 0, "")
chainSyncStrategies[chainSyncStrategy.name]=chainSyncStrategy
chainSyncStrategy=SyncStrategy("replay", 1, "--replay-blockchain")
chainSyncStrategies[chainSyncStrategy.name]=chainSyncStrategy
chainSyncStrategy=SyncStrategy("resync", 2, "--resync-blockchain")
chainSyncStrategies[chainSyncStrategy.name]=chainSyncStrategy
def killall(eosInstanceInfos):
if 0 != subprocess.call(["programs/launcher/launcher", "-k", "15"]):
if 0 != subprocess.call(["programs/launcher/launcher", "-k", "15"], stdout=FNULL):
print("ERROR: Launcher failed to shut down eos cluster.")
for eosInstanceInfo in eosInstanceInfos:
try:
os.kill(eosInstanceInfo.pid, signal.SIGKILL)
except Exception as e:
except Exception as ex:
pass
def cleanup():
......@@ -59,39 +66,21 @@ def errorExit(msg="", errorCode=1):
print("ERROR:", msg)
exit(errorCode)
# def waitForCluserStability (eosInstanceInfos=[], timeout=60):
# DEBUG and print("Waiting for cluster to stabilize.")
# blockNum=1
# return waitOnBlockSync(blockNum, eosInstanceInfos, timeout)
# def waitOnBlockSync (blockNum=1, eosInstanceInfos=[], timeout=60):
# startTime=time.time()
# while time.time()-startTime < timeout:
# stable=True
# for eosInstanceInfo in eosInstanceInfos:
# port=eosInstanceInfo.port
# DEBUG and print("Request block %d from node on port %d" % (blockNum, port))
# if 0 != subprocess.call(["programs/eosc/eosc", "--port", str(port), "get", "block", str(blockNum)],
# stdout=FNULL, stderr=FNULL):
# DEBUG and print("ERROR: Block %d request failed: " % (blockNum))
# stable=False
# break
# if stable is True:
# return True
# time.sleep(5)
# print("Waiting on nodes discovery to finish.")
# return False
def waitOnClusterSync(eosInstanceInfos, timeout=60):
startTime=time.time()
if eosInstanceInfos[0].alive is False:
print("ERROR: Root node is down.")
return False;
targetHeadBlockNum=getHeadBlockNum(eosInstanceInfos[0]) #get root nodes head block num
DEBUG and print("Head block number on root node: %d" % (targetHeadBlockNum))
if targetHeadBlockNum == -1:
return False
currentTimeout=timeout-(time.time()-startTime)
return waitOnClusterBlockNumSync(eosInstanceInfos, targetHeadBlockNum, currentTimeout)
def waitOnClusterBlockNumSync(eosInstanceInfos, targetHeadBlockNum, timeout=60):
startTime=time.time()
while time.time()-startTime < timeout:
synced=True
for eosInstanceInfo in eosInstanceInfos:
......@@ -100,14 +89,14 @@ def waitOnClusterSync(eosInstanceInfos, timeout=60):
DEBUG and print("Request block %d from node on port %d" % (targetHeadBlockNum, port))
if 0 != subprocess.call(["programs/eosc/eosc", "--port", str(port), "get", "block", str(targetHeadBlockNum)],
stdout=FNULL, stderr=FNULL):
DEBUG and print("ERROR: Block %d request failed: " % (targetHeadBlockNum))
DEBUG and print("Block %d request failed: " % (targetHeadBlockNum))
synced=False
break
if synced is True:
return True
time.sleep(5)
print("Waiting on nodes to synchronize.")
#DEBUG and print("Brief pause to allow nodes to catch up.")
time.sleep(3)
return False
......@@ -137,8 +126,8 @@ def createAccountInfos(count):
name=''.join(random.choice(string.ascii_lowercase) for _ in range(5))
accountInfos.append(AccountInfo(name, ownerPrivate, ownerPublic, activePrivate, activePublic, 0))
except Exception as e:
print("ERROR: Exception during key creation:", e)
except Exception as ex:
print("ERROR: Exception during key creation:", ex)
break
if count != len(accountInfos):
......@@ -173,7 +162,6 @@ def createWallet(INITA_PRV_KEY, accountInfos):
return True
def getHeadBlockNum(eosInstanceInfo):
port=eosInstanceInfo.port
num=-1
......@@ -189,8 +177,8 @@ def getHeadBlockNum(eosInstanceInfo):
s=m.group(1)
num=int(s)
except Exception as e:
print("ERROR: Exception during block number retrieval.", e)
except Exception as ex:
print("ERROR: Exception during block number retrieval.", ex)
return -1
return num
......@@ -202,8 +190,8 @@ def waitForNextBlock(eosInstanceInfo):
time.sleep(0.25)
nextNum=getHeadBlockNum(eosInstanceInfo)
def transferFunds(source, destination, amount, port=START_PORT):
def transferFunds(source, destination, amount, eosInstanceInfo):
port=eosInstanceInfo.port
if 0 != subprocess.call(["programs/eosc/eosc", "--port", str(port), "transfer", source, destination,
str(amount), "memo"], stdout=FNULL):
print("ERROR: Failed to transfer funds", amount, "from account "+ source+ " to "+ destination)
......@@ -221,41 +209,66 @@ def spreadFunds(adminAccount, accountInfos, eosInstanceInfos, amount=1, waitNext
count=len(accountInfos)
transferAmount=(count*amount)+amount
port=eosInstanceInfos[0].port
eosInstanceInfo=eosInstanceInfos[0]
port=eosInstanceInfo.port
fromm=adminAccount
to=accountInfos[0].name
print("Transfering %d units from account %s to %s on eos server port %d" % (transferAmount, fromm, to, port))
if False == transferFunds(fromm, to, transferAmount, port):
if False == transferFunds(fromm, to, transferAmount, eosInstanceInfo):
return False
newBalance = accountInfos[0].balance + transferAmount
accountInfos[0] = accountInfos[0]._replace(balance=newBalance);
nextEosIdx=-1
for i in range(0, count):
port=eosInstanceInfos[i].port
accountInfo=accountInfos[i]
nextInstanceFound=False
for n in range(0, count):
#print("nextEosIdx: %d, n: %d" % (nextEosIdx, n))
nextEosIdx=(nextEosIdx + 1)%count
if eosInstanceInfos[nextEosIdx].alive:
#print("nextEosIdx: %d" % (nextEosIdx))
nextInstanceFound=True
break
if nextInstanceFound is False:
print("ERROR: No active nodes found.")
return False
#print("nextEosIdx: %d, count: %d" % (nextEosIdx, count))
eosInstanceInfo=eosInstanceInfos[nextEosIdx]
port=eosInstanceInfo.port
if waitNextBlock:
print("Waiting on next block on eos server port", port)
waitForNextBlock(eosInstanceInfos[i])
waitForNextBlock(eosInstanceInfo)
transferAmount -= amount
fromm=accountInfos[i].name
fromm=accountInfo.name
to=accountInfos[i+1].name if i < (count-1) else adminAccount
print("Transfering %d units from account %s to %s on eos server port %d." % (transferAmount, fromm, to, port))
if False == transferFunds(fromm, to, transferAmount, port):
if False == transferFunds(fromm, to, transferAmount, eosInstanceInfo):
return False
newBalance = accountInfos[i].balance - transferAmount
accountInfos[i] = accountInfos[i]._replace(balance=newBalance);
newBalance = accountInfo.balance - transferAmount
accountInfos[i] = accountInfo._replace(balance=newBalance);
if i < (count-1):
newBalance = accountInfos[i+1].balance + transferAmount
accountInfos[i+1] = accountInfos[i+1]._replace(balance=newBalance);
return True
def validateSpreadFunds(adminAccount, accountInfos, expectedTotal):
actualTotal=getAccountBalance(adminAccount)
def validateSpreadFunds(adminAccount, accountInfos, eosInstanceInfos, expectedTotal):
for eosInstanceInfo in eosInstanceInfos:
if eosInstanceInfo.alive and validateSpreadFundsOnNode(adminAccount, accountInfos, eosInstanceInfo, expectedTotal) is False:
print("ERROR: Failed to validate funds on eos node port: %d" % (eosInstanceInfo.port))
return False
return True
def validateSpreadFundsOnNode(adminAccount, accountInfos, eosInstanceInfo, expectedTotal):
actualTotal=getAccountBalance(adminAccount, eosInstanceInfo.port)
for accountInfo in accountInfos:
fund = getAccountBalance(accountInfo.name)
fund = getAccountBalance(accountInfo.name, eosInstanceInfo.port)
if fund != accountInfo.balance:
print("ERROR: validateSpreadFunds> Expected: %d, actual: %d for account %s" % (accountInfo.balance, fund, accountInfo.name))
return False
......@@ -267,16 +280,16 @@ def validateSpreadFunds(adminAccount, accountInfos, expectedTotal):
return True
def getSystemBalance(adminAccount, accountInfos):
balance=getAccountBalance(adminAccount)
def getSystemBalance(adminAccount, accountInfos, port=START_PORT):
balance=getAccountBalance(adminAccount, port)
for accountInfo in accountInfos:
balance += getAccountBalance(accountInfo.name)
balance += getAccountBalance(accountInfo.name, port)
return balance
def spreadFundsAndValidate(adminAccount, accountInfos, eosInstanceInfos, amount=1):
DEBUG and print("Spread Funds and validate")
initialFunds=getSystemBalance(adminAccount, accountInfos)
initialFunds=getSystemBalance(adminAccount, accountInfos, eosInstanceInfos[0].port)
DEBUG and print("Initial system balance: %d" % (initialFunds))
if False == spreadFunds(adminAccount, accountInfos, eosInstanceInfos, amount):
......@@ -286,15 +299,13 @@ def spreadFundsAndValidate(adminAccount, accountInfos, eosInstanceInfos, amount=
waitForNextBlock(eosInstanceInfos[0])
if False == validateSpreadFunds(adminAccount, accountInfos, initialFunds):
if False == validateSpreadFunds(adminAccount, accountInfos, eosInstanceInfos, initialFunds):
errorExit("Failed to validate funds transfer across nodes.")
print("Funds spread validated")
def getAccountBalance(name):
def getAccountBalance(name, port=START_PORT):
try:
ret=subprocess.check_output(["programs/eosc/eosc", "get", "account", name]).decode("utf-8")
ret=subprocess.check_output(["programs/eosc/eosc", "--port", str(port), "get", "account", name]).decode("utf-8")
#print ("getAccountBalance>", ret)
p=re.compile('\n\s+\"eos_balance\"\:\s+\"(\d+.\d+)\s+EOS\",\n', re.MULTILINE)
m=p.search(ret)
......@@ -342,7 +353,7 @@ def getEosInstanceInfos(totalNodes):
try:
psOut=subprocess.check_output(["pgrep", "-a", "eosd"]).decode("utf-8")
print("psOut: <%s>" % psOut)
#print("psOut: <%s>" % psOut)
for i in range(0, totalNodes):
pattern="[\n]?(\d+) (.* --data-dir tn_data_%02d)\n" % (i)
......@@ -351,11 +362,11 @@ def getEosInstanceInfos(totalNodes):
print("ERROR: Failed to find eosd pid. Pattern %s" % (pattern))
break
instance=EosInstanceInfo(START_PORT + i, int(m.group(1)), m.group(2), True)
print("EosInstanceInfo:", instance)
DEBUG and print("EosInstanceInfo:", instance)
eosInstanceInfos.append(instance)
except Exception as ex:
print("ERROR: Exception during EosInstanceInfos creation:", e)
print("ERROR: Exception during EosInstanceInfos creation:", ex)
return eosInstanceInfos
......@@ -374,14 +385,14 @@ def updateEosInstanceInfos(eosInstanceInfos):
return eosInstanceInfos
# Kills a percentange of Eos instances starting from the tail and update eosInstanceInfos state
def killSomeEosInstances(eosInstanceInfos, percent):
killCount=int(percent*len(eosInstanceInfos))
DEBUG and print("Killing %d Eosd instances." % (killCount))
def killSomeEosInstances(eosInstanceInfos, percent, killSignal=signal.SIGTERM):
killCount=int((percent/100.0)*len(eosInstanceInfos))
DEBUG and print("Killing %d Eosd instances with signal %s." % (killCount, killSignal))
killedCount=0
for eosInstanceInfo in reversed(eosInstanceInfos):
try:
os.kill(eosInstanceInfo.pid, signal.SIGTERM)
os.kill(eosInstanceInfo.pid, killSignal)
except Exception as ex:
print("ERROR: Failed to kill process pid %d." % (eosInstanceInfo.pid), ex)
return False
......@@ -394,7 +405,7 @@ def killSomeEosInstances(eosInstanceInfos, percent):
def relaunchEosInstances(eosInstanceInfos):
chainArg=ChainArgs.get(chainStrategy)
chainArg=chainSyncStrategy.arg
for i in range(0, len(eosInstanceInfos)):
eosInstanceInfo=eosInstanceInfos[i]
......@@ -440,30 +451,24 @@ def createAccounts(accountInfos, eosInstanceInfo):
return True
def killRelaunchAndResync(eosInstanceInfos, KILL_PERCENT):
DEBUG and print("Killing %.2f%% cluster node instances." % (KILL_PERCENT))
if killSomeEosInstances(eosInstanceInfos, KILL_PERCENT) is False:
print("ERROR: Failed to kill Eos instances")
return False
DEBUG and print("Eosd instances killed.")
return True
initializeChainStrategies()
parser = argparse.ArgumentParser()
parser.add_argument("-p", type=int, help="producing nodes count", default=1)
parser.add_argument("-p", type=int, help="producing nodes count", default=2)
parser.add_argument("-d", type=int, help="delay between nodes startup", default=1)
parser.add_argument("-s", type=str, help="topology", default="star")
parser.add_argument("-s", type=str, help="topology", default="mesh")
parser.add_argument("-c", type=str, help="chain strategy[replay|resync|none]", default="none")
args = parser.parse_args()
pnodes=args.p
topo=args.s
delay=args.d
chainStrategy=args.c
chainSyncStrategyStr=args.c
total_nodes = pnodes
print ("producing nodes: %d , topology: %s , delay between nodes launch(seconds): %d" % (pnodes, topo, delay))
print ("producing nodes: %d, topology: %s, delay between nodes launch(seconds): %d, chain sync strategy: %s" % (pnodes, topo, delay, chainSyncStrategyStr))
chainSyncStrategy=chainSyncStrategies.get(chainSyncStrategyStr, chainSyncStrategies["none"])
cleanup()
random.seed(1) # Use a fixed seed for repeatability.
......@@ -477,7 +482,8 @@ try:
if total_nodes != len(eosInstanceInfos):
errorExit("ERROR: Unable to validate eosd instances, expected: %d, actual: %d" % (total_nodes, len(eosInstanceInfos)))
time.sleep(3) #Give time for system to produce some clusters
#if not waitForCluserStability(eosInstanceInfos):
if not waitOnClusterSync(eosInstanceInfos):
errorExit("Cluster never stabilized")
......@@ -489,7 +495,7 @@ try:
errorExit("Account keys creation failed.")
print ("Account keys created.")
SYSTEM_BALANCE=getAccountBalance(ADMIN_ACCOUNT)
SYSTEM_BALANCE=getAccountBalance(ADMIN_ACCOUNT, eosInstanceInfos[0].port)
if not createWallet(INITA_PRV_KEY, accountInfos):
errorExit("Wallet creation failed.")
......@@ -499,16 +505,21 @@ try:
errorExit("Accounts creation failed.")
print("Accounts created.")
spreadFundsAndValidate(ADMIN_ACCOUNT, accountInfos, eosInstanceInfos, 10)
print("Funds spread validated")
print("Funds spread and validated")
print("Killing %.2f%% cluster node instances." % (KILL_PERCENT))
if killSomeEosInstances(eosInstanceInfos, KILL_PERCENT) is False:
killSignal=signal.SIGTERM
if chainSyncStrategy.id is 0:
killSignal=signal.SIGKILL
print("Killing %d%% cluster node instances." % (KILL_PERCENT))
if killSomeEosInstances(eosInstanceInfos, KILL_PERCENT, killSignal) is False:
errorExit("Failed to kill Eos instances")
print("Eosd instances killed.")
# spreadFundsAndValidate(ADMIN_ACCOUNT, accountInfos, eosInstanceInfos, 10)
# print("Funds spread validated")
spreadFundsAndValidate(ADMIN_ACCOUNT, accountInfos, eosInstanceInfos, 10)
print("Funds spread and validated")
print ("Relaunching dead cluster nodes instances.")
if relaunchEosInstances(eosInstanceInfos) is False:
......@@ -518,12 +529,13 @@ try:
print ("Resyncing cluster nodes.")
if not waitOnClusterSync(eosInstanceInfos):
errorExit("Cluster never synchronized")
print ("Cluster resynched")
print ("Cluster synched")
spreadFundsAndValidate(ADMIN_ACCOUNT, accountInfos, eosInstanceInfos, 10)
print("Funds spread validated")
print("Funds spread and validated")
finally:
print("Shut down the cluster and cleanup.")
killall(eosInstanceInfos)
cleanup()
pass
......
......@@ -8,6 +8,15 @@ scriptNames[0]='trans_sync_across_mixed_cluster_test.sh -p 1 -n 2'
# Test to transfer funds across multiple accounts in cluster and validate
scriptNames[1]='distributed-transactions-test.py -p 1 -n 4'
# Test to kill node with SIGTERM and restart with argument "--resync-blockchain"
scriptNames[2]='restart-scenarios-test.py -c resync -p3'
# Test to kill node with SIGTERM and restart with argument "--replay-blockchain"
scriptNames[3]='restart-scenarios-test.py -c replay -p3'
# Test to kill node with SIGKILL and restart without a chain sync strategy argument
scriptNames[4]='restart-scenarios-test.py -c none -p3'
failingTestsCount=0
for scriptName in "${scriptNames[@]}"; do
echo "********* Running script: $scriptName"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册