未验证 提交 0d7579b9 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #2627 from taosdata/feature/sangshuduo/random-file-fail

add --random-file-fail-factor argument to cover more scenarios.
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "tglobal.h" #include "tglobal.h"
#include "dnodeInt.h" #include "dnodeInt.h"
#include "dnodeMain.h" #include "dnodeMain.h"
#include "tfile.h"
static void signal_handler(int32_t signum, siginfo_t *sigInfo, void *context); static void signal_handler(int32_t signum, siginfo_t *sigInfo, void *context);
static sem_t exitSem; static sem_t exitSem;
...@@ -67,6 +68,18 @@ int32_t main(int32_t argc, char *argv[]) { ...@@ -67,6 +68,18 @@ int32_t main(int32_t argc, char *argv[]) {
taosSetAllocMode(TAOS_ALLOC_MODE_DETECT_LEAK, NULL, true); taosSetAllocMode(TAOS_ALLOC_MODE_DETECT_LEAK, NULL, true);
} }
} }
#endif
#ifdef TAOS_RANDOM_FILE_FAIL
else if (strcmp(argv[i], "--random-file-fail-factor") == 0) {
if ( (i+1) < argc ) {
int factor = atoi(argv[i+1]);
printf("The factor of random failure is %d\n", factor);
taosSetRandomFileFailFactor(factor);
} else {
printf("Please specify a number for random failure factor!");
exit(EXIT_FAILURE);
}
}
#endif #endif
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#ifdef TAOS_RANDOM_FILE_FAIL #ifdef TAOS_RANDOM_FILE_FAIL
void taosSetRandomFileFailFactor(int factor);
ssize_t taos_tread(int fd, void *buf, size_t count); ssize_t taos_tread(int fd, void *buf, size_t count);
ssize_t taos_twrite(int fd, void *buf, size_t count); ssize_t taos_twrite(int fd, void *buf, size_t count);
off_t taos_lseek(int fd, off_t offset, int whence); off_t taos_lseek(int fd, off_t offset, int whence);
......
...@@ -26,40 +26,51 @@ ...@@ -26,40 +26,51 @@
#include "os.h" #include "os.h"
#define RANDOM_FILE_FAIL_FACTOR 5 #ifdef TAOS_RANDOM_FILE_FAIL
static int random_file_fail_factor = 20;
void taosSetRandomFileFailFactor(int factor)
{
random_file_fail_factor = factor;
}
#endif
ssize_t taos_tread(int fd, void *buf, size_t count) ssize_t taos_tread(int fd, void *buf, size_t count)
{ {
#ifdef TAOS_RANDOM_FILE_FAIL #ifdef TAOS_RANDOM_FILE_FAIL
if (rand() % RANDOM_FILE_FAIL_FACTOR == 0) { if (random_file_fail_factor > 0) {
if (rand() % random_file_fail_factor == 0) {
errno = EIO; errno = EIO;
return -1; return -1;
} }
}
#endif #endif
return tread(fd, buf, count); return tread(fd, buf, count);
} }
ssize_t taos_twrite(int fd, void *buf, size_t count) ssize_t taos_twrite(int fd, void *buf, size_t count)
{ {
#ifdef TAOS_RANDOM_FILE_FAIL #ifdef TAOS_RANDOM_FILE_FAIL
if (rand() % RANDOM_FILE_FAIL_FACTOR == 0) { if (random_file_fail_factor > 0) {
if (rand() % random_file_fail_factor == 0) {
errno = EIO; errno = EIO;
return -1; return -1;
} }
}
#endif #endif
return twrite(fd, buf, count); return twrite(fd, buf, count);
} }
off_t taos_lseek(int fd, off_t offset, int whence) off_t taos_lseek(int fd, off_t offset, int whence)
{ {
#ifdef TAOS_RANDOM_FILE_FAIL #ifdef TAOS_RANDOM_FILE_FAIL
if (rand() % RANDOM_FILE_FAIL_FACTOR == 0) { if (random_file_fail_factor > 0) {
if (rand() % random_file_fail_factor == 0) {
errno = EIO; errno = EIO;
return -1; return -1;
} }
}
#endif #endif
return lseek(fd, offset, whence); return lseek(fd, offset, whence);
} }
###################################################################
# Copyright (c) 2016 by TAOS Technologies, Inc.
# All rights reserved.
#
# This file is proprietary and confidential to TAOS Technologies.
# No part of this file may be reproduced, stored, transmitted,
# disclosed or used in any form or by any means other than as
# expressly provided by the written permission from Jianhui Tao
#
###################################################################
# -*- coding: utf-8 -*-
import sys
import os
import os.path
import subprocess
from util.log import *
class TDSimClient:
def __init__(self):
self.testCluster = False
self.cfgDict = {
"numOfLogLines": "100000000",
"numOfThreadsPerCore": "2.0",
"locale": "en_US.UTF-8",
"charset": "UTF-8",
"asyncLog": "0",
"anyIp": "0",
"sdbDebugFlag": "135",
"rpcDebugFlag": "135",
"tmrDebugFlag": "131",
"cDebugFlag": "135",
"udebugFlag": "135",
"jnidebugFlag": "135",
"qdebugFlag": "135",
}
def init(self, path):
self.__init__()
self.path = path
def getLogDir(self):
self.logDir = "%s/sim/psim/log" % (self.path)
return self.logDir
def getCfgDir(self):
self.cfgDir = "%s/sim/psim/cfg" % (self.path)
return self.cfgDir
def setTestCluster(self, value):
self.testCluster = value
def addExtraCfg(self, option, value):
self.cfgDict.update({option: value})
def cfg(self, option, value):
cmd = "echo '%s %s' >> %s" % (option, value, self.cfgPath)
if os.system(cmd) != 0:
tdLog.exit(cmd)
def deploy(self):
self.logDir = "%s/sim/psim/log" % (self.path)
self.cfgDir = "%s/sim/psim/cfg" % (self.path)
self.cfgPath = "%s/sim/psim/cfg/taos.cfg" % (self.path)
cmd = "rm -rf " + self.logDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "mkdir -p " + self.logDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "rm -rf " + self.cfgDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "mkdir -p " + self.cfgDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "touch " + self.cfgPath
if os.system(cmd) != 0:
tdLog.exit(cmd)
if self.testCluster:
self.cfg("masterIp", "192.168.0.1")
self.cfg("secondIp", "192.168.0.2")
self.cfg("logDir", self.logDir)
for key, value in self.cfgDict.items():
self.cfg(key, value)
tdLog.debug("psim is deployed and configured by %s" % (self.cfgPath))
class TDDnode:
def __init__(self, index):
self.index = index
self.running = 0
self.deployed = 0
self.testCluster = False
self.valgrind = 0
def init(self, path):
self.path = path
def setTestCluster(self, value):
self.testCluster = value
def setValgrind(self, value):
self.valgrind = value
def getDataSize(self):
totalSize = 0
if (self.deployed == 1):
for dirpath, dirnames, filenames in os.walk(self.dataDir):
for f in filenames:
fp = os.path.join(dirpath, f)
if not os.path.islink(fp):
totalSize = totalSize + os.path.getsize(fp)
return totalSize
def deploy(self):
self.logDir = "%s/sim/dnode%d/log" % (self.path, self.index)
self.dataDir = "%s/sim/dnode%d/data" % (self.path, self.index)
self.cfgDir = "%s/sim/dnode%d/cfg" % (self.path, self.index)
self.cfgPath = "%s/sim/dnode%d/cfg/taos.cfg" % (
self.path, self.index)
cmd = "rm -rf " + self.dataDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "rm -rf " + self.logDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "rm -rf " + self.cfgDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "mkdir -p " + self.dataDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "mkdir -p " + self.logDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "mkdir -p " + self.cfgDir
if os.system(cmd) != 0:
tdLog.exit(cmd)
cmd = "touch " + self.cfgPath
if os.system(cmd) != 0:
tdLog.exit(cmd)
if self.testCluster:
self.startIP()
if self.testCluster:
self.cfg("masterIp", "192.168.0.1")
self.cfg("secondIp", "192.168.0.2")
self.cfg("publicIp", "192.168.0.%d" % (self.index))
self.cfg("internalIp", "192.168.0.%d" % (self.index))
self.cfg("privateIp", "192.168.0.%d" % (self.index))
self.cfg("dataDir", self.dataDir)
self.cfg("logDir", self.logDir)
self.cfg("numOfLogLines", "100000000")
self.cfg("mnodeEqualVnodeNum", "0")
self.cfg("walLevel", "1")
self.cfg("statusInterval", "1")
self.cfg("numOfTotalVnodes", "64")
self.cfg("numOfMnodes", "3")
self.cfg("numOfThreadsPerCore", "2.0")
self.cfg("monitor", "0")
self.cfg("maxVnodeConnections", "30000")
self.cfg("maxMgmtConnections", "30000")
self.cfg("maxMeterConnections", "30000")
self.cfg("maxShellConns", "30000")
self.cfg("locale", "en_US.UTF-8")
self.cfg("charset", "UTF-8")
self.cfg("asyncLog", "0")
self.cfg("anyIp", "0")
self.cfg("dDebugFlag", "135")
self.cfg("mDebugFlag", "135")
self.cfg("sdbDebugFlag", "135")
self.cfg("rpcDebugFlag", "135")
self.cfg("tmrDebugFlag", "131")
self.cfg("cDebugFlag", "135")
self.cfg("httpDebugFlag", "135")
self.cfg("monitorDebugFlag", "135")
self.cfg("udebugFlag", "135")
self.cfg("jnidebugFlag", "135")
self.cfg("qdebugFlag", "135")
self.deployed = 1
tdLog.debug(
"dnode:%d is deployed and configured by %s" %
(self.index, self.cfgPath))
def getBuildPath(self):
selfPath = os.path.dirname(os.path.realpath(__file__))
if ("community" in selfPath):
projPath = selfPath[:selfPath.find("community")]
else:
projPath = selfPath[:selfPath.find("tests")]
for root, dirs, files in os.walk(projPath):
if ("taosd" in files):
rootRealPath = os.path.dirname(os.path.realpath(root))
if ("packaging" not in rootRealPath):
buildPath = root[:len(root)-len("/build/bin")]
break
return buildPath
def start(self):
buildPath = self.getBuildPath()
if (buildPath == ""):
tdLog.exit("taosd not found!")
else:
tdLog.info("taosd found in %s" % buildPath)
binPath = buildPath + "/build/bin/taosd"
if self.deployed == 0:
tdLog.exit("dnode:%d is not deployed" % (self.index))
if self.valgrind == 0:
cmd = "nohup %s -c %s > /dev/null 2>&1 & " % (
binPath, self.cfgDir)
else:
valgrindCmdline = "valgrind --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes"
cmd = "nohup %s %s -c %s --random-file-fail-factor 5 2>&1 & " % (
valgrindCmdline, binPath, self.cfgDir)
print(cmd)
if os.system(cmd) != 0:
tdLog.exit(cmd)
self.running = 1
tdLog.debug("dnode:%d is running with %s " % (self.index, cmd))
tdLog.debug("wait 5 seconds for the dnode:%d to start." % (self.index))
time.sleep(5)
def stop(self):
if self.valgrind == 0:
toBeKilled = "taosd"
else:
toBeKilled = "valgrind.bin"
if self.running != 0:
psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % toBeKilled
processID = subprocess.check_output(
psCmd, shell=True).decode("utf-8")
while(processID):
killCmd = "kill -INT %s > /dev/null 2>&1" % processID
os.system(killCmd)
time.sleep(1)
processID = subprocess.check_output(
psCmd, shell=True).decode("utf-8")
for port in range(6030, 6041):
fuserCmd = "fuser -k -n tcp %d" % port
os.system(fuserCmd)
if self.valgrind:
time.sleep(2)
self.running = 0
tdLog.debug("dnode:%d is stopped by kill -INT" % (self.index))
def forcestop(self):
if self.valgrind == 0:
toBeKilled = "taosd"
else:
toBeKilled = "valgrind.bin"
if self.running != 0:
psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % toBeKilled
processID = subprocess.check_output(
psCmd, shell=True).decode("utf-8")
while(processID):
killCmd = "kill -KILL %s > /dev/null 2>&1" % processID
os.system(killCmd)
time.sleep(1)
processID = subprocess.check_output(
psCmd, shell=True).decode("utf-8")
for port in range(6030, 6041):
fuserCmd = "fuser -k -n tcp %d" % port
os.system(fuserCmd)
if self.valgrind:
time.sleep(2)
self.running = 0
tdLog.debug("dnode:%d is stopped by kill -KILL" % (self.index))
def startIP(self):
cmd = "sudo ifconfig lo:%d 192.168.0.%d up" % (self.index, self.index)
if os.system(cmd) != 0:
tdLog.exit(cmd)
def stopIP(self):
cmd = "sudo ifconfig lo:%d 192.168.0.%d down" % (
self.index, self.index)
if os.system(cmd) != 0:
tdLog.exit(cmd)
def cfg(self, option, value):
cmd = "echo '%s %s' >> %s" % (option, value, self.cfgPath)
if os.system(cmd) != 0:
tdLog.exit(cmd)
def getDnodeRootDir(self, index):
dnodeRootDir = "%s/sim/psim/dnode%d" % (self.path, index)
return dnodeRootDir
def getDnodesRootDir(self):
dnodesRootDir = "%s/sim/psim" % (self.path)
return dnodesRootDir
class TDDnodes:
def __init__(self):
self.dnodes = []
self.dnodes.append(TDDnode(1))
self.dnodes.append(TDDnode(2))
self.dnodes.append(TDDnode(3))
self.dnodes.append(TDDnode(4))
self.dnodes.append(TDDnode(5))
self.dnodes.append(TDDnode(6))
self.dnodes.append(TDDnode(7))
self.dnodes.append(TDDnode(8))
self.dnodes.append(TDDnode(9))
self.dnodes.append(TDDnode(10))
self.simDeployed = False
def init(self, path):
psCmd = "ps -ef|grep -w taosd| grep -v grep | awk '{print $2}'"
processID = subprocess.check_output(psCmd, shell=True).decode("utf-8")
while(processID):
killCmd = "kill -KILL %s > /dev/null 2>&1" % processID
os.system(killCmd)
time.sleep(1)
processID = subprocess.check_output(
psCmd, shell=True).decode("utf-8")
psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}'"
processID = subprocess.check_output(psCmd, shell=True).decode("utf-8")
while(processID):
killCmd = "kill -KILL %s > /dev/null 2>&1" % processID
os.system(killCmd)
time.sleep(1)
processID = subprocess.check_output(
psCmd, shell=True).decode("utf-8")
binPath = os.path.dirname(os.path.realpath(__file__))
binPath = binPath + "/../../../debug/"
tdLog.debug("binPath %s" % (binPath))
binPath = os.path.realpath(binPath)
tdLog.debug("binPath real path %s" % (binPath))
# cmd = "sudo cp %s/build/lib/libtaos.so /usr/local/lib/taos/" % (binPath)
# tdLog.debug(cmd)
# os.system(cmd)
# cmd = "sudo cp %s/build/bin/taos /usr/local/bin/taos/" % (binPath)
# if os.system(cmd) != 0 :
# tdLog.exit(cmd)
# tdLog.debug("execute %s" % (cmd))
# cmd = "sudo cp %s/build/bin/taosd /usr/local/bin/taos/" % (binPath)
# if os.system(cmd) != 0 :
# tdLog.exit(cmd)
# tdLog.debug("execute %s" % (cmd))
if path == "":
# self.path = os.path.expanduser('~')
self.path = os.path.abspath(binPath + "../../")
else:
self.path = os.path.realpath(path)
for i in range(len(self.dnodes)):
self.dnodes[i].init(self.path)
self.sim = TDSimClient()
self.sim.init(self.path)
def setTestCluster(self, value):
self.testCluster = value
def setValgrind(self, value):
self.valgrind = value
def deploy(self, index):
self.sim.setTestCluster(self.testCluster)
if (self.simDeployed == False):
self.sim.deploy()
self.simDeployed = True
self.check(index)
self.dnodes[index - 1].setTestCluster(self.testCluster)
self.dnodes[index - 1].setValgrind(self.valgrind)
self.dnodes[index - 1].deploy()
def cfg(self, index, option, value):
self.check(index)
self.dnodes[index - 1].cfg(option, value)
def start(self, index):
self.check(index)
self.dnodes[index - 1].start()
def stop(self, index):
self.check(index)
self.dnodes[index - 1].stop()
def getDataSize(self, index):
self.check(index)
return self.dnodes[index - 1].getDataSize()
def forcestop(self, index):
self.check(index)
self.dnodes[index - 1].forcestop()
def startIP(self, index):
self.check(index)
if self.testCluster:
self.dnodes[index - 1].startIP()
def stopIP(self, index):
self.check(index)
if self.dnodes[index - 1].testCluster:
self.dnodes[index - 1].stopIP()
def check(self, index):
if index < 1 or index > 10:
tdLog.exit("index:%d should on a scale of [1, 10]" % (index))
def stopAll(self):
tdLog.info("stop all dnodes")
for i in range(len(self.dnodes)):
self.dnodes[i].stop()
psCmd = "ps -ef | grep -w taosd | grep 'root' | grep -v grep | awk '{print $2}'"
processID = subprocess.check_output(psCmd, shell=True).decode("utf-8")
if processID:
cmd = "sudo systemctl stop taosd"
os.system(cmd)
# if os.system(cmd) != 0 :
# tdLog.exit(cmd)
psCmd = "ps -ef|grep -w taosd| grep -v grep | awk '{print $2}'"
processID = subprocess.check_output(psCmd, shell=True).decode("utf-8")
while(processID):
killCmd = "kill -KILL %s > /dev/null 2>&1" % processID
os.system(killCmd)
time.sleep(1)
processID = subprocess.check_output(
psCmd, shell=True).decode("utf-8")
psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}'"
processID = subprocess.check_output(psCmd, shell=True).decode("utf-8")
while(processID):
killCmd = "kill -KILL %s > /dev/null 2>&1" % processID
os.system(killCmd)
time.sleep(1)
processID = subprocess.check_output(
psCmd, shell=True).decode("utf-8")
# if os.system(cmd) != 0 :
# tdLog.exit(cmd)
def getDnodesRootDir(self):
dnodesRootDir = "%s/sim" % (self.path)
return dnodesRootDir
def getSimCfgPath(self):
return self.sim.getCfgDir()
def getSimLogPath(self):
return self.sim.getLogDir()
def addSimExtraCfg(self, option, value):
self.sim.addExtraCfg(option, value)
tdDnodes = TDDnodes()
#!/bin/bash
# if [ $# != 4 || $# != 5 ]; then
# echo "argument list need input : "
# echo " -n nodeName"
# echo " -s start/stop"
# echo " -c clear"
# exit 1
# fi
NODE_NAME=
EXEC_OPTON=
CLEAR_OPTION="false"
while getopts "n:s:u:x:ct" arg
do
case $arg in
n)
NODE_NAME=$OPTARG
;;
s)
EXEC_OPTON=$OPTARG
;;
c)
CLEAR_OPTION="clear"
;;
t)
SHELL_OPTION="true"
;;
u)
USERS=$OPTARG
;;
x)
SIGNAL=$OPTARG
;;
?)
echo "unkown argument"
;;
esac
done
SCRIPT_DIR=`dirname $0`
cd $SCRIPT_DIR/../
SCRIPT_DIR=`pwd`
IN_TDINTERNAL="community"
if [[ "$SCRIPT_DIR" == *"$IN_TDINTERNAL"* ]]; then
cd ../../..
else
cd ../../
fi
TAOS_DIR=`pwd`
TAOSD_DIR=`find . -name "taosd"|grep bin|head -n1`
if [[ "$TAOSD_DIR" == *"$IN_TDINTERNAL"* ]]; then
BIN_DIR=`find . -name "taosd"|grep bin|head -n1|cut -d '/' --fields=2,3`
else
BIN_DIR=`find . -name "taosd"|grep bin|head -n1|cut -d '/' --fields=2`
fi
BUILD_DIR=$TAOS_DIR/$BIN_DIR/build
SIM_DIR=$TAOS_DIR/sim
NODE_DIR=$SIM_DIR/$NODE_NAME
EXE_DIR=$BUILD_DIR/bin
CFG_DIR=$NODE_DIR/cfg
LOG_DIR=$NODE_DIR/log
DATA_DIR=$NODE_DIR/data
MGMT_DIR=$NODE_DIR/data/mgmt
TSDB_DIR=$NODE_DIR/data/tsdb
TAOS_CFG=$NODE_DIR/cfg/taos.cfg
echo ------------ $EXEC_OPTON $NODE_NAME
TAOS_FLAG=$SIM_DIR/tsim/flag
if [ -f "$TAOS_FLAG" ]; then
EXE_DIR=/usr/local/bin/taos
fi
if [ "$CLEAR_OPTION" = "clear" ]; then
echo rm -rf $MGMT_DIR $TSDB_DIR
rm -rf $TSDB_DIR
rm -rf $MGMT_DIR
fi
if [ "$EXEC_OPTON" = "start" ]; then
echo "ExcuteCmd:" $EXE_DIR/taosd -c $CFG_DIR
if [ "$SHELL_OPTION" = "true" ]; then
nohup valgrind --log-file=${LOG_DIR}/valgrind.log --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes $EXE_DIR/taosd -c $CFG_DIR > /dev/null 2>&1 &
else
nohup $EXE_DIR/taosd -c $CFG_DIR --random-file-fail-factor 5 > /dev/null 2>&1 &
fi
else
#relative path
RCFG_DIR=sim/$NODE_NAME/cfg
PID=`ps -ef|grep taosd | grep $RCFG_DIR | grep -v grep | awk '{print $2}'`
while [ -n "$PID" ]
do
if [ "$SIGNAL" = "SIGINT" ]; then
echo try to kill by signal SIGINT
kill -SIGINT $PID
else
echo try to kill by signal SIGKILL
kill -9 $PID
fi
sleep 1
PID=`ps -ef|grep taosd | grep $RCFG_DIR | grep -v grep | awk '{print $2}'`
done
fi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册