未验证 提交 09dd9aab 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #2861 from taosdata/feature/crash_gen

Now able to create a service-side core dump problem
......@@ -238,7 +238,7 @@ class WorkerThread:
class ThreadCoordinator:
WORKER_THREAD_TIMEOUT = 30
WORKER_THREAD_TIMEOUT = 60 # one minute
def __init__(self, pool: ThreadPool, dbManager):
self._curStep = -1 # first step is 0
......@@ -388,7 +388,9 @@ class ThreadCoordinator:
except taos.error.ProgrammingError as err:
transitionFailed = True
errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno # correct error scheme
logger.info("Transition failed: errno=0x{:X}, msg: {}".format(errno2, err))
errMsg = "Transition failed: errno=0x{:X}, msg: {}".format(errno2, err)
logger.info(errMsg)
self._execStats.registerFailure(errMsg)
# Then we move on to the next step
self._releaseAllWorkerThreads(transitionFailed)
......@@ -812,7 +814,7 @@ class DbConnNative(DbConn):
buildPath = root[:len(root) - len("/build/bin")]
break
if buildPath == None:
raise RuntimeError("Failed to determine buildPath, selfPath={}".format(self_path))
raise RuntimeError("Failed to determine buildPath, selfPath={}".format(selfPath))
return buildPath
......@@ -2292,6 +2294,12 @@ class ServiceManagerThread:
self._thread.daemon = True # thread dies with the program
self._thread.start()
self._thread2 = threading.Thread(
target=self.svcErrorReader,
args=(self._tdeSubProcess.getStdErr(), self._ipcQueue))
self._thread2.daemon = True # thread dies with the program
self._thread2.start()
# wait for service to start
for i in range(0, 10):
time.sleep(1.0)
......@@ -2320,12 +2328,12 @@ class ServiceManagerThread:
raise RuntimeError("sub process object missing")
self._status = MainExec.STATUS_STOPPING
self._tdeSubProcess.stop()
retCode = self._tdeSubProcess.stop()
print("Attempted to stop sub process, got return code: {}".format(retCode))
if self._tdeSubProcess.isRunning(): # still running
print(
"FAILED to stop sub process, it is still running... pid = {}".format(
self.subProcess.pid))
print("FAILED to stop sub process, it is still running... pid = {}".format(
self._tdeSubProcess.getPid()))
else:
self._tdeSubProcess = None # not running any more
self.join() # stop the thread, change the status, etc.
......@@ -2341,6 +2349,9 @@ class ServiceManagerThread:
self._thread.join()
self._thread = None
self._status = MainExec.STATUS_STOPPED
# STD ERR thread
self._thread2.join()
self._thread2 = None
else:
print("Joining empty thread, doing nothing")
......@@ -2421,6 +2432,10 @@ class ServiceManagerThread:
print("\nNo more output from IO thread managing TDengine service")
out.close()
def svcErrorReader(self, err: IO, queue):
for line in iter(err.readline, b''):
print("\nTD Svc STDERR: {}".format(line))
class TdeSubProcess:
def __init__(self):
......@@ -2429,9 +2444,15 @@ class TdeSubProcess:
def getStdOut(self):
return self.subProcess.stdout
def getStdErr(self):
return self.subProcess.stderr
def isRunning(self):
return self.subProcess is not None
def getPid(self):
return self.subProcess.pid
def getBuildPath(self):
selfPath = os.path.dirname(os.path.realpath(__file__))
if ("community" in selfPath):
......@@ -2467,24 +2488,28 @@ class TdeSubProcess:
os.rename(logPath, logPathSaved)
# os.mkdir(logPath) # recreate, no need actually, TDengine will auto-create with proper perms
svcCmd = [taosdPath, '-c', cfgPath]
# svcCmdSingle = "{} -c {}".format(taosdPath, cfgPath)
# svcCmd = ['vmstat', '1']
if self.subProcess: # already there
raise RuntimeError("Corrupt process state")
# print("Starting service: {}".format(svcCmd))
self.subProcess = subprocess.Popen(
svcCmd,
svcCmd, shell=False,
# svcCmdSingle, shell=True, # capture core dump?
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
# bufsize=1, # not supported in binary mode
close_fds=ON_POSIX) # had text=True, which interferred with reading EOF
close_fds=ON_POSIX
) # had text=True, which interferred with reading EOF
def stop(self):
if not self.subProcess:
print("Sub process already stopped")
return
return -1
retCode = self.subProcess.poll()
retCode = self.subProcess.poll() # contains real sub process return code
if retCode: # valid return code, process ended
self.subProcess = None
else: # process still alive, let's interrupt it
......@@ -2495,11 +2520,15 @@ class TdeSubProcess:
self.subProcess.send_signal(signal.SIGINT)
try:
self.subProcess.wait(10)
retCode = self.subProcess.returncode
except subprocess.TimeoutExpired as err:
print("Time out waiting for TDengine service process to exit")
retCode = -3
else:
print("TDengine service process terminated successfully from SIG_INT")
retCode = -4
self.subProcess = None
return retCode
class ThreadStacks: # stack info for all threads
def __init__(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册