diff --git a/README_CN.md b/README_CN.md index a96ddb0275fa58814184dccb0b2617b627dee443..2d749bc95292ae8a9f547ab4d51c03ccbf57550c 100644 --- a/README_CN.md +++ b/README_CN.md @@ -23,7 +23,7 @@ MindInsight为MindSpore提供了简单易用的调优调试能力。在训练过 请从[MindSpore下载页面](https://www.mindspore.cn/versions)下载并安装whl包。 ``` -pip install mindinsight-{version}-cp37-cp37m-linux_{arch}.whl +pip install -U mindinsight-{version}-cp37-cp37m-linux_{arch}.whl ``` 更多MindInsight的安装方法,请点击[安装教程](https://www.mindspore.cn/install/)中的MindInsight章节进行查看。 diff --git a/mindinsight/backend/config/gunicorn_conf.py b/mindinsight/backend/config/gunicorn_conf.py index 37221a4ef59081f70a6907c9f8b41c8b1004920d..0993a0b2692be4ccf343f18b10d8457fa703eb05 100644 --- a/mindinsight/backend/config/gunicorn_conf.py +++ b/mindinsight/backend/config/gunicorn_conf.py @@ -15,9 +15,13 @@ """Config file for gunicorn.""" import os +import multiprocessing +import signal import threading +import time from importlib import import_module +import psutil import gunicorn @@ -43,3 +47,44 @@ def on_starting(server): hook_module = import_module('mindinsight.utils.hook') for hook in hook_module.HookUtils.instance().hooks(): threading.Thread(target=hook.on_startup, args=(server.log,)).start() + + +def post_fork(server, worker): + """ + Launch a process to listen worker after gunicorn fork worker. + + Children processes of gunicorn worker should be killed when worker has been killed + because gunicorn master murders this worker for some reasons such as worker timeout. + + Args: + server (Arbiter): gunicorn server instance. + worker (ThreadWorker): worker instance. + """ + def murder_worker_children_processes(): + processes_to_kill = [] + # sleep 3 seconds so that all worker children processes have been launched. + time.sleep(3) + process = psutil.Process(worker.pid) + for child in process.children(recursive=True): + if child.pid != os.getpid(): + processes_to_kill.append(child) + while True: + if os.getppid() != worker.pid: + current_worker_pid = os.getppid() + for proc in processes_to_kill: + server.log.info("Original worker pid: %d, current worker pid: %d, stop process %d", + worker.pid, current_worker_pid, proc.pid) + try: + proc.send_signal(signal.SIGKILL) + except psutil.NoSuchProcess: + continue + except psutil.Error as ex: + server.log.error("Stop process %d failed. Detail: %s.", proc.pid, str(ex)) + server.log.info("%d processes have been killed.", len(processes_to_kill)) + break + time.sleep(1) + + listen_process = multiprocessing.Process(target=murder_worker_children_processes, + name="murder_worker_children_processes") + listen_process.start() + server.log.info("Server pid: %d, start to listening.", server.pid) diff --git a/mindinsight/datavisual/data_transform/tensor_container.py b/mindinsight/datavisual/data_transform/tensor_container.py index 0f865800e95cc6e47bbe63fe6937fc040c6b97e0..0bdc94d8f6475dc42f051357688a5c6ce14d8e9d 100644 --- a/mindinsight/datavisual/data_transform/tensor_container.py +++ b/mindinsight/datavisual/data_transform/tensor_container.py @@ -193,8 +193,10 @@ class TensorContainer: self._stats = get_statistics_from_tensor(self._np_array) original_buckets = calc_original_buckets(self._np_array, self._stats) self._count = sum(bucket.count for bucket in original_buckets) - self._max = self._stats.max - self._min = self._stats.min + # convert the type of max and min value to np.float64 so that it cannot overflow + # when calculating width of histogram. + self._max = np.float64(self._stats.max) + self._min = np.float64(self._stats.min) self._histogram = Histogram(tuple(original_buckets), self._max, self._min, self._count) @property