diff --git a/dygraph/benchmark/deeplabv3p.py b/dygraph/benchmark/deeplabv3p.py index 9c1bc66c36feba4b8660941e81e7a70c9eef4050..0ce683fe7aca51832be3e1aef521e99c4137cabe 100644 --- a/dygraph/benchmark/deeplabv3p.py +++ b/dygraph/benchmark/deeplabv3p.py @@ -21,6 +21,7 @@ from dygraph.datasets import DATASETS import dygraph.transforms as T from dygraph.models import MODELS from dygraph.utils import get_environ_info +from dygraph.utils import logger from dygraph.core import train @@ -129,8 +130,12 @@ def parse_args(): def main(args): env_info = get_environ_info() + info = ['{}: {}'.format(k, v) for k, v in env_info.items()] + info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info + + ['-' * 48]) + logger.info(info) places = fluid.CUDAPlace(ParallelEnv().dev_id) \ - if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \ + if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \ else fluid.CPUPlace() if args.dataset not in DATASETS: diff --git a/dygraph/benchmark/hrnet.py b/dygraph/benchmark/hrnet.py index 3f64aa79739d2caca601f3a0574fd987ba49fba3..793bb8ae9060a9f8f9247f82535daacfa8100fc2 100644 --- a/dygraph/benchmark/hrnet.py +++ b/dygraph/benchmark/hrnet.py @@ -21,6 +21,7 @@ from dygraph.datasets import DATASETS import dygraph.transforms as T from dygraph.models import MODELS from dygraph.utils import get_environ_info +from dygraph.utils import logger from dygraph.core import train @@ -129,8 +130,12 @@ def parse_args(): def main(args): env_info = get_environ_info() + info = ['{}: {}'.format(k, v) for k, v in env_info.items()] + info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info + + ['-' * 48]) + logger.info(info) places = fluid.CUDAPlace(ParallelEnv().dev_id) \ - if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \ + if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \ else fluid.CPUPlace() if args.dataset not in DATASETS: diff --git a/dygraph/core/infer.py b/dygraph/core/infer.py index 23890e8e1b7c869c36f37aa53d481b22317c1a2c..f86823bc24933c61df9d7830a77c329fbe0fc4c9 100644 --- a/dygraph/core/infer.py +++ b/dygraph/core/infer.py @@ -21,7 +21,7 @@ import cv2 import tqdm from dygraph import utils -import dygraph.utils.logging as logging +import dygraph.utils.logger as logger def mkdir(path): @@ -39,7 +39,7 @@ def infer(model, test_dataset=None, model_dir=None, save_dir='output'): added_saved_dir = os.path.join(save_dir, 'added') pred_saved_dir = os.path.join(save_dir, 'prediction') - logging.info("Start to predict...") + logger.info("Start to predict...") for im, im_info, im_path in tqdm.tqdm(test_dataset): im = to_variable(im) pred, _ = model(im) diff --git a/dygraph/core/train.py b/dygraph/core/train.py index a823265f316951b0f43e4449212f709301e87574..94a3ee4e7300888ca4ce0a1779f5b9a32fb825f0 100644 --- a/dygraph/core/train.py +++ b/dygraph/core/train.py @@ -19,7 +19,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.io import DataLoader from paddle.incubate.hapi.distributed import DistributedBatchSampler -import dygraph.utils.logging as logging +import dygraph.utils.logger as logger from dygraph.utils import load_pretrained_model from dygraph.utils import resume from dygraph.utils import Timer, calculate_eta @@ -111,7 +111,7 @@ def train(model, train_batch_cost = 0.0 remain_steps = total_steps - num_steps eta = calculate_eta(remain_steps, avg_train_batch_cost) - logging.info( + logger.info( "[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}" .format(epoch + 1, num_epochs, step + 1, steps_per_epoch, avg_loss * nranks, lr, avg_train_batch_cost, @@ -152,7 +152,7 @@ def train(model, best_model_dir = os.path.join(save_dir, "best_model") fluid.save_dygraph(model.state_dict(), os.path.join(best_model_dir, 'model')) - logging.info( + logger.info( 'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}' .format(best_model_epoch, best_mean_iou)) diff --git a/dygraph/core/val.py b/dygraph/core/val.py index 0623b61772e221da8ccdf73aebad2217cbbd06de..a35f0709a96191eb3baef3cbc29997361c62da18 100644 --- a/dygraph/core/val.py +++ b/dygraph/core/val.py @@ -20,7 +20,7 @@ import cv2 from paddle.fluid.dygraph.base import to_variable import paddle.fluid as fluid -import dygraph.utils.logging as logging +import dygraph.utils.logger as logger from dygraph.utils import ConfusionMatrix from dygraph.utils import Timer, calculate_eta @@ -39,7 +39,7 @@ def evaluate(model, total_steps = len(eval_dataset) conf_mat = ConfusionMatrix(num_classes, streaming=True) - logging.info( + logger.info( "Start to evaluating(total_samples={}, total_steps={})...".format( len(eval_dataset), total_steps)) timer = Timer() @@ -69,7 +69,7 @@ def evaluate(model, time_step = timer.elapsed_time() remain_step = total_steps - step - 1 - logging.debug( + logger.debug( "[EVAL] Epoch={}, Step={}/{}, iou={:4f}, sec/step={:.4f} | ETA {}". format(epoch_id, step + 1, total_steps, iou, time_step, calculate_eta(remain_step, time_step))) @@ -77,9 +77,9 @@ def evaluate(model, category_iou, miou = conf_mat.mean_iou() category_acc, macc = conf_mat.accuracy() - logging.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format( + logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format( len(eval_dataset), macc, miou)) - logging.info("[EVAL] Category IoU: " + str(category_iou)) - logging.info("[EVAL] Category Acc: " + str(category_acc)) - logging.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa())) + logger.info("[EVAL] Category IoU: " + str(category_iou)) + logger.info("[EVAL] Category Acc: " + str(category_acc)) + logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa())) return miou, macc diff --git a/dygraph/infer.py b/dygraph/infer.py index 76cdee7cacf33307d630f71e5b47737d37e9363c..9d05571ba2115e10bc70301fc83c66c7f9bab313 100644 --- a/dygraph/infer.py +++ b/dygraph/infer.py @@ -84,7 +84,7 @@ def parse_args(): def main(args): env_info = get_environ_info() places = fluid.CUDAPlace(ParallelEnv().dev_id) \ - if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \ + if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \ else fluid.CPUPlace() if args.dataset not in DATASETS: diff --git a/dygraph/train.py b/dygraph/train.py index 073e90a3baeb4e61262ea2be767fd173a49d4872..d835578e2ceb7c6bdd8e64152d7036c207ada776 100644 --- a/dygraph/train.py +++ b/dygraph/train.py @@ -22,6 +22,7 @@ import dygraph.transforms as T #from dygraph.models import MODELS from dygraph.cvlibs import manager from dygraph.utils import get_environ_info +from dygraph.utils import logger from dygraph.core import train @@ -130,8 +131,13 @@ def parse_args(): def main(args): env_info = get_environ_info() + info = ['{}: {}'.format(k, v) for k, v in env_info.items()] + info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info + + ['-' * 48]) + logger.info(info) + places = fluid.CUDAPlace(ParallelEnv().dev_id) \ - if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \ + if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \ else fluid.CPUPlace() if args.dataset not in DATASETS: diff --git a/dygraph/utils/__init__.py b/dygraph/utils/__init__.py index 68a8136a647f50dac8ab122530c71c82cca53f79..e1e92959a70f240f6c59d999e1e135004d5b0de2 100644 --- a/dygraph/utils/__init__.py +++ b/dygraph/utils/__init__.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import logging +from . import logger from . import download from .metrics import ConfusionMatrix from .utils import * from .timer import Timer, calculate_eta +from .get_environ_info import get_environ_info diff --git a/dygraph/utils/get_environ_info.py b/dygraph/utils/get_environ_info.py new file mode 100644 index 0000000000000000000000000000000000000000..944c1f9595d5a4733df85f0b24b380bd00f8a726 --- /dev/null +++ b/dygraph/utils/get_environ_info.py @@ -0,0 +1,113 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from collections import OrderedDict +import subprocess +import glob + +import paddle +import paddle.fluid as fluid +import cv2 + +IS_WINDOWS = sys.platform == 'win32' + + +def _find_cuda_home(): + '''Finds the CUDA install path. It refers to the implementation of + pytorch . + ''' + # Guess #1 + cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') + if cuda_home is None: + # Guess #2 + try: + which = 'where' if IS_WINDOWS else 'which' + nvcc = subprocess.check_output([which, + 'nvcc']).decode().rstrip('\r\n') + cuda_home = os.path.dirname(os.path.dirname(nvcc)) + except Exception: + # Guess #3 + if IS_WINDOWS: + cuda_homes = glob.glob( + 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*') + if len(cuda_homes) == 0: + cuda_home = '' + else: + cuda_home = cuda_homes[0] + else: + cuda_home = '/usr/local/cuda' + if not os.path.exists(cuda_home): + cuda_home = None + return cuda_home + + +def _get_nvcc_info(cuda_home): + if cuda_home is not None and os.path.isdir(cuda_home): + try: + nvcc = os.path.join(cuda_home, 'bin/nvcc') + nvcc = subprocess.check_output( + "{} -V".format(nvcc), shell=True).decode() + nvcc = nvcc.strip().split('\n')[-1] + except subprocess.SubprocessError: + nvcc = "Not Available" + return nvcc + + +def _get_gpu_info(): + try: + gpu_info = subprocess.check_output(['nvidia-smi', + '-L']).decode().strip() + gpu_info = gpu_info.split('\n') + for i in range(len(gpu_info)): + gpu_info[i] = ' '.join(gpu_info[i].split(' ')[:4]) + except: + gpu_info = ' Can not get GPU information. Please make sure CUDA have been installed successfully.' + return gpu_info + + +def get_environ_info(): + """collect environment information""" + env_info = {} + env_info['System Platform'] = sys.platform + if env_info['System Platform'] == 'linux': + lsb_v = subprocess.check_output(['lsb_release', '-v']).decode().strip() + lsb_v = lsb_v.replace('\t', ' ') + lsb_d = subprocess.check_output(['lsb_release', '-d']).decode().strip() + lsb_d = lsb_d.replace('\t', ' ') + env_info['LSB'] = [lsb_v, lsb_d] + + env_info['Python'] = sys.version.replace('\n', '') + + compiled_with_cuda = paddle.fluid.is_compiled_with_cuda() + env_info['Paddle compiled with cuda'] = compiled_with_cuda + + if compiled_with_cuda: + cuda_home = _find_cuda_home() + env_info['NVCC'] = _get_nvcc_info(cuda_home) + gpu_nums = fluid.core.get_cuda_device_count() + env_info['GPUs used'] = gpu_nums + env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get( + 'CUDA_VISIBLE_DEVICES') + env_info['GPU'] = _get_gpu_info() + + gcc = subprocess.check_output(['gcc', '--version']).decode() + gcc = gcc.strip().split('\n')[0] + env_info['GCC'] = gcc + + env_info['PaddlePaddle'] = paddle.__version__ + env_info['OpenCV'] = cv2.__version__ + + return env_info diff --git a/dygraph/utils/logging.py b/dygraph/utils/logger.py similarity index 100% rename from dygraph/utils/logging.py rename to dygraph/utils/logger.py diff --git a/dygraph/utils/utils.py b/dygraph/utils/utils.py index 3d4fc62e63a511c129118e42784a6275e6e08856..e002f1457ee4259c0711cd5f36c2632f944d5b0f 100644 --- a/dygraph/utils/utils.py +++ b/dygraph/utils/utils.py @@ -18,7 +18,7 @@ import math import cv2 import paddle.fluid as fluid -from . import logging +from . import logger def seconds_to_hms(seconds): @@ -29,27 +29,9 @@ def seconds_to_hms(seconds): return hms_str -def get_environ_info(): - info = dict() - info['place'] = 'cpu' - info['num'] = int(os.environ.get('CPU_NUM', 1)) - if os.environ.get('CUDA_VISIBLE_DEVICES', None) != "": - if hasattr(fluid.core, 'get_cuda_device_count'): - gpu_num = 0 - try: - gpu_num = fluid.core.get_cuda_device_count() - except: - os.environ['CUDA_VISIBLE_DEVICES'] = '' - pass - if gpu_num > 0: - info['place'] = 'cuda' - info['num'] = fluid.core.get_cuda_device_count() - return info - - def load_pretrained_model(model, pretrained_model): if pretrained_model is not None: - logging.info('Load pretrained model from {}'.format(pretrained_model)) + logger.info('Load pretrained model from {}'.format(pretrained_model)) if os.path.exists(pretrained_model): ckpt_path = os.path.join(pretrained_model, 'model') try: @@ -62,10 +44,10 @@ def load_pretrained_model(model, pretrained_model): num_params_loaded = 0 for k in keys: if k not in para_state_dict: - logging.warning("{} is not in pretrained model".format(k)) + logger.warning("{} is not in pretrained model".format(k)) elif list(para_state_dict[k].shape) != list( model_state_dict[k].shape): - logging.warning( + logger.warning( "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})" .format(k, para_state_dict[k].shape, model_state_dict[k].shape)) @@ -73,7 +55,7 @@ def load_pretrained_model(model, pretrained_model): model_state_dict[k] = para_state_dict[k] num_params_loaded += 1 model.set_dict(model_state_dict) - logging.info("There are {}/{} varaibles are loaded.".format( + logger.info("There are {}/{} varaibles are loaded.".format( num_params_loaded, len(model_state_dict))) else: @@ -81,12 +63,12 @@ def load_pretrained_model(model, pretrained_model): 'The pretrained model directory is not Found: {}'.format( pretrained_model)) else: - logging.info('No pretrained model to load, train from scratch') + logger.info('No pretrained model to load, train from scratch') def resume(model, optimizer, resume_model): if resume_model is not None: - logging.info('Resume model from {}'.format(resume_model)) + logger.info('Resume model from {}'.format(resume_model)) if os.path.exists(resume_model): resume_model = os.path.normpath(resume_model) ckpt_path = os.path.join(resume_model, 'model') @@ -102,7 +84,7 @@ def resume(model, optimizer, resume_model): 'The resume model directory is not Found: {}'.format( resume_model)) else: - logging.info('No model need to resume') + logger.info('No model need to resume') def visualize(image, result, save_dir=None, weight=0.6): diff --git a/dygraph/val.py b/dygraph/val.py index 9550cc837f871d80e9a0eb0ca47c96ba1703b99e..c4ea97d6af5ef266af4c0f28b0402aaeb2bad26f 100644 --- a/dygraph/val.py +++ b/dygraph/val.py @@ -72,7 +72,7 @@ def parse_args(): def main(args): env_info = get_environ_info() places = fluid.CUDAPlace(ParallelEnv().dev_id) \ - if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \ + if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \ else fluid.CPUPlace() if args.dataset not in DATASETS: