From 46b18b101fec367c14307978d29d5197cf4c73d0 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 22 Mar 2019 19:53:31 +0800 Subject: [PATCH] Ngraph (#1916) * Added README for the ngraph engine * Fix --model_save_dir * Uncomment naive tests * Update run.sh script * update to use ngraph * update readme * Numactl -l was removed * update instruction * download pretrained model for inference only * fix typo * update --- .../image_classification/README_ngraph.md | 44 +++++++++++++++++++ fluid/PaddleCV/image_classification/run.sh | 23 ++++++++++ fluid/PaddleCV/image_classification/train.py | 39 +++++++++++----- 3 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 fluid/PaddleCV/image_classification/README_ngraph.md mode change 100644 => 100755 fluid/PaddleCV/image_classification/run.sh diff --git a/fluid/PaddleCV/image_classification/README_ngraph.md b/fluid/PaddleCV/image_classification/README_ngraph.md new file mode 100644 index 00000000..17455bd6 --- /dev/null +++ b/fluid/PaddleCV/image_classification/README_ngraph.md @@ -0,0 +1,44 @@ + +# PaddlePaddle inference and training script +This directory contains configuration and instructions to run the PaddlePaddle + nGraph for a local training and inference. + +# How to build PaddlePaddle framework with NGraph engine +In order to build the PaddlePaddle + nGraph engine and run proper script, follow up a few steps: +1. Install PaddlePaddle project +2. set env exports for nGraph and OpenMP +3. run the inference/training script + +Currently supported models: +* ResNet50 (inference and training). + +Only support Adam optimizer yet. + +Short description of aforementioned steps: + +## 1. Install PaddlePaddle +Follow PaddlePaddle [installation instruction](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/image_classification#installation) to install PaddlePaddle. If you build PaddlePaddle yourself, please use the following cmake arguments and ensure to set `-DWITH_NGRAPH=ON`. +``` +cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=ON -DWITH_MKLDNN=ON -DWITH_NGRAPH=ON +``` +Note: MKLDNN and MKL are required. + +## 2. Set env exports for nGraph and OMP +Set the following exports needed for running nGraph: +``` +export FLAGS_use_ngraph=true +export OMP_NUM_THREADS= +``` + +Optional exports for better performance: +``` +export KMP_AFFINITY=granularity=fine,compact,1,0 +export KMP_BLOCKTIME=1 +``` + +## 3. How the benchmark script might be run. +If everything built successfully, you can run command in ResNet50 nGraph session in script [run.sh](https://github.com/PaddlePaddle/models/blob/develop/fluid/PaddleCV/image_classification/run.sh) to start the benchmark job locally. You will need to uncomment the `#ResNet50 nGraph` part of script. + +Above is training job using the nGraph, to run the inference job using the nGraph: + +Please download the pre-trained resnet50 model from [supported models](https://github.com/PaddlePaddle/models/tree/72dcc7c1a8d5de9d19fbd65b4143bd0d661eee2c/fluid/PaddleCV/image_classification#supported-models-and-performances) for inference script. + diff --git a/fluid/PaddleCV/image_classification/run.sh b/fluid/PaddleCV/image_classification/run.sh old mode 100644 new mode 100755 index b0cc2255..dc318753 --- a/fluid/PaddleCV/image_classification/run.sh +++ b/fluid/PaddleCV/image_classification/run.sh @@ -192,3 +192,26 @@ python train.py \ # --model_category=models_name \ # --model_save_dir=output/ \ # --l2_decay=3e-4 + +#ResNet50 nGraph: +# Training: +#OMP_NUM_THREADS=`nproc` FLAGS_use_ngraph=true python train.py \ +# --model=ResNet50 \ +# --batch_size=128 \ +# --total_images=1281167 \ +# --class_dim=1000 \ +# --image_shape=3,224,224 \ +# --lr_strategy=none \ +# --lr=0.001 \ +# --num_epochs=120 \ +# --with_mem_opt=False \ +# --model_category=models_name \ +# --model_save_dir=output/ \ +# --lr_strategy=adam \ +# --use_gpu=False +# Inference: +#OMP_NUM_THREADS=`nproc` FLAGS_use_ngraph=true python infer.py \ +# --use_gpu=false \ +# --model=ResNet50 \ +# --pretrained_model=ResNet50_pretrained + diff --git a/fluid/PaddleCV/image_classification/train.py b/fluid/PaddleCV/image_classification/train.py index 145b2886..adf7febc 100644 --- a/fluid/PaddleCV/image_classification/train.py +++ b/fluid/PaddleCV/image_classification/train.py @@ -116,6 +116,9 @@ def optimizer_setting(params): learning_rate=lr, momentum=momentum_rate, regularization=fluid.regularizer.L2Decay(l2_decay)) + elif ls["name"] == "adam": + lr = params["lr"] + optimizer = fluid.optimizer.Adam(learning_rate=lr) else: lr = params["lr"] l2_decay = params["l2_decay"] @@ -264,14 +267,17 @@ def train(args): fluid.io.load_vars( exe, pretrained_model, main_program=train_prog, predicate=if_exist) - visible_device = os.getenv('CUDA_VISIBLE_DEVICES') - if visible_device: - device_num = len(visible_device.split(',')) + if args.use_gpu: + visible_device = os.getenv('CUDA_VISIBLE_DEVICES') + if visible_device: + device_num = len(visible_device.split(',')) + else: + device_num = subprocess.check_output( + ['nvidia-smi', '-L']).decode().count('\n') else: - device_num = subprocess.check_output( - ['nvidia-smi', '-L']).decode().count('\n') - + device_num = 1 train_batch_size = args.batch_size / device_num + test_batch_size = 16 if not args.enable_ce: train_reader = paddle.batch( @@ -292,10 +298,15 @@ def train(args): train_py_reader.decorate_paddle_reader(train_reader) test_py_reader.decorate_paddle_reader(test_reader) - train_exe = fluid.ParallelExecutor( - main_program=train_prog, - use_cuda=bool(args.use_gpu), - loss_name=train_cost.name) + + use_ngraph = os.getenv('FLAGS_use_ngraph') + if not use_ngraph: + train_exe = fluid.ParallelExecutor( + main_program=train_prog, + use_cuda=bool(args.use_gpu), + loss_name=train_cost.name) + else: + train_exe = exe train_fetch_list = [ train_cost.name, train_acc1.name, train_acc5.name, global_lr.name @@ -314,9 +325,13 @@ def train(args): try: while True: t1 = time.time() - loss, acc1, acc5, lr = train_exe.run( - fetch_list=train_fetch_list) + if use_ngraph: + loss, acc1, acc5, lr = train_exe.run( + train_prog, fetch_list=train_fetch_list) + else: + loss, acc1, acc5, lr = train_exe.run( + fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 loss = np.mean(np.array(loss)) -- GitLab