提交 40a8659c 编写于 作者: S sneaxiy

remove gc flags, test=develop

上级 6aaf0192
......@@ -18,19 +18,6 @@ from __future__ import print_function
import os
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be set before
# `import paddle`. Otherwise, it would not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import paddle.fluid as fluid
from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results, json_eval_results
......
......@@ -22,19 +22,6 @@ import glob
import numpy as np
from PIL import Image
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be set before
# `import paddle`. Otherwise, it would not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid
from ppdet.utils.cli import print_total_cfg
......
......@@ -22,19 +22,6 @@ import numpy as np
import datetime
from collections import deque
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be set before
# `import paddle`. Otherwise, it would not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid
from ppdet.experimental import mixed_precision_context
......
......@@ -35,13 +35,7 @@ else:
# not take any effect.
set_paddle_flags({
'FLAGS_cudnn_exhaustive_search': use_cudnn_exhaustive_search,
'FLAGS_conv_workspace_size_limit': 256,
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
# You can omit the following settings, because the default
# value of FLAGS_memory_fraction_of_eager_deletion is 1,
# and default value of FLAGS_fast_eager_deletion_mode is 1
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fast_eager_deletion_mode': 1
'FLAGS_conv_workspace_size_limit': 256
})
import random
......
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_fraction_of_gpu_memory_to_use=0.01
CUDA_VISIBLE_DEVICES=0 python train.py --model_net SPADE --dataset cityscapes --train_list train_list --test_list val_list --crop_type Random --batch_size 1 --epoch 200 --load_height 612 --load_width 1124 --crop_height 512 --crop_width 1024 --label_nc 36
......@@ -29,9 +29,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准备
数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
export FLAGS_fast_eager_deletion_mode=1
python train.py --model_name=BMN \
--config=./configs/bmn.yaml \
--log_interval=10 \
......
......@@ -30,9 +30,7 @@ TEM模块以snippet-level的特征序列作为输入,预测每一个时序位
数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
export FLAGS_fast_eager_deletion_mode=1
python train.py --model_name=BsnTem \
--config=./configs/bsn_tem.yaml \
--log_interval=10 \
......@@ -60,9 +58,7 @@ PEM模块以PGM模块输出的BSP特征作为输入,输出proposal包含动作
数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
export FLAGS_fast_eager_deletion_mode=1
python train.py --model_name=BsnPem \
--config=./configs/bsn_pem.yaml \
--log_interval=10 \
......
......@@ -25,8 +25,6 @@ C-TCN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准
数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py --model_name=CTCN \
--config=./configs/ctcn.yaml \
......
......@@ -35,8 +35,6 @@ TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。
数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py --model_name=TSM \
--config=./configs/tsm.yaml \
......
......@@ -26,8 +26,6 @@ TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。
数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py --model_name=TSN \
--config=./configs/tsn.yaml \
......
......@@ -25,8 +25,6 @@ weights="" #set the path of weights to enable eval and predicut, just ignore thi
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
#export CUDA_VISIBLE_DEVICES=0,1,2,3
#export CUDA_VISIBLE_DEVICES=0
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
if [ "$mode"x == "train"x ]; then
......
#!/bin/bash
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
export CUDA_VISIBLE_DEVICES=0
......
......@@ -27,12 +27,6 @@ def set_paddle_flags(flags):
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags({
'FLAGS_eager_delete_tensor_gb': 0, # enable GC
# You can omit the following settings, because the default
# value of FLAGS_memory_fraction_of_eager_deletion is 1,
# and default value of FLAGS_fast_eager_deletion_mode is 1
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fast_eager_deletion_mode': 1,
# Setting the default used gpu memory
'FLAGS_fraction_of_gpu_memory_to_use': 0.98
})
......
......@@ -21,21 +21,6 @@ import numpy as np
import time
import argparse
import functools
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import paddle
import paddle.fluid as fluid
from pyramidbox import PyramidBox
......
#Training details
#GPU: NVIDIA® Tesla® P40 8cards 120epochs 55h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#AlexNet:
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.96
......
#Training details
#GPU: NVIDIA® Tesla® V100 4cards 200epochs 132h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#GoogLeNet:
......
#Training details
#GPU: NVIDIA® Tesla® V100 8cards 200epochs 367h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#InceptionV4
......
#Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 55h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
#Training details
#GPU: NVIDIA® Tesla® V100 4cards 240epochs 135h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
#Training details
#Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt101_32x4d
......
#Training details
#Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt101_64x4d
......
#Training details
#Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt101_vd_32x4d
......
#Training details
#Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt101_vd_64x4d
......
#Training details
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt152_64x4d
......
##Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 100h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet101:
......
#Training details
#GPU: NVIDIA® Tesla® V100 4cards 200epochs 182h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet101_vd
......
##Training details
#GPU: NVIDIA® Tesla® P40 8cards 120epochs 200h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet152:
......
##Training details
#GPU: NVIDIA® Tesla® P40 8cards 200epochs 346h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
......
##Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 67h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet18:
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
......
#Training details
#Machine: Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet200_vd
......
#Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 73h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet34:
python train.py \
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
......
##Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 67h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet50:
......
##Training details
#GPU: NVIDIA® Tesla® V100 4cards 200epochs 141h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet50_vc
......
#Training details
#GPU: NVIDIA® Tesla® V100 4cards 200epochs 120h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
......
#Training details
#GPU: NVIDIA® Tesla® P40 8cards 200epochs 916h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#SE_154
......
##Training details
#GPU: NVIDIA® Tesla® P40 8cards 120epochs 566h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#SE_ResNeXt101_32x4d:
......
#Training details
#Machine:V100 4cards 200epochs 282h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
##Training details
#GPU: NVIDIA® Tesla® K40 4cards 240epochs 156h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
......
##Training details
#GPU: NVIDIA® Tesla® K40 4cards 240epochs 156h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
......
#Training details
#GPU: NVIDIA® Tesla® P40 8cards 90epochs 52h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#VGG11:
......
#Training details
#GPU: NVIDIA® Tesla® V100 4cards 90epochs 58h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#VGG13:
......
#Training details
#GPU: NVIDIA® Tesla® P40 8cards 90epochs 72h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#VGG16:
......
#Training details
#GPU: NVIDIA® Tesla® V100 4cards 150epochs 173h
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
#VGG19:
......
......@@ -31,7 +31,6 @@ def set_paddle_flags(flags):
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags({
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
'FLAGS_fraction_of_gpu_memory_to_use': 0.98
})
......
......@@ -26,8 +26,6 @@ def set_paddle_flags(flags):
set_paddle_flags({
'FLAGS_conv_workspace_size_limit': 500,
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fraction_of_gpu_memory_to_use': 0.98
})
......
......@@ -20,20 +20,6 @@ import shutil
import math
import multiprocessing
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import paddle
import paddle.fluid as fluid
import reader
......
......@@ -25,8 +25,6 @@ def set_paddle_flags(flags):
set_paddle_flags({
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fraction_of_gpu_memory_to_use': 0.98
})
......
export CUDA_VISIBLE_DEVICES=3
export FLAGS_eager_delete_tensor_gb=0.0
#train on ubuntu
python -u main.py \
......
export CPU_NUM=1
export FLAGS_eager_delete_tensor_gb=0.0
#train on ubuntu
python -u main.py \
......
......@@ -18,7 +18,6 @@
set -xe
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
# set CUDA_VISIBLE_DEVICES
export CUDA_VISIBLE_DEVICES=0
......
......@@ -18,7 +18,6 @@
set -xe
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
# set CUDA_VISIBLE_DEVICES
export CUDA_VISIBLE_DEVICES=0
......
#!/bin/bash
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
if [ ! "$CUDA_VISIBLE_DEVICES" ]
......
......@@ -2,7 +2,6 @@
# for gpu memory optimization
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
......
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export FLAGS_fraction_of_gpu_memory_to_use=0.1
python serve.py ./infer_model_800_bs128 5001 &
#!/bin/bash
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1.0
export CUDA_VISIBLE_DEVICES=0
......
......@@ -156,7 +156,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
```
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练
#export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练
......@@ -222,7 +221,6 @@ task_type: train、predict、evaluate、inference, 选择4个参数选项中任
```
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练
#export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练
......@@ -298,7 +296,6 @@ export CUDA_VISIBLE_DEVICES=0 #用户可自行指定空闲的卡
```
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #单卡预测
#export CUDA_VISIBLE_DEVICES= #CPU预测
......@@ -346,7 +343,6 @@ task_type: train、predict、evaluate、inference, 选择4个参数选项中任
```
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #单卡预测
#export CUDA_VISIBLE_DEVICES= #CPU预测
......
#!/bin/bash
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1.0
export CUDA_VISIBLE_DEVICES=0
......
......@@ -52,12 +52,6 @@ train_mrda(){
--enable_ce=store_true
}
# FIXME(zjl): this model would fail when GC is enabled,
# but it seems that this error is from the model itself.
# See issue here: https://github.com/PaddlePaddle/Paddle/issues/18994#event-2532039900
# To fix ce, disable gc in this model temporarily.
export FLAGS_eager_delete_tensor_gb=1
cudaid=${multi:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid
train_atis_slot | python _ce.py
......
......@@ -177,7 +177,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
```
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练
#export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练
......@@ -259,7 +258,6 @@ export CUDA_VISIBLE_DEVICES=0
```
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #单卡预测
#export CUDA_VISIBLE_DEVICES= #CPU预测
......
#!/bin/bash
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0
if [ ! "$CUDA_VISIBLE_DEVICES" ]
......
......@@ -147,7 +147,6 @@ export current_endpoint=192.168.0.17:9185
```shell
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
BERT_BASE_PATH="chinese_L-12_H-768_A-12"
......@@ -209,7 +208,6 @@ SQuAD v1.1
```shell
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3
BERT_BASE_PATH="uncased_L-12_H-768_A-12"
......@@ -255,7 +253,6 @@ python ${SQUAD_PATH}/evaluate-v1.1.py ${SQUAD_PATH}/dev-v1.1.json ${CHECKPOINT_P
```shell
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3
BERT_BASE_PATH="uncased_L-12_H-768_A-12"
CHECKPOINT_PATH=/path/to/save/checkpoints/
......
export FLAGS_fraction_of_gpu_memory_to_use=0.5
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
export CUDA_VISIBLE_DEVICES=0
python train.py \
......
#!/bin/bash
export FLAGS_fraction_of_gpu_memory_to_use=0.5
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
train()
......
#!/bin/bash
export FLAGS_fraction_of_gpu_memory_to_use=0.02
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
export CUDA_VISIBLE_DEVICES=0,1,2,3 # which GPU to use
function run_train() {
......
#set -eux
export FLAGS_fraction_of_gpu_memory_to_use=0.02
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
# export FLAGS_sync_nccl_allreduce=1
# export NCCL_DEBUG=INFO
# export NCCL_IB_GID_INDEX=3
......
......@@ -68,8 +68,6 @@
以提供的英德翻译数据为例,可以执行以下命令进行模型训练:
```sh
# open garbage collection to save memory
export FLAGS_eager_delete_tensor_gb=0.0
# setting visible devices for training
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
......@@ -86,8 +84,6 @@ python -u main.py \
以上命令中传入了执行训练(`do_train`)、训练轮数(`epoch`)和训练数据文件路径(注意请正确设置,支持通配符)等参数,更多参数的使用以及支持的模型超参数可以参见 `transformer.yaml` 配置文件,其中默认提供了 Transformer base model 的配置,如需调整可以在配置文件中更改或通过命令行传入(命令行传入内容将覆盖配置文件中的设置)。可以通过以下命令来训练 Transformer 论文中的 big model:
```sh
# open garbage collection to save memory
export FLAGS_eager_delete_tensor_gb=0.0
# setting visible devices for training
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
......@@ -124,8 +120,6 @@ python -u main.py \
以英德翻译数据为例,模型训练完成后可以执行以下命令对指定文件中的文本进行翻译:
```sh
# open garbage collection to save memory
export FLAGS_eager_delete_tensor_gb=0.0
# setting visible devices for prediction
export CUDA_VISIBLE_DEVICES=0
......@@ -145,8 +139,6 @@ python -u main.py \
`predict_file` 指定的文件中文本的翻译结果会输出到 `output_file` 指定的文件。执行预测时需要设置 `init_from_params` 来给出模型所在目录,更多参数的使用可以在 `transformer.yaml` 文件中查阅注释说明并进行更改设置。注意若在执行预测时设置了模型超参数,应与模型训练时的设置一致,如若训练时使用 big model 的参数设置,则预测时对应类似如下命令:
```sh
# open garbage collection to save memory
export FLAGS_eager_delete_tensor_gb=0.0
# setting visible devices for prediction
export CUDA_VISIBLE_DEVICES=0
......
......@@ -32,8 +32,6 @@ import dist_utils
import reader
from transformer import create_net, position_encoding_init
if os.environ.get('FLAGS_eager_delete_tensor_gb', None) is None:
os.environ['FLAGS_eager_delete_tensor_gb'] = '0'
# num_trainers is used for multi-process gpu training
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
......
......@@ -3,7 +3,6 @@
export CE_MODE_X=1
export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
if [ ! -e data_small.pkl ]; then
wget -c http://dam-data.bj.bcebos.com/data_small.pkl
fi
......
export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
python -u ../train_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \
--ext_eval \
......
export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
python -u ../train_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \
--word_emb_init ./data/word_embedding.pkl \
......
......@@ -98,8 +98,6 @@ python train.py --help
```sh
# 显存使用的比例,显存不足可适当增大,最大为1
export FLAGS_fraction_of_gpu_memory_to_use=1.0
# 显存清理的阈值,显存不足可适当减小,最小为0,为负数时不启用
export FLAGS_eager_delete_tensor_gb=0.8
python -u train.py \
--src_vocab_fpath gen_data/wmt16_ende_data_bpe/vocab_all.bpe.32000 \
--trg_vocab_fpath gen_data/wmt16_ende_data_bpe/vocab_all.bpe.32000 \
......
#!/bin/bash
# This file is only used for continuous evaluation.
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=3
if [ ! -d 'pretrain' ]; then
......
......@@ -18,10 +18,6 @@ fi
cd -
# enable GC strategy
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
# for distillation
#-----------------
export CUDA_VISIBLE_DEVICES=0,1,2,3
......
......@@ -30,10 +30,6 @@ fi
cd -
# enable GC strategy
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0
## for quantization for mobilenet_v1
......
# enable GC strategy
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0,1,2,3
python search.py
......@@ -24,10 +24,6 @@ fi
cd -
# enable GC strategy
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
# for distillation
#-----------------
export CUDA_VISIBLE_DEVICES=0,1,2,3
......
......@@ -21,20 +21,6 @@ import math
import multiprocessing
from paddle.fluid.contrib.slim import Compressor
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import paddle
import paddle.fluid as fluid
import reader
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册