提交 40a8659c 编写于 作者: S sneaxiy

remove gc flags, test=develop

上级 6aaf0192
...@@ -18,19 +18,6 @@ from __future__ import print_function ...@@ -18,19 +18,6 @@ from __future__ import print_function
import os import os
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be set before
# `import paddle`. Otherwise, it would not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import paddle.fluid as fluid import paddle.fluid as fluid
from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results, json_eval_results from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results, json_eval_results
......
...@@ -22,19 +22,6 @@ import glob ...@@ -22,19 +22,6 @@ import glob
import numpy as np import numpy as np
from PIL import Image from PIL import Image
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be set before
# `import paddle`. Otherwise, it would not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid from paddle import fluid
from ppdet.utils.cli import print_total_cfg from ppdet.utils.cli import print_total_cfg
......
...@@ -22,19 +22,6 @@ import numpy as np ...@@ -22,19 +22,6 @@ import numpy as np
import datetime import datetime
from collections import deque from collections import deque
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be set before
# `import paddle`. Otherwise, it would not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid from paddle import fluid
from ppdet.experimental import mixed_precision_context from ppdet.experimental import mixed_precision_context
......
...@@ -35,13 +35,7 @@ else: ...@@ -35,13 +35,7 @@ else:
# not take any effect. # not take any effect.
set_paddle_flags({ set_paddle_flags({
'FLAGS_cudnn_exhaustive_search': use_cudnn_exhaustive_search, 'FLAGS_cudnn_exhaustive_search': use_cudnn_exhaustive_search,
'FLAGS_conv_workspace_size_limit': 256, 'FLAGS_conv_workspace_size_limit': 256
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
# You can omit the following settings, because the default
# value of FLAGS_memory_fraction_of_eager_deletion is 1,
# and default value of FLAGS_fast_eager_deletion_mode is 1
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fast_eager_deletion_mode': 1
}) })
import random import random
......
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_fraction_of_gpu_memory_to_use=0.01 export FLAGS_fraction_of_gpu_memory_to_use=0.01
CUDA_VISIBLE_DEVICES=0 python train.py --model_net SPADE --dataset cityscapes --train_list train_list --test_list val_list --crop_type Random --batch_size 1 --epoch 200 --load_height 612 --load_width 1124 --crop_height 512 --crop_width 1024 --label_nc 36 CUDA_VISIBLE_DEVICES=0 python train.py --model_net SPADE --dataset cityscapes --train_list train_list --test_list val_list --crop_type Random --batch_size 1 --epoch 200 --load_height 612 --load_width 1124 --crop_height 512 --crop_width 1024 --label_nc 36
...@@ -29,9 +29,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准备 ...@@ -29,9 +29,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准备
数据准备完毕后,可以通过如下两种方式启动训练: 数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
export FLAGS_fast_eager_deletion_mode=1
python train.py --model_name=BMN \ python train.py --model_name=BMN \
--config=./configs/bmn.yaml \ --config=./configs/bmn.yaml \
--log_interval=10 \ --log_interval=10 \
......
...@@ -30,9 +30,7 @@ TEM模块以snippet-level的特征序列作为输入,预测每一个时序位 ...@@ -30,9 +30,7 @@ TEM模块以snippet-level的特征序列作为输入,预测每一个时序位
数据准备完毕后,可以通过如下两种方式启动训练: 数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
export FLAGS_fast_eager_deletion_mode=1
python train.py --model_name=BsnTem \ python train.py --model_name=BsnTem \
--config=./configs/bsn_tem.yaml \ --config=./configs/bsn_tem.yaml \
--log_interval=10 \ --log_interval=10 \
...@@ -60,9 +58,7 @@ PEM模块以PGM模块输出的BSP特征作为输入,输出proposal包含动作 ...@@ -60,9 +58,7 @@ PEM模块以PGM模块输出的BSP特征作为输入,输出proposal包含动作
数据准备完毕后,可以通过如下两种方式启动训练: 数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
export FLAGS_fast_eager_deletion_mode=1
python train.py --model_name=BsnPem \ python train.py --model_name=BsnPem \
--config=./configs/bsn_pem.yaml \ --config=./configs/bsn_pem.yaml \
--log_interval=10 \ --log_interval=10 \
......
...@@ -25,8 +25,6 @@ C-TCN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准 ...@@ -25,8 +25,6 @@ C-TCN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准
数据准备完毕后,可以通过如下两种方式启动训练: 数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py --model_name=CTCN \ python train.py --model_name=CTCN \
--config=./configs/ctcn.yaml \ --config=./configs/ctcn.yaml \
......
...@@ -35,8 +35,6 @@ TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。 ...@@ -35,8 +35,6 @@ TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。
数据准备完毕后,可以通过如下两种方式启动训练: 数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py --model_name=TSM \ python train.py --model_name=TSM \
--config=./configs/tsm.yaml \ --config=./configs/tsm.yaml \
......
...@@ -26,8 +26,6 @@ TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。 ...@@ -26,8 +26,6 @@ TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。
数据准备完毕后,可以通过如下两种方式启动训练: 数据准备完毕后,可以通过如下两种方式启动训练:
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py --model_name=TSN \ python train.py --model_name=TSN \
--config=./configs/tsn.yaml \ --config=./configs/tsn.yaml \
......
...@@ -25,8 +25,6 @@ weights="" #set the path of weights to enable eval and predicut, just ignore thi ...@@ -25,8 +25,6 @@ weights="" #set the path of weights to enable eval and predicut, just ignore thi
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
#export CUDA_VISIBLE_DEVICES=0,1,2,3 #export CUDA_VISIBLE_DEVICES=0,1,2,3
#export CUDA_VISIBLE_DEVICES=0 #export CUDA_VISIBLE_DEVICES=0
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
if [ "$mode"x == "train"x ]; then if [ "$mode"x == "train"x ]; then
......
#!/bin/bash #!/bin/bash
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
......
...@@ -27,12 +27,6 @@ def set_paddle_flags(flags): ...@@ -27,12 +27,6 @@ def set_paddle_flags(flags):
# set before `import paddle`. Otherwise, it would # set before `import paddle`. Otherwise, it would
# not take any effect. # not take any effect.
set_paddle_flags({ set_paddle_flags({
'FLAGS_eager_delete_tensor_gb': 0, # enable GC
# You can omit the following settings, because the default
# value of FLAGS_memory_fraction_of_eager_deletion is 1,
# and default value of FLAGS_fast_eager_deletion_mode is 1
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fast_eager_deletion_mode': 1,
# Setting the default used gpu memory # Setting the default used gpu memory
'FLAGS_fraction_of_gpu_memory_to_use': 0.98 'FLAGS_fraction_of_gpu_memory_to_use': 0.98
}) })
......
...@@ -21,21 +21,6 @@ import numpy as np ...@@ -21,21 +21,6 @@ import numpy as np
import time import time
import argparse import argparse
import functools import functools
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from pyramidbox import PyramidBox from pyramidbox import PyramidBox
......
#Training details #Training details
#GPU: NVIDIA® Tesla® P40 8cards 120epochs 55h #GPU: NVIDIA® Tesla® P40 8cards 120epochs 55h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#AlexNet: #AlexNet:
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.96 export FLAGS_fraction_of_gpu_memory_to_use=0.96
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 4cards 200epochs 132h #GPU: NVIDIA® Tesla® V100 4cards 200epochs 132h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#GoogLeNet: #GoogLeNet:
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 8cards 200epochs 367h #GPU: NVIDIA® Tesla® V100 8cards 200epochs 367h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#InceptionV4 #InceptionV4
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 55h #GPU: NVIDIA® Tesla® V100 4cards 120epochs 55h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 4cards 240epochs 135h #GPU: NVIDIA® Tesla® V100 4cards 240epochs 135h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
#Training details #Training details
#Missed #Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt101_32x4d #ResNeXt101_32x4d
......
#Training details #Training details
#Missed #Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt101_64x4d #ResNeXt101_64x4d
......
#Training details #Training details
#Missed #Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt101_vd_32x4d #ResNeXt101_vd_32x4d
......
#Training details #Training details
#Missed #Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt101_vd_64x4d #ResNeXt101_vd_64x4d
......
#Training details #Training details
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNeXt152_64x4d #ResNeXt152_64x4d
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 100h #GPU: NVIDIA® Tesla® V100 4cards 120epochs 100h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet101: #ResNet101:
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 4cards 200epochs 182h #GPU: NVIDIA® Tesla® V100 4cards 200epochs 182h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet101_vd #ResNet101_vd
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® P40 8cards 120epochs 200h #GPU: NVIDIA® Tesla® P40 8cards 120epochs 200h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet152: #ResNet152:
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® P40 8cards 200epochs 346h #GPU: NVIDIA® Tesla® P40 8cards 200epochs 346h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \ python train.py \
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 67h #GPU: NVIDIA® Tesla® V100 4cards 120epochs 67h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet18: #ResNet18:
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \ python train.py \
......
#Training details #Training details
#Machine: Missed #Machine: Missed
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet200_vd #ResNet200_vd
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 73h #GPU: NVIDIA® Tesla® V100 4cards 120epochs 73h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet34: #ResNet34:
python train.py \ python train.py \
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \ python train.py \
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® V100 4cards 120epochs 67h #GPU: NVIDIA® Tesla® V100 4cards 120epochs 67h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet50: #ResNet50:
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® V100 4cards 200epochs 141h #GPU: NVIDIA® Tesla® V100 4cards 200epochs 141h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#ResNet50_vc #ResNet50_vc
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 4cards 200epochs 120h #GPU: NVIDIA® Tesla® V100 4cards 200epochs 120h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \ python train.py \
......
#Training details #Training details
#GPU: NVIDIA® Tesla® P40 8cards 200epochs 916h #GPU: NVIDIA® Tesla® P40 8cards 200epochs 916h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#SE_154 #SE_154
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® P40 8cards 120epochs 566h #GPU: NVIDIA® Tesla® P40 8cards 120epochs 566h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#SE_ResNeXt101_32x4d: #SE_ResNeXt101_32x4d:
......
#Training details #Training details
#Machine:V100 4cards 200epochs 282h #Machine:V100 4cards 200epochs 282h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® K40 4cards 240epochs 156h #GPU: NVIDIA® Tesla® K40 4cards 240epochs 156h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \ python train.py \
......
##Training details ##Training details
#GPU: NVIDIA® Tesla® K40 4cards 240epochs 156h #GPU: NVIDIA® Tesla® K40 4cards 240epochs 156h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \ python train.py \
......
#Training details #Training details
#GPU: NVIDIA® Tesla® P40 8cards 90epochs 52h #GPU: NVIDIA® Tesla® P40 8cards 90epochs 52h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#VGG11: #VGG11:
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 4cards 90epochs 58h #GPU: NVIDIA® Tesla® V100 4cards 90epochs 58h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#VGG13: #VGG13:
......
#Training details #Training details
#GPU: NVIDIA® Tesla® P40 8cards 90epochs 72h #GPU: NVIDIA® Tesla® P40 8cards 90epochs 72h
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#VGG16: #VGG16:
......
#Training details #Training details
#GPU: NVIDIA® Tesla® V100 4cards 150epochs 173h #GPU: NVIDIA® Tesla® V100 4cards 150epochs 173h
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
#VGG19: #VGG19:
......
...@@ -31,7 +31,6 @@ def set_paddle_flags(flags): ...@@ -31,7 +31,6 @@ def set_paddle_flags(flags):
# set before `import paddle`. Otherwise, it would # set before `import paddle`. Otherwise, it would
# not take any effect. # not take any effect.
set_paddle_flags({ set_paddle_flags({
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
'FLAGS_fraction_of_gpu_memory_to_use': 0.98 'FLAGS_fraction_of_gpu_memory_to_use': 0.98
}) })
......
...@@ -26,8 +26,6 @@ def set_paddle_flags(flags): ...@@ -26,8 +26,6 @@ def set_paddle_flags(flags):
set_paddle_flags({ set_paddle_flags({
'FLAGS_conv_workspace_size_limit': 500, 'FLAGS_conv_workspace_size_limit': 500,
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fraction_of_gpu_memory_to_use': 0.98 'FLAGS_fraction_of_gpu_memory_to_use': 0.98
}) })
......
...@@ -20,20 +20,6 @@ import shutil ...@@ -20,20 +20,6 @@ import shutil
import math import math
import multiprocessing import multiprocessing
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import reader import reader
......
...@@ -25,8 +25,6 @@ def set_paddle_flags(flags): ...@@ -25,8 +25,6 @@ def set_paddle_flags(flags):
set_paddle_flags({ set_paddle_flags({
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fraction_of_gpu_memory_to_use': 0.98 'FLAGS_fraction_of_gpu_memory_to_use': 0.98
}) })
......
export CUDA_VISIBLE_DEVICES=3 export CUDA_VISIBLE_DEVICES=3
export FLAGS_eager_delete_tensor_gb=0.0
#train on ubuntu #train on ubuntu
python -u main.py \ python -u main.py \
......
export CPU_NUM=1 export CPU_NUM=1
export FLAGS_eager_delete_tensor_gb=0.0
#train on ubuntu #train on ubuntu
python -u main.py \ python -u main.py \
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
set -xe set -xe
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
# set CUDA_VISIBLE_DEVICES # set CUDA_VISIBLE_DEVICES
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
set -xe set -xe
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
# set CUDA_VISIBLE_DEVICES # set CUDA_VISIBLE_DEVICES
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
......
#!/bin/bash #!/bin/bash
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
if [ ! "$CUDA_VISIBLE_DEVICES" ] if [ ! "$CUDA_VISIBLE_DEVICES" ]
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
# for gpu memory optimization # for gpu memory optimization
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
......
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export FLAGS_fraction_of_gpu_memory_to_use=0.1 export FLAGS_fraction_of_gpu_memory_to_use=0.1
python serve.py ./infer_model_800_bs128 5001 & python serve.py ./infer_model_800_bs128 5001 &
#!/bin/bash #!/bin/bash
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1.0
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
......
...@@ -156,7 +156,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 ...@@ -156,7 +156,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
``` ```
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练 export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练
#export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练 #export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练
...@@ -222,7 +221,6 @@ task_type: train、predict、evaluate、inference, 选择4个参数选项中任 ...@@ -222,7 +221,6 @@ task_type: train、predict、evaluate、inference, 选择4个参数选项中任
``` ```
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练 export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练
#export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练 #export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练
...@@ -298,7 +296,6 @@ export CUDA_VISIBLE_DEVICES=0 #用户可自行指定空闲的卡 ...@@ -298,7 +296,6 @@ export CUDA_VISIBLE_DEVICES=0 #用户可自行指定空闲的卡
``` ```
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #单卡预测 export CUDA_VISIBLE_DEVICES=0 #单卡预测
#export CUDA_VISIBLE_DEVICES= #CPU预测 #export CUDA_VISIBLE_DEVICES= #CPU预测
...@@ -346,7 +343,6 @@ task_type: train、predict、evaluate、inference, 选择4个参数选项中任 ...@@ -346,7 +343,6 @@ task_type: train、predict、evaluate、inference, 选择4个参数选项中任
``` ```
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #单卡预测 export CUDA_VISIBLE_DEVICES=0 #单卡预测
#export CUDA_VISIBLE_DEVICES= #CPU预测 #export CUDA_VISIBLE_DEVICES= #CPU预测
......
#!/bin/bash #!/bin/bash
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1.0
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
......
...@@ -52,12 +52,6 @@ train_mrda(){ ...@@ -52,12 +52,6 @@ train_mrda(){
--enable_ce=store_true --enable_ce=store_true
} }
# FIXME(zjl): this model would fail when GC is enabled,
# but it seems that this error is from the model itself.
# See issue here: https://github.com/PaddlePaddle/Paddle/issues/18994#event-2532039900
# To fix ce, disable gc in this model temporarily.
export FLAGS_eager_delete_tensor_gb=1
cudaid=${multi:=0,1,2,3} cudaid=${multi:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
train_atis_slot | python _ce.py train_atis_slot | python _ce.py
......
...@@ -177,7 +177,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 ...@@ -177,7 +177,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
``` ```
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练 export CUDA_VISIBLE_DEVICES=0 #GPU单卡训练
#export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练 #export CUDA_VISIBLE_DEVICES=0,1,2,3 #GPU多卡训练
...@@ -259,7 +258,6 @@ export CUDA_VISIBLE_DEVICES=0 ...@@ -259,7 +258,6 @@ export CUDA_VISIBLE_DEVICES=0
``` ```
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 #开启显存优化
export CUDA_VISIBLE_DEVICES=0 #单卡预测 export CUDA_VISIBLE_DEVICES=0 #单卡预测
#export CUDA_VISIBLE_DEVICES= #CPU预测 #export CUDA_VISIBLE_DEVICES= #CPU预测
......
#!/bin/bash #!/bin/bash
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
if [ ! "$CUDA_VISIBLE_DEVICES" ] if [ ! "$CUDA_VISIBLE_DEVICES" ]
......
...@@ -147,7 +147,6 @@ export current_endpoint=192.168.0.17:9185 ...@@ -147,7 +147,6 @@ export current_endpoint=192.168.0.17:9185
```shell ```shell
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
BERT_BASE_PATH="chinese_L-12_H-768_A-12" BERT_BASE_PATH="chinese_L-12_H-768_A-12"
...@@ -209,7 +208,6 @@ SQuAD v1.1 ...@@ -209,7 +208,6 @@ SQuAD v1.1
```shell ```shell
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
BERT_BASE_PATH="uncased_L-12_H-768_A-12" BERT_BASE_PATH="uncased_L-12_H-768_A-12"
...@@ -255,7 +253,6 @@ python ${SQUAD_PATH}/evaluate-v1.1.py ${SQUAD_PATH}/dev-v1.1.json ${CHECKPOINT_P ...@@ -255,7 +253,6 @@ python ${SQUAD_PATH}/evaluate-v1.1.py ${SQUAD_PATH}/dev-v1.1.json ${CHECKPOINT_P
```shell ```shell
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
BERT_BASE_PATH="uncased_L-12_H-768_A-12" BERT_BASE_PATH="uncased_L-12_H-768_A-12"
CHECKPOINT_PATH=/path/to/save/checkpoints/ CHECKPOINT_PATH=/path/to/save/checkpoints/
......
export FLAGS_fraction_of_gpu_memory_to_use=0.5 export FLAGS_fraction_of_gpu_memory_to_use=0.5
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
python train.py \ python train.py \
......
#!/bin/bash #!/bin/bash
export FLAGS_fraction_of_gpu_memory_to_use=0.5 export FLAGS_fraction_of_gpu_memory_to_use=0.5
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
train() train()
......
#!/bin/bash #!/bin/bash
export FLAGS_fraction_of_gpu_memory_to_use=0.02 export FLAGS_fraction_of_gpu_memory_to_use=0.02
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
export CUDA_VISIBLE_DEVICES=0,1,2,3 # which GPU to use export CUDA_VISIBLE_DEVICES=0,1,2,3 # which GPU to use
function run_train() { function run_train() {
......
#set -eux #set -eux
export FLAGS_fraction_of_gpu_memory_to_use=0.02 export FLAGS_fraction_of_gpu_memory_to_use=0.02
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
# export FLAGS_sync_nccl_allreduce=1 # export FLAGS_sync_nccl_allreduce=1
# export NCCL_DEBUG=INFO # export NCCL_DEBUG=INFO
# export NCCL_IB_GID_INDEX=3 # export NCCL_IB_GID_INDEX=3
......
...@@ -68,8 +68,6 @@ ...@@ -68,8 +68,6 @@
以提供的英德翻译数据为例,可以执行以下命令进行模型训练: 以提供的英德翻译数据为例,可以执行以下命令进行模型训练:
```sh ```sh
# open garbage collection to save memory
export FLAGS_eager_delete_tensor_gb=0.0
# setting visible devices for training # setting visible devices for training
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
...@@ -86,8 +84,6 @@ python -u main.py \ ...@@ -86,8 +84,6 @@ python -u main.py \
以上命令中传入了执行训练(`do_train`)、训练轮数(`epoch`)和训练数据文件路径(注意请正确设置,支持通配符)等参数,更多参数的使用以及支持的模型超参数可以参见 `transformer.yaml` 配置文件,其中默认提供了 Transformer base model 的配置,如需调整可以在配置文件中更改或通过命令行传入(命令行传入内容将覆盖配置文件中的设置)。可以通过以下命令来训练 Transformer 论文中的 big model: 以上命令中传入了执行训练(`do_train`)、训练轮数(`epoch`)和训练数据文件路径(注意请正确设置,支持通配符)等参数,更多参数的使用以及支持的模型超参数可以参见 `transformer.yaml` 配置文件,其中默认提供了 Transformer base model 的配置,如需调整可以在配置文件中更改或通过命令行传入(命令行传入内容将覆盖配置文件中的设置)。可以通过以下命令来训练 Transformer 论文中的 big model:
```sh ```sh
# open garbage collection to save memory
export FLAGS_eager_delete_tensor_gb=0.0
# setting visible devices for training # setting visible devices for training
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
...@@ -124,8 +120,6 @@ python -u main.py \ ...@@ -124,8 +120,6 @@ python -u main.py \
以英德翻译数据为例,模型训练完成后可以执行以下命令对指定文件中的文本进行翻译: 以英德翻译数据为例,模型训练完成后可以执行以下命令对指定文件中的文本进行翻译:
```sh ```sh
# open garbage collection to save memory
export FLAGS_eager_delete_tensor_gb=0.0
# setting visible devices for prediction # setting visible devices for prediction
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
...@@ -145,8 +139,6 @@ python -u main.py \ ...@@ -145,8 +139,6 @@ python -u main.py \
`predict_file` 指定的文件中文本的翻译结果会输出到 `output_file` 指定的文件。执行预测时需要设置 `init_from_params` 来给出模型所在目录,更多参数的使用可以在 `transformer.yaml` 文件中查阅注释说明并进行更改设置。注意若在执行预测时设置了模型超参数,应与模型训练时的设置一致,如若训练时使用 big model 的参数设置,则预测时对应类似如下命令: `predict_file` 指定的文件中文本的翻译结果会输出到 `output_file` 指定的文件。执行预测时需要设置 `init_from_params` 来给出模型所在目录,更多参数的使用可以在 `transformer.yaml` 文件中查阅注释说明并进行更改设置。注意若在执行预测时设置了模型超参数,应与模型训练时的设置一致,如若训练时使用 big model 的参数设置,则预测时对应类似如下命令:
```sh ```sh
# open garbage collection to save memory
export FLAGS_eager_delete_tensor_gb=0.0
# setting visible devices for prediction # setting visible devices for prediction
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
......
...@@ -32,8 +32,6 @@ import dist_utils ...@@ -32,8 +32,6 @@ import dist_utils
import reader import reader
from transformer import create_net, position_encoding_init from transformer import create_net, position_encoding_init
if os.environ.get('FLAGS_eager_delete_tensor_gb', None) is None:
os.environ['FLAGS_eager_delete_tensor_gb'] = '0'
# num_trainers is used for multi-process gpu training # num_trainers is used for multi-process gpu training
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
export CE_MODE_X=1 export CE_MODE_X=1
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
if [ ! -e data_small.pkl ]; then if [ ! -e data_small.pkl ]; then
wget -c http://dam-data.bj.bcebos.com/data_small.pkl wget -c http://dam-data.bj.bcebos.com/data_small.pkl
fi fi
......
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
python -u ../train_and_evaluate.py --use_cuda \ python -u ../train_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \ --data_path ./data/data.pkl \
--ext_eval \ --ext_eval \
......
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
python -u ../train_and_evaluate.py --use_cuda \ python -u ../train_and_evaluate.py --use_cuda \
--data_path ./data/data.pkl \ --data_path ./data/data.pkl \
--word_emb_init ./data/word_embedding.pkl \ --word_emb_init ./data/word_embedding.pkl \
......
...@@ -98,8 +98,6 @@ python train.py --help ...@@ -98,8 +98,6 @@ python train.py --help
```sh ```sh
# 显存使用的比例,显存不足可适当增大,最大为1 # 显存使用的比例,显存不足可适当增大,最大为1
export FLAGS_fraction_of_gpu_memory_to_use=1.0 export FLAGS_fraction_of_gpu_memory_to_use=1.0
# 显存清理的阈值,显存不足可适当减小,最小为0,为负数时不启用
export FLAGS_eager_delete_tensor_gb=0.8
python -u train.py \ python -u train.py \
--src_vocab_fpath gen_data/wmt16_ende_data_bpe/vocab_all.bpe.32000 \ --src_vocab_fpath gen_data/wmt16_ende_data_bpe/vocab_all.bpe.32000 \
--trg_vocab_fpath gen_data/wmt16_ende_data_bpe/vocab_all.bpe.32000 \ --trg_vocab_fpath gen_data/wmt16_ende_data_bpe/vocab_all.bpe.32000 \
......
#!/bin/bash #!/bin/bash
# This file is only used for continuous evaluation. # This file is only used for continuous evaluation.
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=3 export CUDA_VISIBLE_DEVICES=3
if [ ! -d 'pretrain' ]; then if [ ! -d 'pretrain' ]; then
......
...@@ -18,10 +18,6 @@ fi ...@@ -18,10 +18,6 @@ fi
cd - cd -
# enable GC strategy
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
# for distillation # for distillation
#----------------- #-----------------
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
......
...@@ -30,10 +30,6 @@ fi ...@@ -30,10 +30,6 @@ fi
cd - cd -
# enable GC strategy
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
## for quantization for mobilenet_v1 ## for quantization for mobilenet_v1
......
# enable GC strategy
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
python search.py python search.py
...@@ -24,10 +24,6 @@ fi ...@@ -24,10 +24,6 @@ fi
cd - cd -
# enable GC strategy
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
# for distillation # for distillation
#----------------- #-----------------
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
......
...@@ -21,20 +21,6 @@ import math ...@@ -21,20 +21,6 @@ import math
import multiprocessing import multiprocessing
from paddle.fluid.contrib.slim import Compressor from paddle.fluid.contrib.slim import Compressor
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import reader import reader
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册