模型训练内存占用较高
Created by: JingChunzhen
- 版本、环境信息: 1)PaddlePaddle版本:1.5.0 2)CPU: 3)GPU:tesla v100
- 训练信息 1)单机 单卡 2)显存信息 模型为双塔模型 左侧 resnet 50 右侧 BOW 顶部 两层FC 目前的情况是训练过程中内存占用率较高,大概占用84G内存 模型在训练过程中使用了io.PyReader, 显存策略如下
places = fluid.cuda_places()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = fluid.core.get_cuda_device_count()
exec_strategy.num_iteration_per_drop_scope = 100
build_strategy = fluid.BuildStrategy()
build_strategy.enable_inplace = True
train_exe = fluid.ParallelExecutor(
use_cuda=True,
main_program=fluid.default_main_program(),
loss_name=loss.name,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
train_reader = fluid.io.PyReader(
feed_list=feed_list,
capacity=5,
use_double_buffer=True,
iterable=True)
train_reader.decorate_batch_generator(train_batch_gen, places=places)
# train_batch_gen 为多进程数据读取 ,使用了linecache
export CUDA_VISIBLE_DEVICES=4
export FLAGS_sync_nccl_allreduce=1
export FLAGS_fraction_of_gpu_memory_to_use=0
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
python XXX.py