diff --git a/PaddleCV/image_classification/models/se_resnext.py b/PaddleCV/image_classification/models/se_resnext.py index 697c551e147bed481c0aa88682b0866716d59f93..a6d0d635ac46893f41858b7be9c2d1e5a903724b 100644 --- a/PaddleCV/image_classification/models/se_resnext.py +++ b/PaddleCV/image_classification/models/se_resnext.py @@ -69,7 +69,8 @@ class SE_ResNeXt(): pool_size=3, pool_stride=2, pool_padding=1, - pool_type='max') + pool_type='max', + use_cudnn=False) elif layers == 101: cardinality = 32 reduction_ratio = 16 @@ -88,7 +89,8 @@ class SE_ResNeXt(): pool_size=3, pool_stride=2, pool_padding=1, - pool_type='max') + pool_type='max', + use_cudnn=False) elif layers == 152: cardinality = 64 reduction_ratio = 16 @@ -118,7 +120,7 @@ class SE_ResNeXt(): name='conv3') conv = fluid.layers.pool2d( input=conv, pool_size=3, pool_stride=2, pool_padding=1, \ - pool_type='max') + pool_type='max', use_cudnn=False) n = 1 if layers == 50 or layers == 101 else 3 for block in range(len(depth)): n += 1 @@ -132,7 +134,11 @@ class SE_ResNeXt(): name=str(n) + '_' + str(i + 1)) pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) + input=conv, + pool_size=7, + pool_type='avg', + global_pooling=True, + use_cudnn=False) drop = fluid.layers.dropout( x=pool, dropout_prob=0.5, seed=self.params['dropout_seed']) stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) @@ -224,7 +230,11 @@ class SE_ResNeXt(): reduction_ratio, name=None): pool = fluid.layers.pool2d( - input=input, pool_size=0, pool_type='avg', global_pooling=True) + input=input, + pool_size=0, + pool_type='avg', + global_pooling=True, + use_cudnn=False) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) squeeze = fluid.layers.fc( input=pool, diff --git a/PaddleCV/image_classification/train.py b/PaddleCV/image_classification/train.py index cb33fd72c0fa4194132bfaed5cedc159e3963850..3c929e5fa70e9124d137528d7a60b3a92e0cd196 100644 --- a/PaddleCV/image_classification/train.py +++ b/PaddleCV/image_classification/train.py @@ -22,10 +22,23 @@ import time import sys import functools import math + +def set_paddle_flags(flags): + for key, value in flags.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags({ + 'FLAGS_eager_delete_tensor_gb': 0, # enable gc + 'FLAGS_fraction_of_gpu_memory_to_use': 0.98 +}) import argparse import functools import subprocess - import paddle import paddle.fluid as fluid import paddle.dataset.flowers as flowers @@ -50,6 +63,7 @@ add_arg('class_dim', int, 1000, "Class number.") add_arg('image_shape', str, "3,224,224", "input image size") add_arg('model_save_dir', str, "output", "model save directory") add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.") +add_arg('with_inplace', bool, True, "Whether to use inplace memory optimization.") add_arg('pretrained_model', str, None, "Whether to use pretrained model.") add_arg('checkpoint', str, None, "Whether to resume checkpoint.") add_arg('lr', float, 0.1, "set learning rate.") @@ -412,10 +426,20 @@ def train(args): # use_ngraph is for CPU only, please refer to README_ngraph.md for details use_ngraph = os.getenv('FLAGS_use_ngraph') if not use_ngraph: + build_strategy = fluid.BuildStrategy() + build_strategy.memory_optimize = args.with_mem_opt + build_strategy.enable_inplace = args.with_inplace + build_strategy.fuse_all_reduce_ops=1 + + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.num_iteration_per_drop_scope = 10 + train_exe = fluid.ParallelExecutor( main_program=train_prog, use_cuda=bool(args.use_gpu), - loss_name=train_cost.name) + loss_name=train_cost.name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) else: train_exe = exe @@ -429,6 +453,7 @@ def train(args): test_info = [[], [], []] train_time = [] batch_id = 0 + time_record=[] try: while True: t1 = time.time() @@ -450,6 +475,7 @@ def train(args): t2 = time.time() period = t2 - t1 + time_record.append(period) loss = np.mean(np.array(loss)) train_info[0].append(loss) @@ -457,6 +483,8 @@ def train(args): train_time.append(period) if batch_id % 10 == 0: + period = np.mean(time_record) + time_record=[] if use_mixup: print("Pass {0}, trainbatch {1}, loss {2}, lr {3}, time {4}" .format(pass_id, batch_id, "%.5f"%loss, "%.5f" %lr, "%2.2f sec" % period))