diff --git a/LRC/model.py b/LRC/model.py index ca79be01513ded39d5965f3a00360af5dd9000c5..8aa9fe48bda6fe233043170084a84f01863fb8c2 100644 --- a/LRC/model.py +++ b/LRC/model.py @@ -98,9 +98,7 @@ class Cell(): def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): - relu_a = fluid.layers.relu(input, inplace=True) - #relu_a.persistable = True - #print(relu_a) + relu_a = fluid.layers.relu(input) pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3) conv2d_a = fluid.layers.conv2d( pool_a, @@ -144,8 +142,6 @@ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): initializer=Constant(0.), name=bn_b_name + '.bias'), moving_mean_name=bn_b_name + '.running_mean', moving_variance_name=bn_b_name + '.running_var') - #bn_b.persistable = True - #print(bn_b) fc_name = aux_name + '.classifier' fc = fluid.layers.fc(bn_b, num_classes, @@ -236,25 +232,14 @@ class NetworkCIFAR(object): self.logits_aux = None num_channel = init_channel * 3 s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1) - #s0.persistable = True - #print(s0) - print(s0) for i, cell in enumerate(self.cells): - #s1.persistable = True - #print(s1) name = 'cells.' + str(i) + '.' s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, name) if i == int(2 * self._layers // 3): if self._auxiliary and self.training: - #s1.persistable = True - #print(s1) self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num) - #self.logits_aux.persistable = True - #print(self.logits_aux) out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") - #out.persistable = True - #print(out) self.logits = fluid.layers.fc(out, size=self.class_num, param_attr=ParamAttr( @@ -263,9 +248,6 @@ class NetworkCIFAR(object): bias_attr=ParamAttr( initializer=Constant(0,), name='classifier.bias')) - #self.logits.persistable = True - #print(self.logits) - #print(self.logits_aux) return self.logits, self.logits_aux def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda): @@ -302,7 +284,6 @@ class NetworkCIFAR(object): loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux) loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam ) * loss_b_aux_mean - #print(loss_aux) return loss + auxiliary_weight * loss_aux def lrc_loss(self): @@ -338,10 +319,8 @@ class NetworkCIFAR(object): return lrc_loss_mean def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): - relu_a = fluid.layers.relu(input, inplace=False) - #relu_a.persistable = True - #print(relu_a) - pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2) + relu_a = fluid.layers.relu(input) + pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=3) conv2d_a = fluid.layers.conv2d( pool_a, 128, @@ -374,8 +353,6 @@ def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): uniform=False, fan_in=0), name=aux_name + '.features.5.weight'), bias_attr=False) - #bn_b.persistable = True - #print(bn_b) fc_name = aux_name + '.classifier' fc = fluid.layers.fc(conv2d_b, num_classes, @@ -413,6 +390,7 @@ def Stem0Conv(input, C_out): bn_a, C_out, 3, + stride=2, padding=1, param_attr=ParamAttr( initializer=Xavier( @@ -430,7 +408,7 @@ def Stem0Conv(input, C_out): return bn_b def Stem1Conv(input, C_out): - relu_a = fluid.layers.relu(input,inplace=False) + relu_a = fluid.layers.relu(input) conv_a = fluid.layers.conv2d( relu_a, C_out, @@ -452,10 +430,9 @@ def Stem1Conv(input, C_out): return bn_a class NetworkImageNet(object): - def __init__(self, C, class_num, layers, auxiliary, genotype): + def __init__(self, C, class_num, layers, genotype): self.class_num = class_num self._layers = layers - self._auxiliary = auxiliary self.drop_path_prob = 0 @@ -499,28 +476,19 @@ class NetworkImageNet(object): return py_reader - def forward(self, init_channel, is_train): + def forward(self, is_train): self.training = is_train self.logits_aux = None - num_channel = init_channel * 3 s0 = self.stem0(self.image) s1 = self.stem1(s0) for i, cell in enumerate(self.cells): - #s1.persistable = True - #print(s1) name = 'cells.' + str(i) + '.' s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, name) if i == int(2 * self._layers // 3): - if self._auxiliary and self.training: - #s1.persistable = True - #print(s1) + if self.training: self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num) - #self.logits_aux.persistable = True - #print(self.logits_aux) - out = fluid.layers.pool2d(s1, 7, "avg") - #out.persistable = True - #print(out) + out = fluid.layers.pool2d(s1, 7, "avg", pool_stride=7) self.logits = fluid.layers.fc(out, size=self.class_num, param_attr=ParamAttr( @@ -529,12 +497,9 @@ class NetworkImageNet(object): bias_attr=ParamAttr( initializer=Constant(0,), name='classifier.bias')) - #self.logits.persistable = True - #print(self.logits) - #print(self.logits_aux) return self.logits, self.logits_aux - def calc_loss(self, auxiliary, auxiliary_weight): + def calc_loss(self, auxiliary_weight): prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss = fluid.layers.cross_entropy(prob, self.label) @@ -548,15 +513,15 @@ class NetworkImageNet(object): loss_aux_mean = fluid.layers.reduce_mean(loss_aux) return loss_mean + auxiliary_weight * loss_aux_mean - def train_model(self, py_reader, init_channels, aux, aux_w): + def train_model(self, py_reader, aux_w): self.image, self.label = fluid.layers.read_file(py_reader) - self.logits, self.logits_aux = self.forward(init_channels, True) - self.loss = self.calc_loss(aux, aux_w) + self.logits, self.logits_aux = self.forward(True) + self.loss = self.calc_loss(aux_w) return self.loss - def test_model(self, py_reader, init_channels): + def test_model(self, py_reader): self.image, self.label = fluid.layers.read_file(py_reader) - self.logits, _ = self.forward(init_channels, False) + self.logits, _ = self.forward(False) prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss = fluid.layers.cross_entropy(prob, self.label) acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1) diff --git a/LRC/reader_imagenet.py b/LRC/reader_imagenet.py index b8d28aafd140a96935905604a2106bb2ac1db9fc..0701952a4c526489d01270c9502d026642406809 100644 --- a/LRC/reader_imagenet.py +++ b/LRC/reader_imagenet.py @@ -22,7 +22,6 @@ from PIL import Image from PIL import ImageOps import numpy as np - try: import cPickle as pickle except: @@ -35,38 +34,64 @@ import os import functools import paddle.reader import math +import cv2 __all__ = ['train10', 'test10'] -train_image_size = 224 +train_image_size = 320 test_image_size = 256 CIFAR_MEAN = [0.485, 0.456, 0.406] CIFAR_STD = [0.229, 0.224, 0.225] +def _parse_kv(r): + """ parse kv data from sequence file for imagenet + """ + import cPickle + k, v = r + obj = cPickle.loads(v) + return obj['image'], obj['label'] + def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): aspect_ratio = math.sqrt(np.random.uniform(*ratio)) w = 1. * aspect_ratio h = 1. / aspect_ratio - bound = min((float(img.size[0]) / img.size[1]) / (w**2), - (float(img.size[1]) / img.size[0]) / (h**2)) + # PIL + #bound = min((float(img.size[0]) / img.size[1]) / (w**2), + # (float(img.size[1]) / img.size[0]) / (h**2)) + # cv2 + bound = min((float(img.shape[1]) / img.shape[0]) / (w**2), + (float(img.shape[0]) / img.shape[1]) / (h**2)) scale_max = min(scale[1], bound) scale_min = min(scale[0], bound) - target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, - scale_max) + # PIL + #target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, + # scale_max) + #cv2 + target_area = img.shape[0] * img.shape[1] * np.random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) w = int(target_size * w) h = int(target_size * h) - i = np.random.randint(0, img.size[0] - w + 1) - j = np.random.randint(0, img.size[1] - h + 1) + # PIL + #i = np.random.randint(0, img.size[0] - w + 1) + #j = np.random.randint(0, img.size[1] - h + 1) - img = img.crop((i, j, i + w, j + h)) - img = img.resize((size, size), Image.BILINEAR) + #img = img.crop((i, j, i + w, j + h)) + #img = img.resize((size, size), Image.BILINEAR) + # cv2 + i = np.random.randint(0, img.shape[0] - h + 1) + j = np.random.randint(0, img.shape[1] - w + 1) + img = img[i:i+h, j:j+w,:] + img = cv2.resize(img, (size, size), interpolation=cv2.INTER_LINEAR) return img +# PIL +""" def crop_image(img, target_size, center=True): width, height = img.size size = target_size @@ -80,7 +105,24 @@ def crop_image(img, target_size, center=True): h_end = h_start + size img = img.crop((w_start, h_start, w_end, h_end)) return img - +""" +# cv2 +def crop_image(img, target_size, center=True): + height, width = img.shape[:2] + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img[h_start:h_end, w_start:w_end,:] + return img + +# PIL +""" def preprocess(img_path, is_training): img = Image.open(img_path) @@ -108,14 +150,34 @@ def preprocess(img_path, is_training): img = np.transpose(img, (2, 0, 1)) return img +""" +# cv2 +def preprocess(img_path, is_training): + + img = cv2.imread(img_path) + if is_training: + # random resize crop + img = random_crop(img, train_image_size) + # random horizontal flip + if np.random.randint(2): + img = cv2.flip(img, 1) + else: + # resize + img = cv2.resize(img, (test_image_size, test_image_size), interpolation=cv2.INTER_LINEAR) + # center crop + img = crop_image(img, train_image_size) + img_float = img[:,:,::-1].astype('float32') / 255 + img = (img_float - CIFAR_MEAN) / CIFAR_STD + img = np.transpose(img, (2, 0, 1)) + + return img def reader_creator_filepath(data_dir, sub_name, is_training): file_list = os.path.join(data_dir, sub_name) image_file = 'train' if is_training else 'val' dataset_path = os.path.join(data_dir, image_file) - print(dataset_path) def reader(): with open(file_list) as flist: lines = [line.strip() for line in flist] @@ -130,6 +192,49 @@ def reader_creator_filepath(data_dir, sub_name, is_training): return reader +def _reader_creator(data_dir, file_list,is_training): + def multiprocess_reader(): + full_lines = [line.strip() for line in file_list] + # NOTE:maybe do not need shuffle here! + if is_training: + np.random.shuffle(full_lines) + for line in full_lines: + img_path, label = line.split() + img_path = os.path.join(data_dir, img_path) + img = preprocess(img_path,is_training) + yield img, int(label) +# multiprocess_reader() + return multiprocess_reader + +def mul_reader_creator_filepath(data_dir, sub_name, is_training): + + file_list = os.path.join(data_dir, sub_name) + image_file = 'train' if is_training else 'val' + dataset_path = os.path.join(data_dir, image_file) + + with open(file_list,'r')as f_dir: + lines = f_dir.readlines() + + num_workers = 16 + + n = int(math.ceil(len(lines)/float(num_workers))) + +# global shuffle without image classification " pass seed " strategy + if is_training: + np.random.shuffle(lines) + split_lists = [lines[i:i+n] for i in range(0,len(lines),n)] + readers = [] + for item in split_lists: + readers.append( + _reader_creator( + dataset_path, + item, + 'True' + ) + ) + return paddle.reader.multiprocess_reader(readers,False) + + def train(args): """ @@ -140,7 +245,10 @@ def train(args): :rtype: callable """ - return reader_creator_filepath(args.data, 'debug.txt', True) + # return reader_creator_filepath(args.data, 'train.txt', True) + return mul_reader_creator_filepath('./dataset/imagenet', 'train.txt', True) + + def test(args): @@ -151,4 +259,5 @@ def test(args): :return: Test reader creator. :rtype: callable """ - return reader_creator_filepath(args.data, 'val.txt', False) + return mul_reader_creator_filepath('./dataset/imagenet', 'val.txt', False) + # return reader_creator_filepath(args.data, 'val.txt', False) diff --git a/LRC/train_imagenet.py b/LRC/train_imagenet.py index 17e548637796d90a689355bfb2666d5f23ed649e..273b1f2c2b1f8e3492080c1ea6cb1264799ea8eb 100644 --- a/LRC/train_imagenet.py +++ b/LRC/train_imagenet.py @@ -66,11 +66,6 @@ parser.add_argument( type=str, default='save_models', help='path to save the model') -parser.add_argument( - '--auxiliary', - action='store_true', - default=False, - help='use auxiliary tower') parser.add_argument( '--auxiliary_weight', type=float, @@ -94,7 +89,7 @@ args = parser.parse_args() ImageNet_CLASSES = 1000 dataset_train_size = 1281167 -image_size = 224 +image_size = 320 genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] def main(): @@ -104,7 +99,7 @@ def main(): logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, ImageNet_CLASSES, args.layers, - args.auxiliary, genotype) + genotype) steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size)) train(model, args, image_shape, steps_one_epoch, devices_num) @@ -117,8 +112,8 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape, py_reader = model.build_input(im_shape, is_train) if is_train: with fluid.unique_name.guard(): - loss = model.train_model(py_reader, args.init_channels, - args.auxiliary, args.auxiliary_weight) + loss = model.train_model(py_reader, + args.auxiliary_weight) optimizer = fluid.optimizer.Momentum( learning_rate=cosine_with_warmup_decay(\ args.learning_rate, args.lr_min, steps_one_epoch,\ @@ -130,8 +125,7 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape, out = [py_reader, loss] else: with fluid.unique_name.guard(): - prob, acc_1, acc_5 = model.test_model(py_reader, - args.init_channels) + prob, acc_1, acc_5 = model.test_model(py_reader) out = [py_reader, prob, acc_1, acc_5] return out @@ -163,19 +157,20 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu): # return os.path.exists(os.path.join(args.pretrained_model, var.name)) # fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist) - #build_strategy = fluid.BuildStrategy() - #build_strategy.enable_inplace = False - #build_strategy.memory_optimize = False + build_strategy = fluid.BuildStrategy() + build_strategy.enable_inplace = True + build_strategy.memory_optimize = False train_fetch_list = [loss_train] - fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) + #fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) exec_strategy = fluid.ExecutionStrategy() - #exec_strategy.num_threads = 1 + exec_strategy.num_threads = 1 train_exe = fluid.ParallelExecutor( main_program=train_prog, use_cuda=True, loss_name=loss_train.name, - exec_strategy=exec_strategy) + exec_strategy=exec_strategy, + build_strategy=build_strategy) train_batch_size = args.batch_size test_batch_size = 256 @@ -187,7 +182,7 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu): test_py_reader.decorate_paddle_reader(test_reader) fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog) - print(train_prog.to_string(True)) + #print(train_prog.to_string(True)) def save_model(postfix, main_prog): model_path = os.path.join(args.save_model_path, postfix) @@ -246,7 +241,7 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu): np.array(loss_v).mean(), start_time-prev_start_time)) step_id += 1 sys.stdout.flush() - os._exit(1) + #os._exit(1) except fluid.core.EOFException: train_py_reader.reset() if epoch_id % 50 == 0 or epoch_id == args.epochs - 1: diff --git a/LRC/train_run_imagenet.sh b/LRC/train_run_imagenet.sh new file mode 100644 index 0000000000000000000000000000000000000000..fc472baf8e1a7daabd770808b867a8dc17c18b34 --- /dev/null +++ b/LRC/train_run_imagenet.sh @@ -0,0 +1,5 @@ +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_fraction_of_gpu_memory_to_use=1. +nohup env CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u train_imagenet.py --batch_size=64 > imagenet.log 2>&1 & +