diff --git a/ce_tests/dygraph/quant/src/imagenet_dataset.py b/ce_tests/dygraph/quant/src/imagenet_dataset.py index fe74f871f667d3cc2b040b2cffc71836483ebaec..d8234012131036414e6eff3cc1a1bbef6fe56a64 100644 --- a/ce_tests/dygraph/quant/src/imagenet_dataset.py +++ b/ce_tests/dygraph/quant/src/imagenet_dataset.py @@ -1,35 +1,21 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import os -import cv2 -import math -import random import numpy as np from PIL import Image - -from paddle.vision.datasets import DatasetFolder +from paddle.io import Dataset from paddle.vision.transforms import transforms -class ImageNetDataset(DatasetFolder): +class ImageNetDataset(Dataset): def __init__(self, - path, + data_dir, mode='train', image_size=224, resize_short_size=256): - super(ImageNetDataset, self).__init__(path) + super(ImageNetDataset, self).__init__() + train_file_list = os.path.join(data_dir, 'train_list.txt') + val_file_list = os.path.join(data_dir, 'val_list.txt') + test_file_list = os.path.join(data_dir, 'test_list.txt') + self.data_dir = data_dir self.mode = mode normalize = transforms.Normalize( @@ -47,11 +33,35 @@ class ImageNetDataset(DatasetFolder): normalize ]) - def __getitem__(self, idx): - img_path, label = self.samples[idx] + if mode == 'train': + with open(train_file_list) as flist: + full_lines = [line.strip() for line in flist] + np.random.shuffle(full_lines) + if os.getenv('PADDLE_TRAINING_ROLE'): + # distributed mode if the env var `PADDLE_TRAINING_ROLE` exits + trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + trainer_count = int(os.getenv("PADDLE_TRAINERS_NUM", "1")) + per_node_lines = len(full_lines) // trainer_count + lines = full_lines[trainer_id * per_node_lines:( + trainer_id + 1) * per_node_lines] + print( + "read images from %d, length: %d, lines length: %d, total: %d" + % (trainer_id * per_node_lines, per_node_lines, + len(lines), len(full_lines))) + else: + lines = full_lines + self.data = [line.split() for line in lines] + else: + with open(val_file_list) as flist: + lines = [line.strip() for line in flist] + self.data = [line.split() for line in lines] + + def __getitem__(self, index): + img_path, label = self.data[index] + img_path = os.path.join(self.data_dir, img_path) img = Image.open(img_path).convert('RGB') label = np.array([label]).astype(np.int64) return self.transform(img), label def __len__(self): - return len(self.samples) + return len(self.data) diff --git a/ce_tests/dygraph/quant/src/ptq.py b/ce_tests/dygraph/quant/src/ptq.py index bb1138dfc280a270ab971e0399d196adcb219739..49c673caf6eefa09e56d72e37b32b37bcf7e02c0 100644 --- a/ce_tests/dygraph/quant/src/ptq.py +++ b/ce_tests/dygraph/quant/src/ptq.py @@ -60,8 +60,7 @@ def main(): fp32_model = models.__dict__[FLAGS.arch](pretrained=True) fp32_model.eval() - val_dataset = ImageNetDataset( - os.path.join(FLAGS.data, FLAGS.val_dir), mode='val') + val_dataset = ImageNetDataset(FLAGS.data, mode='val') # 2 quantizations ptq = PTQ() diff --git a/ce_tests/dygraph/quant/src/qat.py b/ce_tests/dygraph/quant/src/qat.py index 0737505f6a16ddaccba2e6e43fad805386973328..e093fc91e55ce111a4606fc29452c04e61755f23 100644 --- a/ce_tests/dygraph/quant/src/qat.py +++ b/ce_tests/dygraph/quant/src/qat.py @@ -86,10 +86,8 @@ def main(): print("Resume from " + FLAGS.resume) model.load(FLAGS.resume) - train_dataset = ImageNetDataset( - os.path.join(FLAGS.data, 'train'), mode='train') - val_dataset = ImageNetDataset( - os.path.join(FLAGS.data, FLAGS.val_dir), mode='val') + train_dataset = ImageNetDataset(FLAGS.data, mode='train') + val_dataset = ImageNetDataset(FLAGS.data, mode='val') optim = make_optimizer( np.ceil( @@ -152,10 +150,6 @@ if __name__ == '__main__': default="", help='path to dataset ' '(should have subdirectories named "train" and "val"') - parser.add_argument( - '--val_dir', - default="val", - help='the dir that saves val images for paddle.Model') # train parser.add_argument(