提交 50717d68 编写于 作者: S shippingwang

mixup on gpu

上级 39f946aa
......@@ -279,47 +279,52 @@ def _to_Tensor(lod_tensor, dtype):
return data_tensor
def post_mix(settings, batch):
batch_size = settings.TRAIN.batch_size // paddle.fluid.core.get_cuda_device_count(
)
def normalize(feeds, config):
image, label = feeds['image'], feeds['label']
print(np.array(image).shape)
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
image = fluid.layers.cast(image, 'float32')
#image = fluid.layers.transpose(image, perm=[0,3,1,2])
batch_imgs = _to_Tensor(batch[0]['feed_image'], 'float32')
batch_label = _to_Tensor(batch[0]['feed_label'], 'int64')
alpha = 0.2
idx = _to_Tensor(np.random.permutation(batch_size), 'int32')
lam = np.random.beta(alpha, alpha)
#image = fluid.layers.cast(image,'float32')
costant = fluid.layers.fill_constant(
shape=[1], value=255.0, dtype='float32')
image = fluid.layers.elementwise_div(image, costant)
batch_imgs = lam * batch_imgs + (1 - lam) * paddle.fluid.layers.gather(
batch_imgs, idx)
mean = fluid.layers.create_tensor(dtype="float32")
fluid.layers.assign(input=img_mean.astype("float32"), output=mean)
std = fluid.layers.create_tensor(dtype="float32")
fluid.layers.assign(input=img_std.astype("float32"), output=std)
# print(type(batch_label))
feed = [{
'feed_image': batch_imgs,
'feed_y_a': batch_label,
'feed_y_b': paddle.fluid.layers.gather(batch_label, idx),
'feed_lam': _to_Tensor([lam] * batch_size, 'float32')
}]
image = fluid.layers.elementwise_sub(image, mean)
image = fluid.layers.elementwise_div(image, std)
return feed
image.stop_gradient = True
print(image)
feeds['image'] = image
return feeds
def post_mix_numpy(settings, batch):
batch_size = settings.TRAIN.batch_size // paddle.fluid.core.get_cuda_device_count(
)
batch_imgs = np.array(batch[0]['feed_image'])
batch_label = np.array(batch[0]['feed_label'])
def mix(feeds, config, is_train=True):
batch_size = config.TRAIN.batch_size // paddle.fluid.core.get_cuda_device_count(
)
#batch_imgs = _to_Tensor(feeds['feed_image'], 'float32')
#batch_label = _to_Tensor(feeds['feed_label'], 'int64')
images = feeds['image']
label = feeds['label']
alpha = 0.2
idx = np.random.permutation(batch_size)
idx = _to_Tensor(np.random.permutation(batch_size), 'int32')
lam = np.random.beta(alpha, alpha)
batch_imgs = lam * batch_imgs + (1 - lam) * batch_imgs[idx]
images = lam * images + (1 - lam) * paddle.fluid.layers.gather(images, idx)
feed = [{
'feed_image': batch_imgs,
'feed_y_a': batch_label,
'feed_y_b': batch_label[idx],
'feed_lam': np.array([lam] * batch_size).astype('float32')
}]
feed = {
'image': images,
'feed_y_a': label,
'feed_y_b': paddle.fluid.layers.gather(label, idx),
'feed_lam': _to_Tensor([lam] * batch_size, 'float32')
}
return feed
return feed if is_train else feeds
......@@ -39,7 +39,7 @@ from paddle.fluid.incubate.fleet.collective import DistributedStrategy
from ema import ExponentialMovingAverage
def create_feeds(image_shape, use_mix=None):
def create_feeds(image_shape, use_mix=None, use_dali=None):
"""
Create feeds as model input
......@@ -53,7 +53,8 @@ def create_feeds(image_shape, use_mix=None):
feeds = OrderedDict()
feeds['image'] = fluid.data(
name="feed_image", shape=[None] + image_shape, dtype="float32")
if use_mix:
if use_mix and not use_dali:
feeds['feed_y_a'] = fluid.data(
name="feed_y_a", shape=[None, 1], dtype="int64")
feeds['feed_y_b'] = fluid.data(
......@@ -61,6 +62,7 @@ def create_feeds(image_shape, use_mix=None):
feeds['feed_lam'] = fluid.data(
name="feed_lam", shape=[None, 1], dtype="float32")
else:
feeds['label'] = fluid.data(
name="feed_label", shape=[None, 1], dtype="int64")
......@@ -149,6 +151,7 @@ def create_loss(out,
loss = JSDivLoss(class_dim=classes_num, epsilon=epsilon)
return loss(out[1], out[0])
print("++++++", use_mix)
if use_mix:
loss = MixCELoss(class_dim=classes_num, epsilon=epsilon)
feed_y_a = feeds['feed_y_a']
......@@ -336,8 +339,15 @@ def build(config, main_prog, startup_prog, is_train=True):
with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard():
use_mix = config.get('use_mix') and is_train
use_dali = config.get('use_dali')
use_distillation = config.get('use_distillation')
feeds = create_feeds(config.image_shape, use_mix=use_mix)
feeds = create_feeds(config.image_shape, use_mix=use_mix, use_dali)
if use_dali and use_mix:
import dali
#feeds = dali.normalize(feeds,config)
feeds = dali.mix(feeds, config, is_train)
dataloader = create_dataloader(feeds.values()) if not config.get(
'use_dali') else None
out = create_model(config.ARCHITECTURE, feeds['image'],
......@@ -428,13 +438,19 @@ def run(dataloader,
batch_time = AverageMeter('elapse', '.3f')
tic = time.time()
dataloader = dataloader if config.get('use_dali') else dataloader()()
#sta = 0
for idx, batch in enumerate(dataloader):
if config.get('use_dali'):
import dali
batch = dali.post_mix_numpy(config, batch)
#start_time = time.time()
metrics = exe.run(program=program, feed=batch, fetch_list=fetch_list)
#end_time = time.time()
#statistics = end_time - start_time
# if idx >= 10:
# sta = sta+statistics
# if idx == 110 and int(os.getenv("PADDLE_TRAINER_ID", 0)) == 0:
# print("10-100batch speed 000", sta/100, 's/batch', 'bs', config.TRAIN.batch_size)
# if idx == 110 and int(os.getenv("PADDLE_TRAINER_ID", 0)) == 1:
# print("10-100batch speed 111", sta/100, 's/batch', 'bs', config.TRAIN.batch_size)
batch_time.update(time.time() - tic)
tic = time.time()
for i, m in enumerate(metrics):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册