未验证 提交 5d3fe63f 编写于 作者: L littletomatodonkey 提交者: GitHub

Merge pull request #185 from littletomatodonkey/dyg_ls

Add label smooth support for dygraph
...@@ -13,3 +13,6 @@ ...@@ -13,3 +13,6 @@
# limitations under the License. # limitations under the License.
from .resnet_name import * from .resnet_name import *
from .dpn import DPN68
from .densenet import DenseNet121
from .hrnet import HRNet_W18_C
\ No newline at end of file
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -268,26 +286,26 @@ class DenseNet(fluid.dygraph.Layer): ...@@ -268,26 +286,26 @@ class DenseNet(fluid.dygraph.Layer):
return y return y
def DenseNet121(): def DenseNet121(**args):
model = DenseNet(layers=121) model = DenseNet(layers=121, **args)
return model return model
def DenseNet161(): def DenseNet161(**args):
model = DenseNet(layers=161) model = DenseNet(layers=161, **args)
return model return model
def DenseNet169(): def DenseNet169(**args):
model = DenseNet(layers=169) model = DenseNet(layers=169, **args)
return model return model
def DenseNet201(): def DenseNet201(**args):
model = DenseNet(layers=201) model = DenseNet(layers=201, **args)
return model return model
def DenseNet264(): def DenseNet264(**args):
model = DenseNet(layers=264) model = DenseNet(layers=264, **args)
return model return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np import numpy as np
import sys import sys
import paddle import paddle
...@@ -386,26 +404,26 @@ class DPN(fluid.dygraph.Layer): ...@@ -386,26 +404,26 @@ class DPN(fluid.dygraph.Layer):
return net_arg return net_arg
def DPN68(): def DPN68(**args):
model = DPN(layers=68) model = DPN(layers=68, **args)
return model return model
def DPN92(): def DPN92(**args):
model = DPN(layers=92) model = DPN(layers=92, **args)
return model return model
def DPN98(): def DPN98(**args):
model = DPN(layers=98) model = DPN(layers=98, **args)
return model return model
def DPN107(): def DPN107(**args):
model = DPN(layers=107) model = DPN(layers=107, **args)
return model return model
def DPN131(): def DPN131(**args):
model = DPN(layers=131) model = DPN(layers=131, **args)
return model return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -647,81 +665,81 @@ class HRNet(fluid.dygraph.Layer): ...@@ -647,81 +665,81 @@ class HRNet(fluid.dygraph.Layer):
return y return y
def HRNet_W18_C(): def HRNet_W18_C(**args):
model = HRNet(width=18) model = HRNet(width=18, **args)
return model return model
def HRNet_W30_C(): def HRNet_W30_C(**args):
model = HRNet(width=30) model = HRNet(width=30, **args)
return model return model
def HRNet_W32_C(): def HRNet_W32_C(**args):
model = HRNet(width=32) model = HRNet(width=32, **args)
return model return model
def HRNet_W40_C(): def HRNet_W40_C(**args):
model = HRNet(width=40) model = HRNet(width=40, **args)
return model return model
def HRNet_W44_C(): def HRNet_W44_C(**args):
model = HRNet(width=44) model = HRNet(width=44, **args)
return model return model
def HRNet_W48_C(): def HRNet_W48_C(**args):
model = HRNet(width=48) model = HRNet(width=48, **args)
return model return model
def HRNet_W60_C(): def HRNet_W60_C(**args):
model = HRNet(width=60) model = HRNet(width=60, **args)
return model return model
def HRNet_W64_C(): def HRNet_W64_C(**args):
model = HRNet(width=64) model = HRNet(width=64, **args)
return model return model
def SE_HRNet_W18_C(): def SE_HRNet_W18_C(**args):
model = HRNet(width=18, has_se=True) model = HRNet(width=18, has_se=True, **args)
return model return model
def SE_HRNet_W30_C(): def SE_HRNet_W30_C(**args):
model = HRNet(width=30, has_se=True) model = HRNet(width=30, has_se=True, **args)
return model return model
def SE_HRNet_W32_C(): def SE_HRNet_W32_C(**args):
model = HRNet(width=32, has_se=True) model = HRNet(width=32, has_se=True, **args)
return model return model
def SE_HRNet_W40_C(): def SE_HRNet_W40_C(**args):
model = HRNet(width=40, has_se=True) model = HRNet(width=40, has_se=True, **args)
return model return model
def SE_HRNet_W44_C(): def SE_HRNet_W44_C(**args):
model = HRNet(width=44, has_se=True) model = HRNet(width=44, has_se=True, **args)
return model return model
def SE_HRNet_W48_C(): def SE_HRNet_W48_C(**args):
model = HRNet(width=48, has_se=True) model = HRNet(width=48, has_se=True, **args)
return model return model
def SE_HRNet_W60_C(): def SE_HRNet_W60_C(**args):
model = HRNet(width=60, has_se=True) model = HRNet(width=60, has_se=True, **args)
return model return model
def SE_HRNet_W64_C(): def SE_HRNet_W64_C(**args):
model = HRNet(width=64, has_se=True) model = HRNet(width=64, has_se=True, **args)
return model return model
...@@ -49,11 +49,9 @@ def create_dataloader(): ...@@ -49,11 +49,9 @@ def create_dataloader():
dataloader(fluid dataloader): dataloader(fluid dataloader):
""" """
trainer_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) trainer_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
capacity = 64 if trainer_num <= 1 else 8 capacity = 64 if trainer_num == 1 else 8
dataloader = fluid.io.DataLoader.from_generator( dataloader = fluid.io.DataLoader.from_generator(
capacity=capacity, capacity=capacity, use_double_buffer=True, iterable=True)
use_double_buffer=True,
iterable=True)
return dataloader return dataloader
...@@ -76,8 +74,8 @@ def create_model(architecture, classes_num): ...@@ -76,8 +74,8 @@ def create_model(architecture, classes_num):
return architectures.__dict__[name](class_dim=classes_num, **params) return architectures.__dict__[name](class_dim=classes_num, **params)
def create_loss(out, def create_loss(feeds,
label, out,
architecture, architecture,
classes_num=1000, classes_num=1000,
epsilon=None, epsilon=None,
...@@ -106,7 +104,7 @@ def create_loss(out, ...@@ -106,7 +104,7 @@ def create_loss(out,
if architecture["name"] == "GoogLeNet": if architecture["name"] == "GoogLeNet":
assert len(out) == 3, "GoogLeNet should have 3 outputs" assert len(out) == 3, "GoogLeNet should have 3 outputs"
loss = GoogLeNetLoss(class_dim=classes_num, epsilon=epsilon) loss = GoogLeNetLoss(class_dim=classes_num, epsilon=epsilon)
return loss(out[0], out[1], out[2], label) return loss(out[0], out[1], out[2], feeds["label"])
if use_distillation: if use_distillation:
assert len(out) == 2, ("distillation output length must be 2, " assert len(out) == 2, ("distillation output length must be 2, "
...@@ -116,14 +114,13 @@ def create_loss(out, ...@@ -116,14 +114,13 @@ def create_loss(out,
if use_mix: if use_mix:
loss = MixCELoss(class_dim=classes_num, epsilon=epsilon) loss = MixCELoss(class_dim=classes_num, epsilon=epsilon)
raise NotImplementedError feed_y_a = feeds['y_a']
#feed_y_a = feeds['feed_y_a'] feed_y_b = feeds['y_b']
#feed_y_b = feeds['feed_y_b'] feed_lam = feeds['lam']
#feed_lam = feeds['feed_lam'] return loss(out, feed_y_a, feed_y_b, feed_lam)
#return loss(out, feed_y_a, feed_y_b, feed_lam)
else: else:
loss = CELoss(class_dim=classes_num, epsilon=epsilon) loss = CELoss(class_dim=classes_num, epsilon=epsilon)
return loss(out, label) return loss(out, feeds["label"])
def create_metric(out, def create_metric(out,
...@@ -166,14 +163,7 @@ def create_metric(out, ...@@ -166,14 +163,7 @@ def create_metric(out,
return fetchs return fetchs
def create_fetchs(out, def create_fetchs(feeds, net, config, mode="train"):
label,
architecture,
topk=5,
classes_num=1000,
epsilon=None,
use_mix=False,
use_distillation=False):
""" """
Create fetchs as model outputs(included loss and measures), Create fetchs as model outputs(included loss and measures),
will call create_loss and create_metric(if use_mix). will call create_loss and create_metric(if use_mix).
...@@ -192,12 +182,21 @@ def create_fetchs(out, ...@@ -192,12 +182,21 @@ def create_fetchs(out,
Returns: Returns:
fetchs(dict): dict of model outputs(included loss and measures) fetchs(dict): dict of model outputs(included loss and measures)
""" """
architecture = config.ARCHITECTURE
topk = config.topk
classes_num = config.classes_num
epsilon = config.get('ls_epsilon')
use_mix = config.get('use_mix') and mode == 'train'
use_distillation = config.get('use_distillation')
out = net(feeds["image"])
fetchs = OrderedDict() fetchs = OrderedDict()
fetchs['loss'] = create_loss(out, label, architecture, classes_num, epsilon, use_mix, fetchs['loss'] = create_loss(feeds, out, architecture, classes_num,
use_distillation) epsilon, use_mix, use_distillation)
if not use_mix: if not use_mix:
metric = create_metric(out, label, architecture, topk, classes_num, metric = create_metric(out, feeds["label"], architecture, topk,
use_distillation) classes_num, use_distillation)
fetchs.update(metric) fetchs.update(metric)
return fetchs return fetchs
...@@ -278,36 +277,17 @@ def mixed_precision_optimizer(config, optimizer): ...@@ -278,36 +277,17 @@ def mixed_precision_optimizer(config, optimizer):
return optimizer return optimizer
def compute(config, out, label, mode='train'): def create_feeds(batch, use_mix):
""" image = to_variable(batch[0].numpy().astype("float32"))
Build a program using a model and an optimizer if use_mix:
1. create feeds y_a = to_variable(batch[1].numpy().astype("int64").reshape(-1, 1))
2. create a dataloader y_b = to_variable(batch[2].numpy().astype("int64").reshape(-1, 1))
3. create a model lam = to_variable(batch[3].numpy().astype("float32").reshape(-1, 1))
4. create fetchs feeds = {"image": image, "y_a": y_a, "y_b": y_b, "lam": lam}
5. create an optimizer else:
label = to_variable(batch[1].numpy().astype('int64').reshape(-1, 1))
Args: feeds = {"image": image, "label": label}
config(dict): config return feeds
main_prog(): main program
startup_prog(): startup program
is_train(bool): train or valid
Returns:
dataloader(): a bridge between the model and the data
fetchs(dict): dict of model outputs(included loss and measures)
"""
fetchs = create_fetchs(
out,
label,
config.ARCHITECTURE,
config.topk,
config.classes_num,
epsilon=config.get('ls_epsilon'),
use_mix=config.get('use_mix') and mode == 'train',
use_distillation=config.get('use_distillation'))
return fetchs
def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
...@@ -324,19 +304,30 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): ...@@ -324,19 +304,30 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
Returns: Returns:
""" """
topk_name = 'top{}'.format(config.topk) use_mix = config.get("use_mix", False) and mode == "train"
metric_list = OrderedDict([ if use_mix:
("loss", AverageMeter('loss', '7.4f')), metric_list = OrderedDict([
("top1", AverageMeter('top1', '.4f')), ("loss", AverageMeter('loss', '7.4f')),
(topk_name, AverageMeter(topk_name, '.4f')), ("lr", AverageMeter(
("lr", AverageMeter('lr', 'f', need_avg=False)), 'lr', 'f', need_avg=False)),
("batch_time", AverageMeter('elapse', '.3f')), ("batch_time", AverageMeter('elapse', '.3f')),
]) ])
else:
topk_name = 'top{}'.format(config.topk)
metric_list = OrderedDict([
("loss", AverageMeter('loss', '7.4f')),
("top1", AverageMeter('top1', '.4f')),
(topk_name, AverageMeter(topk_name, '.4f')),
("lr", AverageMeter(
'lr', 'f', need_avg=False)),
("batch_time", AverageMeter('elapse', '.3f')),
])
tic = time.time() tic = time.time()
for idx, (img, label) in enumerate(dataloader()): for idx, batch in enumerate(dataloader()):
label = to_variable(label.numpy().astype('int64').reshape(-1, 1)) batch_size = len(batch[0])
fetchs = compute(config, net(img), label, mode) feeds = create_feeds(batch, use_mix)
fetchs = create_fetchs(feeds, net, config, mode)
if mode == 'train': if mode == 'train':
avg_loss = net.scale_loss(fetchs['loss']) avg_loss = net.scale_loss(fetchs['loss'])
avg_loss.backward() avg_loss.backward()
...@@ -345,10 +336,10 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): ...@@ -345,10 +336,10 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
net.clear_gradients() net.clear_gradients()
metric_list['lr'].update( metric_list['lr'].update(
optimizer._global_learning_rate().numpy()[0], len(img)) optimizer._global_learning_rate().numpy()[0], batch_size)
for name, fetch in fetchs.items(): for name, fetch in fetchs.items():
metric_list[name].update(fetch.numpy()[0], len(img)) metric_list[name].update(fetch.numpy()[0], batch_size)
metric_list['batch_time'].update(time.time() - tic) metric_list['batch_time'].update(time.time() - tic)
tic = time.time() tic = time.time()
...@@ -365,7 +356,8 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): ...@@ -365,7 +356,8 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
logger.coloring(step_str, "PURPLE"), logger.coloring(step_str, "PURPLE"),
logger.coloring(fetchs_str, 'OKGREEN'))) logger.coloring(fetchs_str, 'OKGREEN')))
end_str = ' '.join([str(m.mean) for m in metric_list.values()] + [metric_list['batch_time'].total]) end_str = ' '.join([str(m.mean) for m in metric_list.values()] +
[metric_list['batch_time'].total])
if mode == 'eval': if mode == 'eval':
logger.info("END {:s} {:s}s".format(mode, end_str)) logger.info("END {:s} {:s}s".format(mode, end_str))
else: else:
...@@ -378,4 +370,4 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): ...@@ -378,4 +370,4 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
# return top1_acc in order to save the best model # return top1_acc in order to save the best model
if mode == 'valid': if mode == 'valid':
return metric_list['top1'].avg return metric_list['top1'].avg
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册