未验证 提交 3d53d8b9 编写于 作者: C cuicheng01 提交者: GitHub

Merge pull request #1971 from cuicheng01/Add_PULC_demo

[WIP]Add pulc demo
...@@ -189,6 +189,7 @@ class PPLCNet(TheseusLayer): ...@@ -189,6 +189,7 @@ class PPLCNet(TheseusLayer):
dropout_prob=0.2, dropout_prob=0.2,
class_expand=1280, class_expand=1280,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
stride_list=[2, 2, 2, 2, 2],
use_last_conv=True, use_last_conv=True,
return_patterns=None, return_patterns=None,
return_stages=None, return_stages=None,
...@@ -198,6 +199,8 @@ class PPLCNet(TheseusLayer): ...@@ -198,6 +199,8 @@ class PPLCNet(TheseusLayer):
self.class_expand = class_expand self.class_expand = class_expand
self.lr_mult_list = lr_mult_list self.lr_mult_list = lr_mult_list
self.use_last_conv = use_last_conv self.use_last_conv = use_last_conv
self.stride_list = stride_list
self.net_config = NET_CONFIG
if isinstance(self.lr_mult_list, str): if isinstance(self.lr_mult_list, str):
self.lr_mult_list = eval(self.lr_mult_list) self.lr_mult_list = eval(self.lr_mult_list)
...@@ -206,17 +209,27 @@ class PPLCNet(TheseusLayer): ...@@ -206,17 +209,27 @@ class PPLCNet(TheseusLayer):
)), "lr_mult_list should be in (list, tuple) but got {}".format( )), "lr_mult_list should be in (list, tuple) but got {}".format(
type(self.lr_mult_list)) type(self.lr_mult_list))
assert len(self.lr_mult_list assert len(self.lr_mult_list
) == 6, "lr_mult_list length should be 5 but got {}".format( ) == 6, "lr_mult_list length should be 6 but got {}".format(
len(self.lr_mult_list)) len(self.lr_mult_list))
assert isinstance(self.stride_list, (
list, tuple
)), "stride_list should be in (list, tuple) but got {}".format(
type(self.stride_list))
assert len(self.stride_list
) == 5, "stride_list length should be 5 but got {}".format(
len(self.stride_list))
for i, stride in enumerate(stride_list[1:]):
self.net_config["blocks{}".format(i + 3)][0][3] = stride
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
num_channels=3, num_channels=3,
filter_size=3, filter_size=3,
num_filters=make_divisible(16 * scale), num_filters=make_divisible(16 * scale),
stride=2, stride=stride_list[0],
lr_mult=self.lr_mult_list[0]) lr_mult=self.lr_mult_list[0])
self.blocks2 = nn.Sequential(* [ self.blocks2 = nn.Sequential(*[
DepthwiseSeparable( DepthwiseSeparable(
num_channels=make_divisible(in_c * scale), num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale), num_filters=make_divisible(out_c * scale),
...@@ -224,10 +237,11 @@ class PPLCNet(TheseusLayer): ...@@ -224,10 +237,11 @@ class PPLCNet(TheseusLayer):
stride=s, stride=s,
use_se=se, use_se=se,
lr_mult=self.lr_mult_list[1]) lr_mult=self.lr_mult_list[1])
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks2"])
]) ])
self.blocks3 = nn.Sequential(* [ self.blocks3 = nn.Sequential(*[
DepthwiseSeparable( DepthwiseSeparable(
num_channels=make_divisible(in_c * scale), num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale), num_filters=make_divisible(out_c * scale),
...@@ -235,10 +249,11 @@ class PPLCNet(TheseusLayer): ...@@ -235,10 +249,11 @@ class PPLCNet(TheseusLayer):
stride=s, stride=s,
use_se=se, use_se=se,
lr_mult=self.lr_mult_list[2]) lr_mult=self.lr_mult_list[2])
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks3"])
]) ])
self.blocks4 = nn.Sequential(* [ self.blocks4 = nn.Sequential(*[
DepthwiseSeparable( DepthwiseSeparable(
num_channels=make_divisible(in_c * scale), num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale), num_filters=make_divisible(out_c * scale),
...@@ -246,10 +261,11 @@ class PPLCNet(TheseusLayer): ...@@ -246,10 +261,11 @@ class PPLCNet(TheseusLayer):
stride=s, stride=s,
use_se=se, use_se=se,
lr_mult=self.lr_mult_list[3]) lr_mult=self.lr_mult_list[3])
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks4"])
]) ])
self.blocks5 = nn.Sequential(* [ self.blocks5 = nn.Sequential(*[
DepthwiseSeparable( DepthwiseSeparable(
num_channels=make_divisible(in_c * scale), num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale), num_filters=make_divisible(out_c * scale),
...@@ -257,10 +273,11 @@ class PPLCNet(TheseusLayer): ...@@ -257,10 +273,11 @@ class PPLCNet(TheseusLayer):
stride=s, stride=s,
use_se=se, use_se=se,
lr_mult=self.lr_mult_list[4]) lr_mult=self.lr_mult_list[4])
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks5"])
]) ])
self.blocks6 = nn.Sequential(* [ self.blocks6 = nn.Sequential(*[
DepthwiseSeparable( DepthwiseSeparable(
num_channels=make_divisible(in_c * scale), num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale), num_filters=make_divisible(out_c * scale),
...@@ -268,13 +285,14 @@ class PPLCNet(TheseusLayer): ...@@ -268,13 +285,14 @@ class PPLCNet(TheseusLayer):
stride=s, stride=s,
use_se=se, use_se=se,
lr_mult=self.lr_mult_list[5]) lr_mult=self.lr_mult_list[5])
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks6"])
]) ])
self.avg_pool = AdaptiveAvgPool2D(1) self.avg_pool = AdaptiveAvgPool2D(1)
if self.use_last_conv: if self.use_last_conv:
self.last_conv = Conv2D( self.last_conv = Conv2D(
in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * in_channels=make_divisible(self.net_config["blocks6"][-1][2] *
scale), scale),
out_channels=self.class_expand, out_channels=self.class_expand,
kernel_size=1, kernel_size=1,
...@@ -286,8 +304,9 @@ class PPLCNet(TheseusLayer): ...@@ -286,8 +304,9 @@ class PPLCNet(TheseusLayer):
else: else:
self.last_conv = None self.last_conv = None
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
self.fc = Linear(self.class_expand if self.use_last_conv else self.fc = Linear(
NET_CONFIG["blocks6"][-1][2], class_num) self.class_expand if self.use_last_conv else
make_divisible(self.net_config["blocks6"][-1][2]), class_num)
super().init_res( super().init_res(
stages_pattern, stages_pattern,
......
...@@ -122,21 +122,18 @@ class ConvBNLayer(TheseusLayer): ...@@ -122,21 +122,18 @@ class ConvBNLayer(TheseusLayer):
self.is_vd_mode = is_vd_mode self.is_vd_mode = is_vd_mode
self.act = act self.act = act
self.avg_pool = AvgPool2D( self.avg_pool = AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True) kernel_size=2, stride=stride, padding="SAME", ceil_mode=True)
self.conv = Conv2D( self.conv = Conv2D(
in_channels=num_channels, in_channels=num_channels,
out_channels=num_filters, out_channels=num_filters,
kernel_size=filter_size, kernel_size=filter_size,
stride=stride, stride=1 if is_vd_mode else stride,
padding=(filter_size - 1) // 2, padding=(filter_size - 1) // 2,
groups=groups, groups=groups,
weight_attr=ParamAttr(learning_rate=lr_mult), weight_attr=ParamAttr(learning_rate=lr_mult),
bias_attr=False, bias_attr=False,
data_format=data_format) data_format=data_format)
weight_attr = ParamAttr(learning_rate=lr_mult, trainable=True)
bias_attr = ParamAttr(learning_rate=lr_mult, trainable=True)
self.bn = BatchNorm( self.bn = BatchNorm(
num_filters, num_filters,
param_attr=ParamAttr(learning_rate=lr_mult), param_attr=ParamAttr(learning_rate=lr_mult),
...@@ -164,7 +161,6 @@ class BottleneckBlock(TheseusLayer): ...@@ -164,7 +161,6 @@ class BottleneckBlock(TheseusLayer):
lr_mult=1.0, lr_mult=1.0,
data_format="NCHW"): data_format="NCHW"):
super().__init__() super().__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
...@@ -193,7 +189,7 @@ class BottleneckBlock(TheseusLayer): ...@@ -193,7 +189,7 @@ class BottleneckBlock(TheseusLayer):
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
stride=stride if if_first else 1, stride=stride,
is_vd_mode=False if if_first else True, is_vd_mode=False if if_first else True,
lr_mult=lr_mult, lr_mult=lr_mult,
data_format=data_format) data_format=data_format)
...@@ -248,7 +244,7 @@ class BasicBlock(TheseusLayer): ...@@ -248,7 +244,7 @@ class BasicBlock(TheseusLayer):
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=1, filter_size=1,
stride=stride if if_first else 1, stride=stride,
is_vd_mode=False if if_first else True, is_vd_mode=False if if_first else True,
lr_mult=lr_mult, lr_mult=lr_mult,
data_format=data_format) data_format=data_format)
...@@ -287,6 +283,7 @@ class ResNet(TheseusLayer): ...@@ -287,6 +283,7 @@ class ResNet(TheseusLayer):
stem_act="relu", stem_act="relu",
class_num=1000, class_num=1000,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
stride_list=[2, 2, 2, 2, 2],
data_format="NCHW", data_format="NCHW",
input_image_channel=3, input_image_channel=3,
return_patterns=None, return_patterns=None,
...@@ -296,6 +293,7 @@ class ResNet(TheseusLayer): ...@@ -296,6 +293,7 @@ class ResNet(TheseusLayer):
self.cfg = config self.cfg = config
self.lr_mult_list = lr_mult_list self.lr_mult_list = lr_mult_list
self.stride_list = stride_list
self.is_vd_mode = version == "vd" self.is_vd_mode = version == "vd"
self.class_num = class_num self.class_num = class_num
self.num_filters = [64, 128, 256, 512] self.num_filters = [64, 128, 256, 512]
...@@ -312,11 +310,19 @@ class ResNet(TheseusLayer): ...@@ -312,11 +310,19 @@ class ResNet(TheseusLayer):
) == 5, "lr_mult_list length should be 5 but got {}".format( ) == 5, "lr_mult_list length should be 5 but got {}".format(
len(self.lr_mult_list)) len(self.lr_mult_list))
assert isinstance(self.stride_list, (
list, tuple
)), "stride_list should be in (list, tuple) but got {}".format(
type(self.stride_list))
assert len(self.stride_list
) == 5, "stride_list length should be 5 but got {}".format(
len(self.stride_list))
self.stem_cfg = { self.stem_cfg = {
#num_channels, num_filters, filter_size, stride #num_channels, num_filters, filter_size, stride
"vb": [[input_image_channel, 64, 7, 2]], "vb": [[input_image_channel, 64, 7, self.stride_list[0]]],
"vd": "vd": [[input_image_channel, 32, 3, self.stride_list[0]],
[[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]] [32, 32, 3, 1], [32, 64, 3, 1]]
} }
self.stem = nn.Sequential(*[ self.stem = nn.Sequential(*[
...@@ -332,7 +338,10 @@ class ResNet(TheseusLayer): ...@@ -332,7 +338,10 @@ class ResNet(TheseusLayer):
]) ])
self.max_pool = MaxPool2D( self.max_pool = MaxPool2D(
kernel_size=3, stride=2, padding=1, data_format=data_format) kernel_size=3,
stride=stride_list[1],
padding=1,
data_format=data_format)
block_list = [] block_list = []
for block_idx in range(len(self.block_depth)): for block_idx in range(len(self.block_depth)):
shortcut = False shortcut = False
...@@ -341,7 +350,8 @@ class ResNet(TheseusLayer): ...@@ -341,7 +350,8 @@ class ResNet(TheseusLayer):
num_channels=self.num_channels[block_idx] if i == 0 else num_channels=self.num_channels[block_idx] if i == 0 else
self.num_filters[block_idx] * self.channels_mult, self.num_filters[block_idx] * self.channels_mult,
num_filters=self.num_filters[block_idx], num_filters=self.num_filters[block_idx],
stride=2 if i == 0 and block_idx != 0 else 1, stride=self.stride_list[block_idx + 1]
if i == 0 and block_idx != 0 else 1,
shortcut=shortcut, shortcut=shortcut,
if_first=block_idx == i == 0 if version == "vd" else True, if_first=block_idx == i == 0 if version == "vd" else True,
lr_mult=self.lr_mult_list[block_idx + 1], lr_mult=self.lr_mult_list[block_idx + 1],
......
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: "./output/"
device: "gpu"
save_interval: 5
eval_during_train: True
eval_interval: 1
epochs: 20
print_batch_step: 20
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 192]
save_inference_dir: "./inference"
use_multilabel: True
# model architecture
Arch:
name: "MobileNetV3_large_x1_0"
pretrained: True
class_num: 26
# loss function config for traing/eval process
Loss:
Train:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Eval:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.01
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.0005
#clip_norm: 10
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/train_val_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- Padv2:
size: [212, 276]
pad_mode: 1
fill_value: 0
- RandomCropImage:
size: [192, 256]
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: True
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/test_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Metric:
Eval:
- ATTRMetric:
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: "./output/"
device: "gpu"
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 20
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 192]
save_inference_dir: "./inference"
use_multilabel: True
# model architecture
Arch:
name: "PPLCNet_x1_0"
pretrained: True
use_ssld: True
class_num: 26
# loss function config for traing/eval process
Loss:
Train:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Eval:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.01
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.0005
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/train_val_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- TimmAutoAugment:
prob: 0.8
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: [192, 256]
- Padv2:
size: [212, 276]
pad_mode: 1
fill_value: 0
- RandomCropImage:
size: [192, 256]
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.4
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: True
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/test_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Metric:
Eval:
- ATTRMetric:
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output
device: gpu
save_interval: 1
eval_during_train: True
start_eval_epoch: 1
eval_interval: 1
epochs: 20
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 192]
save_inference_dir: ./inference
# training model under @to_static
to_static: False
use_dali: False
use_multilabel: True
# model architecture
Arch:
name: "DistillationModel"
class_num: &class_num 26
# if not null, its lengths should be same as models
pretrained_list:
# if not null, its lengths should be same as models
freeze_params_list:
- True
- False
use_sync_bn: True
models:
- Teacher:
name: ResNet101_vd
class_num: *class_num
- Student:
name: PPLCNet_x1_0
class_num: *class_num
pretrained: True
use_ssld: True
infer_model_name: "Student"
# loss function config for traing/eval process
Loss:
Train:
- DistillationDMLLoss:
weight: 1.0
model_name_pairs:
- ["Student", "Teacher"]
- DistillationMultiLabelLoss:
weight: 1.0
weight_ratio: True
model_names: ["Student"]
size_sum: True
Eval:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.01
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.0005
# data loader for train and eval
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/train_val_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- TimmAutoAugment:
prob: 0.8
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: [192, 256]
- Padv2:
size: [212, 276]
pad_mode: 1
fill_value: 0
- RandomCropImage:
size: [192, 256]
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.4
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: True
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/test_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Metric:
Eval:
- ATTRMetric:
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: "./output/"
device: "gpu"
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 20
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 192]
save_inference_dir: "./inference"
use_multilabel: True
# model architecture
Arch:
name: "PPLCNet_x1_0"
pretrained: True
use_ssld: True
class_num: 26
# loss function config for traing/eval process
Loss:
Train:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Eval:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.01
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.0005
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/train_val_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- TimmAutoAugment:
prob: 0.0
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: [192, 256]
- Padv2:
size: [212, 276]
pad_mode: 1
fill_value: 0
- RandomCropImage:
size: [192, 256]
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.0
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: True
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k"
cls_label_path: "dataset/pa100k/test_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Metric:
Eval:
- ATTRMetric:
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: "./output/"
device: "gpu"
save_interval: 5
eval_during_train: True
eval_interval: 1
epochs: 20
print_batch_step: 20
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 192]
save_inference_dir: "./inference"
use_multilabel: True
# model architecture
Arch:
name: "Res2Net200_vd_26w_4s"
pretrained: True
class_num: 26
# loss function config for traing/eval process
Loss:
Train:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Eval:
- MultiLabelLoss:
weight: 1.0
weight_ratio: True
size_sum: True
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.01
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.0005
#clip_norm: 10
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/train_val_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- Padv2:
size: [212, 276]
pad_mode: 1
fill_value: 0
- RandomCropImage:
size: [192, 256]
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: True
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: MultiLabelDataset
image_root: "dataset/pa100k/"
cls_label_path: "dataset/pa100k/test_list.txt"
label_ratio: True
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 256]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Metric:
Eval:
- ATTRMetric:
base_config_file: ppcls/configs/PULC/person_attribute/PPLCNet_x1_0_search.yaml
distill_config_file: ppcls/configs/PULC/person_attribute/PPLCNet_x1_0_Distillation.yaml
gpus: 0,1,2,3
output_dir: output/search_attr
search_times: 1
search_dict:
- search_key: lrs
replace_config:
- Optimizer.lr.learning_rate
search_values: [0.0001, 0.005, 0.01, 0.02, 0.05]
- search_key: resolutions
replace_config:
- DataLoader.Train.dataset.transform_ops.1.ResizeImage.size
- DataLoader.Train.dataset.transform_ops.4.RandomCropImage.size
- DataLoader.Train.dataset.transform_ops.2.TimmAutoAugment.img_size
search_values: [[192, 256]]
- search_key: ra_probs
replace_config:
- DataLoader.Train.dataset.transform_ops.2.TimmAutoAugment.prob
search_values: [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
- search_key: re_probs
replace_config:
- DataLoader.Train.dataset.transform_ops.7.RandomErasing.EPSILON
search_values: [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
- search_key: lr_mult_list
replace_config:
- Arch.lr_mult_list
search_values:
- [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
- [0.0, 0.4, 0.4, 0.8, 0.8, 1.0]
- [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
teacher:
rm_keys:
- Arch.lr_mult_list
search_values:
- ResNet101_vd
- ResNet50_vd
final_replace:
Arch.lr_mult_list: Arch.models.1.Student.lr_mult_list
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
eval_interval: 1
start_eval_epoch: 18
epochs: 20
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 224, 224]
save_inference_dir: ./inference
# training model under @to_static
to_static: False
use_dali: False
# model architecture
Arch:
name: MobileNetV3_large_x1_0
class_num: 2
pretrained: True
use_sync_bn: True
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.13
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.00002
# data loader for train and eval
DataLoader:
Train:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- RandCropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 512
drop_last: False
shuffle: True
loader:
num_workers: 8
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 2]
Eval:
- TopkAcc:
topk: [1, 2]
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
start_eval_epoch: 18
eval_interval: 1
epochs: 20
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 80, 160]
save_inference_dir: ./inference
# training model under @to_static
to_static: False
use_dali: False
# model architecture
Arch:
name: PPLCNet_x1_0
class_num: 2
pretrained: True
use_ssld: True
stride_list: [2, [2, 1], [2, 1], [2, 1], [2, 1]]
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.8
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.00004
# data loader for train and eval
DataLoader:
Train:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [160, 80]
- TimmAutoAugment:
prob: 1.0
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: [160, 80]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.0
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
sampler:
name: DistributedBatchSampler
batch_size: 256
drop_last: False
shuffle: True
loader:
num_workers: 16
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [160, 80]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 8
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [160, 80]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 2]
Eval:
- TopkAcc:
topk: [1, 2]
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
start_eval_epoch: 18
eval_interval: 1
epochs: 20
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 80, 160]
save_inference_dir: ./inference
# training model under @to_static
to_static: False
use_dali: False
# model architecture
Arch:
name: "DistillationModel"
class_num: &class_num 2
# if not null, its lengths should be same as models
pretrained_list:
# if not null, its lengths should be same as models
freeze_params_list:
- True
- False
use_sync_bn: True
models:
- Teacher:
name: ResNet101_vd
class_num: *class_num
stride_list: [2, [2, 1], [2, 1], [2, 1], [2, 1]]
- Student:
name: PPLCNet_x1_0
class_num: *class_num
stride_list: [2, [2, 1], [2, 1], [2, 1], [2, 1]]
pretrained: True
use_ssld: True
infer_model_name: "Student"
# loss function config for traing/eval process
Loss:
Train:
- DistillationDMLLoss:
weight: 1.0
model_name_pairs:
- ["Student", "Teacher"]
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.8
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.00004
# data loader for train and eval
DataLoader:
Train:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [160, 80]
- TimmAutoAugment:
prob: 1.0
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: [160, 80]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.0
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
sampler:
name: DistributedBatchSampler
batch_size: 256
drop_last: False
shuffle: True
loader:
num_workers: 16
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [160, 80]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 8
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [160, 80]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- DistillationTopkAcc:
model_key: "Student"
topk: [1, 2]
Eval:
- TopkAcc:
topk: [1, 2]
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
start_eval_epoch: 18
eval_interval: 1
epochs: 20
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 48, 192]
save_inference_dir: ./inference
# training model under @to_static
to_static: False
use_dali: False
# model architecture
Arch:
name: PPLCNet_x1_0
class_num: 2
pretrained: True
use_ssld: True
stride_list: [2, [2, 1], [2, 1], [2, 1], [2, 1]]
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.5
warmup_epoch: 5
regularizer:
name: 'L2'
coeff: 0.00004
# data loader for train and eval
DataLoader:
Train:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 48]
- TimmAutoAugment:
prob: 0.0
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: [192, 48]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.0
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
sampler:
name: DistributedBatchSampler
batch_size: 256
drop_last: False
shuffle: True
loader:
num_workers: 16
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 48]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 8
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
size: [192, 48]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 2]
Eval:
- TopkAcc:
topk: [1, 2]
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
eval_interval: 1
start_eval_epoch: 10
epochs: 20
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 224, 224]
save_inference_dir: ./inference
# training model under @to_static
to_static: False
use_dali: False
# mixed precision training
AMP:
scale_loss: 128.0
use_dynamic_loss_scaling: True
# O1: mixed fp16
level: O1
# model architecture
Arch:
name: SwinTransformer_tiny_patch4_window7_224
class_num: 2
pretrained: True
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr:
name: Cosine
learning_rate: 1e-4
eta_min: 2e-6
warmup_epoch: 5
warmup_start_lr: 2e-7
# data loader for train and eval
DataLoader:
Train:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: True
loader:
num_workers: 8
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/text_direction/
cls_label_path: ./dataset/text_direction/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: False
shuffle: False
loader:
num_workers: 8
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 2]
Eval:
- TopkAcc:
topk: [1, 2]
base_config_file: ppcls/configs/PULC/text_direction/PPLCNet_x1_0.yaml
distill_config_file: ppcls/configs/PULC/text_direction/PPLCNet_x1_0_distillation.yaml
gpus: 0,1,2,3
output_dir: output/search_text
search_times: 1
search_dict:
- search_key: lrs
replace_config:
- Optimizer.lr.learning_rate
search_values: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
- search_key: resolutions
replace_config:
- DataLoader.Train.dataset.transform_ops.1.ResizeImage.size
- DataLoader.Train.dataset.transform_ops.2.TimmAutoAugment.img_size
- DataLoader.Eval.dataset.transform_ops.1.ResizeImage.size
search_values: [[192, 48], [180, 60], [160, 80]]
- search_key: ra_probs
replace_config:
- DataLoader.Train.dataset.transform_ops.2.TimmAutoAugment.prob
search_values: [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
- search_key: re_probs
replace_config:
- DataLoader.Train.dataset.transform_ops.4.RandomErasing.EPSILON
search_values: [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
- search_key: lr_mult_list
replace_config:
- Arch.lr_mult_list
search_values:
- [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
- [0.0, 0.4, 0.4, 0.8, 0.8, 1.0]
- [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
teacher:
rm_keys:
- Arch.lr_mult_list
search_values:
- ResNet101_vd
- ResNet50_vd
final_replace:
Arch.lr_mult_list: Arch.models.1.Student.lr_mult_list
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册