diff --git a/ppcls/configs/reid/MetaBIN_ResNet50_single_source.yaml b/ppcls/configs/reid/MetaBIN_ResNet50_single_source.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8808a7a090096f121d916fd2e94713b24dec22b --- /dev/null +++ b/ppcls/configs/reid/MetaBIN_ResNet50_single_source.yaml @@ -0,0 +1,282 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null #"metabin_resnet50_final" + output_dir: "./output/" + device: "gpu" + iter_per_epoch: 145 + warmup_iter: 10 + save_interval: 40 + eval_during_train: True + eval_interval: 10 + epochs: &epochs 120 + print_batch_step: 20 + use_visualdl: False + eval_mode: "retrieval" + retrieval_feature_from: "features" # 'backbone' or 'features' + re_ranking: False + # used for static mode and model export + image_shape: [3, 256, 128] + save_inference_dir: "./inference" + train_mode: 'metabin' + +# model architecture +Arch: + name: "RecModel" + Backbone: + name: "ResNet50_metabin" + pretrained: False # "metabin_resnet50_backbone_pretrained" + bias_lr_factor: 2.0 + gate_lr_factor: 20.0 + BackboneStopLayer: + name: "flatten" + Neck: + name: BNNeck + num_features: &feat_dim 2048 + weight_attr: + initializer: + name: Constant + value: 1.0 + Head: + name: "FC" + embedding_size: *feat_dim + class_num: &class_num 751 + weight_attr: + initializer: + name: KaimingUniform + negative_slope: 2.23606 # math.sqrt(5) + nonlinearity: "leaky_relu" + bias_attr: False + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "bounding_box_train" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + interpolation: "bicubic" + backend: "pil" + - RandFlipImage: + flip_code: 1 + - Pad: + padding: 10 + - RandCropImageV2: + size: [128, 256] + - ColorJitter: + brightness: 0.15 + contrast: 0.15 + saturation: 0.1 + hue: 0.1 + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: NaiveIdentityBatchSampler + batch_size: 96 + num_instances: 4 + drop_last: True + loader: + num_workers: 4 + use_shared_memory: True + + Metalearning: + Train: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "bounding_box_train" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + interpolation: "bicubic" + backend: "pil" + - RandFlipImage: + flip_code: 1 + - Pad: + padding: 10 + - RandCropImageV2: + size: [128, 256] + - ColorJitter: + brightness: 0.15 + contrast: 0.15 + saturation: 0.1 + hue: 0.1 + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DomainShuffleBatchSampler + batch_size: 96 + num_instances: 4 + drop_last: True + camera_to_domain: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: "DukeMTMC" + image_root: "./dataset/" + cls_label_path: "query" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + interpolation: "bicubic" + backend: "pil" + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: "DukeMTMC" + image_root: "./dataset/" + cls_label_path: "bounding_box_test" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + interpolation: "bicubic" + backend: "pil" + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +# loss function config for traing/eval process +Loss: + Train: + - CELossForMetaBIN: + weight: 1.0 + epsilon: 0.1 + - TripletLossForMetaBIN: + weight: 1.0 + margin: 0.3 + feature_from: "backbone" + - IntraDomainScatterLoss: + weight: 1.0 + normalize_feature: True + feature_from: "backbone" + - InterDomainShuffleLoss: + weight: 1.0 + normalize_feature: False + feature_from: "backbone" + Basic: + - CELossForMetaBIN: + weight: 1.0 + epsilon: 0.1 + - TripletLossForMetaBIN: + weight: 1.0 + margin: 0.3 + feature_from: "backbone" + MetaTrain: + - CELossForMetaBIN: + weight: 1.0 + epsilon: 0.1 + - TripletLossForMetaBIN: + weight: 1.0 + margin: 0.3 + feature_from: "backbone" + - IntraDomainScatterLoss: + weight: 1.0 + normalize_feature: True + feature_from: "backbone" + - InterDomainShuffleLoss: + weight: 1.0 + normalize_feature: False + feature_from: "backbone" + MetaTest: + - CELossForMetaBIN: + weight: 1.0 + epsilon: 0.1 + - TripletLossForMetaBIN: + weight: 1.0 + margin: 0.3 + feature_from: "backbone" + Eval: + - TripletLossForMetaBIN: + weight: 1.0 + margin: 0.3 + feature_from: "backbone" + +Optimizer: + - Momentum: + scope: ".*(conv|batch_norm|instance_norm|feat_bn|fc)" + lr: + name: MultiStepDecay + epochs: *epochs + learning_rate: 0.01 + step_each_epoch: 145 + milestones: [50, 90] + gamma: 0.1 + warmup_epoch: 10 + warmup_start_lr: 0.0001 + by_epoch: False + last_epoch: -1 + momentum: 0.9 + regularizer: + name: "L2" + coeff: 0.0005 + - Momentum: + scope: "backbone.*gate" + lr: + name: Constant + learning_rate: 0.01 + last_epoch: 0 + momentum: 0.9 + - SGD: + scope: "RecModel" + lr: + name: Cyclic + epochs: *epochs + step_each_epoch: 145 + base_learning_rate: 0.001 + max_learning_rate: 0.1 + warmup_epoch: 0 + warmup_start_lr: 1 + step_size_up: 1095 + step_size_down: 1095 + by_epoch: False + last_epoch: 0 + +AMP: + scale_loss: 65536 + use_dynamic_loss_scaling: True + +Metric: + Eval: + - Recallk: + topk: [1, 5, 10] + - mAP: {} \ No newline at end of file diff --git a/ppcls/engine/train/__init__.py b/ppcls/engine/train/__init__.py index a3fea6938905cda3a1b65d1fe82321d025a7d6b3..50bf9037f4982354724d56f5814f47cf8b92decc 100644 --- a/ppcls/engine/train/__init__.py +++ b/ppcls/engine/train/__init__.py @@ -15,3 +15,4 @@ from ppcls.engine.train.train import train_epoch from ppcls.engine.train.train_fixmatch import train_epoch_fixmatch from ppcls.engine.train.train_fixmatch_ccssl import train_epoch_fixmatch_ccssl from ppcls.engine.train.train_progressive import train_epoch_progressive +from ppcls.engine.train.train_metabin import train_epoch_metabin diff --git a/ppcls/engine/train/train_metabin.py b/ppcls/engine/train/train_metabin.py new file mode 100644 index 0000000000000000000000000000000000000000..d9348408114412c93e5ec37b61c329c902f20d57 --- /dev/null +++ b/ppcls/engine/train/train_metabin.py @@ -0,0 +1,251 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# reference: https://arxiv.org/abs/2011.14670v2 + +from __future__ import absolute_import, division, print_function + +import time +import paddle +import numpy as np +from collections import defaultdict + +from ppcls.engine.train.utils import update_loss, update_metric, log_info, type_name +from ppcls.utils import profiler +from ppcls.data import build_dataloader +from ppcls.arch.backbone.variant_models.resnet_variant import MetaBIN, BINGate +from ppcls.loss import build_loss + + +def train_epoch_metabin(engine, epoch_id, print_batch_step): + tic = time.time() + + if not hasattr(engine, "train_dataloader_iter"): + engine.train_dataloader_iter = iter(engine.train_dataloader) + + if not hasattr(engine, "meta_dataloader"): + engine.meta_dataloader = build_dataloader( + config=engine.config['DataLoader']['Metalearning'], + mode='Train', + device=engine.device) + engine.meta_dataloader_iter = iter(engine.meta_dataloader) + + num_domain = engine.train_dataloader.dataset.num_cams + for iter_id in range(engine.iter_per_epoch): + # fetch data batch from dataloader + try: + train_batch = next(engine.train_dataloader_iter) + except Exception: + engine.train_dataloader_iter = iter(engine.train_dataloader) + train_batch = next(engine.train_dataloader_iter) + + try: + mtrain_batch, mtest_batch = get_meta_data( + engine.meta_dataloader_iter, num_domain) + except Exception: + engine.meta_dataloader_iter = iter(engine.meta_dataloader) + mtrain_batch, mtest_batch = get_meta_data( + engine.meta_dataloader_iter, num_domain) + + profiler.add_profiler_step(engine.config["profiler_options"]) + if iter_id == 5: + for key in engine.time_info: + engine.time_info[key].reset() + engine.time_info["reader_cost"].update(time.time() - tic) + + train_batch_size = train_batch[0].shape[0] + mtrain_batch_size = mtrain_batch[0].shape[0] + mtest_batch_size = mtest_batch[0].shape[0] + if not engine.config["Global"].get("use_multilabel", False): + train_batch[1] = train_batch[1].reshape([train_batch_size, -1]) + mtrain_batch[1] = mtrain_batch[1].reshape([mtrain_batch_size, -1]) + mtest_batch[1] = mtest_batch[1].reshape([mtest_batch_size, -1]) + + engine.global_step += 1 + + if engine.global_step == 1: # update model (without gate) to warmup + for i in range(engine.config["Global"]["warmup_iter"] - 1): + out, basic_loss_dict = basic_update(engine, train_batch) + loss_dict = basic_loss_dict + try: + train_batch = next(engine.train_dataloader_iter) + except Exception: + engine.train_dataloader_iter = iter( + engine.train_dataloader) + train_batch = next(engine.train_dataloader_iter) + + out, basic_loss_dict = basic_update(engine=engine, batch=train_batch) + mtrain_loss_dict, mtest_loss_dict = metalearning_update( + engine=engine, mtrain_batch=mtrain_batch, mtest_batch=mtest_batch) + loss_dict = { + ** + {"train_" + key: value + for key, value in basic_loss_dict.items()}, ** { + "mtrain_" + key: value + for key, value in mtrain_loss_dict.items() + }, ** + {"mtest_" + key: value + for key, value in mtest_loss_dict.items()} + } + # step lr (by iter) + # the last lr_sch is cyclic_lr + for i in range(len(engine.lr_sch) - 1): + if not getattr(engine.lr_sch[i], "by_epoch", False): + engine.lr_sch[i].step() + # update ema + if engine.ema: + engine.model_ema.update(engine.model) + + # below code just for logging + # update metric_for_logger + update_metric(engine, out, train_batch, train_batch_size) + # update_loss_for_logger + update_loss(engine, loss_dict, train_batch_size) + engine.time_info["batch_cost"].update(time.time() - tic) + if iter_id % print_batch_step == 0: + log_info(engine, train_batch_size, epoch_id, iter_id) + tic = time.time() + + # step lr(by epoch) + # the last lr_sch is cyclic_lr + for i in range(len(engine.lr_sch) - 1): + if getattr(engine.lr_sch[i], "by_epoch", False) and \ + type_name(engine.lr_sch[i]) != "ReduceOnPlateau": + engine.lr_sch[i].step() + + +def setup_opt(engine, stage): + assert stage in ["train", "mtrain", "mtest"] + opt = defaultdict() + if stage == "train": + opt["bn_mode"] = "general" + opt["enable_inside_update"] = False + opt["lr_gate"] = 0.0 + elif stage == "mtrain": + opt["bn_mode"] = "hold" + opt["enable_inside_update"] = False + opt["lr_gate"] = 0.0 + elif stage == "mtest": + norm_lr = engine.lr_sch[1].last_lr + cyclic_lr = engine.lr_sch[2].get_lr() + engine.lr_sch[2].step() # update cyclic learning rate + opt["bn_mode"] = "hold" + opt["enable_inside_update"] = True + opt["lr_gate"] = norm_lr * cyclic_lr + for layer in engine.model.sublayers(): + if isinstance(layer, MetaBIN): + layer.setup_opt(opt) + + +def reset_opt(model): + for layer in model.sublayers(): + if isinstance(layer, MetaBIN): + layer.reset_opt() + + +def get_meta_data(meta_dataloader_iter, num_domain): + """ + fetch data batch from dataloader then divide the batch by domains + """ + list_all = np.random.permutation(num_domain) + list_mtrain = list(list_all[:num_domain // 2]) + batch = next(meta_dataloader_iter) + domain_idx = batch[2] + cnt = 0 + for sample in list_mtrain: + if cnt == 0: + is_mtrain_domain = domain_idx == sample + else: + is_mtrain_domain = paddle.logical_or(is_mtrain_domain, + domain_idx == sample) + cnt += 1 + + # mtrain_batch + if not any(is_mtrain_domain): + mtrain_batch = None + raise RuntimeError + else: + mtrain_batch = dict() + mtrain_batch = [batch[i][is_mtrain_domain] for i in range(len(batch))] + + # mtest_batch + is_mtest_domains = is_mtrain_domain == False + if not any(is_mtest_domains): + mtest_batch = None + raise RuntimeError + else: + mtest_batch = dict() + mtest_batch = [batch[i][is_mtest_domains] for i in range(len(batch))] + return mtrain_batch, mtest_batch + + +def forward(engine, batch, loss_func): + batch_info = defaultdict() + batch_info = {"label": batch[1], "domain": batch[2]} + amp_level = engine.config["AMP"].get("level", "O1").upper() + with paddle.amp.auto_cast( + custom_black_list={"flatten_contiguous_range", "greater_than"}, + level=amp_level): + out = engine.model(batch[0], batch[1]) + loss_dict = loss_func(out, batch_info) + return out, loss_dict + + +def backward(engine, loss, optimizer): + scaled = engine.scaler.scale(loss) + scaled.backward() + engine.scaler.minimize(optimizer, scaled) + for layer in engine.model.sublayers(): + if isinstance(layer, BINGate): + layer.clip_gate() + + +def basic_update(engine, batch): + setup_opt(engine, "train") + train_loss_func = build_loss(engine.config["Loss"]["Basic"]) + out, train_loss_dict = forward(engine, batch, train_loss_func) + train_loss = train_loss_dict["loss"] + backward(engine, train_loss, engine.optimizer[0]) + engine.optimizer[0].clear_grad() + reset_opt(engine.model) + return out, train_loss_dict + + +def metalearning_update(engine, mtrain_batch, mtest_batch): + # meta train + mtrain_loss_func = build_loss(engine.config["Loss"]["MetaTrain"]) + setup_opt(engine, "mtrain") + + mtrain_batch_info = defaultdict() + mtrain_batch_info = {"label": mtrain_batch[1], "domain": mtrain_batch[2]} + out = engine.model(mtrain_batch[0], mtrain_batch[1]) + mtrain_loss_dict = mtrain_loss_func(out, mtrain_batch_info) + mtrain_loss = mtrain_loss_dict["loss"] + engine.optimizer[1].clear_grad() + mtrain_loss.backward() + + # meta test + mtest_loss_func = build_loss(engine.config["Loss"]["MetaTest"]) + setup_opt(engine, "mtest") + + out, mtest_loss_dict = forward(engine, mtest_batch, mtest_loss_func) + engine.optimizer[1].clear_grad() + mtest_loss = mtest_loss_dict["loss"] + backward(engine, mtest_loss, engine.optimizer[1]) + + engine.optimizer[0].clear_grad() + engine.optimizer[1].clear_grad() + reset_opt(engine.model) + + return mtrain_loss_dict, mtest_loss_dict