diff --git a/ppcls/arch/gears/fc.py b/ppcls/arch/gears/fc.py index 0262c100bb80a7f12a9fe4085a444539e3a5cd27..b32474195e1ada4cd0a17b493f68f65a242d82cd 100644 --- a/ppcls/arch/gears/fc.py +++ b/ppcls/arch/gears/fc.py @@ -30,6 +30,6 @@ class FC(nn.Layer): self.fc = paddle.nn.Linear( self.embedding_size, self.class_num, weight_attr=weight_attr) - def forward(self, input): + def forward(self, input, label=None): out = self.fc(input) return out diff --git a/ppcls/configs/Products/ResNet50_vd_Aliproduct.yaml b/ppcls/configs/Products/ResNet50_vd_Aliproduct.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d440c5004e1a5ca527ea15f4d2514498f1f27f7 --- /dev/null +++ b/ppcls/configs/Products/ResNet50_vd_Aliproduct.yaml @@ -0,0 +1,127 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + class_num: 50030 + save_interval: 10 + eval_during_train: False + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + eval_mode: "classification" + +# model architecture +Arch: + name: "RecModel" + Backbone: + name: "ResNet50_vd" + pretrained: False + BackboneStopLayer: + name: "flatten_0" + Neck: + name: "FC" + embedding_size: 2048 + class_num: 512 + Head: + name: "FC" + embedding_size: 512 + class_num: 50030 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.05 + regularizer: + name: 'L2' + coeff: 0.00007 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "ImageNetDataset" + image_root: "./dataset/Aliproduct/" + cls_label_path: "./dataset/Aliproduct/train_list.txt" + transform_ops: + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 6 + use_shared_memory: False + Eval: + # TOTO: modify to the latest trainer + dataset: + name: "ImageNetDataset" + image_root: "./dataset/Aliproduct/" + cls_label_path: "./dataset/Aliproduct/val_list.txt" + transform_ops: + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: False +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] + +Infer: + infer_imgs: "docs/images/whl/demo.jpg" + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/ppcls/configs/Products/ResNet50_vd_Inshop.yaml b/ppcls/configs/Products/ResNet50_vd_Inshop.yaml new file mode 100644 index 0000000000000000000000000000000000000000..108ab3ea9cad7e40ed609e008fb2be674eb217b8 --- /dev/null +++ b/ppcls/configs/Products/ResNet50_vd_Inshop.yaml @@ -0,0 +1,163 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + class_num: 3997 + save_interval: 10 + eval_during_train: False + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + eval_mode: "retrieval" + +# model architecture +Arch: + name: "RecModel" + Backbone: + name: "ResNet50_vd" + pretrained: False + BackboneStopLayer: + name: "flatten_0" + Neck: + name: "FC" + embedding_size: 2048 + class_num: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 3997 + margin: 0.15 + scale: 30 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - TripletLossV2: + weight: 1.0 + margin: 0.5 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: MultiStepDecay + learning_rate: 0.01 + milestones: [30, 60, 70, 80, 90, 100] + gamma: 0.5 + verbose: False + last_epoch: -1 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "ImageNetDataset" + image_root: "./dataset/Inshop/" + cls_label_path: "./dataset/Inshop/train_list.txt" + transform_ops: + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: DistributedRandomIdentitySampler + batch_size: 64 + num_instances: 2 + drop_last: False + shuffle: True + loader: + num_workers: 6 + use_shared_memory: False + Eval: + Query: + # TOTO: modify to the latest trainer + dataset: + name: "ImageNetDataset" + image_root: "./dataset/Inshop/" + cls_label_path: "./dataset/Inshop/query_list.txt" + transform_ops: + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: False + + Gallery: + # TOTO: modify to the latest trainer + dataset: + name: "ImageNetDataset" + image_root: "./dataset/Inshop/" + cls_label_path: "./dataset/Inshop/gallery_list.txt" + transform_ops: + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: False + +Metric: + Eval: + - Recallk: + topk: [1, 5] + +Infer: + infer_imgs: "docs/images/whl/demo.jpg" + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/ppcls/configs/Products/ResNet50_vd_SOP.yaml b/ppcls/configs/Products/ResNet50_vd_SOP.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a522a79e69bb55aa7dfc671cc2fc955aa466f2da --- /dev/null +++ b/ppcls/configs/Products/ResNet50_vd_SOP.yaml @@ -0,0 +1,163 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + class_num: 11319 + save_interval: 10 + eval_during_train: False + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + eval_mode: "retrieval" + +# model architecture +Arch: + name: "RecModel" + Backbone: + name: "ResNet50_vd" + pretrained: False + BackboneStopLayer: + name: "flatten_0" + Neck: + name: "FC" + embedding_size: 2048 + class_num: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 11319 + margin: 0.15 + scale: 30 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - TripletLossV2: + weight: 1.0 + margin: 0.5 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: MultiStepDecay + learning_rate: 0.01 + milestones: [30, 60, 70, 80, 90, 100] + gamma: 0.5 + verbose: False + last_epoch: -1 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "ImageNetDataset" + image_root: "./dataset/Stanford_Online_Products/" + cls_label_path: "./dataset/Stanford_Online_Products/train_list.txt" + transform_ops: + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: DistributedRandomIdentitySampler + batch_size: 64 + num_instances: 2 + drop_last: False + shuffle: True + loader: + num_workers: 6 + use_shared_memory: False + Eval: + Query: + # TOTO: modify to the latest trainer + dataset: + name: "ImageNetDataset" + image_root: "./dataset/Stanford_Online_Products/" + cls_label_path: "./dataset/Stanford_Online_Products/test_list.txt" + transform_ops: + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: False + + Gallery: + # TOTO: modify to the latest trainer + dataset: + name: "ImageNetDataset" + image_root: "./dataset/Stanford_Online_Products/" + cls_label_path: "./dataset/Stanford_Online_Products/test_list.txt" + transform_ops: + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: False + +Metric: + Eval: + - Recallk: + topk: [1, 5] + +Infer: + infer_imgs: "docs/images/whl/demo.jpg" + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: