diff --git a/configs/CSPNet/CSPResNet50.yaml b/configs/CSPNet/CSPResNet50.yaml deleted file mode 100644 index 78b56af93b1a77f05510092b8236f1b1bfb596a5..0000000000000000000000000000000000000000 --- a/configs/CSPNet/CSPResNet50.yaml +++ /dev/null @@ -1,76 +0,0 @@ -mode: 'train' -ARCHITECTURE: - name: 'CSPResNet50_leaky' - -pretrained_model: "" -model_save_dir: "./output/" -classes_num: 1000 -total_images: 1281167 -save_interval: 1 -validate: True -valid_interval: 1 -epochs: 120 -topk: 5 -image_shape: [3, 256, 256] - -use_mix: False -ls_epsilon: -1 - -LEARNING_RATE: - function: 'Piecewise' - params: - lr: 0.1 - decay_epochs: [30, 60, 90] - gamma: 0.1 - -OPTIMIZER: - function: 'Momentum' - params: - momentum: 0.9 - regularizer: - function: 'L2' - factor: 0.000100 - -TRAIN: - batch_size: 256 - num_workers: 4 - file_list: "./dataset/ILSVRC2012/train_list.txt" - data_dir: "./dataset/ILSVRC2012/" - shuffle_seed: 0 - transforms: - - DecodeImage: - to_rgb: True - to_np: False - channel_first: False - - RandCropImage: - size: 256 - - RandFlipImage: - flip_code: 1 - - NormalizeImage: - scale: 1./255. - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - order: '' - - ToCHWImage: - -VALID: - batch_size: 64 - num_workers: 4 - file_list: "./dataset/ILSVRC2012/val_list.txt" - data_dir: "./dataset/ILSVRC2012/" - shuffle_seed: 0 - transforms: - - DecodeImage: - to_rgb: True - to_np: False - channel_first: False - - ResizeImage: - resize_short: 256 - - CropImage: - size: 256 - - NormalizeImage: - scale: 1.0/255.0 - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - order: '' - - ToCHWImage: diff --git a/configs/ResNet_ACNet/ResNet_ACNet.yaml b/configs/ResNet_ACNet/ResNet_ACNet.yaml deleted file mode 100644 index 63faec8f9ee6612d5121b2d42ea224af31793ae0..0000000000000000000000000000000000000000 --- a/configs/ResNet_ACNet/ResNet_ACNet.yaml +++ /dev/null @@ -1,77 +0,0 @@ -mode: 'train' -ARCHITECTURE: - name: "ResNet50_ACNet" - -pretrained_model: "" -model_save_dir: "./output/" -classes_num: 1000 -total_images: 1281167 -save_interval: 1 -validate: True -valid_interval: 1 -epochs: 120 -topk: 5 -image_shape: [3, 224, 224] - -LEARNING_RATE: - function: 'Piecewise' - params: - lr: 0.1 - decay_epochs: [30, 60, 90] - gamma: 0.1 - -OPTIMIZER: - function: 'Momentum' - params: - momentum: 0.9 - regularizer: - function: 'L2' - factor: 0.0001 - -TRAIN: - batch_size: 256 - num_workers: 4 - file_list: "./dataset/ILSVRC2012/train_list.txt" - data_dir: "./dataset/ILSVRC2012/" - shuffle_seed: 0 - transforms: - - DecodeImage: - to_rgb: True - to_np: False - channel_first: False - - RandCropImage: - size: 224 - - RandFlipImage: - flip_code: 1 - - NormalizeImage: - scale: 1./255. - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - order: '' - - ToCHWImage: - - - -VALID: - batch_size: 64 - num_workers: 4 - file_list: "./dataset/ILSVRC2012/val_list.txt" - data_dir: "./dataset/ILSVRC2012/" - shuffle_seed: 0 - transforms: - - DecodeImage: - to_rgb: True - to_np: False - channel_first: False - - ResizeImage: - resize_short: 256 - - CropImage: - size: 224 - - NormalizeImage: - scale: 1.0/255.0 - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - order: '' - - ToCHWImage: - - diff --git a/ppcls/modeling/architectures/se_resnext.py b/ppcls/modeling/architectures/se_resnext.py index d323e852f492c3036476f5c6e96ff60f90b4a5d3..ff4d835c6110a41a5d0b4b371ff7c0f4f7a29f72 100644 --- a/ppcls/modeling/architectures/se_resnext.py +++ b/ppcls/modeling/architectures/se_resnext.py @@ -20,6 +20,7 @@ import numpy as np import paddle from paddle import ParamAttr import paddle.nn as nn +import paddle.nn.functional as F from paddle.nn import Conv2D, BatchNorm, Linear, Dropout from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D from paddle.nn.initializer import Uniform diff --git a/ppcls/utils/save_load.py b/ppcls/utils/save_load.py index 243d12de3c473c98a52a8887171f470679675ff4..ff50485c333e76e327a5e2a985fb46f2e6fb569e 100644 --- a/ppcls/utils/save_load.py +++ b/ppcls/utils/save_load.py @@ -80,15 +80,19 @@ def load_distillation_model(model, pretrained_model, load_static_weights): load_static_weights ) == 2, "load_static_weights length should be 2 but got {}".format( len(load_static_weights)) + teacher = model.teacher if hasattr(model, + "teacher") else model._layers.teacher + student = model.student if hasattr(model, + "student") else model._layers.student load_dygraph_pretrain( - model.teacher, + teacher, path=pretrained_model[0], load_static_weights=load_static_weights[0]) logger.info( logger.coloring("Finish initing teacher model from {}".format( pretrained_model), "HEADER")) load_dygraph_pretrain( - model.student, + student, path=pretrained_model[1], load_static_weights=load_static_weights[1]) logger.info( diff --git a/tools/export_model.py b/tools/export_model.py index 661b6b28af2374c747aae9a5d0bbae8afc3d21ff..cc896fbb50644d14017726121c26c193c6f9e65f 100644 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -43,10 +43,11 @@ def parse_args(): class Net(paddle.nn.Layer): - def __init__(self, net, to_static, class_dim): + def __init__(self, net, to_static, class_dim, model): super(Net, self).__init__() self.pre_net = net(class_dim=class_dim) self.to_static = to_static + self.model = model # Please modify the 'shape' according to actual needs @to_static(input_spec=[ @@ -55,6 +56,8 @@ class Net(paddle.nn.Layer): ]) def forward(self, inputs): x = self.pre_net(inputs) + if self.model == "GoogLeNet": + x = x[0] x = F.softmax(x) return x @@ -64,7 +67,7 @@ def main(): net = architectures.__dict__[args.model] - model = Net(net, to_static, args.class_dim) + model = Net(net, to_static, args.class_dim, args.model) load_dygraph_pretrain( model.pre_net, path=args.pretrained_model, diff --git a/tools/infer/predict.py b/tools/infer/predict.py index 870b518905eae6a36d585193b911f0f0c4a541a5..4a2e6d076415ee7b71191d52ade1dc60ce027b6b 100644 --- a/tools/infer/predict.py +++ b/tools/infer/predict.py @@ -53,7 +53,7 @@ def main(args): assert args.use_fp16 is False else: assert args.use_gpu is True - assert args.model_name is not None + assert args.model is not None # HALF precission predict only work when using tensorrt if args.use_fp16 is True: assert args.use_tensorrt is True @@ -105,8 +105,8 @@ def main(args): fp_message = "FP16" if args.use_fp16 else "FP32" trt_msg = "using tensorrt" if args.use_tensorrt else "not using tensorrt" print("{0}\t{1}\t{2}\tbatch size: {3}\ttime(ms): {4}".format( - args.model_name, trt_msg, fp_message, args.batch_size, 1000 * - test_time / test_num)) + args.model, trt_msg, fp_message, args.batch_size, 1000 * test_time + / test_num)) if __name__ == "__main__": diff --git a/tools/infer/utils.py b/tools/infer/utils.py index 33950aeca1237322da9e874b47f9d378762091ab..c078a0640a796c85866220097fbb9ed607b3979a 100644 --- a/tools/infer/utils.py +++ b/tools/infer/utils.py @@ -40,7 +40,6 @@ def parse_args(): parser.add_argument("--use_tensorrt", type=str2bool, default=False) parser.add_argument("--gpu_mem", type=int, default=8000) parser.add_argument("--enable_benchmark", type=str2bool, default=False) - parser.add_argument("--model_name", type=str) parser.add_argument("--top_k", type=int, default=1) parser.add_argument("--hubserving", type=str2bool, default=False) diff --git a/tools/train.py b/tools/train.py index fe8a77b4f6aca2159f70dd3a92860a37aab10d32..846abc4005a27535519115186ab2642fe2122606 100644 --- a/tools/train.py +++ b/tools/train.py @@ -52,7 +52,7 @@ def parse_args(): def main(args): - paddle.seed(123) + paddle.seed(12345) config = get_config(args.config, overrides=args.override, show=True) # assign the place @@ -68,7 +68,6 @@ def main(args): strategy = paddle.distributed.init_parallel_env() net = program.create_model(config.ARCHITECTURE, config.classes_num) - optimizer, lr_scheduler = program.create_optimizer( config, parameter_list=net.parameters())