未验证 提交 d72acebb 编写于 作者: M minghaoBD 提交者: GitHub

Improve unstructured pruner easeofuse (#738)

上级 8eacc16b
...@@ -67,7 +67,7 @@ python3.7 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshol ...@@ -67,7 +67,7 @@ python3.7 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshol
按照比例剪裁(训练速度较慢,推荐按照阈值剪裁): 按照比例剪裁(训练速度较慢,推荐按照阈值剪裁):
```bash ```bash
python3.7 train.py --data imagenet --lr 0.05 --pruning_mode ratio --ratio 0.5 python3.7 train.py --data imagenet --lr 0.05 --pruning_mode ratio --ratio 0.55
``` ```
GPU多卡训练: GPU多卡训练:
...@@ -76,9 +76,11 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 ...@@ -76,9 +76,11 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
python3.7 -m paddle.distributed.launch \ python3.7 -m paddle.distributed.launch \
--gpus="0,1,2,3" \ --gpus="0,1,2,3" \
--log_dir="train_mbv1_imagenet_threshold_001_log" \ --log_dir="train_mbv1_imagenet_threshold_001_log" \
train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshold 0.01 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshold 0.01 --batch_size 256
``` ```
**注意**,这里的batch_size为单卡上的。
恢复训练(请替代命令中的`dir/to/the/saved/pruned/model``INTERRUPTED_EPOCH`): 恢复训练(请替代命令中的`dir/to/the/saved/pruned/model``INTERRUPTED_EPOCH`):
```bash ```bash
python3.7 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshold 0.01 \ python3.7 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshold 0.01 \
...@@ -87,7 +89,7 @@ python3.7 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshol ...@@ -87,7 +89,7 @@ python3.7 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshol
## 推理: ## 推理:
```bash ```bash
python3.7 eval --pruned_model models/ --data imagenet python3.7 evalualte.py --pruned_model models/ --data imagenet
``` ```
剪裁训练代码示例: 剪裁训练代码示例:
...@@ -101,6 +103,7 @@ for epoch in range(epochs): ...@@ -101,6 +103,7 @@ for epoch in range(epochs):
loss = calculate_loss() loss = calculate_loss()
loss.backward() loss.backward()
opt.step() opt.step()
learning_rate.step()
opt.clear_grad() opt.clear_grad()
#STEP2: update the pruner's threshold given the updated parameters #STEP2: update the pruner's threshold given the updated parameters
pruner.step() pruner.step()
...@@ -128,8 +131,8 @@ test() ...@@ -128,8 +131,8 @@ test()
更多使用参数请参照shell文件或者运行如下命令查看: 更多使用参数请参照shell文件或者运行如下命令查看:
```bash ```bash
python3.7 train --h python3.7 train.py --h
python3.7 evaluate --h python3.7 evaluate.py --h
``` ```
## 实验结果 ## 实验结果
...@@ -138,5 +141,6 @@ python3.7 evaluate --h ...@@ -138,5 +141,6 @@ python3.7 evaluate --h
|:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:| |:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:|
| MobileNetV1 | ImageNet | Baseline | - | 70.99%/89.68% | - | - | - | | MobileNetV1 | ImageNet | Baseline | - | 70.99%/89.68% | - | - | - |
| MobileNetV1 | ImageNet | ratio | -55.19% | 70.87%/89.80% (-0.12%/+0.12%) | 0.005 | - | 68 | | MobileNetV1 | ImageNet | ratio | -55.19% | 70.87%/89.80% (-0.12%/+0.12%) | 0.005 | - | 68 |
| MobileNetV1 | ImageNet | threshold | -49.49% | 71.22%/89.78% (+0.23%/+0.10%) | 0.05 | 0.01 | 93 |
| YOLO v3 | VOC | - | - |76.24% | - | - | - | | YOLO v3 | VOC | - | - |76.24% | - | - | - |
| YOLO v3 | VOC |threshold | -56.50% | 77.02%(+0.78%) | 0.001 | 0.01 | 102k iterations | | YOLO v3 | VOC |threshold | -56.50% | 77.21% (+0.97%) | 0.001 | 0.01 | 150k iterations |
...@@ -15,13 +15,14 @@ import time ...@@ -15,13 +15,14 @@ import time
import logging import logging
from paddleslim.common import get_logger from paddleslim.common import get_logger
import paddle.distributed as dist import paddle.distributed as dist
from paddle.distributed import ParallelEnv
_logger = get_logger(__name__, level=logging.INFO) _logger = get_logger(__name__, level=logging.INFO)
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('batch_size', int, 64, "Minibatch size.") add_arg('batch_size', int, 64 * 4, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.") add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.") add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.")
add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.") add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
...@@ -39,7 +40,7 @@ add_arg('pretrained_model', str, None, "The pretrained model the lo ...@@ -39,7 +40,7 @@ add_arg('pretrained_model', str, None, "The pretrained model the lo
add_arg('model_path', str, "./models", "The path to save model.") add_arg('model_path', str, "./models", "The path to save model.")
add_arg('model_period', int, 10, "The period to save model in epochs.") add_arg('model_period', int, 10, "The period to save model in epochs.")
add_arg('resume_epoch', int, -1, "The epoch to resume training.") add_arg('resume_epoch', int, -1, "The epoch to resume training.")
add_arg('num_workers', int, 4, "number of workers when loading dataset.") add_arg('num_workers', int, 16, "number of workers when loading dataset.")
# yapf: enable # yapf: enable
...@@ -75,13 +76,22 @@ def create_optimizer(args, step_per_epoch, model): ...@@ -75,13 +76,22 @@ def create_optimizer(args, step_per_epoch, model):
def compress(args): def compress(args):
dist.init_parallel_env() if args.use_gpu:
place = paddle.set_device('gpu')
else:
place = paddle.set_device('cpu')
trainer_num = paddle.distributed.get_world_size()
use_data_parallel = trainer_num != 1
if use_data_parallel:
dist.init_parallel_env()
train_reader = None train_reader = None
test_reader = None test_reader = None
if args.data == "imagenet": if args.data == "imagenet":
import imagenet_reader as reader import imagenet_reader as reader
train_dataset = reader.ImageNetDataset(data_dir='/data', mode='train') train_dataset = reader.ImageNetDataset(mode='train')
val_dataset = reader.ImageNetDataset(data_dir='/data', mode='val') val_dataset = reader.ImageNetDataset(mode='val')
class_dim = 1000 class_dim = 1000
elif args.data == "cifar10": elif args.data == "cifar10":
normalize = T.Normalize( normalize = T.Normalize(
...@@ -94,30 +104,33 @@ def compress(args): ...@@ -94,30 +104,33 @@ def compress(args):
class_dim = 10 class_dim = 10
else: else:
raise ValueError("{} is not supported.".format(args.data)) raise ValueError("{} is not supported.".format(args.data))
places = paddle.static.cuda_places(
) if args.use_gpu else paddle.static.cpu_places() batch_sampler = paddle.io.DistributedBatchSampler(
batch_size_per_card = int(args.batch_size / len(places)) train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True)
train_loader = paddle.io.DataLoader( train_loader = paddle.io.DataLoader(
train_dataset, train_dataset,
places=places, places=place,
drop_last=True, batch_sampler=batch_sampler,
batch_size=args.batch_size,
shuffle=True,
return_list=True, return_list=True,
num_workers=args.num_workers, num_workers=args.num_workers,
use_shared_memory=True) use_shared_memory=True)
valid_loader = paddle.io.DataLoader( valid_loader = paddle.io.DataLoader(
val_dataset, val_dataset,
places=places, places=place,
drop_last=False, drop_last=False,
return_list=True, return_list=True,
batch_size=args.batch_size, batch_size=64,
shuffle=False, shuffle=False,
use_shared_memory=True) use_shared_memory=True)
step_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size)) step_per_epoch = int(
np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks))
# model definition # model definition
model = mobilenet_v1(num_classes=class_dim, pretrained=True) model = mobilenet_v1(num_classes=class_dim, pretrained=True)
if ParallelEnv().nranks > 1:
model = paddle.DataParallel(model)
if args.pretrained_model is not None: if args.pretrained_model is not None:
model.set_state_dict(paddle.load(args.pretrained_model)) model.set_state_dict(paddle.load(args.pretrained_model))
...@@ -160,44 +173,65 @@ def compress(args): ...@@ -160,44 +173,65 @@ def compress(args):
def train(epoch): def train(epoch):
model.train() model.train()
train_reader_cost = 0.0
train_run_cost = 0.0
total_samples = 0
reader_start = time.time()
for batch_id, data in enumerate(train_loader): for batch_id, data in enumerate(train_loader):
start_time = time.time() train_reader_cost += time.time() - reader_start
x_data = data[0] x_data = data[0]
y_data = paddle.to_tensor(data[1]) y_data = paddle.to_tensor(data[1])
if args.data == 'cifar10': if args.data == 'cifar10':
y_data = paddle.unsqueeze(y_data, 1) y_data = paddle.unsqueeze(y_data, 1)
train_start = time.time()
logits = model(x_data) logits = model(x_data)
loss = F.cross_entropy(logits, y_data) loss = F.cross_entropy(logits, y_data)
acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)
end_time = time.time()
if batch_id % args.log_period == 0:
_logger.info(
"epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; time: {}".
format(epoch, batch_id, args.lr,
np.mean(loss.numpy()),
np.mean(acc_top1.numpy()),
np.mean(acc_top5.numpy()), end_time - start_time))
loss.backward() loss.backward()
opt.step() opt.step()
learning_rate.step()
opt.clear_grad() opt.clear_grad()
pruner.step() pruner.step()
train_run_cost += time.time() - train_start
total_samples += args.batch_size * ParallelEnv().nranks
if batch_id % args.log_period == 0:
_logger.info(
"epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
format(epoch, batch_id,
opt.get_lr(),
np.mean(loss.numpy()),
np.mean(acc_top1.numpy()),
np.mean(acc_top5.numpy()), train_reader_cost /
args.log_period, (train_reader_cost + train_run_cost
) / args.log_period, total_samples
/ args.log_period, total_samples / (
train_reader_cost + train_run_cost)))
train_reader_cost = 0.0
train_run_cost = 0.0
total_samples = 0
reader_start = time.time()
pruner = UnstructuredPruner( pruner = UnstructuredPruner(
model, model,
mode=args.pruning_mode, mode=args.pruning_mode,
ratio=args.ratio, ratio=args.ratio,
threshold=args.threshold) threshold=args.threshold)
for i in range(args.resume_epoch + 1, args.num_epochs): for i in range(args.resume_epoch + 1, args.num_epochs):
train(i) train(i)
if i % args.test_period == 0: if (i + 1) % args.test_period == 0:
pruner.update_params() pruner.update_params()
_logger.info( _logger.info(
"The current density of the pruned model is: {}%".format( "The current density of the pruned model is: {}%".format(
round(100 * UnstructuredPruner.total_sparse(model), 2))) round(100 * UnstructuredPruner.total_sparse(model), 2)))
test(i) test(i)
if i > args.resume_epoch and i % args.model_period == 0: if (i + 1) % args.model_period == 0:
pruner.update_params() pruner.update_params()
paddle.save(model.state_dict(), paddle.save(model.state_dict(),
os.path.join(args.model_path, "model-pruned.pdparams")) os.path.join(args.model_path, "model-pruned.pdparams"))
......
#!/bin/bash #!/bin/bash
export CUDA_VISIBLE_DEVICES=3 export CUDA_VISIBLE_DEVICES=3
python3.7 train.py \ python3.7 train.py \
--batch_size=128 \ --batch_size=256 \
--lr=0.05 \ --lr=0.05 \
--ratio=0.45 \ --threshold=0.01 \
--threshold=1e-5 \
--pruning_mode="threshold" \ --pruning_mode="threshold" \
--data="cifar10" \ --data="cifar10" \
#!/bin/bash #!/bin/bash
export CUDA_VISIBLE_DEVICES=3 export CUDA_VISIBLE_DEVICES=3
python3.7 train.py \ python3.7 train.py \
--batch_size=64 \ --batch_size=256 \
--lr=0.05 \ --lr=0.05 \
--ratio=0.45 \ --threshold=0.01 \
--threshold=1e-5 \
--pruning_mode="threshold" \ --pruning_mode="threshold" \
--data="imagenet" \ --data="imagenet" \
...@@ -15,7 +15,7 @@ DATA_DIM = 224 ...@@ -15,7 +15,7 @@ DATA_DIM = 224
THREAD = 16 THREAD = 16
BUF_SIZE = 10240 BUF_SIZE = 10240
DATA_DIR = './data/ILSVRC2012/' DATA_DIR = 'data/ILSVRC2012/'
DATA_DIR = os.path.join(os.path.split(os.path.realpath(__file__))[0], DATA_DIR) DATA_DIR = os.path.join(os.path.split(os.path.realpath(__file__))[0], DATA_DIR)
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
......
...@@ -68,20 +68,22 @@ def _get_skip_params(program): ...@@ -68,20 +68,22 @@ def _get_skip_params(program):
按照阈值剪裁: 按照阈值剪裁:
```bash ```bash
CUDA_VISIBLE_DEVICES=2,3 python3.7 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshold 0.01 CUDA_VISIBLE_DEVICES=2,3 python3.7 train.py --batch_size 512 --data imagenet --lr 0.05 --pruning_mode threshold --threshold 0.01
``` ```
按照比例剪裁(训练速度较慢,推荐按照阈值剪裁): 按照比例剪裁(训练速度较慢,推荐按照阈值剪裁):
```bash ```bash
CUDA_VISIBLE_DEVICES=2,3 python3.7 train.py --data imagenet --lr 0.05 --pruning_mode ratio --ratio 0.5 CUDA_VISIBLE_DEVICES=2,3 python3.7 train.py --batch_size 512 --data imagenet --lr 0.05 --pruning_mode ratio --ratio 0.55
``` ```
恢复训练(请替代命令中的`dir/to/the/saved/pruned/model``INTERRUPTED_EPOCH`): 恢复训练(请替代命令中的`dir/to/the/saved/pruned/model``INTERRUPTED_EPOCH`):
``` ```
CUDA_VISIBLE_DEVICES=2,3 python3.7 train.py --data imagenet --lr 0.05 --pruning_mode threshold --threshold 0.01 \ CUDA_VISIBLE_DEVICES=2,3 python3.7 train.py --batch_size 512 --data imagenet --lr 0.05 --pruning_mode threshold --threshold 0.01 \
--pretrained_model dir/to/the/saved/pruned/model --resume_epoch INTERRUPTED_EPOCH --pretrained_model dir/to/the/saved/pruned/model --resume_epoch INTERRUPTED_EPOCH
``` ```
**注意**,上述命令中的`batch_size`为多张卡上总的`batch_size`,即一张卡的`batch_size`为256。
## 推理 ## 推理
```bash ```bash
CUDA_VISIBLE_DEVICES=0 python3.7 evaluate.py --pruned_model models/ --data imagenet CUDA_VISIBLE_DEVICES=0 python3.7 evaluate.py --pruned_model models/ --data imagenet
...@@ -107,7 +109,7 @@ opt.minimize(avg_cost) ...@@ -107,7 +109,7 @@ opt.minimize(avg_cost)
#STEP1: initialize the pruner #STEP1: initialize the pruner
pruner = UnstructuredPruner(paddle.static.default_main_program(), mode='threshold', threshold=0.01, place=place) # 按照阈值剪裁 pruner = UnstructuredPruner(paddle.static.default_main_program(), mode='threshold', threshold=0.01, place=place) # 按照阈值剪裁
# pruner = UnstructuredPruner(paddle.static.default_main_program(), mode='ratio', ratio=0.5, place=place) # 按照比例剪裁 # pruner = UnstructuredPruner(paddle.static.default_main_program(), mode='ratio', ratio=0.55, place=place) # 按照比例剪裁
exe.run(paddle.static.default_startup_program()) exe.run(paddle.static.default_startup_program())
paddle.fluid.io.load_vars(exe, args.pretrained_model) paddle.fluid.io.load_vars(exe, args.pretrained_model)
...@@ -116,10 +118,7 @@ for epoch in range(epochs): ...@@ -116,10 +118,7 @@ for epoch in range(epochs):
for batch_id, data in enumerate(train_loader): for batch_id, data in enumerate(train_loader):
loss_n, acc_top1_n, acc_top5_n = exe.run( loss_n, acc_top1_n, acc_top5_n = exe.run(
train_program, train_program,
feed={ feed=data,
"image": data[0].get('image'),
"label": data[0].get('label')
},
fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
learning_rate.step() learning_rate.step()
#STEP2: update the pruner's threshold given the updated parameters #STEP2: update the pruner's threshold given the updated parameters
...@@ -157,5 +156,6 @@ python3.7 evaluate.py --h ...@@ -157,5 +156,6 @@ python3.7 evaluate.py --h
|:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:| |:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:|
| MobileNetV1 | ImageNet | Baseline | - | 70.99%/89.68% | - | - | - | | MobileNetV1 | ImageNet | Baseline | - | 70.99%/89.68% | - | - | - |
| MobileNetV1 | ImageNet | ratio | -55.19% | 70.87%/89.80% (-0.12%/+0.12%) | 0.05 | - | 68 | | MobileNetV1 | ImageNet | ratio | -55.19% | 70.87%/89.80% (-0.12%/+0.12%) | 0.05 | - | 68 |
| MobileNetV1 | ImageNet | threshold | -49.49% | 71.22%/89.78% (+0.23%/+0.10%) | 0.05 | 0.01 | 93 |
| YOLO v3 | VOC | - | - |76.24% | - | - | - | | YOLO v3 | VOC | - | - |76.24% | - | - | - |
| YOLO v3 | VOC |threshold | -56.50% | 77.02%(+0.78%) | 0.001 | 0.01 |102k iterations| | YOLO v3 | VOC |threshold | -56.50% | 77.21%(+0.97%) | 0.001 | 0.01 |150k iterations|
...@@ -19,12 +19,12 @@ _logger = get_logger(__name__, level=logging.INFO) ...@@ -19,12 +19,12 @@ _logger = get_logger(__name__, level=logging.INFO)
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('batch_size', int, 64, "Minibatch size.") add_arg('batch_size', int, 64 * 4, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.") add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('model', str, "MobileNet", "The target model.") add_arg('model', str, "MobileNet", "The target model.")
add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretrained", "Whether to use pretrained model.") add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretrained", "Whether to use pretrained model.")
add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.") add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.")
add_arg('lr_strategy', str, "cosine_decay", "The learning rate decay strategy.") add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.") add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.")
add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.") add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.")
add_arg('threshold', float, 1e-5, "The threshold to set zeros, the abs(weights) lower than which will be zeros.") add_arg('threshold', float, 1e-5, "The threshold to set zeros, the abs(weights) lower than which will be zeros.")
...@@ -86,8 +86,8 @@ def compress(args): ...@@ -86,8 +86,8 @@ def compress(args):
args.pretrained_model = False args.pretrained_model = False
elif args.data == "imagenet": elif args.data == "imagenet":
import imagenet_reader as reader import imagenet_reader as reader
train_dataset = reader.ImageNetDataset(data_dir='/data', mode='train') train_dataset = reader.ImageNetDataset(mode='train')
val_dataset = reader.ImageNetDataset(data_dir='/data', mode='val') val_dataset = reader.ImageNetDataset(mode='val')
class_dim = 1000 class_dim = 1000
image_shape = "3,224,224" image_shape = "3,224,224"
else: else:
...@@ -95,14 +95,16 @@ def compress(args): ...@@ -95,14 +95,16 @@ def compress(args):
image_shape = [int(m) for m in image_shape.split(",")] image_shape = [int(m) for m in image_shape.split(",")]
assert args.model in model_list, "{} is not in lists: {}".format(args.model, assert args.model in model_list, "{} is not in lists: {}".format(args.model,
model_list) model_list)
places = paddle.static.cuda_places( if args.use_gpu:
) if args.use_gpu else paddle.static.cpu_places() places = paddle.static.cuda_places()
else:
places = paddle.static.cpu_places()
place = places[0] place = places[0]
exe = paddle.static.Executor(place) exe = paddle.static.Executor(place)
image = paddle.static.data( image = paddle.static.data(
name='image', shape=[None] + image_shape, dtype='float32') name='image', shape=[None] + image_shape, dtype='float32')
label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
batch_size_per_card = int(args.batch_size / len(places)) batch_size_per_card = int(args.batch_size / len(places))
train_loader = paddle.io.DataLoader( train_loader = paddle.io.DataLoader(
train_dataset, train_dataset,
...@@ -148,6 +150,10 @@ def compress(args): ...@@ -148,6 +150,10 @@ def compress(args):
exe.run(paddle.static.default_startup_program()) exe.run(paddle.static.default_startup_program())
if args.pretrained_model: if args.pretrained_model:
assert os.path.exists(
args.
pretrained_model), "Pretrained model path {} doesn't exist".format(
args.pretrained_model)
def if_exist(var): def if_exist(var):
return os.path.exists(os.path.join(args.pretrained_model, var.name)) return os.path.exists(os.path.join(args.pretrained_model, var.name))
...@@ -169,12 +175,7 @@ def compress(args): ...@@ -169,12 +175,7 @@ def compress(args):
for batch_id, data in enumerate(valid_loader): for batch_id, data in enumerate(valid_loader):
start_time = time.time() start_time = time.time()
acc_top1_n, acc_top5_n = exe.run( acc_top1_n, acc_top5_n = exe.run(
program, program, feed=data, fetch_list=[acc_top1.name, acc_top5.name])
feed={
"image": data[0].get('image'),
"label": data[0].get('label')
},
fetch_list=[acc_top1.name, acc_top5.name])
end_time = time.time() end_time = time.time()
if batch_id % args.log_period == 0: if batch_id % args.log_period == 0:
_logger.info( _logger.info(
...@@ -190,28 +191,38 @@ def compress(args): ...@@ -190,28 +191,38 @@ def compress(args):
np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
def train(epoch, program): def train(epoch, program):
train_reader_cost = 0.0
train_run_cost = 0.0
total_samples = 0
reader_start = time.time()
for batch_id, data in enumerate(train_loader): for batch_id, data in enumerate(train_loader):
start_time = time.time() train_reader_cost += time.time() - reader_start
train_start = time.time()
loss_n, acc_top1_n, acc_top5_n = exe.run( loss_n, acc_top1_n, acc_top5_n = exe.run(
train_program, train_program,
feed={ feed=data,
"image": data[0].get('image'),
"label": data[0].get('label')
},
fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
end_time = time.time() pruner.step()
train_run_cost += time.time() - train_start
total_samples += args.batch_size
loss_n = np.mean(loss_n) loss_n = np.mean(loss_n)
acc_top1_n = np.mean(acc_top1_n) acc_top1_n = np.mean(acc_top1_n)
acc_top5_n = np.mean(acc_top5_n) acc_top5_n = np.mean(acc_top5_n)
if batch_id % args.log_period == 0: if batch_id % args.log_period == 0:
_logger.info( _logger.info(
"epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; time: {}". "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
format(epoch, batch_id, format(epoch, batch_id,
learning_rate.get_lr(), loss_n, acc_top1_n, learning_rate.get_lr(), loss_n, acc_top1_n,
acc_top5_n, end_time - start_time)) acc_top5_n, train_reader_cost / args.log_period, (
train_reader_cost + train_run_cost
) / args.log_period, total_samples / args.log_period,
total_samples / (train_reader_cost + train_run_cost
)))
train_reader_cost = 0.0
train_run_cost = 0.0
total_samples = 0
learning_rate.step() learning_rate.step()
pruner.step() reader_start = time.time()
batch_id += 1
build_strategy = paddle.static.BuildStrategy() build_strategy = paddle.static.BuildStrategy()
exec_strategy = paddle.static.ExecutionStrategy() exec_strategy = paddle.static.ExecutionStrategy()
...@@ -227,10 +238,10 @@ def compress(args): ...@@ -227,10 +238,10 @@ def compress(args):
round(100 * UnstructuredPruner.total_sparse( round(100 * UnstructuredPruner.total_sparse(
paddle.static.default_main_program()), 2))) paddle.static.default_main_program()), 2)))
if i % args.test_period == 0: if (i + 1) % args.test_period == 0:
pruner.update_params() pruner.update_params()
test(i, val_program) test(i, val_program)
if i > args.resume_epoch and i % args.model_period == 0: if (i + 1) % args.model_period == 0:
pruner.update_params() pruner.update_params()
# NOTE: We are using fluid.io.save_params() because the pretrained model is from an older version which requires this API. # NOTE: We are using fluid.io.save_params() because the pretrained model is from an older version which requires this API.
# Please consider using paddle.static.save(program, model_path) as long as it becomes possible. # Please consider using paddle.static.save(program, model_path) as long as it becomes possible.
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
export CUDA_VISIBLE_DEVICES=2,3 export CUDA_VISIBLE_DEVICES=2,3
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python3.7 train.py \ python3.7 train.py \
--batch_size 256 \ --batch_size 512 \
--data imagenet \ --data imagenet \
--pruning_mode ratio \ --pruning_mode ratio \
--ratio 0.45 \ --ratio 0.55 \
--lr 0.075 \ --lr 0.05 \
--pretrained_model /PaddleSlim/demo/pretrained_model/MobileNetV1_pretrained --pretrained_model ./MobileNetV1_pretrained
...@@ -2,9 +2,8 @@ ...@@ -2,9 +2,8 @@
export CUDA_VISIBLE_DEVICES=2,3 export CUDA_VISIBLE_DEVICES=2,3
export FLAGS_fraction_of_gpu_memory_to_use=0.98 export FLAGS_fraction_of_gpu_memory_to_use=0.98
python3.7 train.py \ python3.7 train.py \
--batch_size=256 \ --batch_size=512 \
--data="mnist" \ --data="mnist" \
--pruning_mode="threshold" \ --pruning_mode="threshold" \
--ratio=0.45 \ --threshold=0.01 \
--threshold=1e-5 \ --lr=0.05 \
--lr=0.075 \
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册