提交 49eab558 编写于 作者: xujinanne's avatar xujinanne

python install

上级 73e97d39
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -12,9 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from paddle.check_import_scipy import check_import_scipy
check_import_scipy(os.name)
try:
from paddle.version import full_version as __version__
......
文件模式从 100644 更改为 100755
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def check_import_scipy(OsName):
print_info = ""
if OsName == 'nt':
try:
import scipy.io as scio
except ImportError as e:
print_info = str(e)
if (len(print_info) > 0):
if 'DLL load failed' in print_info:
raise ImportError(
print_info +
"\nplease download visual C++ Redistributable for vs 2015, https://www.microsoft.com/en-us/download/details.aspx?id=48145"
)
return
文件模式从 100644 更改为 100755
......@@ -22,10 +22,8 @@ import paddle.dataset.cifar
import paddle.dataset.movielens
import paddle.dataset.conll05
import paddle.dataset.uci_housing
import paddle.dataset.sentiment
import paddle.dataset.wmt14
import paddle.dataset.wmt16
import paddle.dataset.mq2007
import paddle.dataset.flowers
import paddle.dataset.voc2012
import paddle.dataset.image
......@@ -37,11 +35,9 @@ __all__ = [
'cifar',
'movielens',
'conll05',
'sentiment',
'uci_housing',
'wmt14',
'wmt16',
'mq2007',
'flowers',
'voc2012',
'image',
......
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MQ2007 dataset
MQ2007 is a query set from Million Query track of TREC 2007. There are about 1700 queries in it with labeled documents. In MQ2007, the 5-fold cross
validation strategy is adopted and the 5-fold partitions are included in the package. In each fold, there are three subsets for learning: training set,
validation set and testing set.
MQ2007 dataset from website
http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar and parse training set and test set into paddle reader creators
"""
from __future__ import print_function
import os
import functools
import rarfile
from .common import download
import numpy as np
# URL = "http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar"
URL = "http://www.bigdatalab.ac.cn/benchmark/upload/download_source/7b6dbbe2-842c-11e4-a536-bcaec51b9163_MQ2007.rar"
MD5 = "7be1640ae95c6408dab0ae7207bdc706"
def __initialize_meta_info__():
"""
download and extract the MQ2007 dataset
"""
fn = fetch()
rar = rarfile.RarFile(fn)
dirpath = os.path.dirname(fn)
rar.extractall(path=dirpath)
return dirpath
class Query(object):
"""
queries used for learning to rank algorithms. It is created from relevance scores, query-document feature vectors
Parameters:
----------
query_id : int
query_id in dataset, mapping from query to relevance documents
relevance_score : int
relevance score of query and document pair
feature_vector : array, dense feature
feature in vector format
description : string
comment section in query doc pair data
"""
def __init__(self,
query_id=-1,
relevance_score=-1,
feature_vector=None,
description=""):
self.query_id = query_id
self.relevance_score = relevance_score
if feature_vector is None:
self.feature_vector = []
else:
self.feature_vector = feature_vector
self.description = description
def __str__(self):
string = "%s %s %s" % (str(self.relevance_score), str(self.query_id),
" ".join(str(f) for f in self.feature_vector))
return string
# @classmethod
def _parse_(self, text):
"""
parse line into Query
"""
comment_position = text.find('#')
line = text[:comment_position].strip()
self.description = text[comment_position + 1:].strip()
parts = line.split()
if len(parts) != 48:
sys.stdout.write("expect 48 space split parts, get %d" %
(len(parts)))
return None
# format : 0 qid:10 1:0.000272 2:0.000000 ....
self.relevance_score = int(parts[0])
self.query_id = int(parts[1].split(':')[1])
for p in parts[2:]:
pair = p.split(':')
self.feature_vector.append(float(pair[1]))
return self
class QueryList(object):
"""
group query into list, every item in list is a Query
"""
def __init__(self, querylist=None):
self.query_id = -1
if querylist is None:
self.querylist = []
else:
self.querylist = querylist
for query in self.querylist:
if self.query_id == -1:
self.query_id = query.query_id
else:
if self.query_id != query.query_id:
raise ValueError("query in list must be same query_id")
def __iter__(self):
for query in self.querylist:
yield query
def __len__(self):
return len(self.querylist)
def __getitem__(self, i):
return self.querylist[i]
def _correct_ranking_(self):
if self.querylist is None:
return
self.querylist.sort(key=lambda x: x.relevance_score, reverse=True)
def _add_query(self, query):
if self.query_id == -1:
self.query_id = query.query_id
else:
if self.query_id != query.query_id:
raise ValueError("query in list must be same query_id")
self.querylist.append(query)
def gen_plain_txt(querylist):
"""
gen plain text in list for other usage
Paramters:
--------
querylist : querylist, one query match many docment pairs in list, see QueryList
return :
------
query_id : np.array, shape=(samples_num, )
label : np.array, shape=(samples_num, )
querylist : np.array, shape=(samples_num, feature_dimension)
"""
if not isinstance(querylist, QueryList):
querylist = QueryList(querylist)
querylist._correct_ranking_()
for query in querylist:
yield querylist.query_id, query.relevance_score, np.array(
query.feature_vector)
def gen_point(querylist):
"""
gen item in list for point-wise learning to rank algorithm
Paramters:
--------
querylist : querylist, one query match many docment pairs in list, see QueryList
return :
------
label : np.array, shape=(samples_num, )
querylist : np.array, shape=(samples_num, feature_dimension)
"""
if not isinstance(querylist, QueryList):
querylist = QueryList(querylist)
querylist._correct_ranking_()
for query in querylist:
yield query.relevance_score, np.array(query.feature_vector)
def gen_pair(querylist, partial_order="full"):
"""
gen pair for pair-wise learning to rank algorithm
Paramters:
--------
querylist : querylist, one query match many docment pairs in list, see QueryList
pairtial_order : "full" or "neighbour"
there is redudant in all possiable pair combinations, which can be simplifed
gen pairs for neighbour items or the full partial order pairs
return :
------
label : np.array, shape=(1)
query_left : np.array, shape=(1, feature_dimension)
query_right : same as left
"""
if not isinstance(querylist, QueryList):
querylist = QueryList(querylist)
querylist._correct_ranking_()
labels = []
docpairs = []
# C(n,2)
for i in range(len(querylist)):
query_left = querylist[i]
for j in range(i + 1, len(querylist)):
query_right = querylist[j]
if query_left.relevance_score > query_right.relevance_score:
labels.append([1])
docpairs.append([
np.array(query_left.feature_vector),
np.array(query_right.feature_vector)
])
elif query_left.relevance_score < query_right.relevance_score:
labels.append([1])
docpairs.append([
np.array(query_right.feature_vector),
np.array(query_left.feature_vector)
])
for label, pair in zip(labels, docpairs):
yield np.array(label), pair[0], pair[1]
def gen_list(querylist):
"""
gen item in list for list-wise learning to rank algorithm
Paramters:
--------
querylist : querylist, one query match many docment pairs in list, see QueryList
return :
------
label : np.array, shape=(samples_num, )
querylist : np.array, shape=(samples_num, feature_dimension)
"""
if not isinstance(querylist, QueryList):
querylist = QueryList(querylist)
querylist._correct_ranking_()
relevance_score_list = [[query.relevance_score] for query in querylist]
feature_vector_list = [query.feature_vector for query in querylist]
yield np.array(relevance_score_list), np.array(feature_vector_list)
def query_filter(querylists):
"""
filter query get only document with label 0.
label 0, 1, 2 means the relevance score document with query
parameters :
querylist : QueyList list
return :
querylist : QueyList list
"""
filter_query = []
for querylist in querylists:
relevance_score_list = [query.relevance_score for query in querylist]
if sum(relevance_score_list) != .0:
filter_query.append(querylist)
return filter_query
def load_from_text(filepath, shuffle=False, fill_missing=-1):
"""
parse data file into querys
"""
prev_query_id = -1
querylists = []
querylist = None
fn = __initialize_meta_info__()
with open(os.path.join(fn, filepath)) as f:
for line in f:
query = Query()
query = query._parse_(line)
if query == None:
continue
if query.query_id != prev_query_id:
if querylist is not None:
querylists.append(querylist)
querylist = QueryList()
prev_query_id = query.query_id
querylist._add_query(query)
if querylist is not None:
querylists.append(querylist)
return querylists
def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
"""
Parameters
--------
filename : string
fill_missing : fill the missing value. default in MQ2007 is -1
Returns
------
yield
label query_left, query_right # format = "pairwise"
label querylist # format = "listwise"
"""
querylists = query_filter(
load_from_text(
filepath, shuffle=shuffle, fill_missing=fill_missing))
for querylist in querylists:
if format == "plain_txt":
yield next(gen_plain_txt(querylist))
elif format == "pointwise":
yield next(gen_point(querylist))
elif format == "pairwise":
for pair in gen_pair(querylist):
yield pair
elif format == "listwise":
yield next(gen_list(querylist))
train = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/train.txt")
test = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/test.txt")
def fetch():
return download(URL, "MQ2007", MD5)
if __name__ == "__main__":
fetch()
mytest = functools.partial(
__reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise")
for label, query in mytest():
print(label, query)
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The script fetch and preprocess movie_reviews data set that provided by NLTK
TODO(yuyang18): Complete dataset.
"""
from __future__ import print_function
import six
import collections
from itertools import chain
import os
import nltk
from nltk.corpus import movie_reviews
import zipfile
from functools import cmp_to_key
import paddle.dataset.common
URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip"
MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
__all__ = ['train', 'test', 'get_word_dict']
NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000
def download_data_if_not_yet():
"""
Download the data set, if the data set is not download.
"""
try:
# download and extract movie_reviews.zip
paddle.dataset.common.download(
URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
filename = os.path.join(path, 'movie_reviews.zip')
zip_file = zipfile.ZipFile(filename)
zip_file.extractall(path)
zip_file.close()
# make sure that nltk can find the data
if paddle.dataset.common.DATA_HOME not in nltk.data.path:
nltk.data.path.append(paddle.dataset.common.DATA_HOME)
movie_reviews.categories()
except LookupError:
print("Downloading movie_reviews data set, please wait.....")
nltk.download(
'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
print("Download data set success.....")
print("Path is " + nltk.data.find('corpora/movie_reviews').path)
def get_word_dict():
"""
Sorted the words by the frequency of words which occur in sample
:return:
words_freq_sorted
"""
words_freq_sorted = list()
word_freq_dict = collections.defaultdict(int)
download_data_if_not_yet()
for category in movie_reviews.categories():
for field in movie_reviews.fileids(category):
for words in movie_reviews.words(field):
word_freq_dict[words] += 1
words_sort_list = list(six.iteritems(word_freq_dict))
words_sort_list.sort(key=cmp_to_key(lambda a, b: b[1] - a[1]))
for index, word in enumerate(words_sort_list):
words_freq_sorted.append((word[0], index))
return words_freq_sorted
def sort_files():
"""
Sorted the sample for cross reading the sample
:return:
files_list
"""
files_list = list()
neg_file_list = movie_reviews.fileids('neg')
pos_file_list = movie_reviews.fileids('pos')
files_list = list(
chain.from_iterable(list(zip(neg_file_list, pos_file_list))))
return files_list
def load_sentiment_data():
"""
Load the data set
:return:
data_set
"""
data_set = list()
download_data_if_not_yet()
words_ids = dict(get_word_dict())
for sample_file in sort_files():
words_list = list()
category = 0 if 'neg' in sample_file else 1
for word in movie_reviews.words(sample_file):
words_list.append(words_ids[word.lower()])
data_set.append((words_list, category))
return data_set
def reader_creator(data):
"""
Reader creator, generate an iterator for data set
:param data:
train data set or test data set
"""
for each in data:
yield each[0], each[1]
def train():
"""
Default training set reader creator
"""
data_set = load_sentiment_data()
return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])
def test():
"""
Default test set reader creator
"""
data_set = load_sentiment_data()
return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
def fetch():
nltk.download('movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import nltk
import paddle.dataset.sentiment as st
from nltk.corpus import movie_reviews
class TestSentimentMethods(unittest.TestCase):
def test_get_word_dict(self):
word_dict = st.get_word_dict()[0:10]
test_word_list = [(',', 0), ('the', 1), ('.', 2), ('a', 3), ('and', 4),
('of', 5), ('to', 6), ("'", 7), ('is', 8), ('in', 9)]
for idx, each in enumerate(word_dict):
self.assertEqual(each, test_word_list[idx])
self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path)
def test_sort_files(self):
last_label = ''
for sample_file in st.sort_files():
current_label = sample_file.split("/")[0]
self.assertNotEqual(current_label, last_label)
last_label = current_label
def test_data_set(self):
data_set = st.load_sentiment_data()
last_label = -1
for each in st.test():
self.assertNotEqual(each[1], last_label)
last_label = each[1]
self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES)
self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES)
self.assertEqual(
len(list(st.test())),
(st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES))
if __name__ == '__main__':
unittest.main()
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
UCI Housing dataset.
This module will download dataset from
https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and
parse training set and test set into paddle reader creators.
"""
from __future__ import print_function
import numpy as np
import six
import tempfile
import tarfile
import os
import paddle.dataset.common
__all__ = ['train', 'test']
URL = 'http://paddlemodels.bj.bcebos.com/uci_housing/housing.data'
MD5 = 'd4accdce7a25600298819f8e28e8d593'
feature_names = [
'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
'PTRATIO', 'B', 'LSTAT'
]
UCI_TRAIN_DATA = None
UCI_TEST_DATA = None
FLUID_URL_MODEL = 'https://github.com/PaddlePaddle/book/raw/develop/01.fit_a_line/fluid/fit_a_line.fluid.tar'
FLUID_MD5_MODEL = '6e6dd637ccd5993961f68bfbde46090b'
def feature_range(maximums, minimums):
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
feature_num = len(maximums)
ax.bar(list(range(feature_num)),
maximums - minimums,
color='r',
align='center')
ax.set_title('feature scale')
plt.xticks(list(range(feature_num)), feature_names)
plt.xlim([-1, feature_num])
fig.set_figheight(6)
fig.set_figwidth(10)
if not os.path.exists('./image'):
os.makedirs('./image')
fig.savefig('image/ranges.png', dpi=48)
plt.close(fig)
def load_data(filename, feature_num=14, ratio=0.8):
global UCI_TRAIN_DATA, UCI_TEST_DATA
if UCI_TRAIN_DATA is not None and UCI_TEST_DATA is not None:
return
data = np.fromfile(filename, sep=' ')
data = data.reshape(data.shape[0] // feature_num, feature_num)
maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
axis=0) / data.shape[0]
feature_range(maximums[:-1], minimums[:-1])
for i in six.moves.range(feature_num - 1):
data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
offset = int(data.shape[0] * ratio)
UCI_TRAIN_DATA = data[:offset]
UCI_TEST_DATA = data[offset:]
def train():
"""
UCI_HOUSING training set creator.
It returns a reader creator, each sample in the reader is features after
normalization and price number.
:return: Training reader creator
:rtype: callable
"""
global UCI_TRAIN_DATA
load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5))
def reader():
for d in UCI_TRAIN_DATA:
yield d[:-1], d[-1:]
return reader
def test():
"""
UCI_HOUSING test set creator.
It returns a reader creator, each sample in the reader is features after
normalization and price number.
:return: Test reader creator
:rtype: callable
"""
global UCI_TEST_DATA
load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5))
def reader():
for d in UCI_TEST_DATA:
yield d[:-1], d[-1:]
return reader
def fluid_model():
parameter_tar = paddle.dataset.common.download(
FLUID_URL_MODEL, 'uci_housing', FLUID_MD5_MODEL, 'fit_a_line.fluid.tar')
tar = tarfile.TarFile(parameter_tar, mode='r')
dirpath = tempfile.mkdtemp()
tar.extractall(path=dirpath)
return dirpath
def predict_reader():
"""
It returns just one tuple data to do inference.
:return: one tuple data
:rtype: tuple
"""
global UCI_TEST_DATA
load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5))
return (UCI_TEST_DATA[0][:-1], )
def fetch():
paddle.dataset.common.download(URL, 'uci_housing', MD5)
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -24,14 +24,10 @@ from . import quantize
from .quantize import *
from . import reader
from .reader import *
from . import slim
from .slim import *
from . import utils
from .utils import *
from . import extend_optimizer
from .extend_optimizer import *
from . import model_stat
from .model_stat import *
from . import mixed_precision
from .mixed_precision import *
from . import layers
......@@ -43,7 +39,6 @@ __all__ += memory_usage_calc.__all__
__all__ += op_frequence.__all__
__all__ += quantize.__all__
__all__ += reader.__all__
__all__ += slim.__all__
__all__ += utils.__all__
__all__ += extend_optimizer.__all__
__all__ += ['mixed_precision']
......
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Example:
>>from paddle.fluid.contrib.model_stat import summary
>>main_program = ...
>>summary(main_program)
+-----+------------+----------------+----------------+---------+------------+
| No. | TYPE | INPUT | OUTPUT | PARAMs | FLOPs |
+-----+------------+----------------+----------------+---------+------------+
| 0 | conv2d | (3, 200, 200) | (64, 100, 100) | 9408 | 188160000 |
| 1 | batch_norm | (64, 100, 100) | (64, 100, 100) | 256 | 640000 |
| 2 | relu | (64, 100, 100) | (64, 100, 100) | 0 | 640000 |
| 3 | pool2d | (64, 100, 100) | (64, 50, 50) | 0 | 1440000 |
...
| 176 | conv2d | (512, 7, 7) | (512, 7, 7) | 2359296 | 231211008 |
| 177 | relu | (512, 7, 7) | (512, 7, 7) | 0 | 25088 |
| 178 | conv2d | (512, 7, 7) | (2048, 7, 7) | 1048576 | 102760448 |
| 179 | relu | (2048, 7, 7) | (2048, 7, 7) | 0 | 100352 |
| 180 | pool2d | (2048, 7, 7) | (2048, 1, 1) | 0 | 100352 |
+-----+------------+----------------+----------------+---------+------------+
Total PARAMs: 48017344(0.0480G)
Total FLOPs: 11692747751(11.69G)
'''
from collections import OrderedDict
from prettytable import PrettyTable
def summary(main_prog):
'''
It can summary model's PARAMS, FLOPs until now.
It support common operator like conv, fc, pool, relu, sigmoid, bn etc.
Args:
main_prog: main program
Returns:
print summary on terminal
'''
collected_ops_list = []
for one_b in main_prog.blocks:
block_vars = one_b.vars
for one_op in one_b.ops:
op_info = OrderedDict()
spf_res = _summary_model(block_vars, one_op)
if spf_res is None:
continue
# TODO: get the operator name
op_info['type'] = one_op.type
op_info['input_shape'] = spf_res[0][1:]
op_info['out_shape'] = spf_res[1][1:]
op_info['PARAMs'] = spf_res[2]
op_info['FLOPs'] = spf_res[3]
collected_ops_list.append(op_info)
summary_table, total = _format_summary(collected_ops_list)
_print_summary(summary_table, total)
def _summary_model(block_vars, one_op):
'''
Compute operator's params and flops.
Args:
block_vars: all vars of one block
one_op: one operator to count
Returns:
in_data_shape: one operator's input data shape
out_data_shape: one operator's output data shape
params: one operator's PARAMs
flops: : one operator's FLOPs
'''
if one_op.type in ['conv2d', 'depthwise_conv2d']:
k_arg_shape = block_vars[one_op.input("Filter")[0]].shape
in_data_shape = block_vars[one_op.input("Input")[0]].shape
out_data_shape = block_vars[one_op.output("Output")[0]].shape
c_out, c_in, k_h, k_w = k_arg_shape
_, c_out_, h_out, w_out = out_data_shape
assert c_out == c_out_, 'shape error!'
k_groups = one_op.attr("groups")
kernel_ops = k_h * k_w * (c_in / k_groups)
bias_ops = 0 if one_op.input("Bias") == [] else 1
params = c_out * (kernel_ops + bias_ops)
flops = h_out * w_out * c_out * (kernel_ops + bias_ops)
# base nvidia paper, include mul and add
flops = 2 * flops
elif one_op.type == 'pool2d':
in_data_shape = block_vars[one_op.input("X")[0]].shape
out_data_shape = block_vars[one_op.output("Out")[0]].shape
_, c_out, h_out, w_out = out_data_shape
k_size = one_op.attr("ksize")
params = 0
flops = h_out * w_out * c_out * (k_size[0] * k_size[1])
elif one_op.type == 'mul':
k_arg_shape = block_vars[one_op.input("Y")[0]].shape
in_data_shape = block_vars[one_op.input("X")[0]].shape
out_data_shape = block_vars[one_op.output("Out")[0]].shape
# TODO: fc has mul ops
# add attr to mul op, tell us whether it belongs to 'fc'
# this's not the best way
if 'fc' not in one_op.output("Out")[0]:
return None
k_in, k_out = k_arg_shape
# bias in sum op
params = k_in * k_out + 1
flops = k_in * k_out
elif one_op.type in ['sigmoid', 'tanh', 'relu', 'leaky_relu', 'prelu']:
in_data_shape = block_vars[one_op.input("X")[0]].shape
out_data_shape = block_vars[one_op.output("Out")[0]].shape
params = 0
if one_op.type == 'prelu':
params = 1
flops = 1
for one_dim in in_data_shape:
flops *= one_dim
elif one_op.type == 'batch_norm':
in_data_shape = block_vars[one_op.input("X")[0]].shape
out_data_shape = block_vars[one_op.output("Y")[0]].shape
_, c_in, h_out, w_out = in_data_shape
# gamma, beta
params = c_in * 2
# compute mean and std
flops = h_out * w_out * c_in * 2
else:
return None
return in_data_shape, out_data_shape, params, flops
def _format_summary(collected_ops_list):
'''
Format summary report.
Args:
collected_ops_list: the collected operator with summary
Returns:
summary_table: summary report format
total: sum param and flops
'''
summary_table = PrettyTable(
["No.", "TYPE", "INPUT", "OUTPUT", "PARAMs", "FLOPs"])
summary_table.align = 'r'
total = {}
total_params = []
total_flops = []
for i, one_op in enumerate(collected_ops_list):
# notice the order
table_row = [
i,
one_op['type'],
one_op['input_shape'],
one_op['out_shape'],
int(one_op['PARAMs']),
int(one_op['FLOPs']),
]
summary_table.add_row(table_row)
total_params.append(int(one_op['PARAMs']))
total_flops.append(int(one_op['FLOPs']))
total['params'] = total_params
total['flops'] = total_flops
return summary_table, total
def _print_summary(summary_table, total):
'''
Print all the summary on terminal.
Args:
summary_table: summary report format
total: sum param and flops
'''
parmas = total['params']
flops = total['flops']
print(summary_table)
print('Total PARAMs: {}({:.4f}M)'.format(
sum(parmas), sum(parmas) / (10**6)))
print('Total FLOPs: {}({:.2f}G)'.format(sum(flops), sum(flops) / 10**9))
print(
"Notice: \n now supported ops include [Conv, DepthwiseConv, FC(mul), BatchNorm, Pool, Activation(sigmoid, tanh, relu, leaky_relu, prelu)]"
)
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .core import *
__all__ = ['Compressor', ]
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import config
from .config import *
from . import compressor
from .compressor import *
from . import strategy
from .strategy import *
__all__ = config.__all__ + compressor.__all__ + strategy.__all__
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ....core import CPUPlace, EOFException
from .... import compiler
from ....framework import Variable
from .... import io
from .... import profiler
from .... import scope_guard
from ....data_feeder import DataFeeder
from ....log_helper import get_logger
from ....reader import DataLoaderBase
from ..graph import *
from .config import ConfigFactory
import numpy as np
from collections import Iterable
import time
import os
import logging
import sys
import pickle
import functools
import traceback
__all__ = ['Context', 'Compressor']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def cached_reader(reader, sampled_rate, cache_path, cached_id):
"""
Sample partial data from reader and cache them into local file system.
Args:
reader: Iterative data source.
sampled_rate(float): The sampled rate used to sample partial data for evaluation. None means using all data in eval_reader. default: None.
cache_path(str): The path to cache the sampled data.
cached_id(int): The id of dataset sampled. Evaluations with same cached_id use the same sampled dataset. default: 0.
"""
np.random.seed(cached_id)
cache_path = os.path.join(cache_path, str(cached_id))
_logger.debug('read data from: {}'.format(cache_path))
def s_reader():
if os.path.isdir(cache_path):
for file_name in open(os.path.join(cache_path, "list")):
yield np.load(
os.path.join(cache_path, file_name.strip()),
allow_pickle=True)
else:
os.makedirs(cache_path)
list_file = open(os.path.join(cache_path, "list"), 'w')
batch = 0
dtype = None
for data in reader():
if batch == 0 or (np.random.uniform() < sampled_rate):
np.save(
os.path.join(cache_path, 'batch' + str(batch)), data)
list_file.write('batch' + str(batch) + '.npy\n')
batch += 1
yield data
return s_reader
class Context(object):
"""
The context in the process of compression.
"""
def __init__(self,
place,
scope,
train_graph=None,
train_reader=None,
eval_graph=None,
eval_reader=None,
teacher_graphs=None,
train_optimizer=None,
distiller_optimizer=None,
search_space=None):
"""
Args:
place: The device place where the compression job running.
scope: The scope used in compression job.
train_graph: The graph with loss as output node.
eval_graph: The graph used for evaluation.
eval_reader: The data reader used for evaluation.
teacher_graphs: The teacher graphs used in distillation strategies.
train_optimizer: The optimizer used to append backward ops and
optimization ops into train_graph.
distiller_optimizer: The optimizer used by distillation strategies.
"""
# The total number of epoches to be trained.
self.epoch = 0
# Current epoch
self.epoch_id = 0
# Current batch
self.batch_id = 0
self.k_v = {}
self.place = place
self.scope = scope
self.train_graph = train_graph
self.train_reader = train_reader
self.eval_graph = eval_graph
self.eval_reader = eval_reader
self.executor = None
self.teacher_graphs = teacher_graphs
self.train_optimizer = train_optimizer
self.distiller_optimizer = distiller_optimizer
self.optimize_graph = None
self.cache_path = './eval_cache'
self.eval_results = {}
self.skip_training = False
self.search_space = search_space
def to_file(self, file_name):
"""
Save the context into file.
"""
data = {}
data['epoch_id'] = self.epoch_id
data['eval_results'] = self.eval_results
with open(file_name, 'wb') as context_file:
pickle.dump(data, context_file)
def from_file(self, file_name):
"""
Load the context from file.
"""
with open(file_name, 'rb') as context_file:
if sys.version_info < (3, 0):
data = pickle.load(context_file)
else:
data = pickle.load(context_file, encoding='bytes')
self.epoch_id = data['epoch_id']
self.eval_results = data['eval_results']
def eval_converged(self, metric_name, delta=0.001):
"""
Check whether the training has been converged.
Args:
metric_name(str): The metric used to check convergence.
delta(float): '(metric[k] - metric[k-1] / metric[k-1]) < delta'
means that the training has been converged.
Returns:
bool: True means the training has been converged.
"""
# TODO(wanghaoshuang@baidu.com): enhence this method.
if (metric_name not in self.eval_results
) or len(self.eval_results[metric_name]) < 2:
return False
results = self.eval_results[metric_name][-2:]
_logger.info('Latest evaluations: {}'.format(results))
return abs(results[1] - results[0]) / results[0] < delta
def run_eval_graph(self, sampled_rate=None, cached_id=0):
"""
Evaluate the current mode in context.
Args:
sampled_rate(float): The sampled rate used to sample partial data
for evaluation. None means using all data in eval_reader. default: None.
cached_id(int): The id of dataset sampled. Evaluations with same
cached_id use the same sampled dataset. default: 0.
"""
_logger.info('Running evaluation')
assert self.eval_graph is not None
assert self.eval_reader is not None
eval_graph = self.eval_graph.clone(for_test=True)
executor = SlimGraphExecutor(self.place)
results = []
batch_id = 0
s_time = time.time()
reader = self.eval_reader
if sampled_rate:
assert (not isinstance(reader, Variable))
assert (sampled_rate > 0)
assert (self.cache_path is not None)
_logger.info('sampled_rate: {}; cached_id: {}'.format(sampled_rate,
cached_id))
reader = cached_reader(reader, sampled_rate, self.cache_path,
cached_id)
if isinstance(reader, Variable) or (
isinstance(reader, DataLoaderBase) and (not reader.iterable)):
reader.start()
try:
while True:
result = executor.run(eval_graph, self.scope)
result = [np.mean(r) for r in result]
results.append(result)
if batch_id % 20 == 0:
_logger.info("batch-{}; {}={}".format(
batch_id, eval_graph.out_nodes.keys(), result))
batch_id += 1
except EOFException:
reader.reset()
else:
for data in reader():
result = executor.run(eval_graph, self.scope, data=data)
result = [np.mean(r) for r in result]
results.append(result)
if batch_id % 20 == 0:
_logger.info("batch-{}; {}={}".format(
batch_id, eval_graph.out_nodes.keys(), result))
batch_id += 1
result = np.mean(np.array(results), axis=0)
_logger.info("Final eval result: {}={}".format(
eval_graph.out_nodes.keys(), result))
if not isinstance(result, Iterable):
result = [result]
_logger.info('Finish evaluation')
return result, eval_graph.out_nodes.keys()
def put(self, key, value):
self.k_v[key] = value
def get(self, key):
return self.k_v.get(key)
class Compressor(object):
"""
The pass used to compress model.
"""
def __init__(self,
place,
scope,
train_program,
train_reader=None,
train_feed_list=None,
train_fetch_list=None,
eval_program=None,
eval_reader=None,
eval_feed_list=None,
eval_fetch_list=None,
eval_func=None,
save_eval_model=True,
prune_infer_model=None,
teacher_programs=[],
checkpoint_path=None,
train_optimizer=None,
distiller_optimizer=None,
search_space=None,
log_period=20):
"""
Args:
place(fluid.Place): The device place where the compression job running.
scope(fluid.core.Scope): The scope used to run graph.
train_program(Program): The main program to be compressed. It must have loss op.
train_reader: The data reader used for training.
train_feed_list(dict): A dict to indicate the input variable of the training program.
The key is user-defined and human-readable name.
The value is the name of Variable.
train_fetch_list(dict): A dict to indicate the output variable of the training program.
The key is user-defined and human-readable name.
The value is the name of Variable.
eval_program(Program): The program used for evaluation.
eval_reader: The data reader used for evaluation. It can be None if eval_func is not None.
eval_feed_list(dict): A dict to indicate the input variable of the evaluation program.
The key is user-defined and human-readable name.
The value is the name of Variable.
It can be None if eval_func is not None.
eval_fetch_list(dict): A dict to indicate the output variable of the evaluation program.
The key is user-defined and human-readable name.
The value is the name of Variable.
eval_func(dict|function): Callback functions used to evaluate the compressed model.
The eval_func is a dict, the key is user-defined name and the value is
a callback function. And the score returned from callback functions
can be referenced in config file by the key of eval_func.
The args of callback function are compressed eval_program and scope which
store the compressed parameters.
Default: None.
save_eval_model(bool): Whether to save eval model when saving checkpoints. Default: True.
prune_infer_model(tuple|list): If prune_infer_model is not None, compressor will prune
eval program into inference program according to inputs and outputs
defined in prune_infer_model. prune_infer_model[0] is a list of input
variables' names and prune_infer_model[1] is a list of output variables'
names. If prune_infer_model is None, it will not save inference model.
Default: None.
teacher_programs: The teacher graphs used in distillation strategies.
train_optimizer: The optimizer used to append backward ops and
optimization ops into train_graph.
distiller_optimizer: The optimizer used by distillation strategies. In distillation strategy,
this optimizer is used to minimize the combined loss of student-net and
teacher-net while train_optimizer is used to minimize loss of
student-net in fine-tune stage.
search_space(slim.nas.SearchSpace): The instance that define the searching space. It must inherite
slim.nas.SearchSpace class and overwrite the abstract methods.
log_period(int): The period of print log of training.
"""
assert train_feed_list is None or isinstance(
train_feed_list, list
), "train_feed_list should be a list of tuple, such as [('image', image.name), ('label', gt.name)]"
assert eval_feed_list is None or isinstance(
eval_feed_list, list
), "eval_feed_list should be a list of tuple, such as [('image', image.name), ('label', gt.name)]"
self.strategies = []
self.epoch = 0
self.place = CPUPlace() if place is None else place
self.scope = scope
self.train_graph = GraphWrapper(
train_program, in_nodes=train_feed_list, out_nodes=train_fetch_list)
self.eval_graph = GraphWrapper(
eval_program, in_nodes=eval_feed_list, out_nodes=eval_fetch_list)
self.train_reader = train_reader
self.eval_reader = eval_reader
self.eval_func = eval_func
self.save_eval_model = save_eval_model
self.prune_infer_model = prune_infer_model
self.teacher_graphs = []
for teacher in teacher_programs:
self.teacher_graphs.append(GraphWrapper(teacher))
self.checkpoint = None
self.checkpoint_path = checkpoint_path
self.eval_epoch = 1
self.train_optimizer = train_optimizer
self.distiller_optimizer = distiller_optimizer
self.init_model = None
self.search_space = search_space
self.log_period = log_period
assert (log_period > 0)
def _add_strategy(self, strategy):
"""
Add a strategy to current compress pass.
Args:
strategy: The strategy to be added into current compress pass.
"""
self.strategies.append(strategy)
self.epoch = max(strategy.end_epoch, self.epoch)
def config(self, config_file):
"""
Configure the compress pass from file with yaml format.
Args:
config_file(str): The config file in local file system.
"""
factory = ConfigFactory(config_file)
self.epoch = factory.compressor['epoch']
for strategy in factory.compressor['strategies']:
self._add_strategy(strategy)
if 'checkpoint_path' in factory.compressor:
self.checkpoint_path = factory.compressor['checkpoint_path']
if 'init_model' in factory.compressor:
self.init_model = factory.compressor['init_model']
if 'eval_epoch' in factory.compressor:
self.eval_epoch = factory.compressor['eval_epoch']
assert (self.eval_epoch > 0)
def _init_model(self, context):
"""
Load model that has been compressed.
"""
if self.init_model and os.path.exists(self.init_model):
exe = SlimGraphExecutor(context.place)
with scope_guard(context.scope):
context.train_graph.load_persistables(self.init_model, exe)
flops = context.eval_graph.flops()
conv_flops = context.eval_graph.flops(only_conv=True)
context.eval_graph.update_param_shape(context.scope)
context.eval_graph.update_groups_of_conv()
_logger.info("conv flops: -{}".format(1 - float(
context.eval_graph.flops(only_conv=True)) / conv_flops))
_logger.info("total flops: -{}".format(1 - float(
context.eval_graph.flops()) / flops))
context.train_graph.update_param_shape(context.scope)
context.train_graph.update_groups_of_conv()
context.train_graph.infer_shape()
_logger.info("Init model from: {}".format(self.init_model))
def _load_checkpoint(self, context):
"""
Load checkpoints from file.
"""
_logger.debug('_load_checkpoint')
strategies = self.strategies
if self.checkpoint_path:
if not os.path.exists(self.checkpoint_path):
_logger.warning("Checkpints path doesn't exist: [{}]".format(
self.checkpoint_path))
return context, strategies
checkpoints = [
dir for dir in os.listdir(self.checkpoint_path)
if os.path.isdir(os.path.join(self.checkpoint_path, dir))
]
_logger.debug('self.checkpoint_path: {}'.format(
self.checkpoint_path))
_logger.info('checkpoints: {}'.format(checkpoints))
if len(checkpoints) > 0:
latest = max([int(ck) for ck in checkpoints])
latest_ck_path = os.path.join(self.checkpoint_path, str(latest))
model_path = os.path.join(latest_ck_path, 'model')
context_path = os.path.join(latest_ck_path, 'context')
strategy_path = os.path.join(latest_ck_path, 'strategies')
if os.path.exists(context_path):
context.from_file(context_path)
context.epoch_id += 1
if os.path.exists(strategy_path):
with open(strategy_path, 'rb') as strategy_file:
if sys.version_info < (3, 0):
strategies = pickle.load(strategy_file)
else:
strategies = pickle.load(
strategy_file, encoding='bytes')
assert (len(self.strategies) == len(strategies))
for s, s1 in zip(self.strategies, strategies):
s1.__dict__.update(s.__dict__)
for strategy in strategies:
strategy.restore_from_checkpoint(context)
if os.path.exists(model_path):
exe = SlimGraphExecutor(context.place)
with scope_guard(context.scope):
context.optimize_graph.load_persistables(model_path,
exe)
_logger.info("Loaded params from: {}".format(model_path))
return context, strategies
def _save_checkpoint(self, context):
"""
Save checkpoints to file.
"""
if context.epoch_id % 1 == 0 and self.checkpoint_path:
checkpoint_path = os.path.join(self.checkpoint_path,
str(context.epoch_id))
model_path = os.path.join(checkpoint_path, 'model')
eval_model_path = os.path.join(checkpoint_path, 'eval_model')
context_path = os.path.join(checkpoint_path, 'context')
strategy_path = os.path.join(checkpoint_path, 'strategies')
if not os.path.isdir(model_path):
os.makedirs(model_path)
exe = SlimGraphExecutor(context.place)
with scope_guard(context.scope):
context.optimize_graph.save_persistables(model_path, exe)
if self.save_eval_model:
context.eval_graph.save_model(eval_model_path, exe)
if self.prune_infer_model:
context.eval_graph.save_infer_model(
eval_model_path,
exe,
self.prune_infer_model,
program_only=self.save_eval_model)
context.to_file(context_path)
with open(strategy_path, 'wb') as strategy_file:
pickle.dump(self.strategies, strategy_file)
_logger.info('Saved checkpoint to: {}'.format(checkpoint_path))
def _train_one_epoch(self, context):
"""
Train one epoch.
"""
if context.skip_training:
return
executor = SlimGraphExecutor(self.place)
if context.optimize_graph.compiled_graph is None:
build_strategy = compiler.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
context.optimize_graph.compiled_graph = compiler.CompiledProgram(
context.optimize_graph.program).with_data_parallel(
loss_name=context.optimize_graph.out_nodes['loss'],
build_strategy=build_strategy)
if isinstance(context.train_reader, Variable) or (
isinstance(context.train_reader, DataLoaderBase) and
(not context.train_reader.iterable)):
context.train_reader.start()
try:
while True:
for strategy in self.strategies:
strategy.on_batch_begin(context)
results = executor.run(context.optimize_graph,
context.scope)
results = [float(np.mean(result)) for result in results]
if context.batch_id % self.log_period == 0:
_logger.info("epoch:{}; batch_id:{}; {} = {}".format(
context.epoch_id, context.batch_id,
context.optimize_graph.out_nodes.keys(
), [round(r, 6) for r in results]))
for strategy in self.strategies:
strategy.on_batch_end(context)
context.batch_id += 1
except EOFException:
context.train_reader.reset()
else:
for data in context.train_reader():
for strategy in self.strategies:
strategy.on_batch_begin(context)
results = executor.run(context.optimize_graph,
context.scope,
data=data)
results = [float(np.mean(result)) for result in results]
if context.batch_id % self.log_period == 0:
_logger.info("epoch:{}; batch_id:{}; {} = {}".format(
context.epoch_id, context.batch_id,
context.optimize_graph.out_nodes.keys(
), [round(r, 6) for r in results]))
for strategy in self.strategies:
strategy.on_batch_end(context)
context.batch_id += 1
context.batch_id = 0
def _eval(self, context):
"""
Runing evaluation.
"""
if self.eval_func is not None:
for key in self.eval_func:
func = self.eval_func[key]
if key not in context.eval_results:
context.eval_results[key] = []
context.eval_results[key].append(
func(self.eval_graph.program, self.scope))
else:
results, names = context.run_eval_graph()
for name, result in zip(names, results):
if name not in context.eval_results:
context.eval_results[name] = []
context.eval_results[name].append(result)
def run(self):
"""
Execute compressiong pass.
"""
context = Context(
place=self.place,
scope=self.scope,
train_graph=self.train_graph,
train_reader=self.train_reader,
eval_graph=self.eval_graph,
eval_reader=self.eval_reader,
teacher_graphs=self.teacher_graphs,
train_optimizer=self.train_optimizer,
distiller_optimizer=self.distiller_optimizer,
search_space=self.search_space)
self.context = context
if self.teacher_graphs:
context.put('teachers', self.teacher_graphs)
self._init_model(context)
if not context.optimize_graph:
if context.train_optimizer:
context.train_optimizer._name = 'train_opt'
context.optimize_graph = context.train_graph.get_optimize_graph(
context.train_optimizer, context.place, context.scope)
else:
context.optimize_graph = context.train_graph
context, self.strategies = self._load_checkpoint(context)
for strategy in self.strategies:
strategy.on_compression_begin(context)
if 'MKLDNNPostTrainingQuantStrategy' in [
i.__class__.__name__ for i in self.strategies
]:
return None
start = context.epoch_id
for epoch in range(start, self.epoch):
context.epoch_id = epoch
try:
for strategy in self.strategies:
strategy.on_epoch_begin(context)
self._train_one_epoch(context)
if self.eval_epoch and epoch % self.eval_epoch == 0:
self._eval(context)
self._save_checkpoint(context)
for strategy in self.strategies:
strategy.on_epoch_end(context)
except Exception:
_logger.error(traceback.print_exc())
continue
for strategy in self.strategies:
strategy.on_compression_end(context)
return context.eval_graph
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import funcsigs
import yaml
from collections import OrderedDict
from ..prune import *
from ..quantization import *
from .strategy import *
from ..distillation import *
from ..searcher import *
from ..nas import *
__all__ = ['ConfigFactory']
"""This factory is used to create instances by loading and parsing configure file with yaml format.
"""
PLUGINS = ['pruners', 'quantizers', 'distillers', 'strategies', 'controllers']
class ConfigFactory(object):
def __init__(self, config):
"""Init a factory from configure file."""
self.instances = {}
self.compressor = {}
self.version = None
self._parse_config(config)
def instance(self, name):
"""
Get instance from factory.
"""
if name in self.instances:
return self.instances[name]
else:
return None
def _new_instance(self, name, attrs):
if name not in self.instances:
class_ = globals()[attrs['class']]
sig = funcsigs.signature(class_.__init__)
keys = [
param.name for param in sig.parameters.values()
if (param.kind == param.POSITIONAL_OR_KEYWORD)
][1:]
keys = set(attrs.keys()).intersection(set(keys))
args = {}
for key in keys:
value = attrs[key]
if isinstance(value, str) and value.lower() == 'none':
value = None
if isinstance(value, str) and value in self.instances:
value = self.instances[value]
if isinstance(value, list):
for i in range(len(value)):
if isinstance(value[i],
str) and value[i] in self.instances:
value[i] = self.instances[value[i]]
args[key] = value
self.instances[name] = class_(**args)
return self.instances.get(name)
def _parse_config(self, config):
assert config
with open(config, 'r') as config_file:
key_values = self._ordered_load(config_file)
for key in key_values:
# parse version
if key == 'version' and self.version is None:
self.version = int(key_values['version'])
assert self.version == int(key_values['version'])
# parse pruners
if key in PLUGINS:
instances = key_values[key]
for name in instances:
self._new_instance(name, instances[name])
if key == 'compressor':
self.compressor['strategies'] = []
self.compressor['epoch'] = key_values[key]['epoch']
if 'init_model' in key_values[key]:
self.compressor['init_model'] = key_values[key][
'init_model']
if 'checkpoint_path' in key_values[key]:
self.compressor['checkpoint_path'] = key_values[key][
'checkpoint_path']
if 'eval_epoch' in key_values[key]:
self.compressor['eval_epoch'] = key_values[key][
'eval_epoch']
if 'strategies' in key_values[key]:
for name in key_values[key]['strategies']:
strategy = self.instance(name)
self.compressor['strategies'].append(strategy)
if key == 'include':
for config_file in key_values[key]:
self._parse_config(config_file.strip())
def _ordered_load(self,
stream,
Loader=yaml.Loader,
object_pairs_hook=OrderedDict):
"""
See: https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts
"""
class OrderedLoader(Loader):
pass
def construct_mapping(loader, node):
loader.flatten_mapping(node)
return object_pairs_hook(loader.construct_pairs(node))
OrderedLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping)
return yaml.load(stream, OrderedLoader)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['Strategy']
class Strategy(object):
"""
Base class for all strategies.
"""
def __init__(self, start_epoch=0, end_epoch=0):
"""
Args:
start_epoch: The first epoch to apply the strategy.
end_epoch: The last epoch to apply the strategy.
"""
self.start_epoch = start_epoch
self.end_epoch = end_epoch
def __getstate__(self):
d = {}
for key in self.__dict__:
if key not in ["start_epoch", "end_epoch"]:
d[key] = self.__dict__[key]
return d
def on_compression_begin(self, context):
pass
def on_epoch_begin(self, context):
pass
def on_epoch_end(self, context):
pass
def on_batch_begin(self, context):
pass
def on_batch_end(self, context):
pass
def on_compression_end(self, context):
pass
def restore_from_checkpoint(self, context):
pass
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import distiller
from .distiller import *
from . import distillation_strategy
from .distillation_strategy import *
__all__ = distiller.__all__
__all__ += distillation_strategy.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..core.strategy import Strategy
from ....framework import Program, Variable, program_guard
from ....log_helper import get_logger
from .... import Executor
import logging
__all__ = ['DistillationStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class DistillationStrategy(Strategy):
def __init__(self, distillers=None, start_epoch=0, end_epoch=0):
"""
Args:
distillers(list): A list of distiller used to combine student graph and teacher graph
by adding some loss.
start_epoch(int): The epoch when to merge student graph and teacher graph for
distillation training. default: 0
end_epoch(int): The epoch when to finish distillation training. default: 0
"""
super(DistillationStrategy, self).__init__(start_epoch, end_epoch)
self.distillers = distillers
def restore_from_checkpoint(self, context):
# load from checkpoint
if context.epoch_id > 0:
if context.epoch_id > self.start_epoch and context.epoch_id < self.end_epoch:
_logger.info('Restore DistillationStrategy')
self._create_distillation_graph(context)
_logger.info('Restore DistillationStrategy finish.')
def on_epoch_begin(self, context):
if self.start_epoch == context.epoch_id:
_logger.info('DistillationStrategy::on_epoch_begin.')
self._create_distillation_graph(context)
_logger.info('DistillationStrategy set optimize_graph.')
def _create_distillation_graph(self, context):
"""
step 1: Merge student graph and teacher graph into distillation graph.
step 2: Add loss into distillation graph by distillers.
step 3: Append backward ops and optimize ops into distillation graph for training.
"""
# step 1
teacher = context.teacher_graphs[0]
for var in teacher.program.list_vars():
var.stop_gradient = True
graph = context.train_graph.clone()
graph.merge(teacher)
if 'loss' in graph.out_nodes:
graph.out_nodes['student_loss'] = graph.out_nodes['loss']
# step 2
for distiller in self.distillers:
graph = distiller.distiller_loss(graph)
# step 3
startup_program = Program()
with program_guard(graph.program, startup_program):
context.distiller_optimizer._name = 'distillation_optimizer'
# The learning rate variable may be created in other program.
# Update information in optimizer to make
# learning rate variable being accessible in current program.
optimizer = context.distiller_optimizer
if isinstance(optimizer._learning_rate, Variable):
optimizer._learning_rate_map[
graph.program] = optimizer._learning_rate
optimizer.minimize(graph.var(graph.out_nodes['loss'])._var)
exe = Executor(context.place)
exe.run(startup_program, scope=context.scope)
# backup graph for fine-tune after distillation
context.put('distillation_backup_optimize_graph',
context.optimize_graph)
context.optimize_graph = graph
def on_epoch_end(self, context):
if context.epoch_id == (self.end_epoch - 1):
_logger.info('DistillationStrategy::on_epoch_end.')
# restore optimize_graph for fine-tune or other strategy in next stage.
context.optimize_graph = context.get(
'distillation_backup_optimize_graph')
_logger.info(
'DistillationStrategy set context.optimize_graph to None.')
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .... import layers
from .... import optimizer
from .... import Executor
from .... import Program
from .... import program_guard
from .... import regularizer
__all__ = ['FSPDistiller', 'L2Distiller', 'SoftLabelDistiller']
class L2Distiller(object):
"""
Combine two layers from student net and teacher net by l2-loss.
And add the loss into the total loss using for distillation training.
"""
def __init__(self,
student_feature_map,
teacher_feature_map,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.distillation_loss_weight = distillation_loss_weight
def distiller_loss(self, graph):
"""
Modify graph inplace to add l2-loss.
Args:
graph(GraphWrapper): The graph to be modified.
Returns:
GraphWrapper: The modified graph.
"""
distiller_pass = L2DistillerPass(self.student_feature_map,
self.teacher_feature_map,
self.distillation_loss_weight)
dis_graph = distiller_pass.apply(graph)
return dis_graph
class L2DistillerPass(object):
"""
The pass used to add l2-loss.
"""
def __init__(self,
student_feature_map,
teacher_feature_map,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.distillation_loss_weight = distillation_loss_weight
def apply(self, graph):
ret_graph = graph
with program_guard(ret_graph.program):
student_feature_map = ret_graph.var(self.student_feature_map)._var
teacher_feature_map = ret_graph.var(self.teacher_feature_map)._var
l2loss = layers.reduce_mean(
layers.square(student_feature_map - teacher_feature_map))
distillation_loss = l2loss * self.distillation_loss_weight
student_loss = 0
if 'loss' in ret_graph.out_nodes:
student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
loss = distillation_loss + student_loss
ret_graph.out_nodes['loss'] = loss.name
ret_graph.out_nodes[
'l2loss_' + self.student_feature_map + "_" +
self.teacher_feature_map] = distillation_loss.name
return ret_graph
class FSPDistiller(object):
"""
Combine layers from student net and teacher net by fsp-loss.
"""
def __init__(self, student_pairs, teacher_pairs,
distillation_loss_weight=1):
"""
Args:
student_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
a section in student network. The variables in a tuple should
have the same feature map size.
teacher_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
a section in teacher network. The variables in a tuple should
have the same feature map size. Varibale named teacher_pairs[i][j]
should has the save channel number with that of variable named
student_pairs[i][j].
distillation_loss_weight(float): The weight of the fsp-loss. default: 1.
"""
self.student_pairs = student_pairs
self.teacher_pairs = teacher_pairs
self.distillation_loss_weight = distillation_loss_weight
def distiller_loss(self, graph):
"""
Modify graph inplace to add fsp-loss.
Args:
graph(GraphWrapper): The graph to be modified.
Returns:
GraphWrapper: The modified graph.
"""
distiller_pass = FSPDistillerPass(self.student_pairs,
self.teacher_pairs,
self.distillation_loss_weight)
dis_graph = distiller_pass.apply(graph)
return dis_graph
class FSPDistillerPass(object):
'''
Combine layers from student net and teacher net by fsp-loss.
'''
def __init__(self, s_pairs, t_pairs, distillation_loss_weight=1):
"""
Args:
s_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
a section in student network. The variables in a tuple should
have the same feature map size.
t_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
a section in teacher network. The variables in a tuple should
have the same feature map size. Varibale named teacher_pairs[i][j]
should has the save channel number with that of variable named
student_pairs[i][j].
distillation_loss_weight(float): The weight of the fsp-loss. default: 1.
"""
self.s_pairs = s_pairs
self.t_pairs = t_pairs
self.distillation_loss_weight = distillation_loss_weight
def apply(self, graph):
ret_graph = graph
with program_guard(ret_graph.program):
losses = []
for s_pair, t_pair in zip(self.s_pairs, self.t_pairs):
s_pair_start = ret_graph.var(s_pair[0])._var
s_pair_end = ret_graph.var(s_pair[1])._var
s_fsp_matrix = self._fsp_matrix(s_pair_start, s_pair_end)
t_pair_start = ret_graph.var(t_pair[0])._var
t_pair_end = ret_graph.var(t_pair[1])._var
t_fsp_matrix = self._fsp_matrix(t_pair_start, t_pair_end)
l2_loss = layers.reduce_mean(
layers.square(s_fsp_matrix - t_fsp_matrix))
losses.append(l2_loss)
distillation_loss = layers.sum(
losses) * self.distillation_loss_weight
student_loss = 0
if 'loss' in ret_graph.out_nodes:
student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
loss = distillation_loss + student_loss
ret_graph.out_nodes['loss'] = loss.name
ret_graph.out_nodes[
'fsp_distillation_loss'] = distillation_loss.name
return ret_graph
def _fsp_matrix(self, fea_map_0, fea_map_1):
return layers.fsp_matrix(fea_map_0, fea_map_1)
class SoftLabelDistiller(object):
"""
Combine two layers from student net and teacher net by softmax_with_cross_entropy loss.
And add the loss into the total loss using for distillation training.
"""
def __init__(self,
student_feature_map=None,
teacher_feature_map=None,
student_temperature=1.0,
teacher_temperature=1.0,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy. default: 1.0
teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy. default: 1.0
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.distillation_loss_weight = distillation_loss_weight
self.student_temperature = student_temperature
self.teacher_temperature = teacher_temperature
def distiller_loss(self, graph):
"""
Modify graph inplace to add softmax_with_cross_entropy loss.
Args:
graph(GraphWrapper): The graph to be modified.
Returns:
GraphWrapper: The modified graph.
"""
distiller_pass = SoftLabelDistillerPass(
self.student_feature_map, self.teacher_feature_map,
self.student_temperature, self.teacher_temperature,
self.distillation_loss_weight)
dis_graph = distiller_pass.apply(graph)
return dis_graph
class SoftLabelDistillerPass(object):
def __init__(self,
student_feature_map,
teacher_feature_map,
student_temperature,
teacher_temperature,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy.
teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy.
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.student_temperature = student_temperature
self.teacher_temperature = teacher_temperature
self.distillation_loss_weight = distillation_loss_weight
def apply(self, graph):
ret_graph = graph
with program_guard(ret_graph.program):
student_feature_map = ret_graph.var(self.student_feature_map)._var
teacher_feature_map = ret_graph.var(self.teacher_feature_map)._var
s_fea = layers.softmax(student_feature_map /
self.student_temperature)
t_fea = layers.softmax(teacher_feature_map /
self.teacher_temperature)
t_fea.stop_gradient = True
ce_loss = layers.reduce_mean(
layers.cross_entropy(
s_fea, t_fea, soft_label=True))
distillation_loss = ce_loss * self.distillation_loss_weight
student_loss = 0
if 'loss' in ret_graph.out_nodes:
student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
loss = distillation_loss + student_loss
ret_graph.out_nodes['loss'] = loss.name
ret_graph.out_nodes[
'soft_label_loss_' + self.student_feature_map + "_" +
self.teacher_feature_map] = distillation_loss.name
return ret_graph
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import executor
from .executor import *
from . import graph_wrapper
from .graph_wrapper import *
__all__ = executor.__all__
__all__ += graph_wrapper.__all__
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ....compiler import CompiledProgram
from ....data_feeder import DataFeeder
from .... import executor
from .graph_wrapper import GraphWrapper
__all__ = ['SlimGraphExecutor']
class SlimGraphExecutor(object):
"""
Wrapper of executor used to run GraphWrapper.
"""
def __init__(self, place):
self.exe = executor.Executor(place)
self.place = place
def run(self, graph, scope, data=None):
"""
Runing a graph with a batch of data.
Args:
graph(GraphWrapper): The graph to be executed.
scope(fluid.core.Scope): The scope to be used.
data(list<tuple>): A batch of data. Each tuple in this list is a sample.
It will feed the items of tuple to the in_nodes of graph.
Returns:
results(list): A list of result with the same order indicated by graph.out_nodes.
"""
assert isinstance(graph, GraphWrapper)
feed = None
if data is not None and isinstance(data[0], dict):
# return list = False
feed = data
elif data is not None:
feeder = DataFeeder(
feed_list=list(graph.in_nodes.values()),
place=self.place,
program=graph.program)
feed = feeder.feed(data)
fetch_list = list(graph.out_nodes.values())
program = graph.compiled_graph if graph.compiled_graph else graph.program
results = self.exe.run(program,
scope=scope,
fetch_list=fetch_list,
feed=feed)
return results
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from .... import io
from .... import compiler
from ....framework import Program
from ....framework import program_guard
from ....framework import Parameter
from ....framework import Variable
from ....executor import Executor
import copy
from collections import Iterable
from ....io import save_inference_model, load_inference_model, save_persistables
import numpy as np
import pickle
import os
__all__ = ['GraphWrapper', 'VarWrapper', 'OpWrapper']
OPTIMIZER_OPS = [
'momentum',
'lars_momentum',
'adagrad',
'adam',
'adamax',
'dpsgd',
'decayed_adagrad',
'adadelta',
'rmsprop',
]
class VarWrapper(object):
def __init__(self, var, graph):
assert isinstance(var, Variable)
assert isinstance(graph, GraphWrapper)
self._var = var
self._graph = graph
def __eq__(self, v):
"""
Overwrite this function for ...in... syntax in python.
"""
return self._var.name == v._var.name
def name(self):
"""
Get the name of the variable.
"""
return self._var.name
def shape(self):
"""
Get the shape of the varibale.
"""
return self._var.shape
def set_shape(self, shape):
"""
Set the shape of the variable.
"""
self._var.desc.set_shape(shape)
def inputs(self):
"""
Get all the operators that use this variable as output.
Returns:
list<OpWrapper>: A list of operators.
"""
ops = []
for op in self._graph.ops():
if self in op.all_inputs():
ops.append(op)
return ops
def outputs(self):
"""
Get all the operators that use this variable as input.
Returns:
list<OpWrapper>: A list of operators.
"""
ops = []
for op in self._graph.ops():
if self in op.all_outputs():
ops.append(op)
return ops
class OpWrapper(object):
def __init__(self, op, graph):
assert isinstance(graph, GraphWrapper)
self._op = op
self._graph = graph
def __eq__(self, op):
"""
Overwrite this function for ...in... syntax in python.
"""
return self.idx() == op.idx()
def all_inputs(self):
"""
Get all the input variables of this operator.
"""
return [
self._graph.var(var_name) for var_name in self._op.input_arg_names
]
def all_outputs(self):
"""
Get all the output variables of this operator.
"""
return [
self._graph.var(var_name) for var_name in self._op.output_arg_names
]
def idx(self):
"""
Get the id of this operator.
"""
return self._op.idx
def type(self):
"""
Get the type of this operator.
"""
return self._op.type
def is_bwd_op(self):
"""
Whether this operator is backward op.
"""
return self.type().endswith('_grad')
def is_opt_op(self):
"""
Whether this operator is optimizer op.
"""
return self.type() in OPTIMIZER_OPS
def inputs(self, name):
"""
Get all the varibales by the input name.
"""
return [self._graph.var(var_name) for var_name in self._op.input(name)]
def outputs(self, name):
"""
Get all the varibales by the output name.
"""
return [self._graph.var(var_name) for var_name in self._op.output(name)]
def set_attr(self, key, value):
"""
Set the value of attribute by attribute's name.
Args:
key(str): the attribute name.
value(bool|int|str|float|list): the value of the attribute.
"""
self._op._set_attr(key, value)
def attr(self, name):
"""
Get the attribute by name.
Args:
name(str): the attribute name.
Returns:
bool|int|str|float|list: The attribute value. The return value
can be any valid attribute type.
"""
return self._op.attr(name)
class GraphWrapper(object):
"""
It is a wrapper of paddle.fluid.framework.IrGraph with some special functions
for paddle slim framework.
"""
def __init__(self, program=None, in_nodes=[], out_nodes=[]):
"""
Args:
program(framework.Program): A program with
in_nodes(dict): A dict to indicate the input nodes of the graph.
The key is user-defined and human-readable name.
The value is the name of Variable.
out_nodes(dict): A dict to indicate the input nodes of the graph.
The key is user-defined and human-readable name.
The value is the name of Variable.
"""
super(GraphWrapper, self).__init__()
self.program = Program() if program is None else program
self.persistables = {}
self.teacher_persistables = {}
for var in self.program.list_vars():
if var.persistable:
self.persistables[var.name] = var
self.compiled_graph = None
in_nodes = [] if in_nodes is None else in_nodes
out_nodes = [] if out_nodes is None else out_nodes
self.in_nodes = OrderedDict(in_nodes)
self.out_nodes = OrderedDict(out_nodes)
self._attrs = OrderedDict()
def all_parameters(self):
"""
Get all the parameters in this graph.
Returns:
list<VarWrapper>: A list of VarWrapper instances.
"""
params = []
for block in self.program.blocks:
for param in block.all_parameters():
params.append(VarWrapper(param, self))
return params
def is_parameter(self, var):
"""
Whether the given variable is parameter.
Args:
var(VarWrapper): The given varibale.
"""
return isinstance(var._var, Parameter)
def is_persistable(self, var):
"""
Whether the given variable is persistable.
Args:
var(VarWrapper): The given varibale.
"""
return var._var.persistable
def compile(self, for_parallel=True, for_test=False, mem_opt=False):
"""
Compile the program in this wrapper to framework.CompiledProgram for next running.
This function must be called if the program is modified.
Args:
for_parallel(bool): Whether the program to run in data parallel way. default: True.
for_test(bool): Whether the compiled program is used for test.
"""
target = self.program
if for_test:
loss = None
else:
loss = self.out_nodes['loss']
if for_parallel:
# disable memory optimize for stable training
build_strategy = compiler.BuildStrategy()
build_strategy.enable_inplace = mem_opt
build_strategy.memory_optimize = mem_opt
build_strategy.fuse_all_reduce_ops = False
# build_strategy.async_mode = False
self.compiled_graph = compiler.CompiledProgram(
target).with_data_parallel(
loss_name=loss, build_strategy=build_strategy)
else:
self.compiled_graph = compiler.CompiledProgram(target)
def ops(self):
"""
Return all operator nodes included in the graph as a set.
"""
ops = []
for block in self.program.blocks:
for op in block.ops:
ops.append(OpWrapper(op, self))
return ops
def vars(self):
"""
Get all the variables.
"""
return [VarWrapper(var, self) for var in self.program.list_vars()]
def var(self, name):
"""
Get the variable by variable name.
"""
return VarWrapper(self.program.global_block().var(name), self)
def clone(self, for_test=False):
"""
Clone a new graph from current graph.
Returns:
(GraphWrapper): The wrapper of a new graph.
"""
return GraphWrapper(
self.program.clone(for_test),
copy.deepcopy(self.in_nodes), copy.deepcopy(self.out_nodes))
def merge(self, graph):
"""
Merge a graph into current graph.
Args:
graph(GraphWrapper): The graph to be merged by current graph.
"""
for var in graph.program.list_vars():
if var.persistable:
self.teacher_persistables[var.name] = var
new_var = self.program.global_block()._clone_variable(
var, force_persistable=False)
new_var.stop_gradient = var.stop_gradient
# TODO: parameters should be cloned
for op in graph.ops():
op = op._op
inputs = {}
outputs = {}
attrs = {}
for input_name in op.input_names:
inputs[input_name] = [
self.var(in_var_name)._var
for in_var_name in op.input(input_name)
]
for output_name in op.output_names:
outputs[output_name] = [
self.var(out_var_name)._var
for out_var_name in op.output(output_name)
]
for attr_name in op.attr_names:
attrs[attr_name] = op.attr(attr_name)
self.program.global_block().append_op(
type=op.type, inputs=inputs, outputs=outputs, attrs=attrs)
def program(self):
"""
Get the program in current wrapper.
"""
return self.program
def pre_ops(self, op):
"""
Get all the previous operators of target operator.
Args:
op(OpWrapper): Target operator..
Returns:
list<OpWrapper>: A list of operators.
"""
ops = []
for p in self.ops():
for in_var in op.all_inputs():
if in_var in p.all_outputs():
ops.append(p)
return ops
def next_ops(self, op):
"""
Get all the next operators of target operator.
Args:
op(OpWrapper): Target operator..
Returns:
list<OpWrapper>: A list of operators.
"""
ops = []
for p in self.ops():
for out_var in op.all_outputs():
if out_var in p.all_inputs():
ops.append(p)
return ops
def get_param_by_op(self, op):
"""
Get the parameters used by target operator.
"""
assert isinstance(op, OpWrapper)
params = []
for var in op.all_inputs():
if isinstance(var._var, Parameter):
params.append(var)
assert len(params) > 0
return params
def numel_params(self):
"""
Get the number of elements in all parameters.
"""
ret = 0
for param in self.all_parameters():
ret += np.product(param.shape())
return ret
def get_optimize_graph(self, optimizer, place, scope, no_grad_var_names=[]):
"""
Get a new graph for training by appending some backward operators and optimization operators.
Args:
optimizer: The optimzier used to generate training graph.
place: The place to run the graph.
scope: The scope used to run the graph. Some new variable will be added into this scope.
no_grad_var_names(list<str>): Names of variables that should be ignored while computing gradients. default: [].
Returns:
(GraphWrapper): The wrapper of new graph with backward ops and optimization ops.
"""
graph = self.clone()
startup_program = Program()
with program_guard(
main_program=graph.program, startup_program=startup_program):
target_name = None
if 'loss' in graph.out_nodes:
target_name = graph.out_nodes['loss']
elif 'cost' in graph.out_nodes:
target_name = graph.out_nodes['cost']
else:
return None
target = graph.var(target_name)._var
# The learning rate variable may be created in other program.
# Update information in optimizer to make
# learning rate variable being accessible in current program.
if isinstance(optimizer._learning_rate, Variable):
optimizer._learning_rate_map[
graph.program] = optimizer._learning_rate
optimizer.minimize(target, no_grad_set=no_grad_var_names)
exe = Executor(place)
exe.run(program=startup_program, scope=scope)
return graph
def flops(self, only_conv=False):
"""
Get the flops of current graph.
Args:
only_conv: Only calculating the conv layers. default: False.
Returns:
int: The flops of current graph.
"""
flops = 0
for op in self.ops():
if op.type() in ['conv2d', 'depthwise_conv2d']:
filter_shape = op.inputs("Filter")[0].shape()
input_shape = op.inputs("Input")[0].shape()
output_shape = op.outputs("Output")[0].shape()
c_out, c_in, k_h, k_w = filter_shape
_, _, h_out, w_out = output_shape
groups = op.attr("groups")
kernel_ops = k_h * k_w * (c_in / groups)
if len(op.inputs("Bias")) > 0:
with_bias = 1
else:
with_bias = 0
flops += 2 * h_out * w_out * c_out * (kernel_ops + with_bias)
elif op.type() == 'pool2d' and not only_conv:
input_shape = op.inputs("X")[0].shape()
output_shape = op.outputs("Out")[0].shape()
_, c_out, h_out, w_out = output_shape
k_size = op.attr("ksize")
flops += h_out * w_out * c_out * (k_size[0]**2)
elif op.type() == 'mul' and not only_conv:
x_shape = list(op.inputs("X")[0].shape())
y_shape = op.inputs("Y")[0].shape()
if x_shape[0] == -1:
x_shape[0] = 1
flops += 2 * x_shape[0] * x_shape[1] * y_shape[1]
elif op.type() in ['relu', 'sigmoid', 'batch_norm'
] and not only_conv:
input_shape = list(op.inputs("X")[0].shape())
if input_shape[0] == -1:
input_shape[0] = 1
flops += np.product(input_shape)
return flops
def save_model(self, path, exe):
"""
Save network and parameters into file which can be load by load_inference_model api.
Args:
path(str): The path to save the persistables.
exe(framework.Executor): The executor used to save the persistables.
"""
out_vars = [
self.var(var_name)._var for var_name in self.out_nodes.values()
]
in_vars = list(self.in_nodes.values())
assert (len(in_vars) > 0)
assert (len(out_vars) > 0)
io.save_inference_model(
path,
in_vars,
out_vars,
exe.exe,
model_filename="__model__",
params_filename="__params__",
main_program=self.program.clone(),
export_for_deployment=True)
def save_infer_model(self, path, exe, in_out, program_only=False):
"""
Save network and parameters into file which can be load by load_inference_model api.
Args:
path(str): The path to save the persistables.
exe(framework.Executor): The executor used to save the persistables.
in_out(tuple|list): in_out[0] is a list of input nodes' names
and in_out[1] is a list of output nodes' names.
program_only(bool): Whether to save program only.
"""
out_vars = [self.var(var_name)._var for var_name in in_out[1]]
in_vars = list(in_out[0])
assert (len(in_vars) > 0)
assert (len(out_vars) > 0)
io.save_inference_model(
path,
in_vars,
out_vars,
exe.exe,
model_filename="__model__.infer",
params_filename="__params__",
program_only=program_only,
main_program=self.program.clone(),
export_for_deployment=True)
def save_persistables(self, path, exe):
"""
Save all the persistable variables into file.
Args:
path(str): The path to save the persistables.
exe(framework.Executor): The executor used to save the persistables.
"""
# update persistables from program
for var in self.program.list_vars():
if var.persistable and var.name not in self.persistables:
self.persistables[var.name] = var
persistables = []
for var in self.persistables:
if 'reader' not in var and 'double_buffer' not in var and var not in self.teacher_persistables:
persistables.append(self.persistables[var])
io.save_vars(exe.exe, path, vars=persistables)
def load_persistables(self, path, exe):
"""
Load the persistable variables from file.
Args:
path(str): The path to load the persistables.
exe(framework.Executor): The executor used to load the persistables.
"""
def if_exist(var):
return os.path.exists(os.path.join(path, var.name))
persistables = []
for var in self.persistables:
if 'reader' not in var and 'double_buffer' not in var:
persistables.append(self.persistables[var])
io.load_vars(exe.exe, path, vars=persistables, predicate=if_exist)
def update_param_shape(self, scope):
"""
Update the shape of parameters in the graph according to tensors in scope.
It is used after loading pruned parameters from file.
"""
for param in self.all_parameters():
tensor_shape = np.array(scope.find_var(param.name()).get_tensor(
)).shape
param.set_shape(tensor_shape)
def infer_shape(self):
"""
Update the groups of convolution layer according to current filters.
It is used after loading pruned parameters from file.
"""
for op in self.ops():
if op.type() != 'conditional_block':
op._op.desc.infer_shape(op._op.block.desc)
def update_groups_of_conv(self):
for op in self.ops():
if op.type() == 'depthwise_conv2d' or op.type(
) == 'depthwise_conv2d_grad':
op.set_attr('groups', op.inputs('Filter')[0].shape()[0])
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import light_nas_strategy
from .light_nas_strategy import *
from . import controller_server
from .controller_server import *
from . import search_agent
from .search_agent import *
from . import search_space
from .search_space import *
from . import lock
from .lock import *
__all__ = light_nas_strategy.__all__
__all__ += controller_server.__all__
__all__ += search_agent.__all__
__all__ += search_space.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import socket
from threading import Thread
from ....log_helper import get_logger
__all__ = ['ControllerServer']
_logger = get_logger(
__name__,
logging.INFO,
fmt='ControllerServer-%(asctime)s-%(levelname)s: %(message)s')
class ControllerServer(object):
"""
The controller wrapper with a socket server to handle the request of search agentt.
"""
def __init__(self,
controller=None,
address=('', 0),
max_client_num=100,
search_steps=None,
key=None):
"""
Args:
controller(slim.searcher.Controller): The controller used to generate tokens.
address(tuple): The address of current server binding with format (ip, port). Default: ('', 0).
which means setting ip automatically
max_client_num(int): The maximum number of clients connecting to current server simultaneously. Default: 100.
search_steps(int): The total steps of searching. None means never stopping. Default: None
"""
self._controller = controller
self._address = address
self._max_client_num = max_client_num
self._search_steps = search_steps
self._closed = False
self._port = address[1]
self._ip = address[0]
self._key = key
def start(self):
self._socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self._socket_server.bind(self._address)
self._socket_server.listen(self._max_client_num)
self._port = self._socket_server.getsockname()[1]
self._ip = self._socket_server.getsockname()[0]
_logger.info("listen on: [{}:{}]".format(self._ip, self._port))
thread = Thread(target=self.run)
thread.start()
return str(thread)
def close(self):
"""Close the server."""
self._closed = True
def port(self):
"""Get the port."""
return self._port
def ip(self):
"""Get the ip."""
return self._ip
def run(self):
_logger.info("Controller Server run...")
while ((self._search_steps is None) or
(self._controller._iter <
(self._search_steps))) and not self._closed:
conn, addr = self._socket_server.accept()
message = conn.recv(1024).decode()
if message.strip("\n") == "next_tokens":
tokens = self._controller.next_tokens()
tokens = ",".join([str(token) for token in tokens])
conn.send(tokens.encode())
else:
_logger.info("recv message from {}: [{}]".format(addr, message))
messages = message.strip('\n').split("\t")
if (len(messages) < 3) or (messages[0] != self._key):
_logger.info("recv noise from {}: [{}]".format(addr,
message))
continue
tokens = messages[1]
reward = messages[2]
tokens = [int(token) for token in tokens.split(",")]
self._controller.update(tokens, float(reward))
tokens = self._controller.next_tokens()
tokens = ",".join([str(token) for token in tokens])
conn.send(tokens.encode())
_logger.info("send message to {}: [{}]".format(addr, tokens))
conn.close()
self._socket_server.close()
_logger.info("server closed!")
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..core.strategy import Strategy
from ..graph import GraphWrapper
from .controller_server import ControllerServer
from .search_agent import SearchAgent
from ....executor import Executor
from ....log_helper import get_logger
import re
import logging
import functools
import socket
from .lock import lock, unlock
__all__ = ['LightNASStrategy']
_logger = get_logger(
__name__,
logging.INFO,
fmt='LightNASStrategy-%(asctime)s-%(levelname)s: %(message)s')
class LightNASStrategy(Strategy):
"""
Light-NAS search strategy.
"""
def __init__(self,
controller=None,
end_epoch=1000,
target_flops=629145600,
target_latency=0,
retrain_epoch=1,
metric_name='top1_acc',
server_ip=None,
server_port=0,
is_server=False,
max_client_num=100,
search_steps=None,
key="light-nas"):
"""
Args:
controller(searcher.Controller): The searching controller. Default: None.
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. Default: 0
target_flops(int): The constraint of FLOPS.
target_latency(float): The constraint of latency.
retrain_epoch(int): The number of training epochs before evaluating structure generated by controller. Default: 1.
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper. Default: 'top1_acc'
server_ip(str): The ip that controller server listens on. None means getting the ip automatically. Default: None.
server_port(int): The port that controller server listens on. 0 means getting usable port automatically. Default: 0.
is_server(bool): Whether current host is controller server. Default: False.
max_client_num(int): The maximum number of clients that connect to controller server concurrently. Default: 100.
search_steps(int): The total steps of searching. Default: None.
key(str): The key used to identify legal agent for controller server. Default: "light-nas"
"""
self.start_epoch = 0
self.end_epoch = end_epoch
self._max_flops = target_flops
self._max_latency = target_latency
self._metric_name = metric_name
self._controller = controller
self._retrain_epoch = 0
self._server_ip = server_ip
self._server_port = server_port
self._is_server = is_server
self._retrain_epoch = retrain_epoch
self._search_steps = search_steps
self._max_client_num = max_client_num
self._max_try_times = 100
self._key = key
if self._server_ip is None:
self._server_ip = self._get_host_ip()
def _get_host_ip(self):
return socket.gethostbyname(socket.gethostname())
def on_compression_begin(self, context):
self._current_tokens = context.search_space.init_tokens()
self._controller.reset(context.search_space.range_table(),
self._current_tokens, None)
# create controller server
if self._is_server:
open("./slim_LightNASStrategy_controller_server.socket",
'a').close()
socket_file = open(
"./slim_LightNASStrategy_controller_server.socket", 'r+')
lock(socket_file)
tid = socket_file.readline()
if tid == '':
_logger.info("start controller server...")
self._server = ControllerServer(
controller=self._controller,
address=(self._server_ip, self._server_port),
max_client_num=self._max_client_num,
search_steps=self._search_steps,
key=self._key)
tid = self._server.start()
self._server_port = self._server.port()
socket_file.write(tid)
_logger.info("started controller server...")
unlock(socket_file)
socket_file.close()
_logger.info("self._server_ip: {}; self._server_port: {}".format(
self._server_ip, self._server_port))
# create client
self._search_agent = SearchAgent(
self._server_ip, self._server_port, key=self._key)
def __getstate__(self):
"""Socket can't be pickled."""
d = {}
for key in self.__dict__:
if key not in ["_search_agent", "_server"]:
d[key] = self.__dict__[key]
return d
def on_epoch_begin(self, context):
if context.epoch_id >= self.start_epoch and context.epoch_id <= self.end_epoch and (
self._retrain_epoch == 0 or
(context.epoch_id - self.start_epoch) % self._retrain_epoch == 0):
_logger.info("light nas strategy on_epoch_begin")
min_flops = -1
for _ in range(self._max_try_times):
startup_p, train_p, test_p, _, _, train_reader, test_reader = context.search_space.create_net(
self._current_tokens)
context.eval_graph.program = test_p
flops = context.eval_graph.flops()
if min_flops == -1:
min_flops = flops
min_tokens = self._current_tokens[:]
else:
if flops < min_flops:
min_tokens = self._current_tokens[:]
if self._max_latency > 0:
latency = context.search_space.get_model_latency(test_p)
_logger.info("try [{}] with latency {} flops {}".format(
self._current_tokens, latency, flops))
else:
_logger.info("try [{}] with flops {}".format(
self._current_tokens, flops))
if flops > self._max_flops or (self._max_latency > 0 and
latency > self._max_latency):
self._current_tokens = self._controller.next_tokens(
min_tokens)
else:
break
context.train_reader = train_reader
context.eval_reader = test_reader
exe = Executor(context.place)
exe.run(startup_p)
context.optimize_graph.program = train_p
context.optimize_graph.compile()
context.skip_training = (self._retrain_epoch == 0)
def on_epoch_end(self, context):
if context.epoch_id >= self.start_epoch and context.epoch_id < self.end_epoch and (
self._retrain_epoch == 0 or
(context.epoch_id - self.start_epoch + 1
) % self._retrain_epoch == 0):
self._current_reward = context.eval_results[self._metric_name][-1]
flops = context.eval_graph.flops()
if flops > self._max_flops:
self._current_reward = 0.0
if self._max_latency > 0:
test_p = context.search_space.create_net(self._current_tokens)[
2]
latency = context.search_space.get_model_latency(test_p)
if latency > self._max_latency:
self._current_reward = 0.0
_logger.info("reward: {}; latency: {}; flops: {}; tokens: {}".
format(self._current_reward, latency, flops,
self._current_tokens))
else:
_logger.info("reward: {}; flops: {}; tokens: {}".format(
self._current_reward, flops, self._current_tokens))
self._current_tokens = self._search_agent.update(
self._current_tokens, self._current_reward)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
__All__ = ['lock', 'unlock']
if os.name == 'nt':
def lock(file):
raise NotImplementedError('Windows is not supported.')
def unlock(file):
raise NotImplementedError('Windows is not supported.')
elif os.name == 'posix':
from fcntl import flock, LOCK_EX, LOCK_UN
def lock(file):
"""Lock the file in local file system."""
flock(file.fileno(), LOCK_EX)
def unlock(file):
"""Unlock the file in local file system."""
flock(file.fileno(), LOCK_UN)
else:
raise RuntimeError("File Locker only support NT and Posix platforms!")
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import socket
from ....log_helper import get_logger
__all__ = ['SearchAgent']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class SearchAgent(object):
"""
Search agent.
"""
def __init__(self, server_ip=None, server_port=None, key=None):
"""
Args:
server_ip(str): The ip that controller server listens on. None means getting the ip automatically. Default: None.
server_port(int): The port that controller server listens on. 0 means getting usable port automatically. Default: 0.
key(str): The key used to identify legal agent for controller server. Default: "light-nas"
"""
self.server_ip = server_ip
self.server_port = server_port
self.socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self._key = key
def update(self, tokens, reward):
"""
Update the controller according to latest tokens and reward.
Args:
tokens(list<int>): The tokens generated in last step.
reward(float): The reward of tokens.
"""
socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_client.connect((self.server_ip, self.server_port))
tokens = ",".join([str(token) for token in tokens])
socket_client.send("{}\t{}\t{}".format(self._key, tokens, reward)
.encode())
tokens = socket_client.recv(1024).decode()
tokens = [int(token) for token in tokens.strip("\n").split(",")]
return tokens
def next_tokens(self):
"""
Get next tokens.
"""
socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_client.connect((self.server_ip, self.server_port))
socket_client.send("next_tokens".encode())
tokens = socket_client.recv(1024).decode()
tokens = [int(token) for token in tokens.strip("\n").split(",")]
return tokens
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The search space used to search neural architecture"""
__all__ = ['SearchSpace']
class SearchSpace(object):
"""Controller for Neural Architecture Search.
"""
def __init__(self, *args, **kwargs):
pass
def init_tokens(self):
"""Get init tokens in search space.
"""
raise NotImplementedError('Abstract method.')
def range_table(self):
"""Get range table of current search space.
"""
raise NotImplementedError('Abstract method.')
def create_net(self, tokens):
"""Create networks for training and evaluation according to tokens.
Args:
tokens(list<int>): The tokens which represent a network.
Return:
(tuple): startup_program, train_program, evaluation_program, train_metrics, test_metrics
"""
raise NotImplementedError('Abstract method.')
def get_model_latency(self, program):
"""Get model latency according to program.
Args:
program(Program): The program to get latency.
Return:
(float): model latency.
"""
raise NotImplementedError('Abstract method.')
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import pruner
from .pruner import *
from . import prune_strategy
from .prune_strategy import *
from . import auto_prune_strategy
from .auto_prune_strategy import *
__all__ = pruner.__all__
__all__ += prune_strategy.__all__
__all__ += auto_prune_strategy.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .prune_strategy import PruneStrategy
import re
import logging
import functools
import copy
from ....log_helper import get_logger
__all__ = ['AutoPruneStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class AutoPruneStrategy(PruneStrategy):
"""
Automatic pruning strategy.
"""
def __init__(self,
pruner=None,
controller=None,
start_epoch=0,
end_epoch=10,
min_ratio=0.5,
max_ratio=0.7,
metric_name='top1_acc',
pruned_params='conv.*_weights',
retrain_epoch=0,
uniform_range=None,
init_tokens=None):
"""
Args:
pruner(slim.Pruner): The pruner used to prune the parameters. Default: None.
controller(searcher.Controller): The searching controller. Default: None.
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. Default: 0
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. Default: 0
min_ratio(float): The maximum pruned ratio. Default: 0.7
max_ratio(float): The minimum pruned ratio. Default: 0.5
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper. Default: 'top1_acc'
pruned_params(str): The pattern str to match the parameter names to be pruned. Default: 'conv.*_weights'
retrain_epoch(int): The training epochs in each seaching step. Default: 0
uniform_range(int): The token range in each position of tokens generated by controller. None means getting the range automatically. Default: None.
init_tokens(list<int>): The initial tokens. None means getting the initial tokens automatically. Default: None.
"""
super(AutoPruneStrategy, self).__init__(pruner, start_epoch, end_epoch,
0.0, metric_name, pruned_params)
self._max_ratio = max_ratio
self._min_ratio = min_ratio
self._controller = controller
self._metric_name = metric_name
self._pruned_param_names = []
self._retrain_epoch = retrain_epoch
self._uniform_range = uniform_range
self._init_tokens = init_tokens
self._current_tokens = None
def on_compression_begin(self, context):
"""
Prepare some information for searching strategy.
step 1: Find all the parameters to be pruned.
step 2: Get initial tokens and setup controller.
"""
pruned_params = []
for param in context.eval_graph.all_parameters():
if re.match(self.pruned_params, param.name()):
self._pruned_param_names.append(param.name())
if self._init_tokens is not None:
self._current_tokens = self._init_tokens
else:
self._current_tokens = self._get_init_tokens(context)
if self._uniform_range is not None:
self._range_table = [round(self._uniform_range, 2) / 0.01] * len(
self._pruned_param_names)
else:
self._range_table = copy.deepcopy(self._current_tokens)
_logger.info('init tokens: {}'.format(self._current_tokens))
_logger.info("range_table: {}".format(self._range_table))
constrain_func = functools.partial(
self._constrain_func, context=context)
self._controller.reset(self._range_table, self._current_tokens,
constrain_func)
def _constrain_func(self, tokens, context=None):
"""Check whether the tokens meet constraint."""
ori_flops = context.eval_graph.flops()
ratios = self._tokens_to_ratios(tokens)
params = self._pruned_param_names
param_shape_backup = {}
self._prune_parameters(
context.eval_graph,
context.scope,
params,
ratios,
context.place,
only_graph=True,
param_shape_backup=param_shape_backup)
context.eval_graph.update_groups_of_conv()
flops = context.eval_graph.flops()
for param in param_shape_backup.keys():
context.eval_graph.var(param).set_shape(param_shape_backup[param])
flops_ratio = (1 - float(flops) / ori_flops)
if flops_ratio >= self._min_ratio and flops_ratio <= self._max_ratio:
_logger.info("Success try [{}]; flops: -{}".format(tokens,
flops_ratio))
return True
else:
_logger.info("Failed try [{}]; flops: -{}".format(tokens,
flops_ratio))
return False
def _get_init_tokens(self, context):
"""Get initial tokens.
"""
ratios = self._get_uniform_ratios(context)
_logger.info('Get init ratios: {}'.format(
[round(r, 2) for r in ratios]))
return self._ratios_to_tokens(ratios)
def _ratios_to_tokens(self, ratios):
"""Convert pruned ratios to tokens.
"""
return [int(ratio / 0.01) for ratio in ratios]
def _tokens_to_ratios(self, tokens):
"""Convert tokens to pruned ratios.
"""
return [token * 0.01 for token in tokens]
def _get_uniform_ratios(self, context):
"""
Search a group of uniform ratios.
"""
min_ratio = 0.
max_ratio = 1.
target = (self._min_ratio + self._max_ratio) / 2
flops = context.eval_graph.flops()
model_size = context.eval_graph.numel_params()
ratios = None
while min_ratio < max_ratio:
ratio = (max_ratio + min_ratio) / 2
ratios = [ratio] * len(self._pruned_param_names)
param_shape_backup = {}
self._prune_parameters(
context.eval_graph,
context.scope,
self._pruned_param_names,
ratios,
context.place,
only_graph=True,
param_shape_backup=param_shape_backup)
pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
pruned_size = 1 - (float(context.eval_graph.numel_params()) /
model_size)
for param in param_shape_backup.keys():
context.eval_graph.var(param).set_shape(param_shape_backup[
param])
if abs(pruned_flops - target) < 1e-2:
break
if pruned_flops > target:
max_ratio = ratio
else:
min_ratio = ratio
_logger.info('Get ratios: {}'.format([round(r, 2) for r in ratios]))
return ratios
def on_epoch_begin(self, context):
"""
step 1: Get a new tokens from controller.
step 2: Pruning eval_graph and optimize_program by tokens
"""
if context.epoch_id >= self.start_epoch and context.epoch_id <= self.end_epoch and (
self._retrain_epoch == 0 or
(context.epoch_id - self.start_epoch) % self._retrain_epoch == 0):
_logger.info("on_epoch_begin")
params = self._pruned_param_names
ratios = self._tokens_to_ratios(self._current_tokens)
self._param_shape_backup = {}
self._param_backup = {}
self._prune_parameters(
context.optimize_graph,
context.scope,
params,
ratios,
context.place,
param_backup=self._param_backup,
param_shape_backup=self._param_shape_backup)
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
context.optimize_graph.compile(
mem_opt=False) # to update the compiled program
context.skip_training = (self._retrain_epoch == 0)
def on_epoch_end(self, context):
"""
step 1: Get reward of current tokens and update controller.
step 2: Restore eval_graph and optimize_graph
"""
if context.epoch_id >= self.start_epoch and context.epoch_id < self.end_epoch and (
self._retrain_epoch == 0 or
(context.epoch_id - self.start_epoch + 1
) % self._retrain_epoch == 0):
_logger.info("on_epoch_end")
reward = context.eval_results[self._metric_name][-1]
self._controller.update(self._current_tokens, reward)
self._current_tokens = self._controller.next_tokens()
# restore pruned parameters
for param_name in self._param_backup.keys():
param_t = context.scope.find_var(param_name).get_tensor()
param_t.set(self._param_backup[param_name], context.place)
self._param_backup = {}
# restore shape of parameters
for param in self._param_shape_backup.keys():
context.optimize_graph.var(param).set_shape(
self._param_shape_backup[param])
self._param_shape_backup = {}
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
context.optimize_graph.compile(
mem_opt=False) # to update the compiled program
elif context.epoch_id == self.end_epoch: # restore graph for final training
# restore pruned parameters
for param_name in self._param_backup.keys():
param_t = context.scope.find_var(param_name).get_tensor()
param_t.set(self.param_backup[param_name], context.place)
# restore shape of parameters
for param in self._param_shape_backup.keys():
context.eval_graph.var(param).set_shape(
self._param_shape_backup[param])
context.optimize_graph.var(param).set_shape(
self._param_shape_backup[param])
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
params, ratios = self._get_prune_ratios(
self._controller._best_tokens)
self._prune_parameters(context.optimize_graph, context.scope,
params, ratios, context.place)
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
context.optimize_graph.compile(
mem_opt=True) # to update the compiled program
context.skip_training = False
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..core.strategy import Strategy
from ..graph import VarWrapper, OpWrapper, GraphWrapper
from ....framework import Program, program_guard, Parameter
from ....log_helper import get_logger
from .... import layers
import prettytable as pt
import numpy as np
from scipy.optimize import leastsq
import copy
import re
import os
import pickle
import logging
import sys
__all__ = ['SensitivePruneStrategy', 'UniformPruneStrategy', 'PruneStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class PruneStrategy(Strategy):
"""
The base class of all pruning strategies.
"""
def __init__(self,
pruner=None,
start_epoch=0,
end_epoch=0,
target_ratio=0.5,
metric_name=None,
pruned_params='conv.*_weights'):
"""
Args:
pruner(slim.Pruner): The pruner used to prune the parameters.
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
target_ratio(float): The flops ratio to be pruned from current model.
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper.
pruned_params(str): The pattern str to match the parameter names to be pruned.
"""
super(PruneStrategy, self).__init__(start_epoch, end_epoch)
self.pruner = pruner
self.target_ratio = target_ratio
self.metric_name = metric_name
self.pruned_params = pruned_params
self.pruned_list = []
def _eval_graph(self, context, sampled_rate=None, cached_id=0):
"""
Evaluate the current mode in context.
Args:
context(slim.core.Context): The context storing all information used to evaluate the current model.
sampled_rate(float): The sampled rate used to sample partial data for evaluation. None means using all data in eval_reader. default: None.
cached_id(int): The id of dataset sampled. Evaluations with same cached_id use the same sampled dataset. default: 0.
"""
results, names = context.run_eval_graph(sampled_rate, cached_id)
metric = np.mean(results[list(names).index(self.metric_name)])
return metric
def _prune_filters_by_ratio(self,
scope,
params,
ratio,
place,
lazy=False,
only_graph=False,
param_shape_backup=None,
param_backup=None):
"""
Pruning filters by given ratio.
Args:
scope(fluid.core.Scope): The scope used to pruning filters.
params(list<VarWrapper>): A list of filter parameters.
ratio(float): The ratio to be pruned.
place(fluid.Place): The device place of filter parameters.
lazy(bool): True means setting the pruned elements to zero.
False means cutting down the pruned elements.
only_graph(bool): True means only modifying the graph.
False means modifying graph and variables in scope.
"""
if params[0].name() in self.pruned_list[0]:
return
param_t = scope.find_var(params[0].name()).get_tensor()
pruned_idx = self.pruner.cal_pruned_idx(
params[0].name(), np.array(param_t), ratio, axis=0)
for param in params:
assert isinstance(param, VarWrapper)
param_t = scope.find_var(param.name()).get_tensor()
if param_backup is not None and (param.name() not in param_backup):
param_backup[param.name()] = copy.deepcopy(np.array(param_t))
pruned_param = self.pruner.prune_tensor(
np.array(param_t), pruned_idx, pruned_axis=0, lazy=lazy)
if not only_graph:
param_t.set(pruned_param, place)
ori_shape = param.shape()
if param_shape_backup is not None and (
param.name() not in param_shape_backup):
param_shape_backup[param.name()] = copy.deepcopy(param.shape())
new_shape = list(param.shape())
new_shape[0] = pruned_param.shape[0]
param.set_shape(new_shape)
_logger.debug(
'|----------------------------------------+----+------------------------------+------------------------------|'
)
_logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format(
str(param.name()),
str(ratio), str(ori_shape), str(param.shape())))
self.pruned_list[0].append(param.name())
return pruned_idx
def _prune_parameter_by_idx(self,
scope,
params,
pruned_idx,
pruned_axis,
place,
lazy=False,
only_graph=False,
param_shape_backup=None,
param_backup=None):
"""
Pruning parameters in given axis.
Args:
scope(fluid.core.Scope): The scope storing paramaters to be pruned.
params(VarWrapper): The parameter to be pruned.
pruned_idx(list): The index of elements to be pruned.
pruned_axis(int): The pruning axis.
place(fluid.Place): The device place of filter parameters.
lazy(bool): True means setting the pruned elements to zero.
False means cutting down the pruned elements.
only_graph(bool): True means only modifying the graph.
False means modifying graph and variables in scope.
"""
if params[0].name() in self.pruned_list[pruned_axis]:
return
for param in params:
assert isinstance(param, VarWrapper)
param_t = scope.find_var(param.name()).get_tensor()
if param_backup is not None and (param.name() not in param_backup):
param_backup[param.name()] = copy.deepcopy(np.array(param_t))
pruned_param = self.pruner.prune_tensor(
np.array(param_t), pruned_idx, pruned_axis, lazy=lazy)
if not only_graph:
param_t.set(pruned_param, place)
ori_shape = param.shape()
if param_shape_backup is not None and (
param.name() not in param_shape_backup):
param_shape_backup[param.name()] = copy.deepcopy(param.shape())
new_shape = list(param.shape())
new_shape[pruned_axis] = pruned_param.shape[pruned_axis]
param.set_shape(new_shape)
_logger.debug(
'|----------------------------------------+----+------------------------------+------------------------------|'
)
_logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format(
str(param.name()),
str(pruned_axis), str(ori_shape), str(param.shape())))
self.pruned_list[pruned_axis].append(param.name())
def _forward_search_related_op(self, graph, param):
"""
Forward search operators that will be affected by pruning of param.
Args:
graph(GraphWrapper): The graph to be searched.
param(VarWrapper): The current pruned parameter.
Returns:
list<OpWrapper>: A list of operators.
"""
assert isinstance(param, VarWrapper)
visited = {}
for op in graph.ops():
visited[op.idx()] = False
stack = []
for op in graph.ops():
if (not op.is_bwd_op()) and (param in op.all_inputs()):
stack.append(op)
visit_path = []
while len(stack) > 0:
top_op = stack[len(stack) - 1]
if visited[top_op.idx()] == False:
visit_path.append(top_op)
visited[top_op.idx()] = True
next_ops = None
if top_op.type() == "conv2d" and param not in top_op.all_inputs():
next_ops = None
elif top_op.type() == "mul":
next_ops = None
else:
next_ops = self._get_next_unvisited_op(graph, visited, top_op)
if next_ops == None:
stack.pop()
else:
stack += next_ops
return visit_path
def _get_next_unvisited_op(self, graph, visited, top_op):
"""
Get next unvisited adjacent operators of given operators.
Args:
graph(GraphWrapper): The graph used to search.
visited(list): The ids of operators that has been visited.
top_op: The given operator.
Returns:
list<OpWrapper>: A list of operators.
"""
assert isinstance(top_op, OpWrapper)
next_ops = []
for op in graph.next_ops(top_op):
if (visited[op.idx()] == False) and (not op.is_bwd_op()):
next_ops.append(op)
return next_ops if len(next_ops) > 0 else None
def _get_accumulator(self, graph, param):
"""
Get accumulators of given parameter. The accumulator was created by optimizer.
Args:
graph(GraphWrapper): The graph used to search.
param(VarWrapper): The given parameter.
Returns:
list<VarWrapper>: A list of accumulators which are variables.
"""
assert isinstance(param, VarWrapper)
params = []
for op in param.outputs():
if op.is_opt_op():
for out_var in op.all_outputs():
if graph.is_persistable(out_var) and out_var.name(
) != param.name():
params.append(out_var)
return params
def _forward_pruning_ralated_params(self,
graph,
scope,
param,
place,
ratio=None,
pruned_idxs=None,
lazy=False,
only_graph=False,
param_backup=None,
param_shape_backup=None):
"""
Pruning all the parameters affected by the pruning of given parameter.
Args:
graph(GraphWrapper): The graph to be searched.
scope(fluid.core.Scope): The scope storing paramaters to be pruned.
param(VarWrapper): The given parameter.
place(fluid.Place): The device place of filter parameters.
ratio(float): The target ratio to be pruned.
pruned_idx(list): The index of elements to be pruned.
lazy(bool): True means setting the pruned elements to zero.
False means cutting down the pruned elements.
only_graph(bool): True means only modifying the graph.
False means modifying graph and variables in scope.
"""
assert isinstance(
graph,
GraphWrapper), "graph must be instance of slim.core.GraphWrapper"
assert isinstance(
param, VarWrapper), "param must be instance of slim.core.VarWrapper"
if param.name() in self.pruned_list[0]:
return
related_ops = self._forward_search_related_op(graph, param)
if ratio is None:
assert pruned_idxs is not None
self._prune_parameter_by_idx(
scope, [param] + self._get_accumulator(graph, param),
pruned_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
else:
pruned_idxs = self._prune_filters_by_ratio(
scope, [param] + self._get_accumulator(graph, param),
ratio,
place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
corrected_idxs = pruned_idxs[:]
for idx, op in enumerate(related_ops):
if op.type() == "conv2d" and (param not in op.all_inputs()):
for in_var in op.all_inputs():
if graph.is_parameter(in_var):
conv_param = in_var
self._prune_parameter_by_idx(
scope, [conv_param] + self._get_accumulator(
graph, conv_param),
corrected_idxs,
pruned_axis=1,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
if op.type() == "depthwise_conv2d":
for in_var in op.all_inputs():
if graph.is_parameter(in_var):
conv_param = in_var
self._prune_parameter_by_idx(
scope, [conv_param] + self._get_accumulator(
graph, conv_param),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
elif op.type() == "elementwise_add":
# pruning bias
for in_var in op.all_inputs():
if graph.is_parameter(in_var):
bias_param = in_var
self._prune_parameter_by_idx(
scope, [bias_param] + self._get_accumulator(
graph, bias_param),
pruned_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
elif op.type() == "mul": # pruning fc layer
fc_input = None
fc_param = None
for in_var in op.all_inputs():
if graph.is_parameter(in_var):
fc_param = in_var
else:
fc_input = in_var
idx = []
feature_map_size = fc_input.shape()[2] * fc_input.shape()[3]
range_idx = np.array(range(feature_map_size))
for i in corrected_idxs:
idx += list(range_idx + i * feature_map_size)
corrected_idxs = idx
self._prune_parameter_by_idx(
scope, [fc_param] + self._get_accumulator(graph, fc_param),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
elif op.type() == "concat":
concat_inputs = op.all_inputs()
last_op = related_ops[idx - 1]
for out_var in last_op.all_outputs():
if out_var in concat_inputs:
concat_idx = concat_inputs.index(out_var)
offset = 0
for ci in range(concat_idx):
offset += concat_inputs[ci].shape()[1]
corrected_idxs = [x + offset for x in pruned_idxs]
elif op.type() == "batch_norm":
bn_inputs = op.all_inputs()
mean = bn_inputs[2]
variance = bn_inputs[3]
alpha = bn_inputs[0]
beta = bn_inputs[1]
self._prune_parameter_by_idx(
scope, [mean] + self._get_accumulator(graph, mean),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
self._prune_parameter_by_idx(
scope, [variance] + self._get_accumulator(graph, variance),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
self._prune_parameter_by_idx(
scope, [alpha] + self._get_accumulator(graph, alpha),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
self._prune_parameter_by_idx(
scope, [beta] + self._get_accumulator(graph, beta),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
def _prune_parameters(self,
graph,
scope,
params,
ratios,
place,
lazy=False,
only_graph=False,
param_backup=None,
param_shape_backup=None):
"""
Pruning the given parameters.
Args:
graph(GraphWrapper): The graph to be searched.
scope(fluid.core.Scope): The scope storing paramaters to be pruned.
params(list<str>): A list of parameter names to be pruned.
ratios(list<float>): A list of ratios to be used to pruning parameters.
place(fluid.Place): The device place of filter parameters.
pruned_idx(list): The index of elements to be pruned.
lazy(bool): True means setting the pruned elements to zero.
False means cutting down the pruned elements.
only_graph(bool): True means only modifying the graph.
False means modifying graph and variables in scope.
"""
_logger.debug('\n################################')
_logger.debug('# pruning parameters #')
_logger.debug('################################\n')
_logger.debug(
'|----------------------------------------+----+------------------------------+------------------------------|'
)
_logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format('parameter', 'axis',
'from', 'to'))
assert len(params) == len(ratios)
self.pruned_list = [[], []]
for param, ratio in zip(params, ratios):
assert isinstance(param, str) or isinstance(param, unicode)
param = graph.var(param)
self._forward_pruning_ralated_params(
graph,
scope,
param,
place,
ratio=ratio,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
ops = param.outputs()
for op in ops:
if op.type() == 'conv2d':
brother_ops = self._search_brother_ops(graph, op)
for broher in brother_ops:
for p in graph.get_param_by_op(broher):
self._forward_pruning_ralated_params(
graph,
scope,
p,
place,
ratio=ratio,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
_logger.debug(
'|----------------------------------------+----+------------------------------+------------------------------|'
)
def _search_brother_ops(self, graph, op_node):
"""
Search brother operators that was affected by pruning of given operator.
Args:
graph(GraphWrapper): The graph to be searched.
op_node(OpWrapper): The start node for searching.
Returns:
list<VarWrapper>: A list of operators.
"""
visited = [op_node.idx()]
stack = []
brothers = []
for op in graph.next_ops(op_node):
if (op.type() != 'conv2d') and (op.type() != 'fc') and (
not op._is_bwd_op()):
stack.append(op)
visited.append(op.idx())
while len(stack) > 0:
top_op = stack.pop()
for parent in graph.pre_ops(top_op):
if parent.idx() not in visited and (not parent._is_bwd_op()):
if ((parent.type == 'conv2d') or (parent.type == 'fc')):
brothers.append(parent)
else:
stack.append(parent)
visited.append(parent.idx())
for child in graph.next_ops(top_op):
if (child.type != 'conv2d') and (child.type != 'fc') and (
child.idx() not in visited) and (
not child._is_bwd_op()):
stack.append(child)
visited.append(child.idx())
return brothers
def _prune_graph(self, graph, target_graph):
"""
Pruning parameters of graph according to target graph.
Args:
graph(GraphWrapper): The graph to be pruned.
target_graph(GraphWrapper): The reference graph.
Return: None
"""
count = 1
_logger.debug(
'|----+----------------------------------------+------------------------------+------------------------------|'
)
_logger.debug('|{:^4}|{:^40}|{:^30}|{:^30}|'.format('id', 'parammeter',
'from', 'to'))
for param in target_graph.all_parameters():
var = graph.var(param.name())
ori_shape = var.shape()
var.set_shape(param.shape())
_logger.debug(
'|----+----------------------------------------+------------------------------+------------------------------|'
)
_logger.debug('|{:^4}|{:^40}|{:^30}|{:^30}|'.format(
str(count),
str(param.name()), str(ori_shape), str(param.shape())))
count += 1
_logger.debug(
'|----+----------------------------------------+------------------------------+------------------------------|'
)
class UniformPruneStrategy(PruneStrategy):
"""
The uniform pruning strategy. The parameters will be pruned by uniform ratio.
"""
def __init__(self,
pruner=None,
start_epoch=0,
end_epoch=0,
target_ratio=0.5,
metric_name=None,
pruned_params='conv.*_weights'):
"""
Args:
pruner(slim.Pruner): The pruner used to prune the parameters.
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
target_ratio(float): The flops ratio to be pruned from current model.
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper.
pruned_params(str): The pattern str to match the parameter names to be pruned.
"""
super(UniformPruneStrategy, self).__init__(pruner, start_epoch,
end_epoch, target_ratio,
metric_name, pruned_params)
def _get_best_ratios(self, context):
"""
Search a group of ratios for pruning target flops.
"""
_logger.info('_get_best_ratios')
pruned_params = []
for param in context.eval_graph.all_parameters():
if re.match(self.pruned_params, param.name()):
pruned_params.append(param.name())
min_ratio = 0.
max_ratio = 1.
flops = context.eval_graph.flops()
model_size = context.eval_graph.numel_params()
while min_ratio < max_ratio:
ratio = (max_ratio + min_ratio) / 2
_logger.debug(
'-----------Try pruning ratio: {:.2f}-----------'.format(ratio))
ratios = [ratio] * len(pruned_params)
param_shape_backup = {}
self._prune_parameters(
context.eval_graph,
context.scope,
pruned_params,
ratios,
context.place,
only_graph=True,
param_shape_backup=param_shape_backup)
pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
pruned_size = 1 - (float(context.eval_graph.numel_params()) /
model_size)
_logger.debug('Pruned flops: {:.2f}'.format(pruned_flops))
_logger.debug('Pruned model size: {:.2f}'.format(pruned_size))
for param in param_shape_backup.keys():
context.eval_graph.var(param).set_shape(param_shape_backup[
param])
if abs(pruned_flops - self.target_ratio) < 1e-2:
break
if pruned_flops > self.target_ratio:
max_ratio = ratio
else:
min_ratio = ratio
_logger.info('Get ratios: {}'.format([round(r, 2) for r in ratios]))
return pruned_params, ratios
def restore_from_checkpoint(self, context):
self._prune(context, self.params, self.ratios)
def _prune(self, context, params, ratios):
self._prune_parameters(context.optimize_graph, context.scope, params,
ratios, context.place)
model_size = context.eval_graph.numel_params()
flops = context.eval_graph.flops()
_logger.debug('\n################################')
_logger.debug('# pruning eval graph #')
_logger.debug('################################\n')
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
_logger.info(
'------------------finish pruning--------------------------------')
_logger.info('Pruned size: {:.2f}'.format(1 - (float(
context.eval_graph.numel_params()) / model_size)))
_logger.info('Pruned flops: {:.2f}'.format(1 - (float(
context.eval_graph.flops()) / flops)))
def on_epoch_begin(self, context):
if context.epoch_id == self.start_epoch:
params, ratios = self._get_best_ratios(context)
self.params = params
self.ratios = ratios
self._prune(context, params, ratios)
_logger.info(
'------------------UniformPruneStrategy.on_compression_begin finish--------------------------------'
)
class SensitivePruneStrategy(PruneStrategy):
"""
Sensitive pruning strategy. Different pruned ratio was applied on each layer.
"""
def __init__(self,
pruner=None,
start_epoch=0,
end_epoch=0,
delta_rate=0.20,
target_ratio=0.5,
metric_name='top1_acc',
pruned_params='conv.*_weights',
sensitivities_file='./sensitivities.data',
sensitivities={},
num_steps=1,
eval_rate=None):
"""
Args:
pruner(slim.Pruner): The pruner used to prune the parameters.
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0.
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 10.
delta_rate(float): The delta used to generate ratios when calculating sensitivities. default: 0.2
target_ratio(float): The flops ratio to be pruned from current model. default: 0.5
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper. default: 'top1_acc'
pruned_params(str): The pattern str to match the parameter names to be pruned. default: 'conv.*_weights'.
sensitivities_file(str): The sensitivities file. default: './sensitivities.data'
sensitivities(dict): The user-defined sensitivities. default: {}.
num_steps(int): The number of pruning steps. default: 1.
eval_rate(float): The rate of sampled data used to calculate sensitivities.
None means using all the data. default: None.
"""
super(SensitivePruneStrategy, self).__init__(pruner, start_epoch,
end_epoch, target_ratio,
metric_name, pruned_params)
self.delta_rate = delta_rate
self.pruned_list = []
self.sensitivities = sensitivities
self.sensitivities_file = sensitivities_file
self.num_steps = num_steps
self.eval_rate = eval_rate
self.pruning_step = 1 - pow((1 - target_ratio), 1.0 / self.num_steps)
def _save_sensitivities(self, sensitivities, sensitivities_file):
"""
Save sensitivities into file.
"""
with open(sensitivities_file, 'wb') as f:
pickle.dump(sensitivities, f)
def _load_sensitivities(self, sensitivities_file):
"""
Load sensitivities from file.
"""
sensitivities = {}
if sensitivities_file and os.path.exists(sensitivities_file):
with open(sensitivities_file, 'rb') as f:
if sys.version_info < (3, 0):
sensitivities = pickle.load(f)
else:
sensitivities = pickle.load(f, encoding='bytes')
for param in sensitivities:
sensitivities[param]['pruned_percent'] = [
round(p, 2) for p in sensitivities[param]['pruned_percent']
]
self._format_sensitivities(sensitivities)
return sensitivities
def _format_sensitivities(self, sensitivities):
"""
Print formated sensitivities in debug log level.
"""
tb = pt.PrettyTable()
tb.field_names = ["parameter", "size"] + [
str(round(i, 2))
for i in np.arange(self.delta_rate, 1, self.delta_rate)
]
for param in sensitivities:
if len(sensitivities[param]['loss']) == (len(tb.field_names) - 2):
tb.add_row([param, sensitivities[param]['size']] + [
round(loss, 2) for loss in sensitivities[param]['loss']
])
_logger.debug('\n################################')
_logger.debug('# sensitivities table #')
_logger.debug('################################\n')
_logger.debug(tb)
def _compute_sensitivities(self, context):
"""
Computing the sensitivities of all parameters.
"""
_logger.info("calling _compute_sensitivities.")
cached_id = np.random.randint(1000)
if self.start_epoch == context.epoch_id:
sensitivities_file = self.sensitivities_file
else:
sensitivities_file = self.sensitivities_file + ".epoch" + str(
context.epoch_id)
sensitivities = self._load_sensitivities(sensitivities_file)
for param in context.eval_graph.all_parameters():
if not re.match(self.pruned_params, param.name()):
continue
if param.name() not in sensitivities:
sensitivities[param.name()] = {
'pruned_percent': [],
'loss': [],
'size': param.shape()[0]
}
metric = None
for param in sensitivities.keys():
ratio = self.delta_rate
while ratio < 1:
ratio = round(ratio, 2)
if ratio in sensitivities[param]['pruned_percent']:
_logger.debug('{}, {} has computed.'.format(param, ratio))
ratio += self.delta_rate
continue
if metric is None:
metric = self._eval_graph(context, self.eval_rate,
cached_id)
param_backup = {}
# prune parameter by ratio
self._prune_parameters(
context.eval_graph,
context.scope, [param], [ratio],
context.place,
lazy=True,
param_backup=param_backup)
self.pruned_list[0]
# get accuracy after pruning and update self.sensitivities
pruned_metric = self._eval_graph(context, self.eval_rate,
cached_id)
loss = metric - pruned_metric
_logger.info("pruned param: {}; {}; loss={}".format(
param, ratio, loss))
for brother in self.pruned_list[0]:
if re.match(self.pruned_params, brother):
if brother not in sensitivities:
sensitivities[brother] = {
'pruned_percent': [],
'loss': []
}
sensitivities[brother]['pruned_percent'].append(ratio)
sensitivities[brother]['loss'].append(loss)
self._save_sensitivities(sensitivities, sensitivities_file)
# restore pruned parameters
for param_name in param_backup.keys():
param_t = context.scope.find_var(param_name).get_tensor()
param_t.set(param_backup[param_name], context.place)
# pruned_metric = self._eval_graph(context)
ratio += self.delta_rate
return sensitivities
def _get_best_ratios(self, context, sensitivities, target_ratio):
"""
Search a group of ratios for pruning target flops.
"""
_logger.info('_get_best_ratios for pruning ratie: {}'.format(
target_ratio))
def func(params, x):
a, b, c, d = params
return a * x * x * x + b * x * x + c * x + d
def error(params, x, y):
return func(params, x) - y
def slove_coefficient(x, y):
init_coefficient = [10, 10, 10, 10]
coefficient, loss = leastsq(error, init_coefficient, args=(x, y))
return coefficient
min_loss = 0.
max_loss = 0.
# step 1: fit curve by sensitivities
coefficients = {}
for param in sensitivities:
losses = np.array([0] * 5 + sensitivities[param]['loss'])
precents = np.array([0] * 5 + sensitivities[param][
'pruned_percent'])
coefficients[param] = slove_coefficient(precents, losses)
loss = np.max(losses)
max_loss = np.max([max_loss, loss])
# step 2: Find a group of ratios by binary searching.
flops = context.eval_graph.flops()
model_size = context.eval_graph.numel_params()
ratios = []
while min_loss < max_loss:
loss = (max_loss + min_loss) / 2
_logger.info(
'-----------Try pruned ratios while acc loss={:.4f}-----------'.
format(loss))
ratios = []
# step 2.1: Get ratios according to current loss
for param in sensitivities:
coefficient = copy.deepcopy(coefficients[param])
coefficient[-1] = coefficient[-1] - loss
roots = np.roots(coefficient)
for root in roots:
min_root = 1
if np.isreal(root) and root > 0 and root < 1:
selected_root = min(root.real, min_root)
ratios.append(selected_root)
_logger.info('Pruned ratios={}'.format(
[round(ratio, 3) for ratio in ratios]))
# step 2.2: Pruning by current ratios
param_shape_backup = {}
self._prune_parameters(
context.eval_graph,
context.scope,
sensitivities.keys(),
ratios,
context.place,
only_graph=True,
param_shape_backup=param_shape_backup)
pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
pruned_size = 1 - (float(context.eval_graph.numel_params()) /
model_size)
_logger.info('Pruned flops: {:.4f}'.format(pruned_flops))
_logger.info('Pruned model size: {:.4f}'.format(pruned_size))
for param in param_shape_backup.keys():
context.eval_graph.var(param).set_shape(param_shape_backup[
param])
# step 2.3: Check whether current ratios is enough
if abs(pruned_flops - target_ratio) < 0.015:
break
if pruned_flops > target_ratio:
max_loss = loss
else:
min_loss = loss
return sensitivities.keys(), ratios
def _current_pruning_target(self, context):
'''
Get the target pruning rate in current epoch.
'''
_logger.info('Left number of pruning steps: {}'.format(self.num_steps))
if self.num_steps <= 0:
return None
if (self.start_epoch == context.epoch_id) or context.eval_converged(
self.metric_name, 0.005):
self.num_steps -= 1
return self.pruning_step
def on_epoch_begin(self, context):
current_ratio = self._current_pruning_target(context)
if current_ratio is not None:
sensitivities = self._compute_sensitivities(context)
params, ratios = self._get_best_ratios(context, sensitivities,
current_ratio)
self._prune_parameters(context.optimize_graph, context.scope,
params, ratios, context.place)
model_size = context.eval_graph.numel_params()
flops = context.eval_graph.flops()
_logger.debug('################################')
_logger.debug('# pruning eval graph #')
_logger.debug('################################')
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
context.optimize_graph.compile() # to update the compiled program
context.eval_graph.compile(
for_parallel=False,
for_test=True) # to update the compiled program
_logger.info(
'------------------finish pruning--------------------------------'
)
_logger.info('Pruned size: {:.3f}'.format(1 - (float(
context.eval_graph.numel_params()) / model_size)))
_logger.info('Pruned flops: {:.3f}'.format(1 - (float(
context.eval_graph.flops()) / flops)))
metric = self._eval_graph(context)
_logger.info('Metric after pruning: {:.2f}'.format(metric))
_logger.info(
'------------------SensitivePruneStrategy.on_epoch_begin finish--------------------------------'
)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import collections
from .... import layers
__all__ = ['Pruner', 'StructurePruner']
class Pruner(object):
"""
Base class of all pruners.
"""
def __init__(self):
pass
def prune(self, param):
pass
class StructurePruner(Pruner):
"""
Pruner used to pruning parameters by groups.
"""
def __init__(self, pruning_axis, criterions):
"""
Args:
pruning_axis(dict): The key is the name of parameter to be pruned,
'*' means all the parameters.
The value is the axis to be used. Given a parameter
with shape [3, 4], the result of pruning 50% on aixs 1
is a parameter with shape [3, 2].
criterions(dict): The key is the name of parameter to be pruned,
'*' means all the parameters.
The value is the criterion used to sort groups for pruning.
It only supports 'l1_norm' currently.
"""
self.pruning_axis = pruning_axis
self.criterions = criterions
def cal_pruned_idx(self, name, param, ratio, axis=None):
"""
Calculate the index to be pruned on axis by given pruning ratio.
Args:
name(str): The name of parameter to be pruned.
param(np.array): The data of parameter to be pruned.
ratio(float): The ratio to be pruned.
axis(int): The axis to be used for pruning given parameter.
If it is None, the value in self.pruning_axis will be used.
default: None.
Returns:
list<int>: The indexes to be pruned on axis.
"""
criterion = self.criterions[
name] if name in self.criterions else self.criterions['*']
if axis is None:
assert self.pruning_axis is not None, "pruning_axis should set if axis is None."
axis = self.pruning_axis[
name] if name in self.pruning_axis else self.pruning_axis['*']
prune_num = int(round(param.shape[axis] * ratio))
reduce_dims = [i for i in range(len(param.shape)) if i != axis]
if criterion == 'l1_norm':
criterions = np.sum(np.abs(param), axis=tuple(reduce_dims))
pruned_idx = criterions.argsort()[:prune_num]
return pruned_idx
def prune_tensor(self, tensor, pruned_idx, pruned_axis, lazy=False):
"""
Pruning a array by indexes on given axis.
Args:
tensor(numpy.array): The target array to be pruned.
pruned_idx(list<int>): The indexes to be pruned.
pruned_axis(int): The axis of given array to be pruned on.
lazy(bool): True means setting the pruned elements to zero.
False means remove the pruned elements from memory.
default: False.
Returns:
numpy.array: The pruned array.
"""
mask = np.zeros(tensor.shape[pruned_axis], dtype=bool)
mask[pruned_idx] = True
def func(data):
return data[~mask]
def lazy_func(data):
data[mask] = 0
return data
if lazy:
return np.apply_along_axis(lazy_func, pruned_axis, tensor)
else:
return np.apply_along_axis(func, pruned_axis, tensor)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from . import quantization_pass
from .quantization_pass import *
from . import quantization_strategy
from .quantization_strategy import *
from . import mkldnn_post_training_strategy
from .mkldnn_post_training_strategy import *
from . import quantization_mkldnn_pass
from .quantization_mkldnn_pass import *
from . import post_training_quantization
from .post_training_quantization import *
__all__ = quantization_pass.__all__ + quantization_strategy.__all__
__all__ += mkldnn_post_training_strategy.__all__
__all__ += quantization_mkldnn_pass.__all__
__all__ += post_training_quantization.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import logging
import six
import numpy as np
from .... import core
from ..core.strategy import Strategy
from ....log_helper import get_logger
__all__ = ['MKLDNNPostTrainingQuantStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class MKLDNNPostTrainingQuantStrategy(Strategy):
"""
The strategy for MKL-DNN Post Training quantization strategy.
"""
def __init__(self,
int8_model_save_path=None,
fp32_model_path=None,
cpu_math_library_num_threads=1):
"""
Args:
int8_model_save_path(str): int8_model_save_path is used to save an int8 ProgramDesc
with fp32 weights which is used for MKL-DNN int8 inference. For post training quantization,
MKLDNNPostTrainingQuantStrategy only supports converting a fp32 ProgramDesc
with fp32 weights to an int8 ProgramDesc with fp32 weights now. The saved
int8 ProgramDesc with fp32 weights only can be executed with MKL-DNN enabled.
None means it doesn't save int8 ProgramDesc with fp32 weights. default: None.
fp32_model_path(str): fp32_model_path is used to load an original fp32 ProgramDesc with fp32 weights.
None means it doesn't have a fp32 ProgramDesc with fp32 weights. default: None.
cpu_math_library_num_threads(int): The number of cpu math library threads which is used on
MKLDNNPostTrainingQuantStrategy. 1 means it only uses one cpu math library
thread. default: 1
"""
super(MKLDNNPostTrainingQuantStrategy, self).__init__(0, 0)
self.int8_model_save_path = int8_model_save_path
if fp32_model_path is None:
raise Exception("fp32_model_path is None")
self.fp32_model_path = fp32_model_path
self.cpu_math_library_num_threads = cpu_math_library_num_threads
def on_compression_begin(self, context):
"""
Prepare the data and quantify the model
"""
super(MKLDNNPostTrainingQuantStrategy,
self).on_compression_begin(context)
_logger.info('InferQuantStrategy::on_compression_begin')
# Prepare the Analysis Config
infer_config = core.AnalysisConfig("AnalysisConfig")
infer_config.switch_ir_optim(True)
infer_config.disable_gpu()
infer_config.set_model(self.fp32_model_path)
infer_config.enable_mkldnn()
infer_config.set_cpu_math_library_num_threads(
self.cpu_math_library_num_threads)
# Prepare the data for calculating the quantization scales
warmup_reader = context.eval_reader()
if six.PY2:
data = warmup_reader.next()
if six.PY3:
data = warmup_reader.__next__()
num_images = len(data)
image_data = [img.tolist() for (img, _) in data]
image_data = np.array(image_data).astype("float32").reshape(
[num_images, ] + list(data[0][0].shape))
image_data = image_data.ravel()
images = core.PaddleTensor(image_data, "x")
images.shape = [num_images, ] + list(data[0][0].shape)
label_data = [label for (_, label) in data]
labels = core.PaddleTensor(
np.array(label_data).astype("int64").reshape([num_images, 1]), "y")
warmup_data = [images, labels]
# Enable the INT8 Quantization
infer_config.enable_quantizer()
infer_config.quantizer_config().set_quant_data(warmup_data)
infer_config.quantizer_config().set_quant_batch_size(num_images)
# Run INT8 MKL-DNN Quantization
predictor = core.create_paddle_predictor(infer_config)
if self.int8_model_save_path:
if not os.path.exists(self.int8_model_save_path):
os.makedirs(self.int8_model_save_path)
predictor.SaveOptimModel(self.int8_model_save_path)
_logger.info(
'Finish MKLDNNPostTrainingQuantStrategy::on_compresseion_begin')
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import logging
import numpy as np
from ....executor import global_scope
from .... import io
from .... import core
from .... import framework
from ....framework import IrGraph
from ....log_helper import get_logger
from .quantization_pass import QuantizationTransformPass
from .quantization_pass import QuantizationFreezePass
from .quantization_pass import AddQuantDequantPass
from .quantization_pass import _op_real_in_out_name
__all__ = ['PostTrainingQuantization']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class PostTrainingQuantization(object):
def __init__(self,
executor,
sample_generator,
model_dir,
model_filename=None,
params_filename=None,
batch_size=10,
batch_nums=None,
scope=None,
algo="KL",
quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
is_full_quantize=False):
'''
The class utilizes post training quantization methon to quantize the
fp32 model. It uses calibrate data to calculate the scale factor of
quantized variables, and inserts fake quant/dequant op to obtain the
quantized model.
Args:
executor(fluid.Executor): The executor to load, run and save the
quantized model.
sample_generator(Python Generator): The sample generator provides
calibrate data for DataLoader, and it only returns a sample every
time.
model_dir(str): The path of the fp32 model that will be quantized,
and the model and params files are under the path.
model_filename(str, optional): The name of file to load the inference
program. If it is None, the default filename '__model__' will
be used. Default is 'None'.
params_filename(str, optional): The name of file to load all parameters.
When all parameters were saved in a single binary file, set it
as the real filename. If parameters were saved in separate files,
set it as 'None'. Default is 'None'.
batch_size(int, optional): The batch size of DataLoader. Default is 10.
batch_nums(int, optional): If batch_nums is not None, the number of
calibrate data is batch_size*batch_nums. If batch_nums is None, use
all data provided by sample_generator as calibrate data.
scope(fluid.Scope, optional): The scope of the program, use it to load
and save variables. If scope=None, get scope by global_scope().
algo(str, optional): If algo=KL, use KL-divergenc method to
get the more precise scale factor. If algo='direct', use
abs_max methon to get the scale factor. Default is KL.
quantizable_op_type(list[str], optional): List the type of ops
that will be quantized. Default is ["conv2d", "depthwise_conv2d",
"mul"].
is_full_quantized(bool, optional): If set is_full_quantized as True,
apply quantization to all supported quantizable op type. If set
is_full_quantized as False, only apply quantization to the op type
according to the input quantizable_op_type.
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
exe = fluid.Executor(fluid.CPUPlace())
model_dir = path/to/fp32_model_params
# set model_filename as None when the filename is __model__,
# otherwise set it as the real filename
model_filename = None
# set params_filename as None when all parameters were saved in
# separate files, otherwise set it as the real filename
params_filename = None
save_model_path = path/to/save_model_path
# prepare the sample generator according to the model, and the
# sample generator must return a sample every time. The reference
# document: https://www.paddlepaddle.org.cn/documentation/docs/zh
# /user_guides/howto/prepare_data/use_py_reader.html
sample_generator = your_sample_generator
batch_size = 10
batch_nums = 10
algo = "KL"
quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"]
ptq = PostTrainingQuantization(
executor=exe,
sample_generator=sample_generator,
model_dir=model_dir,
model_filename=model_filename,
params_filename=params_filename,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo,
quantizable_op_type=quantizable_op_type)
ptq.quantize()
ptq.save_quantized_model(save_model_path)
'''
self._executor = executor
self._sample_generator = sample_generator
self._model_dir = model_dir
self._model_filename = model_filename
self._params_filename = params_filename
self._batch_size = batch_size
self._batch_nums = batch_nums
self._scope = global_scope() if scope == None else scope
self._algo = algo
supported_quantizable_op_type = \
QuantizationTransformPass._supported_quantizable_op_type + \
AddQuantDequantPass._supported_quantizable_op_type
if is_full_quantize:
self._quantizable_op_type = supported_quantizable_op_type
else:
self._quantizable_op_type = quantizable_op_type
for op_type in self._quantizable_op_type:
assert op_type in supported_quantizable_op_type + \
AddQuantDequantPass._activation_type, \
op_type + " is not supported for quantization."
self._place = self._executor.place
self._program = None
self._feed_list = None
self._fetch_list = None
self._data_loader = None
self._op_real_in_out_name = _op_real_in_out_name
self._bit_length = 8
self._quantized_weight_var_name = []
self._quantized_act_var_name = []
self._sampling_data = {}
self._quantized_var_scale_factor = {}
def quantize(self):
'''
Quantize the fp32 model. Use calibrate data to calculate the scale factor of
quantized variables, and inserts fake quant/dequant op to obtain the
quantized model.
Args:
None
Returns:
the program of quantized model.
'''
self._preprocess()
batch_id = 0
for data in self._data_loader():
self._executor.run(program=self._program,
feed=data,
fetch_list=self._fetch_list,
return_numpy=False)
self._sample_data()
if batch_id % 5 == 0:
_logger.info("run batch: " + str(batch_id))
batch_id += 1
if self._batch_nums and batch_id >= self._batch_nums:
break
_logger.info("all run batch: " + str(batch_id))
_logger.info("calculate scale factor ...")
self._calculate_scale_factor()
_logger.info("update the program ...")
self._update_program()
self._save_output_scale()
return self._program
def save_quantized_model(self, save_model_path):
'''
Save the quantized model to the disk.
Args:
save_model_path(str): The path to save the quantized model
Returns:
None
'''
io.save_inference_model(
dirname=save_model_path,
feeded_var_names=self._feed_list,
target_vars=self._fetch_list,
executor=self._executor,
main_program=self._program)
def _preprocess(self):
'''
Load model and set data loader, collect the variable names for sampling,
and set activation variables to be persistable.
'''
# load model and set data loader
[self._program, self._feed_list, self._fetch_list] = \
io.load_inference_model(dirname=self._model_dir,
executor=self._executor,
model_filename=self._model_filename,
params_filename=self._params_filename)
feed_vars = [framework._get_var(str(var_name), self._program) \
for var_name in self._feed_list]
self._data_loader = io.DataLoader.from_generator(
feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True)
self._data_loader.set_sample_generator(
self._sample_generator,
batch_size=self._batch_size,
drop_last=True,
places=self._place)
# collect the variable names for sampling
persistable_var_names = []
for var in self._program.list_vars():
if var.persistable:
persistable_var_names.append(var.name)
for op in self._program.global_block().ops:
op_type = op.type
if op_type in self._quantizable_op_type:
if op_type in ("conv2d", "depthwise_conv2d"):
self._quantized_act_var_name.append(op.input("Input")[0])
self._quantized_weight_var_name.append(
op.input("Filter")[0])
self._quantized_act_var_name.append(op.output("Output")[0])
elif op_type == "mul":
if self._is_input_all_not_persistable(
op, persistable_var_names):
op._set_attr("skip_quant", True)
_logger.warning("Skip quant a mul op for two "
"input variables are not persistable")
else:
self._quantized_act_var_name.append(op.input("X")[0])
self._quantized_weight_var_name.append(op.input("Y")[0])
self._quantized_act_var_name.append(op.output("Out")[0])
else:
# process other quantizable op type, the input must all not persistable
if self._is_input_all_not_persistable(
op, persistable_var_names):
input_output_name_list = self._op_real_in_out_name[
op_type]
for input_name in input_output_name_list[0]:
for var_name in op.input(input_name):
self._quantized_act_var_name.append(var_name)
for output_name in input_output_name_list[1]:
for var_name in op.output(output_name):
self._quantized_act_var_name.append(var_name)
# set activation variables to be persistable, so can obtain
# the tensor data in sample_data
for var in self._program.list_vars():
if var.name in self._quantized_act_var_name:
var.persistable = True
def _sample_data(self):
'''
Sample the tensor data of quantized variables,
applied in every iteration.
'''
for var_name in self._quantized_weight_var_name:
if var_name not in self._sampling_data:
var_tensor = self._load_var_value(var_name)
self._sampling_data[var_name] = var_tensor
for var_name in self._quantized_act_var_name:
if var_name not in self._sampling_data:
self._sampling_data[var_name] = []
var_tensor = self._load_var_value(var_name)
self._sampling_data[var_name].append(var_tensor)
def _calculate_scale_factor(self):
'''
Calculate the scale factor of quantized variables.
'''
# apply channel_wise_abs_max quantization for weights
for var_name in self._quantized_weight_var_name:
data = self._sampling_data[var_name]
scale_factor_per_channel = []
for i in range(data.shape[0]):
abs_max_value = np.max(np.abs(data[i]))
scale_factor_per_channel.append(abs_max_value)
self._quantized_var_scale_factor[
var_name] = scale_factor_per_channel
# apply kl quantization for activation
for var_name in self._quantized_act_var_name:
if self._algo == "KL":
self._quantized_var_scale_factor[var_name] = \
self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name]))
else:
self._quantized_var_scale_factor[var_name] = \
np.max(np.abs(self._sampling_data[var_name]))
def _update_program(self):
'''
Insert fake_quantize/fake_dequantize op to the program.
'''
# reset quantized activation variable
for var in self._program.list_vars():
if var.name in self._quantized_act_var_name:
var.persistable = False
# use QuantizationTransformPass to insert fake_quantize/fake_dequantize op
graph = IrGraph(core.Graph(self._program.desc), for_test=True)
major_quantizable_op_types = []
for op_type in QuantizationTransformPass._supported_quantizable_op_type:
if op_type in self._quantizable_op_type:
major_quantizable_op_types.append(op_type)
transform_pass = QuantizationTransformPass(
scope=self._scope,
place=self._place,
weight_bits=self._bit_length,
activation_bits=self._bit_length,
activation_quantize_type='moving_average_abs_max',
weight_quantize_type='channel_wise_abs_max',
quantizable_op_type=major_quantizable_op_types)
transform_pass.apply(graph)
# use AddQuantDequantPass to insert fake_quant_dequant op
minor_quantizable_op_types = []
for op_type in AddQuantDequantPass._supported_quantizable_op_type:
if op_type in self._quantizable_op_type:
minor_quantizable_op_types.append(op_type)
add_quant_dequant_pass = AddQuantDequantPass(
scope=self._scope,
place=self._place,
quantizable_op_type=minor_quantizable_op_types)
add_quant_dequant_pass.apply(graph)
# save scale factor to scale var node
for key, val in self._quantized_var_scale_factor.items():
self._set_var_node_value(
key + ".scale", np.array(
[val], dtype=np.float32))
self._set_var_node_value(
key + ".quant_dequant.scale", np.array(
[val], dtype=np.float32))
# apply QuantizationFreezePass, and obtain the final quant model
freeze_pass = QuantizationFreezePass(
scope=self._scope,
place=self._place,
weight_bits=self._bit_length,
activation_bits=self._bit_length,
weight_quantize_type='channel_wise_abs_max',
quantizable_op_type=major_quantizable_op_types)
freeze_pass.apply(graph)
self._program = graph.to_program()
def _save_output_scale(self):
'''
Save output scale to the quantized op.
'''
output_scale_name = "output_scale"
for op in self._program.global_block().ops:
if op.type in self._quantizable_op_type:
output_name_list = self._op_real_in_out_name[op.type][1]
for output_name in output_name_list:
for output_var_name in op.output(output_name):
if output_var_name in self._quantized_var_scale_factor:
op._set_attr(output_scale_name,
self._quantized_var_scale_factor[
output_var_name])
def _load_var_value(self, var_name):
'''
Load variable value from scope
'''
return np.array(self._scope.find_var(var_name).get_tensor())
def _set_var_node_value(self, var_node_name, np_value):
'''
Set the value of var node by name, if the node exits,
'''
assert isinstance(np_value, np.ndarray), \
'The type of value should be numpy array.'
var_node = self._scope.find_var(var_node_name)
if var_node != None:
tensor = var_node.get_tensor()
tensor.set(np_value, self._place)
def _is_input_all_not_persistable(self, op, persistable_var_names):
'''
Analyze the real inputs of the op are all not persistable.
'''
is_input_all_not_persistable = True
input_name_list = self._op_real_in_out_name[op.type][0]
for input_name in input_name_list:
for var_name in op.input(input_name):
if var_name in persistable_var_names:
is_input_all_not_persistable = False
break
return is_input_all_not_persistable
def _get_kl_scaling_factor(self, activation_blob, num_quantized_bins=255):
'''
Using the KL-divergenc method to get the more precise scaling factor.
'''
max_val = np.max(activation_blob)
min_val = np.min(activation_blob)
if min_val >= 0:
hist, hist_edeges = np.histogram(
activation_blob, bins=2048, range=(min_val, max_val))
ending_iter = 2047
starting_iter = int(ending_iter * 0.7)
else:
_logger.error("Please first apply abs to activation_blob.")
bin_width = hist_edeges[1] - hist_edeges[0]
P_sum = len(np.array(activation_blob).ravel())
min_kl_divergence = 0
min_kl_index = 0
kl_inited = False
for i in range(starting_iter, ending_iter + 1):
reference_distr_P = hist[0:i].tolist()
outliers_count = sum(hist[i:2048])
if reference_distr_P[i - 1] == 0:
continue
reference_distr_P[i - 1] += outliers_count
reference_distr_bins = reference_distr_P[:]
candidate_distr_Q = hist[0:i].tolist()
num_merged_bins = int(i / num_quantized_bins)
candidate_distr_Q_quantized = [0] * num_quantized_bins
j_start = 0
j_end = num_merged_bins
for idx in range(num_quantized_bins):
candidate_distr_Q_quantized[idx] = sum(candidate_distr_Q[
j_start:j_end])
j_start += num_merged_bins
j_end += num_merged_bins
if (idx + 1) == num_quantized_bins - 1:
j_end = i
candidate_distr_Q = self._expand_quantized_bins(
candidate_distr_Q_quantized, reference_distr_bins)
Q_sum = sum(candidate_distr_Q)
kl_divergence = self._safe_entropy(reference_distr_P, P_sum,
candidate_distr_Q, Q_sum)
if not kl_inited:
min_kl_divergence = kl_divergence
min_kl_index = i
kl_inited = True
elif kl_divergence < min_kl_divergence:
min_kl_divergence = kl_divergence
min_kl_index = i
else:
pass
if min_kl_index == 0:
while starting_iter > 0:
if hist[starting_iter] == 0:
starting_iter -= 1
continue
else:
break
min_kl_index = starting_iter
return (min_kl_index + 0.5) * bin_width
def _expand_quantized_bins(self, quantized_bins, reference_bins):
'''
'''
expanded_quantized_bins = [0] * len(reference_bins)
num_merged_bins = int(len(reference_bins) / len(quantized_bins))
j_start = 0
j_end = num_merged_bins
for idx in range(len(quantized_bins)):
zero_count = reference_bins[j_start:j_end].count(0)
num_merged_bins = j_end - j_start
if zero_count == num_merged_bins:
avg_bin_ele = 0
else:
avg_bin_ele = quantized_bins[idx] / (
num_merged_bins - zero_count + 0.0)
for idx1 in range(j_start, j_end):
expanded_quantized_bins[idx1] = (0 if reference_bins[idx1] == 0
else avg_bin_ele)
j_start += num_merged_bins
j_end += num_merged_bins
if (idx + 1) == len(quantized_bins) - 1:
j_end = len(reference_bins)
return expanded_quantized_bins
def _safe_entropy(self, reference_distr_P, P_sum, candidate_distr_Q, Q_sum):
'''
Calculate the entropy.
'''
assert len(reference_distr_P) == len(candidate_distr_Q)
tmp_sum1 = 0
tmp_sum2 = 0
for idx in range(len(reference_distr_P)):
p_idx = reference_distr_P[idx]
q_idx = candidate_distr_Q[idx]
if p_idx == 0:
tmp_sum1 += 0
tmp_sum2 += 0
else:
if q_idx == 0:
_logger.error("Fatal error!, idx = " + str(idx) +
" qindex = 0! p_idx = " + str(p_idx))
tmp_sum1 += p_idx * (math.log(Q_sum * p_idx))
tmp_sum2 += p_idx * (math.log(P_sum * q_idx))
return (tmp_sum1 - tmp_sum2) / P_sum
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from .... import core
from ....framework import IrGraph
from ....framework import IrNode
__all__ = ['FakeQAT2MkldnnINT8KernelPass', 'FakeQAT2MkldnnINT8PerfPass']
class FakeQAT2MkldnnINT8KernelPass(object):
"""
Convert QuantizationFreezePass generated IrGraph to MKL-DNN supported INT8
IrGraph. Following transformations did in this pass:
1. Convert int8 range weights with float32 data type, which are generated by
the QuantizationFreezePass, to float32 range weights with float32 data type
by using the corresponding scales. This conversion is because MKL-DNN INT8
conv2d kernel and mul kernel now only support float32 weights input, hence
weights quantization will happen inside the conv2d and mul INT8 kernel.
2. Create the new conv2d or mul op with the converted weights and link its output
to fake_dequantize_abs_max op's output and set conv2d's attribute "force_fp32
_output" as true
3. Transform fake_quantize_xx op to quantize op
4. Remove fake_dequantize_abs_max op
"""
def __init__(self, _scope=None, _place=None):
"""
Args:
scope(fluid.Scope): scope is used to initialize the new parameters.
place(fluid.CPUPlace): place is used to initialize the new parameters.
Examples:
.. code-block:: python
# The original graph will be rewrite.
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization \
import FakeQAT2MkldnnINT8KernelPass
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
graph = IrGraph(core.Graph(fluid.Program().desc), for_test=False)
place = fluid.CPUPlace()
mkldnn_pass = FakeQAT2MkldnnINT8KernelPass(fluid.global_scope(),
place)
mkldnn_pass.apply(graph)
"""
self._scope = _scope
self._place = _place
self._quantize_type = [
'fake_quantize_moving_average_abs_max',
'fake_quantize_range_abs_max'
]
self._dequantize_type = ['fake_dequantize_max_abs']
self._quantize_dequantize_type = [
'fake_quantize_dequantize_moving_average_abs_max'
]
self._quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul']
self._conv_ops = ['conv2d', 'depthwise_conv2d']
self._pool_ops = ['pool2d']
self._in_scale = {}
self._max_range = {}
self._new_output = {}
self._s8_max = 127
def apply(self, graph):
"""
Quantize the graph for running MKL-DNN INT8 inference. According
to activation quantization type, the graph will transform fake
quantize ops to quantize ops and remove the fake dequantize ops.
Args:
graph(IrGraph): the applied graph.
"""
assert isinstance(graph,
IrGraph), 'graph must be the instance of IrGraph.'
ops = graph.all_op_nodes()
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
# Collect the _in_scales and _max_range to calculate the new scales for MKL-DNN
# INT8 conv2d and mul
for op_node in ops:
if op_node.name() in self._dequantize_type:
input_name = op_node.input("X")[0]
scale_name = op_node.input("Scale")[0]
self._in_scale[input_name] = self._load_param(self._scope,
scale_name)[0]
self._max_range[input_name] = op_node.op().attr("max_range")
self._new_output[input_name] = op_node.output("Out")[0]
if op_node.name() in self._quantize_dequantize_type:
inputs = op_node.op().input_names()
attrs = op_node.op().attr_names()
input_name = op_node.input("X")[0]
scale_name = op_node.input("InScale")[0]
self._in_scale[input_name] = self._load_param(self._scope,
scale_name)[0]
# self._max_range[input_name] = op_node.op().attr("max_range")
self._new_output[input_name] = op_node.output("Out")[0]
for op_node in ops:
if op_node.name() in self._quantizable_ops:
if op_node.name() in self._conv_ops:
self._transform_to_conv_mkldnn(graph, op_node)
elif op_node.name() in self._pool_ops:
self._transform_to_pool_mkldnn(graph, op_node)
else:
self._transform_to_mul_mkldnn(graph, op_node)
elif op_node.name() in self._quantize_type:
self._transform_to_quantize_mkldnn(graph, op_node)
elif op_node.name() in self._dequantize_type:
self._remove_fake_dequantize_op(graph, op_node)
self._remove_unused_var_nodes(graph)
return graph
def _transform_to_pool_mkldnn(self, graph, op):
output_name = op.output("Out")[0]
input_name = op.input("X")[0]
def _transform_to_conv_mkldnn(self, graph, op_node):
weight_name = op_node.input("Filter")[0]
output_name = op_node.output("Output")[0]
# Convert int8 range weights to fp32 range weights
weight = self._load_param(self._scope, weight_name)
w_fp32 = np.divide(
np.multiply(weight, self._s8_max), self._max_range[output_name])
w_fp32 = w_fp32.reshape(weight.shape)
self._restore_var(weight_name, w_fp32)
input_var_node = graph._find_node_by_name(op_node.inputs,
op_node.input("Input")[0])
weight_var_node = graph._find_node_by_name(op_node.inputs, weight_name)
# Set fake_dequantize_abs_max's output as new output of conv2d
output_var_node = graph._find_node_by_name(
graph.all_var_nodes(), self._new_output[output_name])
attrs = {
name: op_node.op().attr(name)
for name in op_node.op().attr_names()
}
conv_op_node = graph.create_op_node(
op_type='conv2d',
attrs=attrs,
inputs={'Input': input_var_node,
'Filter': weight_var_node},
outputs={'Output': output_var_node})
# Based on the QAT's scales to calculate the scales of MKL-DNN INT8 conv2d
scale_in = self._s8_max / self._in_scale[output_name]
scale_w = []
scale_w = [self._max_range[output_name] / self._s8_max]
conv_op_node.set_attr("Scale_weights", scale_w)
conv_op_node.set_attr("Scale_in", scale_in)
conv_op_node.set_attr("Scale_out", 1.0)
conv_op_node.set_attr("use_mkldnn", 1)
conv_op_node.set_attr("force_fp32_output", 1)
graph.link_to(input_var_node, conv_op_node)
graph.link_to(weight_var_node, conv_op_node)
graph.link_to(conv_op_node, output_var_node)
graph.safe_remove_nodes(op_node)
def _transform_to_mul_mkldnn(self, graph, op_node):
# For MKL-DNN INT8 mul, input Y should be the weights
weight_name = op_node.input("Y")[0]
output_name = op_node.output("Out")[0]
# Convert int8 range weights to fp32 range weights
weight = self._load_param(self._scope, weight_name)
w_fp32 = np.divide(
np.multiply(weight, self._s8_max), self._max_range[output_name])
w_fp32 = w_fp32.reshape(weight.shape)
self._restore_var(weight_name, w_fp32)
input_var_node = graph._find_node_by_name(op_node.inputs,
op_node.input("X")[0])
weight_var_node = graph._find_node_by_name(op_node.inputs, weight_name)
# Set fake_dequantize_abs_max's output as new output of mul
output_var_node = graph._find_node_by_name(
graph.all_var_nodes(), self._new_output[output_name])
attrs = {
name: op_node.op().attr(name)
for name in op_node.op().attr_names()
}
mul_op_node = graph.create_op_node(
op_type='mul',
attrs=attrs,
inputs={'X': input_var_node,
'Y': weight_var_node},
outputs={'Out': output_var_node})
# Based on the QAT's scales to calculate MKL-DNN INT8 mul's scales
scale_in = self._s8_max / self._in_scale[output_name]
scale_w = []
scale_w = [self._max_range[output_name] / self._s8_max]
mul_op_node.set_attr("scale_y", scale_w)
mul_op_node.set_attr("scale_x", scale_in)
mul_op_node.set_attr("scale_out", 1.0)
mul_op_node.set_attr("use_mkldnn", 1)
mul_op_node.set_attr("force_fp32_output", 1)
graph.link_to(input_var_node, mul_op_node)
graph.link_to(weight_var_node, mul_op_node)
graph.link_to(mul_op_node, output_var_node)
graph.safe_remove_nodes(op_node)
def _transform_to_quantize_mkldnn(self, graph, op_node):
"""
Transform fake_quantize_xx op to quantize mkldnn op in the graph.
"""
input_var_node = graph._find_node_by_name(op_node.inputs,
op_node.input("X")[0])
output_var_node = graph._find_node_by_name(op_node.outputs,
op_node.output("Out")[0])
scale_in = self._s8_max / self._load_param(
self._scope, op_node.input("InScale")[0])[0]
quant_op_node = graph.create_op_node(
op_type='quantize',
attrs={
'data_format': 'MKLDNNLAYOUT',
'use_mkldnn': 1,
'Scale': scale_in,
'is_negative_input': 1
},
inputs={'Input': input_var_node},
outputs={'Output': output_var_node})
graph.link_to(input_var_node, quant_op_node)
graph.link_to(quant_op_node, output_var_node)
graph.safe_remove_nodes(op_node)
def _remove_fake_dequantize_op(self, graph, op_node):
input_var_node = graph._find_node_by_name(op_node.inputs,
op_node.input("X")[0])
graph.safe_remove_nodes(op_node)
def _load_param(self, scope, param_name):
return np.array(scope.find_var(param_name).get_tensor())
def _restore_var(self, name, array):
tensor = self._scope.find_var(name).get_tensor()
tensor.set(array, self._place)
def _remove_unused_var_nodes(self, graph):
all_used_vars = set()
ops = graph.all_op_nodes()
for op_node in ops:
for input_node in op_node.inputs:
all_used_vars.add(input_node)
for output_node in op_node.outputs:
all_used_vars.add(output_node)
all_used_vars = {n.node for n in all_used_vars}
all_unused_vars = {
n
for n in filter(lambda node: node.node not in all_used_vars,
graph.all_var_nodes())
}
graph.safe_remove_nodes(all_unused_vars)
class FakeQAT2MkldnnINT8PerfPass(object):
"""
Transform a QAT model IrGraph into MKL-DNN supported INT8 IrGraph.
The pass consists of the following transformations:
1. gather scale values from fake quantize/dequantize operators,
2. extract FP32 inference model graph from the QAT graph, i.e.
a. remove fake quantize/dequantize operators,
b. dequantize conv2d and mul's weights,
3. optimize the FP32 graph using standard FP32 optimization fuses
(e.g. `conv2d`+`bn` -> `conv2d`),
4. quantize the optimized FP32 graph using standard INT8v2 quantization
passes (`cpu_quantize_pass`, `cpu_quantize_squash_pass`).
"""
def __init__(self, _scope=None, _place=None, _core=None, _debug=False):
self._scope = _scope
self._place = _place
self._core = _core
self._debug = _debug
self._quantize_types = [
'fake_quantize_moving_average_abs_max',
'fake_quantize_range_abs_max',
'fake_quantize_dequantize_moving_average_abs_max'
]
self._fake_quantize_types = [
'fake_quantize_moving_average_abs_max',
'fake_quantize_dequantize_moving_average_abs_max'
]
self._fake_dequantize_types = ['fake_dequantize_max_abs']
self._conv_ops = ['conv2d', 'depthwise_conv2d']
self._pool_ops = ['pool2d']
self._mul_ops = ['mul']
self._fc_ops = ['fc']
self._weight_scales = {}
# Collect the Input and Output sclaes from Fake QAT models
self._var_quant_scales = {}
self._max_range = {}
self._s8_max = 127
def apply(self, graph):
assert isinstance(graph,
IrGraph), 'graph must be the instance of IrGraph.'
graph = self._gather_scales(graph)
graph = self._remove_fake_ops(graph)
graph = self._dequantize_weights(graph)
graph = self._optimize_fp32_graph(graph)
graph = self._compute_weight_scales(graph)
graph = self._update_conv_relu_scales(graph)
graph = self._update_pooling_scales(graph)
graph = self._quantize_fp32_graph(graph)
graph = self._remove_unused_var_nodes(graph)
return graph
def apply_fp32(self, graph):
assert isinstance(graph,
IrGraph), 'graph must be the instance of IrGraph.'
graph = self._gather_scales(graph)
graph = self._remove_fake_ops(graph)
graph = self._dequantize_weights(graph)
graph = self._optimize_fp32_graph(graph)
graph = self._remove_unused_var_nodes(graph)
return graph
def _convert_scale2tensor(self, scale):
tensor = core.LoDTensor()
tensor.set(scale, core.CPUPlace())
return tensor
def _gather_scales(self, graph):
for op in graph.all_op_nodes():
if op.name() in self._quantize_types:
bit_length = op.op().attr("bit_length")
assert bit_length == 8, 'Unsupported number quantization bits ({}). Only 8 is supported now.'.format(
bit_length)
input_name = op.input("X")[0]
scale_name = op.input("InScale")[0]
# Gather new weights scale after folding batchnorm in convolution
scale = np.array(1.0 / self._load_param(
self._scope, scale_name)[0]).astype(np.float64)
lod_tensor = self._convert_scale2tensor(scale)
use_unsigned_int = False
self._var_quant_scales[input_name] = (use_unsigned_int,
lod_tensor)
self._var_quant_scales[scale_name.replace(".scale", "")] = (
use_unsigned_int, lod_tensor)
if op.name() in self._fake_dequantize_types:
input_name = op.input("X")[0]
_max_range = op.op().attr("max_range")
self._weight_scales[input_name] = _max_range
return graph
def _update_pooling_scales(self, graph):
for op in graph.all_op_nodes():
if op.name() in self._pool_ops:
input_name = op.input("X")[0]
output_name = op.output("Out")[0]
if input_name in self._var_quant_scales:
self._var_quant_scales[
output_name] = self._var_quant_scales[input_name]
return graph
def _load_param(self, scope, param_name):
return np.array(scope.find_var(param_name).get_tensor())
def _remove_fake_ops(self, graph):
for op in graph.all_op_nodes():
if op.name() in self._fake_quantize_types:
op_out = graph._find_node_by_name(op.outputs,
op.output("Out")[0])
next_op = op_out.outputs[0]
if next_op.name() not in self._mul_ops:
self._remove_fake_quantize(graph, op)
for op in graph.all_op_nodes():
if op.name() in self._fake_dequantize_types:
op_in = graph._find_node_by_name(op.inputs, op.input("X")[0])
prev_op = op_in.inputs[0]
if prev_op.name() not in self._mul_ops:
self._remove_fake_dequantize(graph, op)
return graph
def _remove_fake_quantize(self, graph, op):
fake_quant_in = graph._find_node_by_name(op.inputs, op.input("X")[0])
fake_quant_in_scale = graph._find_node_by_name(op.inputs,
op.input("InScale")[0])
fake_quant_out = graph._find_node_by_name(op.outputs,
op.output("Out")[0])
fake_quant_out_scale = graph._find_node_by_name(
op.outputs, op.output("OutScale")[0])
next_ops = fake_quant_out.outputs
for next_op in next_ops:
self._swap_inputs(next_op, fake_quant_out, fake_quant_in)
graph.link_to(fake_quant_in, next_op)
graph.safe_remove_nodes(
{op, fake_quant_in_scale, fake_quant_out, fake_quant_out_scale})
return graph
def _remove_fake_dequantize(self, graph, op):
fake_dequant_in = graph._find_node_by_name(op.inputs, op.input("X")[0])
fake_dequant_out = graph._find_node_by_name(op.outputs,
op.output("Out")[0])
next_ops = fake_dequant_out.outputs
for next_op in next_ops:
self._swap_inputs(next_op, fake_dequant_out, fake_dequant_in)
graph.link_to(fake_dequant_in, next_op)
graph.safe_remove_nodes({op, fake_dequant_out})
return graph
def _swap_inputs(self, op, old_input, new_input):
for input_name in op.op().input_names():
if old_input.name() in op.input(input_name):
op.op().set_input(input_name, [
new_input.name() if x == old_input.name() else x
for x in op.input(input_name)
])
def _dequantize_weights(self, graph):
for op in graph.all_op_nodes():
if op.name() in self._conv_ops:
self._dequantize_conv_weights(graph, op)
return graph
def _dequantize_conv_weights(self, graph, op_node):
weight_name = op_node.input("Filter")[0]
output_name = op_node.output("Output")[0]
# Convert int8 range weights to fp32 range weights
scales = self._weight_scales[output_name]
weight = self._load_param(self._scope, weight_name)
w_fp32 = np.divide(np.multiply(weight, self._s8_max), scales)
w_fp32 = w_fp32.reshape(weight.shape)
self._restore_var(weight_name, w_fp32)
def _dequantize_mul_weights(self, graph, op_node):
weight_name = op_node.input("Y")[0]
output_name = op_node.output("Out")[0]
scales = self._weight_scales[output_name]
weight = self._load_param(self._scope, weight_name)
w_fp32 = np.divide(np.multiply(weight, self._s8_max), scales)
w_fp32 = w_fp32.reshape(weight.shape)
self._restore_var(weight_name, w_fp32)
def _restore_var(self, name, array):
tensor = self._scope.find_var(name).get_tensor()
tensor.set(array, self._place)
def _optimize_fp32_graph(self, graph):
graph = self._apply_pass(graph, 'mkldnn_placement_pass',
['mkldnn_enabled_op_types'], [set()])
graph = self._apply_pass(graph, 'depthwise_conv_mkldnn_pass')
graph = self._apply_pass(graph, 'conv_bn_fuse_pass')
graph = self._apply_pass(graph, 'conv_eltwiseadd_bn_fuse_pass')
graph = self._apply_pass(graph, 'conv_bias_mkldnn_fuse_pass')
graph = self._apply_pass(graph, 'conv_elementwise_add_mkldnn_fuse_pass')
graph = self._apply_pass(graph, 'conv_relu_mkldnn_fuse_pass')
graph = self._apply_pass(graph, 'conv_relu6_mkldnn_fuse_pass')
return graph
def _apply_pass(self, graph, pass_name, attrs=None, attr_values=None):
ir_pass = core.get_pass(pass_name)
cpp_graph = graph.graph
if not cpp_graph.has('__param_scope__'):
cpp_graph.set_not_owned('__param_scope__', self._scope)
if attrs:
assert attr_values and len(attrs) == len(
attr_values
), "Different number of pass attributes and their values."
for attr, value in zip(attrs, attr_values):
ir_pass.set(attr, value)
ir_pass.apply(cpp_graph)
if self._debug:
graph.draw('.', 'qat_fp32_{}'.format(pass_name),
graph.all_op_nodes())
self._remove_unused_var_nodes(graph)
return graph
def _remove_unused_var_nodes(self, graph):
all_used_vars = set()
ops = graph.all_op_nodes()
for op_node in ops:
for input_node in op_node.inputs:
all_used_vars.add(input_node)
for output_node in op_node.outputs:
all_used_vars.add(output_node)
all_used_vars = {n.node for n in all_used_vars}
all_unused_vars = {
n
for n in filter(lambda node: node.node not in all_used_vars,
graph.all_var_nodes())
}
graph.safe_remove_nodes(all_unused_vars)
return graph
def _compute_weight_scales(self, graph):
def _compute_var_scales(ops, out_name, w_name, axis):
for op in graph.all_op_nodes():
if op.op().type() in ops:
weight_var_name = op.input(w_name)[0]
weights = np.array(
self._load_param(self._scope, weight_var_name))
scales = 1.0 / np.amax(
np.abs(weights.reshape(weights.shape[0], -1)).astype(
np.float64),
axis=axis)
lod_tensor = self._convert_scale2tensor(scales)
use_unsigned_int = False
self._var_quant_scales[weight_var_name] = (use_unsigned_int,
lod_tensor)
_compute_var_scales(self._conv_ops, "Output", "Filter", axis=1)
_compute_var_scales(self._fc_ops, "Out", "W", axis=0)
return graph
def _find_avg_pooling_ids(self, graph):
ids = []
for op in graph.all_op_nodes():
if op.name() in self._pool_ops:
if op.op().attr("pooling_type") == "avg":
ids.append(op.id())
return set(ids) if len(ids) else set([-1])
def _transform_to_quantize_mkldnn(self, graph, op_node):
"""
Transform fake_quantize_xx op to quantize mkldnn op in the graph.
"""
input_var_node = graph._find_node_by_name(op_node.inputs,
op_node.input("X")[0])
output_var_node = graph._find_node_by_name(op_node.outputs,
op_node.output("Out")[0])
scale_in = self._s8_max / self._load_param(
self._scope, op_node.input("InScale")[0])[0]
quant_op_node = graph.create_op_node(
op_type='quantize',
attrs={
'data_format': 'MKLDNNLAYOUT',
'use_mkldnn': 1,
'Scale': scale_in,
'is_negative_input': 1
},
inputs={'Input': input_var_node},
outputs={'Output': output_var_node})
graph.link_to(input_var_node, quant_op_node)
graph.link_to(quant_op_node, output_var_node)
graph.safe_remove_nodes(op_node)
return quant_op_node
def _update_conv_relu_scales(self, graph):
for op in graph.all_op_nodes():
if op.name() in self._conv_ops:
out_name = op.output("Output")[0]
if out_name in self._var_quant_scales and \
op.op().attr("fuse_activation") == 'relu' and \
op.op().attr("fuse_residual_connection") == False:
_, tensor = self._var_quant_scales[out_name]
self._var_quant_scales[out_name] = (True, tensor)
return graph
def _quantize_fp32_graph(self, graph):
ir_pass = self._core.get_pass('cpu_quantize_placement_pass')
cpp_graph = graph.graph
ir_pass.set('quantize_enabled_op_types', {'conv2d', 'pool2d'})
ir_pass.set('quantize_excluded_op_ids',
self._find_avg_pooling_ids(graph))
ir_pass.apply(cpp_graph)
if self._debug:
graph.draw('.', 'qat_int8_{}'.format(ir_pass.type()),
graph.all_op_nodes())
graph = self._apply_pass(graph, 'cpu_quantize_pass',
['quant_var_scales'],
[self._var_quant_scales])
graph = self._apply_pass(graph, 'cpu_quantize_squash_pass')
return graph
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import numpy as np
from ..... import compat as cpt
from .... import core
from ....framework import IrGraph
from ....framework import IrNode
from .... import unique_name
__all__ = [
'QuantizationTransformPass', 'QuantizationFreezePass', 'ConvertToInt8Pass',
'TransformForMobilePass', 'ScaleForTrainingPass', 'ScaleForInferencePass',
'AddQuantDequantPass'
]
_fake_quant_op_list = [
'fake_quantize_abs_max', 'fake_quantize_range_abs_max',
'fake_quantize_moving_average_abs_max', 'fake_channel_wise_quantize_abs_max'
]
_fake_dequant_op_list = [
'fake_dequantize_max_abs', 'fake_channel_wise_dequantize_max_abs'
]
_out_scale_op_list = [
"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid", "depthwise_conv2d",
"batch_norm", "concat", "tanh", "pad", "elementwise_add", "elementwise_mul",
"dropout", "split", "prelu", "conv2d_transpose", "leaky_relu"
]
# list op real input and output names, to avoid processing input such as AxisTensor.
_op_real_in_out_name = {
"conv2d": [["Input", "Filter"], ["Output"]],
"depthwise_conv2d": [["Input"], ["Output"]],
"mul": [["X", "Y"], ["Out"]],
"pool2d": [["X"], ["Out"]],
"elementwise_add": [["X", "Y"], ["Out"]],
"concat": [["X"], ["Out"]],
"softmax": [["X"], ["Out"]],
"argmax": [["X"], ["Out"]],
"transpose": [["X"], ["Out"]],
"equal": [["X", "Y"], ["Out"]],
"gather": [["X"], ["Out"]],
"greater_equal": [["X", "Y"], ["Out"]],
"greater_than": [["X", "Y"], ["Out"]],
"less_equal": [["X", "Y"], ["Out"]],
"less_than": [["X", "Y"], ["Out"]],
"mean": [["X"], ["Out"]],
"not_equal": [["X", "Y"], ["Out"]],
"reshape": [["X"], ["Out"]],
"reshape2": [["X"], ["Out"]],
"bilinear_interp": [["X"], ["Out"]],
"nearest_interp": [["X"], ["Out"]],
"trilinear_interp": [["X"], ["Out"]],
"slice": [["Input"], ["Out"]],
"squeeze": [["X"], ["Out"]],
"elementwise_sub": [["X", "Y"], ["Out"]],
"relu": [["X"], ["Out"]],
"relu6": [["X"], ["Out"]],
"leaky_relu": [["X"], ["Out"]],
"tanh": [["X"], ["Out"]],
"swish": [["X"], ["Out"]],
}
def _init_var_node(var_node, value, scope, place):
assert isinstance(value,
np.ndarray), 'The type of value should be numpy array.'
assert scope is not None, \
'The scope cannot be set None.'
assert place is not None, \
'The place cannot be set None.'
tensor = scope.var(var_node.name()).get_tensor()
tensor.set(value, place)
class QuantizationTransformPass(object):
_supported_quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul']
def __init__(self,
scope=None,
place=None,
weight_bits=8,
activation_bits=8,
activation_quantize_type='abs_max',
weight_quantize_type='abs_max',
window_size=10000,
moving_rate=0.9,
skip_pattern=['skip_quant'],
quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']):
"""
Convert and rewrite the IrGraph according to weight and
activation quantization type.
Args:
scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
type, this pass will create some new parameters. The scope is used to
initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to initialize new
parameters described above.
weight_bits(int): quantization bit number for weights,
the bias is not quantized.
activation_bits(int): quantization bit number for activation.
activation_quantize_type(str): quantization type for activation,
now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
If use 'abs_max' mode, the quantization scale will be calculated
dynamically each step in both training and testing period. If use
'range_abs_max', a static quantization scale will be calculated
during training and used in inference.
weight_quantize_type(str): quantization type for weights,
support 'abs_max' and 'channel_wise_abs_max'. The 'range_abs_max'
usually is not used for weight, since weights are fixed once the
model is well trained.
window_size(int): the window size for 'range_abs_max' quantization.
moving_rate(float): the param for 'moving_average_abs_max' quantization.
skip_pattern(str or str list): The user-defined quantization skip pattern, which
will be presented in the name scope of an op. When the skip pattern is
detected in an op's name scope, the corresponding op will not be quantized.
quantizable_op_type(list[str]): List the type of ops that will be quantized.
Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in
QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
Examples:
.. code-block:: python
# The original graph will be rewrite.
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization \
import QuantizationTransformPass
from paddle.fluid.contrib.slim.graph import IrGraph
from paddle.fluid import core
graph = IrGraph(core.Graph(program.desc), for_test=False)
place = fluid.CPUPlace()
transform_pass = QuantizationTransformPass(fluid.global_scope(),
place)
transform_pass.apply(graph)
"""
self._scope = scope
self._place = place
self._weight_bits = weight_bits
self._activation_bits = activation_bits
self._skip_pattern = skip_pattern
quant_type = [
'abs_max', 'channel_wise_abs_max', 'range_abs_max',
'moving_average_abs_max'
]
assert activation_quantize_type != 'channel_wise_abs_max', \
"The activation quantization type does not support 'channel_wise_abs_max'."
if activation_quantize_type not in quant_type:
raise ValueError(
"Unknown activation_quantize_type : '%s'. It can only be "
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'." %
(str(activation_quantize_type)))
if weight_quantize_type not in quant_type:
raise ValueError(
"Unknown weight_quantize_type: '%s'. It can only be "
"'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
% (str(weight_quantize_type)))
self._activation_quantize_type = activation_quantize_type
self._weight_quantize_type = weight_quantize_type
self._window_size = window_size
self._moving_rate = moving_rate
self._quantizable_ops = quantizable_op_type
for op in self._quantizable_ops:
assert op in QuantizationTransformPass._supported_quantizable_op_type, \
op + " is not supported for quantization."
self._conv_ops = ['conv2d', 'depthwise_conv2d']
self._quantizable_grad_ops = [
'%s_grad' % (op) for op in self._quantizable_ops
]
self._is_test = None
self._global_step = None
def apply(self, graph):
"""
Quantize the graph for training process. According to weight and
activation quantization type, the graph will be added some fake
quantize operators and fake dequantize operators.
Args:
graph(IrGraph): the applied graph.
Returns:
None
"""
assert isinstance(graph,
IrGraph), 'graph must be the instance of IrGraph.'
self._is_test = graph.is_test()
# marked the variable which has been dequantized.
dequantized_vars = collections.OrderedDict()
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
def _quant_preprocess(op_node):
user_skipped = False
if isinstance(self._skip_pattern, list):
user_skipped = op_node.op().has_attr("op_namescope") and \
any(pattern in op_node.op().attr("op_namescope") for pattern in self._skip_pattern)
elif isinstance(self._skip_pattern, str):
user_skipped = op_node.op().has_attr("op_namescope") and \
op_node.op().attr("op_namescope").find(self._skip_pattern) != -1
if user_skipped:
op_node.op()._set_attr("skip_quant", True)
def _transform_forward(graph, op):
for var_node in op.inputs:
if var_node.name() not in op.input_arg_names():
continue
if var_node.name() in dequantized_vars:
dequant_var_node = dequantized_vars[var_node.name()]
else:
quant_bits = self._weight_bits if var_node.name() in persistable_vars \
else self._activation_bits
quant_type = self._weight_quantize_type if var_node.name() \
in persistable_vars else self._activation_quantize_type
if quant_type == 'channel_wise_abs_max':
assert var_node.name(
) in persistable_vars, "'channel_wise_abs_max' can only be applied on weights."
if op.name() in self._conv_ops:
quant_var_node, scale_var_node = self._insert_channel_quant_op(
graph, var_node, quant_bits)
dequant_var_node = self._insert_channel_dequant_op(
graph, quant_var_node, [scale_var_node],
[quant_bits])
else:
quant_var_node, scale_var_node = self._insert_quant_op(
graph, var_node, quant_bits, 'abs_max')
dequant_var_node = self._insert_dequant_op(
graph, quant_var_node, scale_var_node,
quant_bits)
else:
quant_var_node, scale_var_node = self._insert_quant_op(
graph, var_node, quant_bits, quant_type)
dequant_var_node = self._insert_dequant_op(
graph, quant_var_node, scale_var_node, quant_bits)
dequantized_vars[var_node.name()] = dequant_var_node
graph.update_input_link(var_node, dequant_var_node, op)
def _transform_backward(graph, op):
no_dequanted_input_vars = True
for var_node in op.inputs:
if var_node.name() not in op.input_arg_names():
continue
if var_node.name() in dequantized_vars:
dequant_var_node = dequantized_vars[var_node.name()]
graph.update_input_link(var_node, dequant_var_node, op)
no_dequanted_input_vars = False
if no_dequanted_input_vars:
raise ValueError("There is no dequanted inputs for op %s." %
(op.name()))
if not self._is_test:
self._create_global_step(graph)
ops = graph.all_op_nodes()
# Do the preproccess of quantization, such as skipping some ops
# for not being quantized.
for op in ops:
if op.name() in self._quantizable_ops or \
op.name() in self._quantizable_grad_ops:
_quant_preprocess(op)
# The process of _transform_forward and _transform_backward is needed in two for loops.
# The loop for transforming the forward graph:
for op in ops:
if op.name() in self._quantizable_ops:
skipped = op.op().has_attr("skip_quant") and \
op.op().attr("skip_quant")
if skipped:
continue
_transform_forward(graph, op)
# The loop for renaming the inputs of backward op.
for op in ops:
if op.name() in self._quantizable_grad_ops:
skipped = op.op().has_attr("skip_quant") and \
op.op().attr("skip_quant")
if skipped:
continue
_transform_backward(graph, op)
graph.resolve_hazard()
return graph
def _create_global_step(self, graph):
if self._weight_quantize_type == 'range_abs_max' or \
self._activation_quantize_type == 'range_abs_max':
counter_name = cpt.to_text('@STEP_COUNTER@')
for node in graph.all_var_nodes():
if node.name() == counter_name:
self._global_step = node
if self._global_step is None:
global_step_in = graph.create_persistable_node(
name=counter_name,
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=core.VarDesc.VarType.INT64)
_init_var_node(
global_step_in,
np.zeros(
[1], dtype='int64'),
self._scope,
self._place)
global_step_out = graph.create_var_node_from_desc(
global_step_in.var())
# The attribute of `op_role` is needed by ParallelExecutor.
increment_op = graph.create_op_node(
op_type='increment',
attrs={
'step': 1.0,
'op_role':
core.op_proto_and_checker_maker.OpRole.Forward
},
inputs={'X': global_step_in},
outputs={'Out': global_step_out})
graph.link_to(global_step_in, increment_op)
graph.link_to(increment_op, global_step_out)
self._global_step = global_step_out
def _insert_quant_op(self, graph, var_node, quant_bits, quant_type):
"""
Insert fake_quantize_op in the graph.
"""
if quant_type == 'abs_max':
return self._insert_quant_abs_max_op(graph, var_node, quant_bits)
elif quant_type == 'range_abs_max':
return self._insert_quant_range_abs_max_op(graph, var_node,
quant_bits)
elif quant_type == 'moving_average_abs_max':
return self._insert_quant_moving_average_abs_max_op(graph, var_node,
quant_bits)
def _insert_quant_abs_max_op(self, graph, var_node, quant_bits):
"""
Insert fake_quantize_abs_max op in the graph.
"""
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
scale_var_node = graph.create_var_node(
name=self._quantized_scale_name(var_node.name()),
var_type=var_node.type(),
shape=[1],
var_dtype=var_node.dtype())
quant_op_node = graph.create_op_node(
op_type='fake_quantize_abs_max',
attrs={
'bit_length': quant_bits,
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
},
inputs={'X': var_node},
outputs={'Out': quant_var_node,
'OutScale': scale_var_node})
graph.link_to(var_node, quant_op_node)
graph.link_to(quant_op_node, quant_var_node)
graph.link_to(quant_op_node, scale_var_node)
return quant_var_node, scale_var_node
def _insert_quant_range_abs_max_op(self, graph, var_node, quant_bits):
"""
Insert fake_quantize_range_abs_max on the graph.
"""
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
scale_in_node = graph.create_persistable_node(
name=self._quantized_scale_name(var_node.name()),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=var_node.dtype())
data_type = 'float64' if var_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(
scale_in_node,
np.array(
[0.001], dtype=data_type),
self._scope,
self._place)
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
inputs = {'X': var_node, 'InScale': scale_in_node}
outputs = {'Out': quant_var_node, 'OutScale': scale_out_node}
if not self._is_test:
# The name of scales_var_node maybe 'scales_0', 'scales_1', etc.
scales_node = graph.create_persistable_node(
name=unique_name.generate('scales'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[self._window_size],
var_dtype=var_node.dtype())
data_type = 'float64' if var_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(
scales_node,
np.zeros(
[self._window_size], dtype=data_type),
self._scope,
self._place)
inputs['Iter'] = self._global_step
outputs['OutScales'] = scales_node
attrs = {
'window_size': self._window_size,
'bit_length': quant_bits,
'is_test': self._is_test,
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
}
quant_op_node = graph.create_op_node(
op_type='fake_quantize_range_abs_max',
attrs=attrs,
inputs=inputs,
outputs=outputs)
graph.link_to(var_node, quant_op_node)
graph.link_to(scale_in_node, quant_op_node)
graph.link_to(quant_op_node, quant_var_node)
graph.link_to(quant_op_node, scale_out_node)
if not self._is_test:
graph.link_to(self._global_step, quant_op_node)
graph.link_to(quant_op_node, scales_node)
return quant_var_node, scale_out_node
def _insert_quant_moving_average_abs_max_op(self, graph, var_node,
quant_bits):
"""Insert fake_quantize_moving_average_abs_max
"""
quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
scale_in_node = graph.create_persistable_node(
name=self._quantized_scale_name(var_node.name()),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=var_node.dtype())
data_type = 'float64' if var_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(
scale_in_node,
np.array(
[0.001], dtype=data_type),
self._scope,
self._place)
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
ins = {'X': var_node, 'InScale': scale_in_node}
outs = {'Out': quant_var_node, 'OutScale': scale_out_node}
if not self._is_test:
state_in_node = graph.create_persistable_node(
name=unique_name.generate('state'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
var_dtype=var_node.dtype(),
shape=[1])
data_type = 'float64' if var_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(
state_in_node,
np.ones(
[1], dtype=data_type),
self._scope,
self._place)
accum_in_node = graph.create_persistable_node(
name=unique_name.generate('accum'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
var_dtype=var_node.dtype(),
shape=[1])
_init_var_node(
accum_in_node,
np.ones(
[1], dtype=data_type),
self._scope,
self._place)
state_out_node = graph.create_var_node_from_desc(state_in_node.var(
))
accum_out_node = graph.create_var_node_from_desc(accum_in_node.var(
))
ins['InState'] = state_in_node
ins['InAccum'] = accum_in_node
outs['OutState'] = state_out_node
outs['OutAccum'] = accum_out_node
attrs = {
'bit_length': quant_bits,
'moving_rate': self._moving_rate,
'is_test': self._is_test,
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
}
quant_op_node = graph.create_op_node(
op_type='fake_quantize_moving_average_abs_max',
attrs=attrs,
inputs=ins,
outputs=outs)
graph.link_to(var_node, quant_op_node)
graph.link_to(scale_in_node, quant_op_node)
graph.link_to(quant_op_node, quant_var_node)
graph.link_to(quant_op_node, scale_out_node)
if not self._is_test:
graph.link_to(state_in_node, quant_op_node)
graph.link_to(accum_in_node, quant_op_node)
graph.link_to(quant_op_node, state_out_node)
graph.link_to(quant_op_node, accum_out_node)
return quant_var_node, scale_out_node
def _insert_channel_quant_op(self, graph, var_node, quant_bits):
"""
Insert fake_channel_wise_quantize_abs_max op in the graph.
"""
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
scale_var_node = graph.create_var_node(
name=self._quantized_scale_name(var_node.name()),
var_type=var_node.type(),
shape=[var_node.shape()[0]],
var_dtype=var_node.dtype())
quant_op_node = graph.create_op_node(
op_type='fake_channel_wise_quantize_abs_max',
attrs={
'bit_length': quant_bits,
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
},
inputs={'X': var_node},
outputs={'Out': quant_var_node,
'OutScale': scale_var_node})
graph.link_to(var_node, quant_op_node)
graph.link_to(quant_op_node, quant_var_node)
graph.link_to(quant_op_node, scale_var_node)
return quant_var_node, scale_var_node
def _insert_dequant_op(self, graph, var_node, scale_var_node, quant_bits):
"""
Insert fake_dequantize_op in the graph.
"""
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(var_node.name()),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
max_range = (1 << (quant_bits - 1)) - 1
dequant_op_node = graph.create_op_node(
op_type='fake_dequantize_max_abs',
attrs={
'max_range': float(max_range),
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
},
inputs={'X': var_node,
'Scale': scale_var_node},
outputs={'Out': dequant_var_node})
graph.link_to(var_node, dequant_op_node)
graph.link_to(scale_var_node, dequant_op_node)
graph.link_to(dequant_op_node, dequant_var_node)
return dequant_var_node
def _insert_channel_dequant_op(self, graph, var_node, scale_var_nodes,
quant_bits):
"""
Insert fake_channel_wise_dequantize_max_abs in the graph.
"""
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(var_node.name()),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
dequant_op_node = graph.create_op_node(
op_type='fake_channel_wise_dequantize_max_abs',
attrs={
'quant_bits': quant_bits,
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
},
inputs={'X': var_node,
'Scales': scale_var_nodes},
outputs={'Out': dequant_var_node})
graph.link_to(var_node, dequant_op_node)
for scale_n in scale_var_nodes:
graph.link_to(scale_n, dequant_op_node)
graph.link_to(dequant_op_node, dequant_var_node)
return dequant_var_node
def _quantized_var_name(self, var_name):
"""
Return quantized variable name for the input `var_name`.
"""
return "%s.quantized" % (var_name)
def _dequantized_var_name(self, var_name):
"""
Return dequantized variable name for the input `var_name`.
"""
return "%s.dequantized" % (var_name)
def _quantized_scale_name(self, var_name):
"""
Return the scale name of quantized variable for the input `var_name`.
"""
return "%s.scale" % (var_name)
class QuantizationFreezePass(object):
_supported_quantizable_op_type = \
QuantizationTransformPass._supported_quantizable_op_type
def __init__(self,
scope,
place,
weight_bits=8,
activation_bits=8,
weight_quantize_type='abs_max',
quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']):
"""
The freeze pass is used to adjust the quantize operator order, for example:
1) `activation -> quant -> dequant -> conv2d` will be freezed into
`activation -> quant -> conv2d -> dequant`
2) `weight -> quant -> dequant -> conv2d` will be freezed into `weight -> conv2d`,
and weight will be sacled offline.
Args:
scope(fluid.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the weight tensors.
weight_bits(int): quantization bit number for weights.
activation_bits(int): quantization bit number for activation.
weight_quantize_type(str): quantization type for weights, support 'abs_max' and
'channel_wise_abs_max'. The 'range_abs_max' usually is not used for weight,
since weights are fixed once the model is well trained.
quantizable_op_type(list[str]): List the type of ops that will be quantized.
Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in
QuantizationTransformPass and ConvertToInt8Pass must be the same as this.
"""
assert scope is not None, \
'The scope cannot be set None.'
assert place is not None, \
'The place cannot be set None.'
self._scope = scope
self._place = place
self._weight_bits = weight_bits
self._activation_bits = activation_bits
self._weight_quantize_type = weight_quantize_type
self._quantizable_ops = quantizable_op_type
for op in self._quantizable_ops:
assert op in QuantizationFreezePass._supported_quantizable_op_type, \
op + " is not supported for quantization."
self._conv_ops = ['conv2d', 'depthwise_conv2d']
self._fake_quant_op_names = _fake_quant_op_list
self._fake_dequant_op_names = _fake_dequant_op_list
self._op_input_rename_map = collections.OrderedDict()
self._op_output_rename_map = collections.OrderedDict()
self._var_scale_map = collections.OrderedDict()
def apply(self, graph):
"""
Adjust quantize/dequantize operators order for the inference process.
Args:
graph(IrGraph): the applied graph.
Returns:
None
"""
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
ops = graph.all_op_nodes()
for op_node in ops:
op_name = op_node.name()
if op_name in self._fake_quant_op_names:
input_arg_name = op_node.input('X')[0]
if input_arg_name in persistable_vars:
if self._weight_quantize_type == 'abs_max':
param = self._load_var(input_arg_name)
scale_v = np.max(np.abs(param))
elif self._weight_quantize_type == 'channel_wise_abs_max':
param = self._load_var(input_arg_name)
if len(param.shape) == 4: # conv2d or depthwise_conv2d
scale_v = []
for i in range(param.shape[0]):
scale_v.append(np.max(np.abs(param[i])))
else:
scale_v = np.max(np.abs(param))
else:
scale_v = self._load_var(
op_node.output('OutScale')[0])[0]
self._var_scale_map[input_arg_name] = scale_v
self._remove_fake_quant_and_dequant_op(graph, op_node)
# quantize weight and restore
param_v = self._load_var(input_arg_name)
quantized_param_v = self._quant(param_v, scale_v,
self._weight_bits)
self._restore_var(input_arg_name, quantized_param_v)
else:
scale_v = graph._find_node_by_name(
op_node.outputs, op_node.output('OutScale')[0])
self._var_scale_map[input_arg_name] = scale_v
ops = graph.all_op_nodes()
for op_node in ops:
op_name = op_node.name()
if op_name in self._fake_dequant_op_names:
self._remove_fake_quant_and_dequant_op(graph, op_node)
ops = graph.all_op_nodes()
for op_node in ops:
op_name = op_node.name()
if op_name in self._quantizable_ops:
skipped = op_node.op().has_attr("skip_quant") and \
op_node.op().attr("skip_quant")
if skipped:
continue
if self._weight_quantize_type == 'channel_wise_abs_max' and op_name in self._conv_ops:
self._insert_post_channel_dequant_op(graph, op_node)
else:
self._insert_post_dequant_op(graph, op_node)
for op_node in ops:
# insert dequant_op after fc/conv, need to rename inputs of the followed ops
for var_node in op_node.inputs:
if var_node.node in self._op_output_rename_map:
old_in = var_node
new_in = self._op_output_rename_map[var_node.node]
graph.update_input_link(old_in, new_in, op_node)
# remove the unused var node in the graph
self._remove_unused_var_nodes(graph)
graph.resolve_hazard()
return graph
def _remove_fake_quant_and_dequant_op(self, graph, op_node):
k = graph._find_node_by_name(op_node.outputs, op_node.output('Out')[0])
v = graph._find_node_by_name(op_node.inputs, op_node.input('X')[0])
if v.node not in self._op_input_rename_map:
self._op_input_rename_map[k.node] = v
else:
self._op_input_rename_map[k.node] = self._op_input_rename_map[
v.node]
graph.safe_remove_nodes(op_node)
def _insert_post_channel_dequant_op(self, graph, op_node):
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
for var_node in op_node.inputs:
name = var_node.name()
if name not in op_node.input_arg_names():
continue
if var_node.node in self._op_input_rename_map:
old_in = var_node
new_in = self._op_input_rename_map[var_node.node]
new_in.clear_outputs()
graph.update_input_link(old_in, new_in, op_node)
original_var_name = self._original_var_name(name)
scale_v = self._var_scale_map[original_var_name]
if original_var_name in persistable_vars:
assert isinstance(
scale_v,
list), 'The scale of parameter %s is not a list.' % (
original_var_name)
channel_scale = np.array(scale_v)
else:
assert isinstance(scale_v, IrNode)
scale_var_node = self._var_scale_map[original_var_name]
if len(op_node.output_arg_names()) != 1:
raise ValueError("Only support one output, but op %s has"
" more than one output." % (op_node.name()))
output_var_node = graph._find_node_by_name(
op_node.outputs, op_node.output_arg_names()[0])
weight_scale_node = graph.create_persistable_node(
name=unique_name.generate('channel_scale'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[channel_scale.shape[0]],
var_dtype=output_var_node.dtype())
data_type = 'float64' if output_var_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(weight_scale_node,
channel_scale.astype(data_type), self._scope,
self._place)
dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(output_var_node.name()),
var_type=output_var_node.type(),
shape=output_var_node.shape(),
var_dtype=output_var_node.dtype())
dequant_op_node = graph.create_op_node(
op_type='fake_channel_wise_dequantize_max_abs',
attrs={
'quant_bits': [self._weight_bits, self._activation_bits],
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
},
inputs={
'X': output_var_node,
'Scales': [weight_scale_node, scale_var_node]
},
outputs={'Out': dequant_var_node})
graph.link_to(output_var_node, dequant_op_node)
graph.link_to(scale_var_node, dequant_op_node)
graph.link_to(weight_scale_node, dequant_op_node)
graph.link_to(dequant_op_node, dequant_var_node)
self._op_output_rename_map[output_var_node.node] = dequant_var_node
return dequant_var_node
def _insert_post_dequant_op(self, graph, op_node):
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
if len(op_node.input_arg_names()) >= 2 and len(persistable_vars) == 0:
raise ValueError("The op %s has more than one inputs "
"and all of them are not persistable. "
"Now, it is not supported!" % (op_node.name()))
max_range = 1
param_range = (1 << (self._weight_bits - 1)) - 1
act_range = (1 << (self._activation_bits - 1)) - 1
for var_node in op_node.inputs:
name = var_node.name()
if name not in op_node.input_arg_names():
continue
if var_node.node in self._op_input_rename_map:
old_in = var_node
new_in = self._op_input_rename_map[var_node.node]
new_in.clear_outputs()
graph.update_input_link(old_in, new_in, op_node)
original_var_name = self._original_var_name(name)
scale_v = self._var_scale_map[original_var_name]
if original_var_name in persistable_vars:
assert self._is_float(
scale_v), 'The scale of parameter %s is not a float.' % (
original_var_name)
max_range *= param_range / scale_v
else:
max_range *= act_range
assert isinstance(scale_v, IrNode)
scale_var_node = self._var_scale_map[original_var_name]
if len(op_node.output_arg_names()) != 1:
raise ValueError("Only support one output, but op %s has"
" more than one output." % (op_node.name()))
output_var_node = graph._find_node_by_name(
op_node.outputs, op_node.output_arg_names()[0])
dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(output_var_node.name()),
var_type=output_var_node.type(),
shape=output_var_node.shape(),
var_dtype=output_var_node.dtype())
dequant_op_node = graph.create_op_node(
op_type='fake_dequantize_max_abs',
attrs={
'max_range': float(max_range),
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
},
inputs={'X': output_var_node,
'Scale': scale_var_node},
outputs={'Out': dequant_var_node})
graph.link_to(output_var_node, dequant_op_node)
graph.link_to(scale_var_node, dequant_op_node)
graph.link_to(dequant_op_node, dequant_var_node)
self._op_output_rename_map[output_var_node.node] = dequant_var_node
return dequant_var_node
def _load_var(self, name):
return np.array(self._scope.find_var(name).get_tensor())
def _restore_var(self, name, array):
tensor = self._scope.find_var(name).get_tensor()
tensor.set(array, self._place)
def _remove_unused_var_nodes(self, graph):
all_used_vars = set()
ops = graph.all_op_nodes()
for op_node in ops:
for input_node in op_node.inputs:
all_used_vars.add(input_node)
for output_node in op_node.outputs:
all_used_vars.add(output_node)
all_used_vars = {n.node for n in all_used_vars}
all_unused_vars = {
n
for n in filter(lambda node: node.node not in all_used_vars,
graph.all_var_nodes())
}
graph.safe_remove_nodes(all_unused_vars)
def _original_var_name(self, var_name):
"""
Return the original variable name.
"""
if var_name.endswith('.quantized.dequantized'):
return var_name[:-len('.quantized.dequantized')]
if var_name.endswith('.quantized'):
return var_name[:-len('.quantized')]
if var_name.endswith('.dequantized'):
return var_name[:-len('.dequantized')]
if var_name.endswith('.scale'):
return var_name[:-len('.scale')]
else:
return var_name
def _dequantized_var_name(self, var_name):
"""
Return dequantized variable name for the input `var_name`.
"""
return "%s.dequantized" % (var_name)
def _is_float(self, v):
return isinstance(v, float) or isinstance(v, np.float32) \
or isinstance(v, np.float64)
def _quant(self, x, scale, num_bits):
if isinstance(scale, list):
for i, s in enumerate(scale):
x[i] = np.round(x[i] / s * ((1 << (num_bits - 1)) - 1))
return x
else:
return np.round(x / scale * ((1 << (num_bits - 1)) - 1))
class ConvertToInt8Pass(object):
_supported_quantizable_op_type = \
QuantizationTransformPass._supported_quantizable_op_type
def __init__(self,
scope,
place,
quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']):
"""
Convert the weights into int8_t type.
Args:
scope(fluid.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the
8bits weight tensors.
quantizable_op_type(list[str]): List the type of ops that will be quantized.
Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in
QuantizationTransformPass and QuantizationFreezePass must be the same as this.
"""
assert scope is not None, \
'The scope cannot be set None.'
assert place is not None, \
'The place cannot be set None.'
self._scope = scope
self._place = place
self._quantizable_ops = quantizable_op_type
for op in self._quantizable_ops:
assert op in ConvertToInt8Pass._supported_quantizable_op_type, \
op + " is not supported for quantization."
def apply(self, graph):
"""
Convert weights' tpye of the graph. After that, the data type of the
graph weigths is int8_t.
Args:
graph(IrGraph): the applied graph.
Returns:
None
"""
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
ops = graph.all_op_nodes()
input_map = {}
for op_node in ops:
op_name = op_node.name()
if op_name in self._quantizable_ops:
skipped = op_node.op().has_attr("skip_quant") and \
op_node.op().attr("skip_quant")
if skipped:
continue
for var_node in op_node.inputs:
name = var_node.name()
if name in persistable_vars:
if name not in input_map:
int8_var_node = self._convert_to_int8(graph,
var_node)
input_map[name] = int8_var_node
graph.update_input_link(var_node, input_map[name],
op_node)
# remove the unused var node in the graph
self._remove_unused_var_nodes(graph)
graph.resolve_hazard()
return graph
def _convert_to_int8(self, graph, var_node):
int8_var_node_name = var_node.name() + ".int8"
int8_var_node = graph.create_persistable_node(
name=cpt.to_text(int8_var_node_name),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=core.VarDesc.VarType.INT8)
array = self._load_var(var_node.name())
self._scope.var(int8_var_node_name)
self._store_var(int8_var_node_name, array, np.int8)
return int8_var_node
def _load_var(self, name):
return np.array(self._scope.find_var(name).get_tensor())
def _store_var(self, name, array, dtype):
tensor = self._scope.find_var(name).get_tensor()
tensor.set(array.astype(dtype), self._place)
def _remove_unused_var_nodes(self, graph):
all_used_vars = set()
ops = graph.all_op_nodes()
for op_node in ops:
for input_node in op_node.inputs:
all_used_vars.add(input_node)
for output_node in op_node.outputs:
all_used_vars.add(output_node)
all_used_vars = {n.node for n in all_used_vars}
all_unused_vars = {
n
for n in filter(lambda node: node.node not in all_used_vars,
graph.all_var_nodes())
}
graph.safe_remove_nodes(all_unused_vars)
class TransformForMobilePass(object):
def __init__(self):
"""
This pass is used to convert the freezed graph for paddle-mobile execution.
"""
self._fake_quant_op_names = _fake_quant_op_list
self._fake_dequant_op_names = _fake_dequant_op_list
def apply(self, graph):
"""
Because paddle-mobile use `quantize` an `dequantize` as the names of
quantize operator and dequantize operator, the `apply` function just
realize this logic.
Args:
graph(IrGraph): the graph will be transformed.
Returns:
None
"""
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in self._fake_quant_op_names:
op_node.set_type('quantize')
quant_node = graph.create_op_node_from_desc(op_node.op())
for input_node in op_node.inputs:
graph.link_to(input_node, quant_node)
for output_node in op_node.outputs:
graph.link_to(quant_node, output_node)
graph.safe_remove_nodes(op_node)
if name in self._fake_dequant_op_names:
op_node.set_type('dequantize')
dequant_node = graph.create_op_node_from_desc(op_node.op())
for input_node in op_node.inputs:
graph.link_to(input_node, dequant_node)
for output_node in op_node.outputs:
graph.link_to(dequant_node, output_node)
graph.safe_remove_nodes(op_node)
graph.resolve_hazard()
return graph
class ScaleForTrainingPass(object):
def __init__(self, scope=None, place=None, moving_rate=0.9):
"""
This pass is used for calculating output scales of some operators.
These output scales may be used by tensorRT or some other inference engines.
Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace): The place is used to initialize new parameters.
moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
"""
self._scope = scope
self._place = place
self._moving_rate = moving_rate
self._is_test = None
self._teller_set = _out_scale_op_list
def apply(self, graph):
"""
Insert the `moving_average_abs_max_scale` op in order to calculate output scales
of operators in the teller_set.
Args:
graph(IrGraph): the target graph.
"""
assert isinstance(graph,
IrGraph), 'graph must be the instance of IrGraph.'
self._is_test = graph.is_test()
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in self._teller_set:
if len(op_node.output_arg_names()) != 1:
continue
in_node = graph._find_node_by_name(
op_node.outputs, op_node.output_arg_names()[0])
out_node = graph.create_var_node_from_desc(in_node.var())
scale_node = graph.create_persistable_node(
name=self._scale_name(in_node.name()),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=in_node.dtype())
ins = {'X': in_node}
outs = {'Out': out_node, 'OutScale': scale_node}
if not self._is_test:
state_in_node = graph.create_persistable_node(
name=unique_name.generate('scale_state@'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
var_dtype=in_node.dtype(),
shape=[1])
data_type = 'float64' if in_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(
state_in_node,
np.ones(
[1], dtype=data_type),
self._scope,
self._place)
accum_in_node = graph.create_persistable_node(
name=unique_name.generate('scale_accum@'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
var_dtype=in_node.dtype(),
shape=[1])
_init_var_node(
accum_in_node,
np.ones(
[1], dtype=data_type),
self._scope,
self._place)
state_out_node = graph.create_var_node_from_desc(
state_in_node.var())
accum_out_node = graph.create_var_node_from_desc(
accum_in_node.var())
ins['InState'] = state_in_node
ins['InAccum'] = accum_in_node
outs['OutState'] = state_out_node
outs['OutAccum'] = accum_out_node
attrs = {
'moving_rate': self._moving_rate,
'is_test': self._is_test,
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
}
scale_op_node = graph.create_op_node(
op_type='moving_average_abs_max_scale',
attrs=attrs,
inputs=ins,
outputs=outs)
graph.link_to(in_node, scale_op_node)
graph.link_to(scale_op_node, out_node)
graph.link_to(scale_op_node, scale_node)
if not self._is_test:
graph.link_to(state_in_node, scale_op_node)
graph.link_to(accum_in_node, scale_op_node)
graph.link_to(scale_op_node, state_out_node)
graph.link_to(scale_op_node, accum_out_node)
graph.resolve_hazard()
return graph
def _scale_name(self, var_name):
"""
Return the scale name for the var named `var_name`.
"""
return "%s@scale" % (var_name)
class ScaleForInferencePass(object):
def __init__(self, scope=None):
"""
This pass is used for setting output scales of some operators.
These output scales may be used by tensorRT or some other inference engines.
Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
"""
self._scope = scope
self._teller_set = _out_scale_op_list
def apply(self, graph):
"""
Get output scales from the scope and set these scales in op_descs
of operators in the teller_set.
Args:
graph(IrGraph): the target graph.
"""
assert isinstance(graph,
IrGraph), 'graph must be the instance of IrGraph.'
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in self._teller_set:
if len(op_node.output_arg_names()) != 1:
continue
scale_name = self._scale_name(op_node.output_arg_names()[0])
scale_v = np.array(
self._scope.find_var(scale_name).get_tensor())[0]
op_node.op()._set_attr("out_scale", float(scale_v))
graph.resolve_hazard()
return graph
def _scale_name(self, var_name):
"""
Return the scale name for the var named `var_name`.
"""
return "%s@scale" % (var_name)
class AddQuantDequantPass(object):
_supported_quantizable_op_type = [
"pool2d", "elementwise_add", "concat", "softmax", "argmax", "transpose",
"equal", "gather", "greater_equal", "greater_than", "less_equal",
"less_than", "mean", "not_equal", "reshape", "reshape2",
"bilinear_interp", "nearest_interp", "trilinear_interp", "slice",
"squeeze", "elementwise_sub"
]
_activation_type = ["relu", "relu6", "leaky_relu", "tanh", "swish"]
def __init__(self,
scope=None,
place=None,
moving_rate=0.9,
quant_bits=8,
skip_pattern=["skip_quant"],
quantizable_op_type=["elementwise_add", "pool2d"],
is_full_quantized=False):
"""
This pass add quant_dequant op for some ops, of which all the inputs must be
not persistable.
The input scales can be obtained from the quant_dequant op.
Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to initialize new
parameters described above.
moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max'
quantization. Default is 0.9.
quant_bits(int, optional): quantization bit number for activation. Default is 8.
skip_pattern(str, optional): The user-defined quantization skip pattern, which
will be presented in the name scope of an op. When the skip pattern is
detected in an op's name scope, the corresponding op will not be quantized.
Default is 'skip_quant'.
quantizable_op_type(list[str], optional): List the type of ops that will be
quantized. Default is ["elementwise_add", "pool2d"].
is_full_quantized(bool, optional): If set is_full_quantized as True, apply
quantization to all supported quantizable op type. If set is_full_quantized
as False, only apply quantization to the op type according to the input
quantizable_op_type.
"""
self._scope = scope
self._place = place
self._moving_rate = moving_rate
self._quant_bits = quant_bits
self._is_test = None
self._skip_pattern = skip_pattern
if is_full_quantized:
self._quantizable_op_type = \
AddQuantDequantPass._supported_quantizable_op_type
else:
self._quantizable_op_type = quantizable_op_type
for op_type in quantizable_op_type:
assert op_type in AddQuantDequantPass._supported_quantizable_op_type + \
AddQuantDequantPass._activation_type, \
op_type + " is not supported for quantization."
self._quantizable_grad_op_type = [
'%s_grad' % (op) for op in self._quantizable_op_type
]
assert self._scope != None, "scope must not be None."
assert self._place != None, "place must not be None."
def apply(self, graph):
"""
Add quant_dequant before some ops, such as the 'elementwise_add' and
'pool2d' op.
Args:
graph(IrGraph): the target graph.
Returns:
None
"""
assert isinstance(graph,
IrGraph), 'graph must be the instance of IrGraph.'
self._is_test = graph.is_test()
dequantized_vars_map = collections.OrderedDict()
# Forward stage, insert quant_dequant op
all_op_nodes = graph.all_op_nodes()
for op_node in all_op_nodes:
if op_node.name() in self._quantizable_op_type:
user_skipped = False
if isinstance(self._skip_pattern, list):
user_skipped = op_node.op().has_attr("op_namescope") and \
any(pattern in op_node.op().attr("op_namescope") for pattern in self._skip_pattern)
elif isinstance(self._skip_pattern, str):
user_skipped = op_node.op().has_attr("op_namescope") and \
op_node.op().attr("op_namescope").find(self._skip_pattern) != -1
if user_skipped:
continue
if not self._is_input_all_not_persistable(graph, op_node):
continue
input_name_list = _op_real_in_out_name[op_node.name()][0]
for input_name in input_name_list:
for arg_name in op_node.input(input_name):
in_node = graph._find_node_by_name(op_node.inputs,
arg_name)
if arg_name in dequantized_vars_map:
quant_var_node = dequantized_vars_map[arg_name]
else:
quant_var_node, _ = \
self._inser_quant_dequant_moving_average_abs_max_op(
graph, in_node, self._quant_bits)
dequantized_vars_map[arg_name] = quant_var_node
graph.update_input_link(in_node, quant_var_node,
op_node)
# Backward stage, update input link
for op_node in all_op_nodes:
if op_node.name() in self._quantizable_grad_op_type:
for input_name in op_node.input_arg_names():
if input_name in dequantized_vars_map:
in_node = graph._find_node_by_name(op_node.inputs,
input_name)
dequant_var_node = dequantized_vars_map[input_name]
graph.update_input_link(in_node, dequant_var_node,
op_node)
graph.resolve_hazard()
return graph
def _is_input_all_not_persistable(self, graph, op_node):
'''
Analyse the real inputs of the op node are all not persistable.
'''
is_input_all_not_persistable = True
op_node_name = op_node.name()
input_name_list = _op_real_in_out_name[op_node_name][0]
for input_name in input_name_list:
for arg_name in op_node.input(input_name):
in_node = graph._find_node_by_name(op_node.inputs, arg_name)
is_input_all_not_persistable = (is_input_all_not_persistable and \
(not in_node.persistable()))
return is_input_all_not_persistable
def _inser_quant_dequant_moving_average_abs_max_op(self, graph, var_node,
quant_bits):
"""Insert fake_quantize_dequantize_moving_average_abs_max op.
"""
quant_var_node = graph.create_var_node(
name="{}.quant_dequant".format(var_node.name()),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
scale_in_node = graph.create_persistable_node(
name="{}.quant_dequant.scale".format(var_node.name()),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=var_node.dtype())
data_type = 'float64' if var_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(
scale_in_node,
np.array(
[0.001], dtype=data_type),
self._scope,
self._place)
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
ins = {'X': var_node, 'InScale': scale_in_node}
outs = {'Out': quant_var_node, 'OutScale': scale_out_node}
if not self._is_test:
state_in_node = graph.create_persistable_node(
name=unique_name.generate('quant_dequant.state'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
var_dtype=var_node.dtype(),
shape=[1])
data_type = 'float64' if var_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(
state_in_node,
np.ones(
[1], dtype=data_type),
self._scope,
self._place)
accum_in_node = graph.create_persistable_node(
name=unique_name.generate('quant_dequant.accum'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
var_dtype=var_node.dtype(),
shape=[1])
_init_var_node(
accum_in_node,
np.ones(
[1], dtype=data_type),
self._scope,
self._place)
state_out_node = graph.create_var_node_from_desc(state_in_node.var(
))
accum_out_node = graph.create_var_node_from_desc(accum_in_node.var(
))
ins['InState'] = state_in_node
ins['InAccum'] = accum_in_node
outs['OutState'] = state_out_node
outs['OutAccum'] = accum_out_node
attrs = {
'bit_length': quant_bits,
'moving_rate': self._moving_rate,
'is_test': self._is_test,
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
}
quant_op_node = graph.create_op_node(
op_type='fake_quantize_dequantize_moving_average_abs_max',
attrs=attrs,
inputs=ins,
outputs=outs)
graph.link_to(var_node, quant_op_node)
graph.link_to(scale_in_node, quant_op_node)
graph.link_to(quant_op_node, quant_var_node)
graph.link_to(quant_op_node, scale_out_node)
if not self._is_test:
graph.link_to(state_in_node, quant_op_node)
graph.link_to(accum_in_node, quant_op_node)
graph.link_to(quant_op_node, state_out_node)
graph.link_to(quant_op_node, accum_out_node)
return quant_var_node, scale_out_node
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import sys
import numpy as np
from .... import Executor
from .... import io
from .... import core, scope_guard
from ....compiler import CompiledProgram
from ....compiler import BuildStrategy
from ....framework import IrGraph, Variable, Program
from ....log_helper import get_logger
from ..core.strategy import Strategy
from .quantization_pass import *
__all__ = ['QuantizationStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class QuantizationStrategy(Strategy):
"""
The strategy for Quantization.
"""
def __init__(self,
start_epoch=0,
end_epoch=0,
float_model_save_path=None,
mobile_model_save_path=None,
int8_model_save_path=None,
activation_bits=8,
weight_bits=8,
activation_quantize_type='abs_max',
weight_quantize_type='abs_max',
save_in_nodes=None,
save_out_nodes=None):
"""
Args:
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
float_model_save_path(str): The path to save model with float weights.
None means it doesn't save float model. default: None.
mobile_model_save_path(str): The path to save model for paddle-mobile execution.
None means it doesn't save mobile model. default: None.
int8_model_save_path(str): The path to save model with int8_t weight.
None means it doesn't save int8 model. default: None.
activation_bits(int): quantization bit number for activation. default: 8.
weight_bits(int): quantization bit number for weights. The bias is not quantized.
default: 8.
activation_quantize_type(str): quantization type for activation,
now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
If use 'abs_max' mode, the quantization scale will be calculated
dynamically each step in both training and testing period. If use
'range_abs_max', a static quantization scale will be calculated
during training and used in inference.
weight_quantize_type (str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'.
The 'range_abs_max' usually is not used for weight, since weights are fixed once the model is well trained.
save_in_nodes(list<str>): A list of variable names used to prune graph
for saving inference model.
save_out_nodes(list<str>): A list of variable names used to prune graph
for saving inference model.
"""
super(QuantizationStrategy, self).__init__(start_epoch, end_epoch)
self.start_epoch = start_epoch
self.end_epoch = end_epoch
self.float_model_save_path = float_model_save_path
self.mobile_model_save_path = mobile_model_save_path
self.int8_model_save_path = int8_model_save_path
self.activation_bits = activation_bits
self.weight_bits = weight_bits
self.activation_quantize_type = activation_quantize_type
self.weight_quantize_type = weight_quantize_type
self.save_out_nodes = save_out_nodes
self.save_in_nodes = save_in_nodes
def restore_from_checkpoint(self, context):
"""
Restore graph when the compression task is inited from checkpoint.
"""
# It is inited from checkpoint and has missed start epoch.
if context.epoch_id != 0 and context.epoch_id > self.start_epoch:
_logger.info("Restore quantization task from checkpoint")
self._modify_graph_for_quantization(context)
_logger.info("Finish restoring quantization task from checkpoint")
def _modify_graph_for_quantization(self, context):
"""
Insert fake_quantize_op and fake_dequantize_op before training and testing.
"""
train_ir_graph = IrGraph(
core.Graph(context.optimize_graph.program.clone().desc),
for_test=False)
test_ir_graph = IrGraph(
core.Graph(context.eval_graph.program.clone().desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=context.scope,
place=context.place,
weight_bits=self.weight_bits,
activation_bits=self.activation_bits,
activation_quantize_type=self.activation_quantize_type,
weight_quantize_type=self.weight_quantize_type)
transform_pass.apply(train_ir_graph)
transform_pass.apply(test_ir_graph)
# Put persistables created by transform_pass into context.optimize_graph.persistables
# for saving checkpoint.
program_persistables = set()
for var in context.optimize_graph.program.list_vars():
if var.persistable:
program_persistables.add(var.name)
program = Program()
for var_node in train_ir_graph.all_persistable_nodes():
if var_node.name() not in program_persistables:
var_desc = var_node.var()
var = program.global_block().create_var(
name=var_node.name(),
shape=var_desc.shape(),
dtype=var_desc.dtype(),
type=var_desc.type(),
lod_level=var_desc.lod_level())
context.optimize_graph.persistables[var.name] = var
build_strategy = BuildStrategy()
build_strategy.enable_inplace = False
build_strategy.memory_optimize = False
build_strategy.fuse_all_reduce_ops = False
# for quantization training
context.optimize_graph.compiled_graph = CompiledProgram(
train_ir_graph.graph).with_data_parallel(
loss_name=context.optimize_graph.out_nodes['loss'],
build_strategy=build_strategy)
context.eval_graph.program = test_ir_graph.to_program()
# for saving inference model after training
context.put('quantization_test_ir_graph_backup', test_ir_graph)
def on_epoch_begin(self, context):
"""
Insert fake_quantize_op and fake_dequantize_op before training and testing.
"""
super(QuantizationStrategy, self).on_epoch_begin(context)
if self.start_epoch == context.epoch_id:
_logger.info('QuantizationStrategy::on_epoch_begin')
self._modify_graph_for_quantization(context)
_logger.info('Finish QuantizationStrategy::on_epoch_begin')
def on_epoch_end(self, context):
"""
Free and save inference model.
"""
super(QuantizationStrategy, self).on_compression_end(context)
if context.epoch_id == self.end_epoch:
_logger.info('QuantizationStrategy::on_epoch_end')
test_ir_graph = context.get('quantization_test_ir_graph_backup')
# freeze the graph after training
freeze_pass = QuantizationFreezePass(
scope=context.scope,
place=context.place,
weight_bits=self.weight_bits,
activation_bits=self.activation_bits,
weight_quantize_type=self.weight_quantize_type)
freeze_pass.apply(test_ir_graph)
# for other strategies
context.eval_graph.program = test_ir_graph.to_program()
if self.save_out_nodes == None:
out_vars = [
context.eval_graph.var(var_name)._var
for var_name in context.eval_graph.out_nodes.values()
]
else:
out_vars = [
context.eval_graph.var(var_name)._var
for var_name in self.save_out_nodes
]
if self.save_in_nodes == None:
in_vars = list(context.eval_graph.in_nodes.values())
else:
in_vars = self.save_in_nodes
# save float model
if self.float_model_save_path:
executor = Executor(context.place)
with scope_guard(context.scope):
io.save_inference_model(
self.float_model_save_path,
in_vars,
out_vars,
executor,
main_program=test_ir_graph.to_program(),
model_filename='model',
params_filename='weights',
export_for_deployment=True)
# save int8 model
if self.int8_model_save_path:
convert_int8_pass = ConvertToInt8Pass(
scope=context.scope, place=context.place)
convert_int8_pass.apply(test_ir_graph)
executor = Executor(context.place)
with scope_guard(context.scope):
io.save_inference_model(
self.int8_model_save_path,
in_vars,
out_vars,
executor,
main_program=test_ir_graph.to_program(),
model_filename='model',
params_filename='weights',
export_for_deployment=True)
# save mobile model
if self.mobile_model_save_path:
if not self.int8_model_save_path:
# convert the weights as int8_t type
convert_int8_pass = ConvertToInt8Pass(
scope=context.scope, place=context.place)
convert_int8_pass.apply(test_ir_graph)
# make some changes on the graph for the mobile inference
mobile_pass = TransformForMobilePass()
mobile_pass.apply(test_ir_graph)
executor = Executor(context.place)
with scope_guard(context.scope):
io.save_inference_model(
self.mobile_model_save_path,
in_vars,
out_vars,
executor,
main_program=test_ir_graph.to_program(),
model_filename='model',
params_filename='weights',
export_for_deployment=True)
_logger.info('Finish QuantizationStrategy::on_epoch_end')
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import controller
from .controller import *
__all__ = controller.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The controller used to search hyperparameters or neural architecture"""
import numpy as np
import copy
import math
import logging
from ....log_helper import get_logger
__all__ = ['EvolutionaryController', 'SAController']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class EvolutionaryController(object):
"""Abstract controller for all evolutionary searching method.
"""
def __init__(self, *args, **kwargs):
pass
def update(self, tokens, reward):
"""Update the status of controller according current tokens and reward.
Args:
tokens(list<int>): A solution of searching task.
reward(list<int>): The reward of tokens.
"""
raise NotImplementedError('Abstract method.')
def reset(self, range_table, constrain_func=None):
"""Reset the controller.
Args:
range_table(list<int>): It is used to define the searching space of controller.
The tokens[i] generated by controller should be in [0, range_table[i]).
constrain_func(function): It is used to check whether tokens meet the constraint.
None means there is no constraint. Default: None.
"""
raise NotImplementedError('Abstract method.')
def next_tokens(self):
"""Generate new tokens.
"""
raise NotImplementedError('Abstract method.')
class SAController(EvolutionaryController):
"""Simulated annealing controller."""
def __init__(self,
range_table=None,
reduce_rate=0.85,
init_temperature=1024,
max_iter_number=300):
"""Initialize.
Args:
range_table(list<int>): Range table.
reduce_rate(float): The decay rate of temperature.
init_temperature(float): Init temperature.
max_iter_number(int): max iteration number.
"""
super(SAController, self).__init__()
self._range_table = range_table
self._reduce_rate = reduce_rate
self._init_temperature = init_temperature
self._max_iter_number = max_iter_number
self._reward = -1
self._tokens = None
self._max_reward = -1
self._best_tokens = None
self._iter = 0
def __getstate__(self):
d = {}
for key in self.__dict__:
if key != "_constrain_func":
d[key] = self.__dict__[key]
return d
def reset(self, range_table, init_tokens, constrain_func=None):
"""
Reset the status of current controller.
Args:
range_table(list<int>): The range of value in each position of tokens generated by current controller. The range of tokens[i] is [0, range_table[i]).
init_tokens(list<int>): The initial tokens.
constrain_func(function): The callback function used to check whether the tokens meet constraint. None means there is no constraint. Default: None.
"""
self._range_table = range_table
self._constrain_func = constrain_func
self._tokens = init_tokens
self._iter = 0
def update(self, tokens, reward):
"""
Update the controller according to latest tokens and reward.
Args:
tokens(list<int>): The tokens generated in last step.
reward(float): The reward of tokens.
"""
self._iter += 1
temperature = self._init_temperature * self._reduce_rate**self._iter
if (reward > self._reward) or (np.random.random() <= math.exp(
(reward - self._reward) / temperature)):
self._reward = reward
self._tokens = tokens
if reward > self._max_reward:
self._max_reward = reward
self._best_tokens = tokens
_logger.info("iter: {}; max_reward: {}; best_tokens: {}".format(
self._iter, self._max_reward, self._best_tokens))
_logger.info("current_reward: {}; current tokens: {}".format(
self._reward, self._tokens))
def next_tokens(self, control_token=None):
"""
Get next tokens.
"""
if control_token:
tokens = control_token[:]
else:
tokens = self._tokens
new_tokens = tokens[:]
index = int(len(self._range_table) * np.random.random())
new_tokens[index] = (
new_tokens[index] + np.random.randint(self._range_table[index] - 1)
+ 1) % self._range_table[index]
_logger.info("change index[{}] from {} to {}".format(index, tokens[
index], new_tokens[index]))
if self._constrain_func is None:
return new_tokens
for _ in range(self._max_iter_number):
if not self._constrain_func(new_tokens):
index = int(len(self._range_table) * np.random.random())
new_tokens = tokens[:]
new_tokens[index] = np.random.randint(self._range_table[index])
else:
break
return new_tokens
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
function(_inference_analysis_python_api_int8_test target model_dir data_dir filename use_mkldnn)
py_test(${target} SRCS ${filename}
ENVS CPU_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
FLAGS_use_mkldnn=${use_mkldnn}
ARGS --infer_model ${model_dir}/model
--infer_data ${data_dir}/data.bin
--int8_model_save_path int8_models/${target}
--warmup_batch_size 100
--batch_size 50)
endfunction()
function(inference_analysis_python_api_int8_test target model_dir data_dir filename)
_inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_dir} ${filename} False)
endfunction()
function(inference_analysis_python_api_int8_test_mkldnn target model_dir data_dir filename)
_inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_dir} ${filename} True)
endfunction()
function(inference_qat_int8_test target model_dir data_dir test_script use_mkldnn)
py_test(${target} SRCS ${test_script}
ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
FLAGS_use_mkldnn=${use_mkldnn}
ARGS --qat_model ${model_dir}/model
--infer_data ${data_dir}/data.bin
--batch_size 25
--batch_num 2
--acc_diff_threshold 0.1)
endfunction()
function(inference_qat2_int8_test target model_dir data_dir test_script use_mkldnn)
py_test(${target} SRCS ${test_script}
ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
FLAGS_use_mkldnn=${use_mkldnn}
ARGS --qat_model ${model_dir}/float
--infer_data ${data_dir}/data.bin
--batch_size 25
--batch_num 2
--acc_diff_threshold 0.1
--qat2)
endfunction()
function(save_qat_model_test target qat_model_dir fp32_model_save_path int8_model_save_path test_script)
py_test(${target} SRCS ${test_script}
ARGS --qat_model_path ${qat_model_dir}
--fp32_model_save_path ${fp32_model_save_path}
--int8_model_save_path ${int8_model_save_path})
endfunction()
if(WIN32)
list(REMOVE_ITEM TEST_OPS test_light_nas)
list(REMOVE_ITEM TEST_OPS test_post_training_quantization_mobilenetv1)
list(REMOVE_ITEM TEST_OPS test_post_training_quantization_resnet50)
endif()
# int8 image classification python api test
if(LINUX AND WITH_MKLDNN)
set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2")
set(MKLDNN_INT8_TEST_FILE "test_mkldnn_int8_quantization_strategy.py")
set(MKLDNN_INT8_TEST_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${MKLDNN_INT8_TEST_FILE}")
# googlenet int8
set(INT8_GOOGLENET_MODEL_DIR "${INT8_DATA_DIR}/googlenet")
inference_analysis_python_api_int8_test(test_slim_int8_googlenet ${INT8_GOOGLENET_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE_PATH})
# mobilenet int8
set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv1")
inference_analysis_python_api_int8_test(test_slim_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE_PATH})
inference_analysis_python_api_int8_test_mkldnn(test_slim_int8_mobilenet_mkldnn ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE_PATH})
# temporarily adding WITH_SLIM_MKLDNN_FULL_TEST FLAG for QA testing the following UTs locally,
# since the following UTs cost too much time on CI test.
if (WITH_SLIM_MKLDNN_FULL_TEST)
# resnet50 int8
set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50")
inference_analysis_python_api_int8_test(test_slim_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE_PATH})
# mobilenetv2 int8
set(INT8_MOBILENETV2_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv2")
inference_analysis_python_api_int8_test(test_slim_int8_mobilenetv2 ${INT8_MOBILENETV2_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE_PATH})
# resnet101 int8
set(INT8_RESNET101_MODEL_DIR "${INT8_DATA_DIR}/resnet101")
inference_analysis_python_api_int8_test(test_slim_int8_resnet101 ${INT8_RESNET101_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE_PATH})
# vgg16 int8
set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16")
inference_analysis_python_api_int8_test(test_slim_int8_vgg16 ${INT8_VGG16_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE_PATH})
# vgg19 int8
set(INT8_VGG19_MODEL_DIR "${INT8_DATA_DIR}/vgg19")
inference_analysis_python_api_int8_test(test_slim_int8_vgg19 ${INT8_VGG19_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE_PATH})
endif()
endif()
# Since test_mkldnn_int8_quantization_strategy only supports testing on Linux
# with MKL-DNN, we remove it here for not repeating test, or not testing on other systems.
list(REMOVE_ITEM TEST_OPS test_mkldnn_int8_quantization_strategy)
# QAT FP32 & INT8 comparison python api tests
if(LINUX AND WITH_MKLDNN)
set(DATASET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2")
set(QAT_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2")
set(QAT_MODELS_BASE_URL "${INFERENCE_URL}/int8/QAT_models")
set(MKLDNN_QAT_TEST_FILE "qat_int8_comparison.py")
set(MKLDNN_QAT_TEST_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${MKLDNN_QAT_TEST_FILE}")
# ImageNet small dataset
# May be already downloaded for INT8v2 unit tests
if (NOT EXISTS ${DATASET_DIR})
inference_download_and_uncompress(${DATASET_DIR} "${INFERENCE_URL}/int8" "imagenet_val_100_tail.tar.gz")
endif()
# QAT ResNet50
set(QAT_RESNET50_MODEL_DIR "${QAT_DATA_DIR}/ResNet50_QAT")
if (NOT EXISTS ${QAT_RESNET50_MODEL_DIR})
inference_download_and_uncompress(${QAT_RESNET50_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "ResNet50_qat_model.tar.gz" )
endif()
inference_qat_int8_test(test_qat_int8_resnet50_mkldnn ${QAT_RESNET50_MODEL_DIR} ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
# QAT ResNet101
set(QAT_RESNET101_MODEL_DIR "${QAT_DATA_DIR}/ResNet101_QAT")
if (NOT EXISTS ${QAT_RESNET101_MODEL_DIR})
inference_download_and_uncompress(${QAT_RESNET101_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "ResNet101_qat_model.tar.gz" )
endif()
# inference_qat_int8_test(test_qat_int8_resnet101_mkldnn ${QAT_RESNET101_MODEL_DIR} ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
# QAT GoogleNet
set(QAT_GOOGLENET_MODEL_DIR "${QAT_DATA_DIR}/GoogleNet_QAT")
if (NOT EXISTS ${QAT_GOOGLENET_MODEL_DIR})
inference_download_and_uncompress(${QAT_GOOGLENET_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "GoogleNet_qat_model.tar.gz" )
endif()
inference_qat_int8_test(test_qat_int8_googlenet_mkldnn ${QAT_GOOGLENET_MODEL_DIR} ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
# QAT MobileNetV1
set(QAT_MOBILENETV1_MODEL_DIR "${QAT_DATA_DIR}/MobileNetV1_QAT")
if (NOT EXISTS ${QAT_MOBILENETV1_MODEL_DIR})
inference_download_and_uncompress(${QAT_MOBILENETV1_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "MobileNetV1_qat_model.tar.gz" )
endif()
inference_qat_int8_test(test_qat_int8_mobilenetv1_mkldnn ${QAT_MOBILENETV1_MODEL_DIR} ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
# QAT MobileNetV2
set(QAT_MOBILENETV2_MODEL_DIR "${QAT_DATA_DIR}/MobileNetV2_QAT")
if (NOT EXISTS ${QAT_MOBILENETV2_MODEL_DIR})
inference_download_and_uncompress(${QAT_MOBILENETV2_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "MobileNetV2_qat_model.tar.gz" )
endif()
inference_qat_int8_test(test_qat_int8_mobilenetv2_mkldnn ${QAT_MOBILENETV2_MODEL_DIR} ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
# QAT VGG16
set(QAT_VGG16_MODEL_DIR "${QAT_DATA_DIR}/VGG16_QAT")
if (NOT EXISTS ${QAT_VGG16_MODEL_DIR})
inference_download_and_uncompress(${QAT_VGG16_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "VGG16_qat_model.tar.gz" )
endif()
# inference_qat_int8_test(test_qat_int8_vgg16_mkldnn ${QAT_VGG16_MODEL_DIR} ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
# QAT VGG19
set(QAT_VGG19_MODEL_DIR "${QAT_DATA_DIR}/VGG19_QAT")
if (NOT EXISTS ${QAT_VGG19_MODEL_DIR})
inference_download_and_uncompress(${QAT_VGG19_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "VGG19_qat_model.tar.gz" )
endif()
# inference_qat_int8_test(test_qat_int8_vgg19_mkldnn ${QAT_VGG19_MODEL_DIR} ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
set(QAT2_RESNET50_MODEL_DIR "${QAT_DATA_DIR}/ResNet50_qat_perf")
if (NOT EXISTS ${QAT2_RESNET50_MODEL_DIR})
inference_download_and_uncompress(${QAT2_RESNET50_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "ResNet50_qat_perf.tar.gz" )
endif()
inference_qat2_int8_test(test_qat2_int8_resnet50_mkldnn ${QAT2_RESNET50_MODEL_DIR}/ResNet50_qat_perf ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
set(QAT2_MOBILENETV1_MODEL_DIR "${QAT_DATA_DIR}/MobileNet_qat_perf")
if (NOT EXISTS ${QAT2_MOBILENETV1_MODEL_DIR})
inference_download_and_uncompress(${QAT2_MOBILENETV1_MODEL_DIR} "${QAT_MODELS_BASE_URL}" "MobileNet_qat_perf.tar.gz" )
endif()
inference_qat2_int8_test(test_qat2_int8_mobilenetv1_mkldnn ${QAT2_MOBILENETV1_MODEL_DIR}/MobileNet_qat_perf ${DATASET_DIR} ${MKLDNN_QAT_TEST_FILE_PATH} true)
# Save qat2 fp32 model or qat2 int8 model
set(QAT2_INT8_SAVE_PATH "${QAT_DATA_DIR}/ResNet50_qat2_int8")
set(QAT2_FP32_SAVE_PATH "${QAT_DATA_DIR}/ResNet50_qat2_fp32")
set(SAVE_QAT2_MODEL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/save_qat_model.py")
save_qat_model_test(save_qat2_model_resnet50 ${QAT2_RESNET50_MODEL_DIR}/ResNet50_qat_perf/float ${QAT2_FP32_SAVE_PATH} ${QAT2_INT8_SAVE_PATH} ${SAVE_QAT2_MODEL_SCRIPT} true)
endif()
# Since the test for QAT FP32 & INT8 comparison supports only testing on Linux
# with MKL-DNN, we remove it here to not test it on other systems.
list(REMOVE_ITEM TEST_OPS qat_int8_comparison.py)
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
# SLIM Quantization-aware training (QAT) on INT8 MKL-DNN
This document describes how to use [Paddle Slim](https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/advanced_usage/paddle_slim/paddle_slim.md) to convert a quantization-aware trained model to INT8 MKL-DNN quantized model. In **Release 1.5**, we have released the QAT1.0 MKL-DNN which enabled the INT8 MKL-DNN kernel for QAT trained model within 0.05% accuracy diff on GoogleNet, MobileNet-V1, MobileNet-V2, ResNet-101, ResNet-50, VGG16 and VGG19. In **Release 1.6**, QAT2.0 MKL-DNN, we did the performance optimization based on fake QAT models: ResNet50, ResNet101, Mobilenet-v1, Mobilenet-v2, VGG16 and VGG19 with the minor accuracy drop. Compared with Release 1.5, the QAT2.0 MKL-DNN got better performance gain on inference compared with fake QAT models but got a little bit bigger accuracy diff. We provide the accuracy benchmark both for QAT1.0 MKL-DNN and QAT2.0 MKL-DNN, and performance benchmark on QAT2.0 MKL-DNN.
Notes:
* MKL-DNN and MKL are required. The performance gain can only be obtained with AVX512 series CPU servers.
## 0. Prerequisite
You need to install at least PaddlePaddle-1.6 python package `pip install paddlepaddle==1.6`.
## 1. How to generate INT8 MKL-DNN QAT model
You can refer to the unit test in [test_quantization_mkldnn_pass.py](test_quantization_mkldnn_pass.py). Users firstly use PaddleSlim quantization strategy to get a saved fake QAT model by [QuantizationFreezePass](https://github.com/PaddlePaddle/models/tree/develop/PaddleSlim/quant_low_level_api), then use the `FakeQAT2MkldnnINT8KernelPass` to get the graph which can be run with MKL-DNN INT8 kernel. In Paddle Release 1.6, this pass supports `conv2d` and `depthwise_conv2d` ops with channel-wise quantization for weights. Apart from it, another pass called FakeQAT2MkldnnINT8PerfPass is available for use. This pass allows users to transform their QAT INT8 model into a highly performance-optimized model that is ran using INT8 MKL-DNN kernels.
```python
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import FakeQAT2MkldnnINT8KernelPass
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
# Create the IrGraph by Program
graph = IrGraph(core.Graph(fluid.Program().desc), for_test=False)
place = fluid.CPUPlace()
# Convert the IrGraph to MKL-DNN supported INT8 IrGraph by using
# QAT1.0 MKL-DNN
# FakeQAT2MkldnnINT8KernelPass
mkldnn_pass = FakeQAT2MkldnnINT8KernelPass(fluid.global_scope(), place)
# Apply FakeQAT2MkldnnINT8KernelPass to IrGraph
mkldnn_pass.apply(graph)
# QAT2.0 MKL-DNN
# FakeQAT2MkldnnINT8PerfPass
mkldnn_pass = FakeQAT2MkldnnINT8PerfPass(fluid.global_scope(), place, fluid.core, False)
# Apply FakeQAT2MkldnnINT8PerfPass to IrGraph
mkldnn_pass.apply(graph)
```
## 2. Accuracy and Performance benchmark
>**I. QAT1.0 MKL-DNN Accuracy on Intel(R) Xeon(R) Gold 6271**
| Model | Fake QAT Top1 Accuracy | INT8 QAT Top1 Accuracy | Top1 Diff | Fake QAT Top5 Accuracy | INT8 QAT Top5 Accuracy | Top5 Diff |
|:------------:|:----------------------:|:----------------------:|:---------:|:----------------------:|:----------------------:|:---------:|
| GoogleNet | 70.40% | 70.39% | -0.01% | 89.46% | 89.46% | 0.00% |
| MobileNet-V1 | 70.84% | 70.85% | +0.01% | 89.59% | 89.58% | -0.01% |
| MobileNet-V2 | 72.07% | 72.06% | -0.01% | 90.71% | 90.69% | -0.02% |
| ResNet-101 | 77.49% | 77.52% | +0.03% | 93.68% | 93.67% | -0.01% |
| ResNet-50 | 76.61% | 76.62% | +0.01% | 93.08% | 93.10% | +0.02% |
| VGG16 | 72.71% | 72.69% | -0.02% | 91.11% | 91.09% | -0.02% |
| VGG19 | 73.37% | 73.37% | 0.00% | 91.40% | 91.41% | +0.01% |
>**II. QAT2.0 MKL-DNN Accuracy on Intel(R) Xeon(R) Gold 6271**
| Model | Fake QAT Top1 Accuracy | INT8 QAT Top1 Accuracy | Top1 Diff | Fake QAT Top5 Accuracy | INT8 QAT Top5 Accuracy | Top5 Diff |
|:------------:|:----------------------:|:----------------------:|:---------:|:----------------------:|:----------------------:|:---------:|
| MobileNet-V1 | 70.72% | 70.78% | +0.06% | 89.47% | 89.39% | -0.08% |
| MobileNet-V2 | 72.07% | 72.17% | +0.10% | 90.65% | 90.63% | -0.02% |
| ResNet101 | 77.86% | 77.59% | -0.27% | 93.54% | 93.54% | 0.00% |
| ResNet50 | 76.62% | 76.53% | -0.09% | 93.01% | 92.98% | -0.03% |
| VGG16 | 71.74% | 71.75% | +0.01% | 89.96% | 89.73% | -0.23% |
| VGG19 | 72.30% | 72.09% | -0.21% | 90.19% | 90.13% | -0.06% |
>**III. QAT2.0 MKL-DNN C-API Performance on Intel(R) Xeon(R) Gold 6271**
| Model | FP32 Optimized Throughput (images/s) | INT8 QAT Throughput(images/s) | Ratio(INT8/FP32) |
|:------------:|:------------------------------------:|:-----------------------------:|:----------------:|
| MobileNet-V1 | 73.98 | 227.73 | 3.08 |
| MobileNet-V2 | 86.59 | 206.74 | 2.39 |
| ResNet101 | 7.15 | 26.69 | 3.73 |
| ResNet50 | 13.15 | 49.33 | 3.75 |
| VGG16 | 3.34 | 10.15 | 3.04 |
| VGG19 | 2.83 | 8.67 | 3.07 |
Notes:
* FP32 Optimized Throughput (images/s) is from [int8_mkldnn_quantization.md](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/api/int8_mkldnn_quantization.md).
## 3. How to reproduce the results
Three steps to reproduce the above-mentioned accuracy results, and we take ResNet50 benchmark as an example:
* ### Prepare dataset
```bash
cd /PATH/TO/PADDLE
python paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
```
The converted data binary file is saved by default in `$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin`
* ### Prepare model
You can run the following commands to download ResNet50 model. The exemplary code snippet provided below downloads a ResNet50 QAT model. The reason for having two different versions of the same model originates from having two different QAT training strategies: One for an non-optimized and second for an optimized graph transform which correspond to QAT1.0 and QAT2.0 respectively.
```bash
mkdir -p /PATH/TO/DOWNLOAD/MODEL/
cd /PATH/TO/DOWNLOAD/MODEL/
# uncomment for QAT1.0 MKL-DNN
# export MODEL_NAME=ResNet50
# export MODEL_FILE_NAME= QAT_models/${MODEL_NAME}_qat_model.tar.gz
# uncomment for QAT2.0 MKL-DNN
# export MODEL_NAME=resnet50
# export MODEL_FILE_NAME= QAT2_models/${MODEL_NAME}_quant.tar.gz
wget http://paddle-inference-dist.bj.bcebos.com/int8/${MODEL_FILE_NAME}
```
Unzip the downloaded model to the folder. To verify all the 7 models, you need to set `MODEL_NAME` to one of the following values in command line:
```text
QAT1.0 models
MODEL_NAME=ResNet50, ResNet101, GoogleNet, MobileNetV1, MobileNetV2, VGG16, VGG19
QAT2.0 models
MODEL_NAME=resnet50, resnet101, mobilenetv1, mobilenetv2, vgg16, vgg19
```
* ### Commands to reproduce benchmark
You can run `qat_int8_comparison.py` with the following arguments to reproduce the accuracy result on ResNet50. The difference of command line between the QAT1.0 MKL-DNN and QAT2.0 MKL-DNN is that we use argument `qat2` to enable QAT2.0 MKL-DNN. To perform QAT2.0 MKL-DNN the performance test, the environmental variable `OMP_NUM_THREADS=1` and `batch_size=1` parameter should be set.
>*QAT1.0*
- Accuracy benchmark command on QAT1.0 models
```bash
cd /PATH/TO/PADDLE
OMP_NUM_THREADS=28 FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/slim/tests/qat_int8_comparison.py --qat_model=/PATH/TO/DOWNLOAD/MODEL/${MODEL_NAME}/model --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=50 --batch_num=1000 --acc_diff_threshold=0.001
```
>*QAT2.0*
- Accuracy benchamrk command on QAT2.0 models
```bash
cd /PATH/TO/PADDLE
OMP_NUM_THREADS=28 FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/slim/tests/qat_int8_comparison.py --qat_model=/PATH/TO/DOWNLOAD/MODEL/${MODEL_NAME} --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=50 --batch_num=1000 --acc_diff_threshold=0.01 --qat2
```
* Performance benchmark command on QAT2.0 models
```bash
# 1. Save QAT2.0 INT8 model
cd /PATH/TO/PADDLE/build
python ../python/paddle/fluid/contrib/slim/tests/save_qat_model.py --qat_model_path /PATH/TO/DOWNLOAD/MODEL/${QAT2_MODEL_NAME} --int8_model_save_path /PATH/TO/${QAT2_MODEL_NAME}_qat_int8
# 2. Run the QAT2.0 C-API for performance benchmark
cd /PATH/TO/PADDLE/build
OMP_NUM_THREADS=1 paddle/fluid/inference/tests/api/test_analyzer_qat_image_classification ARGS --enable_fp32=false --with_accuracy_layer=false --int8_model=/PATH/TO/${QAT2_MODEL_NAME}_qat_int8 --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=1 --paddle_num_threads=1
```
> Notes: Due to a large amount of images contained in `int8_full_val.bin` dataset (50 000), the accuracy benchmark which includes comparison of unoptimized and optimized QAT model may last long (even several hours). To accelerate the process, it is recommended to set `OMP_NUM_THREADS` to the max number of physical cores available on the server.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
controllers:
sa_controller:
class: 'SAController'
reduce_rate: 0.9
init_temperature: 1024
max_iter_number: 300
strategies:
auto_pruning_strategy:
class: 'AutoPruneStrategy'
pruner: 'pruner_1'
controller: 'sa_controller'
start_epoch: 0
end_epoch: 2
max_ratio: 0.7
min_ratio: 0.5
pruned_params: '.*_sep_weights'
metric_name: 'acc_top5'
compressor:
epoch: 2
checkpoint_path: './checkpoints_auto_pruning/'
strategies:
- auto_pruning_strategy
version: 1.0
compressor:
epoch: 1
checkpoint_path: './checkpoints/'
#start_epoch: The 'on_epoch_begin' function will be called in start_epoch. default: 0.
#end_epoch: The 'on_epoch_end' function will be called in end_epoch. default: 10.
#delta_rate: The delta used to generate ratios when calculating sensitivities.
#target_ratio: The flops ratio to be pruned from current model.
#metric_name: The metric used to evaluate the model.
#pruned_params: The pattern str to match the parameter names to be pruned.
#sensitivities_file: The sensitivities file.
#num_steps: The number of pruning steps.
#eval_rate: The rate of sampled data used to calculate sensitivities.
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
sensitive_pruning_strategy:
class: 'SensitivePruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
delta_rate: 0.1
target_ratio: 0.3
num_steps: 1
eval_rate: 0.5
pruned_params: '.*_sep_weights'
sensitivities_file: 'mobilenet_acc_top1_sensitive.data'
metric_name: 'acc_top1'
compressor:
epoch: 120
checkpoint_path: './checkpoints/'
strategies:
- sensitive_pruning_strategy
#start_epoch(int): The epoch when to merge student graph and teacher graph for
# distillation training. default: 0
#
#end_epoch(int): The epoch when to finish distillation training. default: 0
#
#student_feature_map(str): The name of feature map from student network.
#
#teacher_feature_map(str): The name of feature map from teacher network.
# It's shape should be the same with student network.
#
#student_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
# a section in student network. The variables in a tuple should
# have the same feature map size.
#
#teacher_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
# a section in teacher network. The variables in a tuple should
# have the same feature map size. Varibale named teacher_pairs[i][j]
# should has the save channel number with that of variable named
# student_pairs[i][j].
#
#distillation_loss_weight(float): The weight of the loss.
version: 1.0
distillers:
fsp_distiller:
class: 'FSPDistiller'
# teacher_pairs: [['teacher_depthwise_conv2d_1.tmp_0', 'teacher_conv2d_3.tmp_0']]
# student_pairs: [['student_depthwise_conv2d_1.tmp_0', 'student_conv2d_3.tmp_0']]
teacher_pairs: [['teacher_conv2_1_dw.tmp_0', 'teacher_conv1.tmp_0']]
student_pairs: [['student_conv2_1_dw.tmp_0', 'student_conv1.tmp_0']]
distillation_loss_weight: 1
l2_distiller:
class: 'L2Distiller'
teacher_feature_map: 'teacher.tmp_1'
student_feature_map: 'student.tmp_1'
distillation_loss_weight: 1
soft_label_distiller:
class: 'SoftLabelDistiller'
student_temperature: 1.0
teacher_temperature: 1.0
teacher_feature_map: 'teacher.tmp_2'
student_feature_map: 'student.tmp_2'
distillation_loss_weight: 0.001
strategies:
distillation_strategy:
class: 'DistillationStrategy'
distillers: ['fsp_distiller', 'l2_distiller', 'soft_label_distiller']
start_epoch: 0
end_epoch: 1
compressor:
epoch: 1
checkpoint_path: './distillation_checkpoints/'
strategies:
- distillation_strategy
#start_epoch: The 'on_epoch_begin' function will be called in start_epoch. default: 0.
#end_epoch: The 'on_epoch_end' function will be called in end_epoch. default: 10.
#delta_rate: The delta used to generate ratios when calculating sensitivities.
#target_ratio: The flops ratio to be pruned from current model.
#metric_name: The metric used to evaluate the model.
#pruned_params: The pattern str to match the parameter names to be pruned.
#sensitivities_file: The sensitivities file.
#num_steps: The number of pruning steps.
#eval_rate: The rate of sampled data used to calculate sensitivities.
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
sensitive_pruning_strategy:
class: 'SensitivePruneStrategy'
pruner: 'pruner_1'
start_epoch: 1
delta_rate: 0.2
target_ratio: 0.08
num_steps: 1
eval_rate: 0.5
pruned_params: '_conv6_sep_weights'
sensitivities_file: 'mobilenet_acc_top1_sensitive.data'
metric_name: 'acc_top1'
compressor:
epoch: 2
checkpoint_path: './checkpoints_pruning/'
strategies:
- sensitive_pruning_strategy
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
uniform_pruning_strategy:
class: 'UniformPruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
target_ratio: 0.5
pruned_params: 'conv.*'
metric_name: 'acc_top1'
compressor:
epoch: 2
checkpoint_path: './checkpoints_uniform_restore_tmp/'
strategies:
- uniform_pruning_strategy
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
uniform_pruning_strategy:
class: 'UniformPruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
target_ratio: 0.5
pruned_params: 'conv.*'
metric_name: 'acc_top1'
compressor:
epoch: 1
checkpoint_path: './checkpoints_uniform_restore/'
strategies:
- uniform_pruning_strategy
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
uniform_pruning_strategy:
class: 'UniformPruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
target_ratio: 0.5
pruned_params: 'conv.*'
metric_name: 'acc_top1'
compressor:
epoch: 2
checkpoint_path: './checkpoints_uniform_restore/'
strategies:
- uniform_pruning_strategy
version: 1.0
controllers:
sa_controller:
class: 'SAController'
reduce_rate: 0.9
init_temperature: 1024
max_iter_number: 300
strategies:
light_nas_strategy:
class: 'LightNASStrategy'
controller: 'sa_controller'
target_flops: 629145600
target_latency: 1
end_epoch: 2
retrain_epoch: 1
metric_name: 'acc_top1'
is_server: 1
max_client_num: 100
search_steps: 2
compressor:
epoch: 2
strategies:
- light_nas_strategy
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.contrib.slim.nas import SearchSpace
from light_nasnet import LightNASNet
import paddle.fluid as fluid
import paddle
import json
import random
total_images = 1281167
lr = 0.1
num_epochs = 1
batch_size = 256
lr_strategy = "cosine_decay"
l2_decay = 4e-5
momentum_rate = 0.9
image_shape = [1, 28, 28]
__all__ = ['LightNASSpace']
NAS_FILTER_SIZE = [[18, 24, 30], [24, 32, 40], [48, 64, 80], [72, 96, 120],
[120, 160, 192]]
NAS_LAYERS_NUMBER = [[1, 2, 3], [2, 3, 4], [3, 4, 5], [2, 3, 4], [2, 3, 4]]
NAS_KERNEL_SIZE = [3, 5]
NAS_FILTERS_MULTIPLIER = [3, 4, 5, 6]
NAS_SHORTCUT = [0, 1]
NAS_SE = [0, 1]
def get_bottleneck_params_list(var):
"""Get bottleneck_params_list from var.
Args:
var: list, variable list.
Returns:
list, bottleneck_params_list.
"""
params_list = [
1, 16, 1, 1, 3, 1, 0, \
6, 24, 2, 2, 3, 1, 0, \
6, 32, 3, 2, 3, 1, 0, \
6, 64, 4, 2, 3, 1, 0, \
6, 96, 3, 1, 3, 1, 0, \
6, 160, 3, 2, 3, 1, 0, \
6, 320, 1, 1, 3, 1, 0, \
]
for i in range(5):
params_list[i * 7 + 7] = NAS_FILTERS_MULTIPLIER[var[i * 6]]
params_list[i * 7 + 8] = NAS_FILTER_SIZE[i][var[i * 6 + 1]]
params_list[i * 7 + 9] = NAS_LAYERS_NUMBER[i][var[i * 6 + 2]]
params_list[i * 7 + 11] = NAS_KERNEL_SIZE[var[i * 6 + 3]]
params_list[i * 7 + 12] = NAS_SHORTCUT[var[i * 6 + 4]]
params_list[i * 7 + 13] = NAS_SE[var[i * 6 + 5]]
return params_list
class LightNASSpace(SearchSpace):
def __init__(self):
super(LightNASSpace, self).__init__()
def init_tokens(self):
"""Get init tokens in search space.
"""
return [
0, 1, 2, 0, 1, 0, 0, 2, 1, 1, 1, 0, 3, 2, 0, 1, 1, 0, 3, 1, 0, 0, 1,
0, 3, 2, 2, 1, 1, 0
]
def range_table(self):
"""Get range table of current search space.
"""
# [NAS_FILTER_SIZE, NAS_LAYERS_NUMBER, NAS_KERNEL_SIZE, NAS_FILTERS_MULTIPLIER, NAS_SHORTCUT, NAS_SE]
return [
4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2,
2, 4, 3, 3, 2, 2, 2
]
def get_model_latency(self, program):
"""Get model latency according to program.
Returns a random number since it's only for testing.
Args:
program(Program): The program to get latency.
Return:
(float): model latency.
"""
return random.randint(1, 2)
def create_net(self, tokens=None):
"""Create a network for training by tokens.
"""
if tokens is None:
tokens = self.init_tokens()
bottleneck_params_list = get_bottleneck_params_list(tokens)
startup_prog = fluid.Program()
train_prog = fluid.Program()
test_prog = fluid.Program()
train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
is_train=True,
main_prog=train_prog,
startup_prog=startup_prog,
bottleneck_params_list=bottleneck_params_list)
test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
is_train=False,
main_prog=test_prog,
startup_prog=startup_prog,
bottleneck_params_list=bottleneck_params_list)
test_prog = test_prog.clone(for_test=True)
train_batch_size = batch_size / 1
test_batch_size = batch_size
train_reader = paddle.batch(
paddle.dataset.mnist.train(),
batch_size=train_batch_size,
drop_last=True)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=test_batch_size)
with fluid.program_guard(train_prog, startup_prog):
train_py_reader.decorate_paddle_reader(train_reader)
with fluid.program_guard(test_prog, startup_prog):
test_py_reader.decorate_paddle_reader(test_reader)
return startup_prog, train_prog, test_prog, (
train_cost, train_acc1, train_acc5,
global_lr), (test_cost, test_acc1,
test_acc5), train_py_reader, test_py_reader
def build_program(is_train,
main_prog,
startup_prog,
bottleneck_params_list=None):
with fluid.program_guard(main_prog, startup_prog):
py_reader = fluid.layers.py_reader(
capacity=16,
shapes=[[-1] + image_shape, [-1, 1]],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
use_double_buffer=False)
with fluid.unique_name.guard():
image, label = fluid.layers.read_file(py_reader)
model = LightNASNet()
avg_cost, acc_top1, acc_top5 = net_config(
image,
label,
model,
class_dim=10,
bottleneck_params_list=bottleneck_params_list,
scale_loss=1.0)
avg_cost.persistable = True
acc_top1.persistable = True
acc_top5.persistable = True
if is_train:
params = model.params
params["total_images"] = total_images
params["lr"] = lr
params["num_epochs"] = num_epochs
params["learning_strategy"]["batch_size"] = batch_size
params["learning_strategy"]["name"] = lr_strategy
params["l2_decay"] = l2_decay
params["momentum_rate"] = momentum_rate
optimizer = optimizer_setting(params)
optimizer.minimize(avg_cost)
global_lr = optimizer._global_learning_rate()
if is_train:
return py_reader, avg_cost, acc_top1, acc_top5, global_lr
else:
return py_reader, avg_cost, acc_top1, acc_top5
def net_config(image,
label,
model,
class_dim=1000,
bottleneck_params_list=None,
scale_loss=1.0):
bottleneck_params_list = [
bottleneck_params_list[i:i + 7]
for i in range(0, len(bottleneck_params_list), 7)
]
out = model.net(input=image,
bottleneck_params_list=bottleneck_params_list,
class_dim=class_dim)
cost, pred = fluid.layers.softmax_with_cross_entropy(
out, label, return_softmax=True)
if scale_loss > 1:
avg_cost = fluid.layers.mean(x=cost) * float(scale_loss)
else:
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5)
return avg_cost, acc_top1, acc_top5
def optimizer_setting(params):
"""optimizer setting.
Args:
params: dict, params.
"""
ls = params["learning_strategy"]
l2_decay = params["l2_decay"]
momentum_rate = params["momentum_rate"]
if ls["name"] == "piecewise_decay":
if "total_images" not in params:
total_images = IMAGENET1000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
bd = [step * e for e in ls["epochs"]]
base_lr = params["lr"]
lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "cosine_decay":
if "total_images" not in params:
total_images = IMAGENET1000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
lr = params["lr"]
num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "cosine_warmup_decay":
if "total_images" not in params:
total_images = IMAGENET1000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
l2_decay = params["l2_decay"]
momentum_rate = params["momentum_rate"]
step = int(math.ceil(float(total_images) / batch_size))
lr = params["lr"]
num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum(
learning_rate=cosine_decay_with_warmup(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "linear_decay":
if "total_images" not in params:
total_images = IMAGENET1000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
num_epochs = params["num_epochs"]
start_lr = params["lr"]
end_lr = 0
total_step = int((total_images / batch_size) * num_epochs)
lr = fluid.layers.polynomial_decay(
start_lr, total_step, end_lr, power=1)
optimizer = fluid.optimizer.Momentum(
learning_rate=lr,
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "adam":
lr = params["lr"]
optimizer = fluid.optimizer.Adam(learning_rate=lr)
else:
lr = params["lr"]
optimizer = fluid.optimizer.Momentum(
learning_rate=lr,
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""LightNASNet."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
__all__ = ['LightNASNet']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class LightNASNet(object):
"""LightNASNet."""
def __init__(self):
self.params = train_parameters
def net(self, input, bottleneck_params_list=None, class_dim=1000,
scale=1.0):
"""Build network.
Args:
input: Variable, input.
class_dim: int, class dim.
scale: float, scale.
Returns:
Variable, network output.
"""
if bottleneck_params_list is None:
# MobileNetV2
# bottleneck_params_list = [
# (1, 16, 1, 1, 3, 1, 0),
# (6, 24, 2, 2, 3, 1, 0),
# (6, 32, 3, 2, 3, 1, 0),
# (6, 64, 4, 2, 3, 1, 0),
# (6, 96, 3, 1, 3, 1, 0),
# (6, 160, 3, 2, 3, 1, 0),
# (6, 320, 1, 1, 3, 1, 0),
# ]
bottleneck_params_list = [
(1, 16, 1, 1, 3, 1, 0),
(3, 24, 3, 2, 3, 1, 0),
(3, 40, 3, 2, 5, 1, 0),
(6, 80, 3, 2, 5, 1, 0),
(6, 96, 2, 1, 3, 1, 0),
(6, 192, 4, 2, 5, 1, 0),
(6, 320, 1, 1, 3, 1, 0),
]
#conv1
input = self.conv_bn_layer(
input,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1,
if_act=True,
name='conv1_1')
# bottleneck sequences
i = 1
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s, k, ifshortcut, ifse = layer_setting
i += 1
input = self.invresi_blocks(
input=input,
in_channel=in_c,
expansion=t,
out_channel=int(c * scale),
num_layers=n,
stride=s,
filter_size=k,
shortcut=ifshortcut,
squeeze=ifse,
name='conv' + str(i))
in_c = int(c * scale)
#last_conv
input = self.conv_bn_layer(
input=input,
num_filters=int(1280 * scale) if scale > 1.0 else 1280,
filter_size=1,
stride=1,
padding=0,
if_act=True,
name='conv9')
input = fluid.layers.pool2d(
input=input,
pool_size=7,
pool_stride=1,
pool_type='avg',
global_pooling=True)
output = fluid.layers.fc(input=input,
size=class_dim,
param_attr=ParamAttr(name='fc10_weights'),
bias_attr=ParamAttr(name='fc10_offset'))
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
if_act=True,
name=None,
use_cudnn=True):
"""Build convolution and batch normalization layers.
Args:
input: Variable, input.
filter_size: int, filter size.
num_filters: int, number of filters.
stride: int, stride.
padding: int, padding.
num_groups: int, number of groups.
if_act: bool, whether using activation.
name: str, name.
use_cudnn: bool, whether use cudnn.
Returns:
Variable, layers output.
"""
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = fluid.layers.batch_norm(
input=conv,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
if if_act:
return fluid.layers.relu6(bn)
else:
return bn
def shortcut(self, input, data_residual):
"""Build shortcut layer.
Args:
input: Variable, input.
data_residual: Variable, residual layer.
Returns:
Variable, layer output.
"""
return fluid.layers.elementwise_add(input, data_residual)
def squeeze_excitation(self,
input,
num_channels,
reduction_ratio,
name=None):
"""Build squeeze excitation layers.
Args:
input: Variable, input.
num_channels: int, number of channels.
reduction_ratio: float, reduction ratio.
name: str, name.
Returns:
Variable, layers output.
"""
pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(
input=pool,
size=num_channels // reduction_ratio,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_sqz_weights'),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
excitation = fluid.layers.fc(
input=squeeze,
size=num_channels,
act='sigmoid',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_exc_weights'),
bias_attr=ParamAttr(name=name + '_exc_offset'))
scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
return scale
def inverted_residual_unit(self,
input,
num_in_filter,
num_filters,
ifshortcut,
ifse,
stride,
filter_size,
expansion_factor,
reduction_ratio=4,
name=None):
"""Build inverted residual unit.
Args:
input(Variable): Theinput.
num_in_filter(int): The number of input filters.
num_filters(int): The number of filters.
ifshortcut(bool): Whether to use shortcut.
stride(int): The stride.
filter_size(int): The filter size.
padding(int): The padding.
expansion_factor(float): Expansion factor.
name(str): The name.
Returns:
Variable, layers output.
"""
num_expfilter = int(round(num_in_filter * expansion_factor))
channel_expand = self.conv_bn_layer(
input=input,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name=name + '_expand')
bottleneck_conv = self.conv_bn_layer(
input=channel_expand,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=int((filter_size - 1) / 2),
num_groups=num_expfilter,
if_act=True,
name=name + '_dwise',
use_cudnn=False)
linear_out = self.conv_bn_layer(
input=bottleneck_conv,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=False,
name=name + '_linear')
out = linear_out
if ifshortcut:
out = self.shortcut(input=input, data_residual=out)
if ifse:
scale = self.squeeze_excitation(
input=linear_out,
num_channels=num_filters,
reduction_ratio=reduction_ratio,
name=name + '_fc')
out = fluid.layers.elementwise_add(x=out, y=scale, act='relu')
return out
def invresi_blocks(self,
input,
in_channel,
expansion,
out_channel,
num_layers,
stride,
filter_size,
shortcut,
squeeze,
name=None):
"""Build inverted residual blocks.
Args:
input(Variable): The input feture map.
in_channel(int): The number of input channel.
expansion(float): Expansion factor.
out_channel(int): The number of output channel.
num_layers(int): The number of layers.
stride(int): The stride.
filter_size(int): The size of filter.
shortcut(bool): Whether to add shortcut layers.
squeeze(bool): Whether to add squeeze excitation layers.
name(str): The name.
Returns:
Variable, layers output.
"""
first_block = self.inverted_residual_unit(
input=input,
num_in_filter=in_channel,
num_filters=out_channel,
ifshortcut=False,
ifse=squeeze,
stride=stride,
filter_size=filter_size,
expansion_factor=expansion,
name=name + '_1')
last_residual_block = first_block
last_c = out_channel
for i in range(1, num_layers):
last_residual_block = self.inverted_residual_unit(
input=last_residual_block,
num_in_filter=last_c,
num_filters=out_channel,
ifshortcut=shortcut,
ifse=squeeze,
stride=1,
filter_size=filter_size,
expansion_factor=expansion,
name=name + '_' + str(i + 1))
return last_residual_block
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = ['MobileNet']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class MobileNet():
def __init__(self, name=""):
self.params = train_parameters
self.name = name
def net(self, input, class_dim=1000, scale=1.0):
# conv1: 112x112
input = self.conv_bn_layer(
input,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
stride=2,
padding=1,
name=self.name + "_conv1")
# 56x56
input = self.depthwise_separable(
input,
num_filters1=32,
num_filters2=64,
num_groups=32,
stride=1,
scale=scale,
name=self.name + "_conv2_1")
input = self.depthwise_separable(
input,
num_filters1=64,
num_filters2=128,
num_groups=64,
stride=2,
scale=scale,
name=self.name + "_conv2_2")
# 28x28
input = self.depthwise_separable(
input,
num_filters1=128,
num_filters2=128,
num_groups=128,
stride=1,
scale=scale,
name=self.name + "_conv3_1")
input = self.depthwise_separable(
input,
num_filters1=128,
num_filters2=256,
num_groups=128,
stride=2,
scale=scale,
name=self.name + "_conv3_2")
# 14x14
input = self.depthwise_separable(
input,
num_filters1=256,
num_filters2=256,
num_groups=256,
stride=1,
scale=scale,
name=self.name + "_conv4_1")
input = self.depthwise_separable(
input,
num_filters1=256,
num_filters2=512,
num_groups=256,
stride=2,
scale=scale,
name=self.name + "_conv4_2")
# 14x14
for i in range(5):
input = self.depthwise_separable(
input,
num_filters1=512,
num_filters2=512,
num_groups=512,
stride=1,
scale=scale,
name=self.name + "_conv5" + "_" + str(i + 1))
# 7x7
input = self.depthwise_separable(
input,
num_filters1=512,
num_filters2=1024,
num_groups=512,
stride=2,
scale=scale,
name=self.name + "_conv5_6")
input = self.depthwise_separable(
input,
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
stride=1,
scale=scale,
name=self.name + "_conv6")
input = fluid.layers.pool2d(
input=input,
pool_size=0,
pool_stride=1,
pool_type='avg',
global_pooling=True)
output = fluid.layers.fc(
input=input,
size=class_dim,
act='softmax',
param_attr=ParamAttr(
initializer=MSRA(), name=self.name + "_fc7_weights"),
bias_attr=ParamAttr(name=self.name + "_fc7_offset"),
name=self.name)
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(
initializer=MSRA(), name=name + "_weights"),
name=name,
bias_attr=False)
bn_name = name + "_bn"
return fluid.layers.batch_norm(
input=conv,
act=act,
name=name,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def depthwise_separable(self,
input,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
name=None):
depthwise_conv = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=int(num_filters1 * scale),
stride=stride,
padding=1,
num_groups=int(num_groups * scale),
use_cudnn=False,
name=name + "_dw")
pointwise_conv = self.conv_bn_layer(
input=depthwise_conv,
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0,
name=name + "_sep")
return pointwise_conv
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import sys
import argparse
import logging
import struct
import six
import numpy as np
import time
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import FakeQAT2MkldnnINT8KernelPass
from paddle.fluid.contrib.slim.quantization import FakeQAT2MkldnnINT8PerfPass
from paddle.fluid import core
logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s')
_logger = logging.getLogger(__name__)
_logger.setLevel(logging.INFO)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', type=int, default=1, help='Batch size.')
parser.add_argument(
'--skip_batch_num',
type=int,
default=0,
help='Number of the first minibatches to skip in performance statistics.'
)
parser.add_argument(
'--debug',
action='store_true',
help='If used, the graph of QAT model is drawn.')
parser.add_argument(
'--qat_model', type=str, default='', help='A path to a QAT model.')
parser.add_argument(
'--qat2',
action='store_true',
help='If used, the QAT model is treated as a second generation model for performance optimization.'
)
parser.add_argument(
'--save_model',
action='store_true',
help='If used, the QAT model will be saved after all transformations')
parser.add_argument('--infer_data', type=str, default='', help='Data file.')
parser.add_argument(
'--batch_num',
type=int,
default=1,
help='Number of batches to process. 0 or less means all.')
parser.add_argument(
'--acc_diff_threshold',
type=float,
default=0.01,
help='Accepted accuracy difference threshold.')
test_args, args = parser.parse_known_args(namespace=unittest)
return test_args, sys.argv[:1] + args
class TestQatInt8Comparison(unittest.TestCase):
"""
Test for accuracy comparison of QAT FP32 and INT8 inference.
"""
def _reader_creator(self, data_file='data.bin'):
def reader():
with open(data_file, 'rb') as fp:
num = fp.read(8)
num = struct.unpack('q', num)[0]
imgs_offset = 8
img_ch = 3
img_w = 224
img_h = 224
img_pixel_size = 4
img_size = img_ch * img_h * img_w * img_pixel_size
label_size = 8
labels_offset = imgs_offset + num * img_size
step = 0
while step < num:
fp.seek(imgs_offset + img_size * step)
img = fp.read(img_size)
img = struct.unpack_from(
'{}f'.format(img_ch * img_w * img_h), img)
img = np.array(img)
img.shape = (img_ch, img_w, img_h)
fp.seek(labels_offset + label_size * step)
label = fp.read(label_size)
label = struct.unpack('q', label)[0]
yield img, int(label)
step += 1
return reader
def _get_batch_accuracy(self, batch_output=None, labels=None):
total = 0
correct = 0
correct_5 = 0
for n, result in enumerate(batch_output):
index = result.argsort()
top_1_index = index[-1]
top_5_index = index[-5:]
total += 1
if top_1_index == labels[n]:
correct += 1
if labels[n] in top_5_index:
correct_5 += 1
acc1 = float(correct) / float(total)
acc5 = float(correct_5) / float(total)
return acc1, acc5
def _prepare_for_fp32_mkldnn(self, graph):
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in ['depthwise_conv2d']:
input_var_node = graph._find_node_by_name(
op_node.inputs, op_node.input("Input")[0])
weight_var_node = graph._find_node_by_name(
op_node.inputs, op_node.input("Filter")[0])
output_var_node = graph._find_node_by_name(
graph.all_var_nodes(), op_node.output("Output")[0])
attrs = {
name: op_node.op().attr(name)
for name in op_node.op().attr_names()
}
conv_op_node = graph.create_op_node(
op_type='conv2d',
attrs=attrs,
inputs={
'Input': input_var_node,
'Filter': weight_var_node
},
outputs={'Output': output_var_node})
graph.link_to(input_var_node, conv_op_node)
graph.link_to(weight_var_node, conv_op_node)
graph.link_to(conv_op_node, output_var_node)
graph.safe_remove_nodes(op_node)
return graph
def _predict(self,
test_reader=None,
model_path=None,
batch_size=1,
batch_num=1,
skip_batch_num=0,
transform_to_int8=False):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')):
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
else:
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(
model_path, exe, 'model', 'params')
graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
if (self._debug):
graph.draw('.', 'qat_orig', graph.all_op_nodes())
if (transform_to_int8):
if (test_case_args.qat2):
transform_to_mkldnn_int8_pass = FakeQAT2MkldnnINT8PerfPass(
_scope=inference_scope,
_place=place,
_core=core,
_debug=self._debug)
graph = transform_to_mkldnn_int8_pass.apply(graph)
else:
mkldnn_int8_pass = FakeQAT2MkldnnINT8KernelPass(
_scope=inference_scope, _place=place)
graph = mkldnn_int8_pass.apply(graph)
else:
graph = self._prepare_for_fp32_mkldnn(graph)
inference_program = graph.to_program()
dshape = [3, 224, 224]
outputs = []
infer_accs1 = []
infer_accs5 = []
fpses = []
batch_times = []
total_samples = 0
top1 = 0.0
top5 = 0.0
iters = 0
infer_start_time = time.time()
for data in test_reader():
if batch_num > 0 and iters >= batch_num:
break
if iters == skip_batch_num:
total_samples = 0
infer_start_time = time.time()
if six.PY2:
images = map(lambda x: x[0].reshape(dshape), data)
if six.PY3:
images = list(map(lambda x: x[0].reshape(dshape), data))
images = np.array(images).astype('float32')
labels = np.array([x[1] for x in data]).astype('int64')
start = time.time()
out = exe.run(inference_program,
feed={feed_target_names[0]: images},
fetch_list=fetch_targets)
batch_time = (time.time() - start) * 1000 # in miliseconds
outputs.append(out[0])
batch_acc1, batch_acc5 = self._get_batch_accuracy(out[0],
labels)
infer_accs1.append(batch_acc1)
infer_accs5.append(batch_acc5)
samples = len(data)
total_samples += samples
batch_times.append(batch_time)
fps = samples / batch_time * 1000
fpses.append(fps)
iters += 1
appx = ' (warm-up)' if iters <= skip_batch_num else ''
_logger.info('batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, '
'latency: {3:.4f} ms, fps: {4:.2f}'.format(
iters, batch_acc1, batch_acc5, batch_time /
batch_size, fps, appx))
# Postprocess benchmark data
batch_latencies = batch_times[skip_batch_num:]
batch_latency_avg = np.average(batch_latencies)
latency_avg = batch_latency_avg / batch_size
fpses = fpses[skip_batch_num:]
fps_avg = np.average(fpses)
infer_total_time = time.time() - infer_start_time
acc1_avg = np.mean(infer_accs1)
acc5_avg = np.mean(infer_accs5)
_logger.info('Total inference run time: {:.2f} s'.format(
infer_total_time))
if test_case_args.save_model:
with fluid.scope_guard(inference_scope):
fluid.io.save_inference_model(
'transformed_qat_int8_model', feed_target_names,
fetch_targets, exe, inference_program)
return outputs, acc1_avg, acc5_avg, fps_avg, latency_avg
def _summarize_performance(self, fp32_fps, fp32_lat, int8_fps, int8_lat):
_logger.info('--- Performance summary ---')
_logger.info('FP32: avg fps: {0:.2f}, avg latency: {1:.4f} ms'.format(
fp32_fps, fp32_lat))
_logger.info('INT8: avg fps: {0:.2f}, avg latency: {1:.4f} ms'.format(
int8_fps, int8_lat))
def _compare_accuracy(self, fp32_acc1, fp32_acc5, int8_acc1, int8_acc5,
threshold):
_logger.info('--- Accuracy summary ---')
_logger.info(
'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'
.format(threshold))
_logger.info(
'FP32: avg top1 accuracy: {0:.4f}, avg top5 accuracy: {1:.4f}'.
format(fp32_acc1, fp32_acc5))
_logger.info(
'INT8: avg top1 accuracy: {0:.4f}, avg top5 accuracy: {1:.4f}'.
format(int8_acc1, int8_acc5))
assert fp32_acc1 > 0.0
assert int8_acc1 > 0.0
assert fp32_acc1 - int8_acc1 <= threshold
def test_graph_transformation(self):
if not fluid.core.is_compiled_with_mkldnn():
return
qat_model_path = test_case_args.qat_model
data_path = test_case_args.infer_data
batch_size = test_case_args.batch_size
batch_num = test_case_args.batch_num
skip_batch_num = test_case_args.skip_batch_num
acc_diff_threshold = test_case_args.acc_diff_threshold
self._debug = test_case_args.debug
_logger.info('QAT FP32 & INT8 prediction run.')
_logger.info('QAT model: {0}'.format(qat_model_path))
_logger.info('Dataset: {0}'.format(data_path))
_logger.info('Batch size: {0}'.format(batch_size))
_logger.info('Batch number: {0}'.format(batch_num))
_logger.info('Accuracy drop threshold: {0}.'.format(acc_diff_threshold))
_logger.info('--- QAT FP32 prediction start ---')
val_reader = paddle.batch(
self._reader_creator(data_path), batch_size=batch_size)
fp32_output, fp32_acc1, fp32_acc5, fp32_fps, fp32_lat = self._predict(
val_reader,
qat_model_path,
batch_size,
batch_num,
skip_batch_num,
transform_to_int8=False)
_logger.info('--- QAT INT8 prediction start ---')
val_reader = paddle.batch(
self._reader_creator(data_path), batch_size=batch_size)
int8_output, int8_acc1, int8_acc5, int8_fps, int8_lat = self._predict(
val_reader,
qat_model_path,
batch_size,
batch_num,
skip_batch_num,
transform_to_int8=True)
self._summarize_performance(fp32_fps, fp32_lat, int8_fps, int8_lat)
self._compare_accuracy(fp32_acc1, fp32_acc5, int8_acc1, int8_acc5,
acc_diff_threshold)
if __name__ == '__main__':
global test_case_args
test_case_args, remaining_args = parse_args()
unittest.main(argv=remaining_args)
#start_epoch(int): The epoch to insert quantization operators. default: 0
#
#end_epoch(int): The epoch to save inference model. default: 0
#
#float_model_save_path(str): The path to save model with float weights.
# None means it doesn't save float model. default: None.
#
#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
# None means it doesn't save mobile model. default: None.
#
#int8_model_save_path(str): The path to save model with int8_t weight.
# None means it doesn't save int8 model. default: None.
#
#activation_bits(int): quantization bit number for activation. default: 8.
#
#weight_bits(int): quantization bit number for weights. The bias is not quantized.
# default: 8.
#
#activation_quantize_type(str): quantization type for activation,
# now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
# If use 'abs_max' mode, the quantization scale will be calculated
# dynamically each step in both training and testing period. If use
# 'range_abs_max', a static quantization scale will be calculated
# during training and used in inference.
#
#save_in_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
#
#save_out_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
version: 1.0
strategies:
quantization_strategy:
class: 'QuantizationStrategy'
start_epoch: 0
end_epoch: 0
float_model_save_path: './output/float'
mobile_model_save_path: './output/mobile'
int8_model_save_path: './output/int8'
weight_bits: 8
activation_bits: 8
weight_quantize_type: 'abs_max'
activation_quantize_type: 'abs_max'
save_in_nodes: ['image']
save_out_nodes: ['quan.tmp_2']
compressor:
epoch: 1
checkpoint_path: './checkpoints_quan/'
strategies:
- quantization_strategy
#start_epoch(int): The epoch to insert quantization operators. default: 0
#
#end_epoch(int): The epoch to save inference model. default: 0
#
#float_model_save_path(str): The path to save model with float weights.
# None means it doesn't save float model. default: None.
#
#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
# None means it doesn't save mobile model. default: None.
#
#int8_model_save_path(str): The path to save model with int8_t weight.
# None means it doesn't save int8 model. default: None.
#
#activation_bits(int): quantization bit number for activation. default: 8.
#
#weight_bits(int): quantization bit number for weights. The bias is not quantized.
# default: 8.
#
#activation_quantize_type(str): quantization type for activation,
# now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
# If use 'abs_max' mode, the quantization scale will be calculated
# dynamically each step in both training and testing period. If use
# 'range_abs_max', a static quantization scale will be calculated
# during training and used in inference.
#
#save_in_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
#
#save_out_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
version: 1.0
strategies:
quantization_strategy:
class: 'QuantizationStrategy'
start_epoch: 0
end_epoch: 0
float_model_save_path: './output/float'
mobile_model_save_path: './output/mobile'
int8_model_save_path: './output/int8'
weight_bits: 8
activation_bits: 8
weight_quantize_type: 'abs_max'
activation_quantize_type: 'abs_max'
save_in_nodes: ['image']
save_out_nodes: ['quan.tmp_2']
compressor:
epoch: 2
checkpoint_path: './checkpoints_quan/'
strategies:
- quantization_strategy
#start_epoch(int): The epoch to insert quantization operators. default: 0
#
#end_epoch(int): The epoch to save inference model. default: 0
#
#float_model_save_path(str): The path to save model with float weights.
# None means it doesn't save float model. default: None.
#
#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
# None means it doesn't save mobile model. default: None.
#
#int8_model_save_path(str): The path to save model with int8_t weight.
# None means it doesn't save int8 model. default: None.
#
#activation_bits(int): quantization bit number for activation. default: 8.
#
#weight_bits(int): quantization bit number for weights. The bias is not quantized.
# default: 8.
#
#activation_quantize_type(str): quantization type for activation,
# now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
# If use 'abs_max' mode, the quantization scale will be calculated
# dynamically each step in both training and testing period. If use
# 'range_abs_max', a static quantization scale will be calculated
# during training and used in inference.
#
#save_in_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
#
#save_out_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
version: 1.0
strategies:
quantization_strategy:
class: 'QuantizationStrategy'
start_epoch: 0
end_epoch: 0
float_model_save_path: './output/float'
mobile_model_save_path: './output/mobile'
int8_model_save_path: './output/int8'
weight_bits: 8
activation_bits: 8
weight_quantize_type: 'abs_max'
activation_quantize_type: 'abs_max'
save_in_nodes: ['image']
save_out_nodes: ['quan.tmp_2']
compressor:
epoch: 2
checkpoint_path: './checkpoints_quan_2/'
strategies:
- quantization_strategy
#int8_model_save_path(str): int8_model_save_path is used to save an int8 ProgramDesc with
# fp32 weights which is used for MKL-DNN int8 inference. For post training quantization,
# MKLDNNPostTrainingQuantStrategy only supports converting a fp32 ProgramDesc
# with fp32 weights to an int8 ProgramDesc with fp32 weights now. The saved
# int8 ProgramDesc with fp32 weights only can be executed with MKL-DNN enabled.
# None means it doesn't save int8 ProgramDesc with fp32 weights. default: None.
#
#fp32_model_path(str): fp32_model_path is used to load an original fp32 ProgramDesc with fp32 weights.
# None means it doesn't have a fp32 ProgramDesc with fp32 weights. default: None.
#
#cpu_math_library_num_threads(int): The number of cpu math library threads which is used on
# MKLDNNPostTrainingQuantStrategy. 1 means it only uses one cpu math library
# thread. default: 1
# Note: Here we set the cpu_math_library_num_threads to 4 which is the maximum number of
# cpu math library threads on CI machine.
#
version: 1.0
strategies:
mkldnn_post_training_strategy:
class: 'MKLDNNPostTrainingQuantStrategy'
int8_model_save_path: 'OUTPUT_PATH'
fp32_model_path: 'MODEL_PATH'
cpu_math_library_num_threads: 4
compressor:
epoch: 0
checkpoint_path: ''
strategies:
- mkldnn_post_training_strategy
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import sys
import argparse
import logging
import struct
import six
import numpy as np
import time
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import FakeQAT2MkldnnINT8PerfPass
from paddle.fluid import core
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--qat_model_path', type=str, default='', help='A path to a QAT model.')
parser.add_argument(
'--fp32_model_save_path',
type=str,
default='',
help='Saved optimized fp32 model')
parser.add_argument(
'--int8_model_save_path',
type=str,
default='',
help='Saved optimized and quantized INT8 model')
test_args, args = parser.parse_known_args(namespace=unittest)
return test_args, sys.argv[:1] + args
def transform_and_save_model(original_path, save_path, save_type):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
if os.path.exists(os.path.join(original_path, '__model__')):
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(original_path, exe)
else:
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(original_path, exe,
'model', 'params')
transform_to_mkldnn_int8_pass = FakeQAT2MkldnnINT8PerfPass(
_scope=inference_scope, _place=place, _core=core)
graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
if save_type == 'FP32':
graph = transform_to_mkldnn_int8_pass.apply_fp32(graph)
elif save_type == 'INT8':
graph = transform_to_mkldnn_int8_pass.apply(graph)
inference_program = graph.to_program()
with fluid.scope_guard(inference_scope):
fluid.io.save_inference_model(save_path, feed_target_names,
fetch_targets, exe, inference_program)
print("Success! Transformed QAT_{0} model can be found at {1}\n".format(
save_type, save_path))
if __name__ == '__main__':
global test_args
test_args, remaining_args = parse_args()
if test_args.fp32_model_save_path:
transform_and_save_model(test_args.qat_model_path,
test_args.fp32_model_save_path, 'FP32')
if test_args.int8_model_save_path:
transform_and_save_model(test_args.qat_model_path,
test_args.int8_model_save_path, 'INT8')
# PaddleSlim Post-training quantization (MKL-DNN INT8)
This document describes how to use [PaddleSlim](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) to convert a FP32 ProgramDesc with FP32 weights to an INT8 ProgramDesc with FP32 weights on GoogleNet, MobileNet-V1, MobileNet-V2, ResNet-101, ResNet-50, VGG16 and VGG19. We provide the instructions on how to enable MKL-DNN INT8 calibration in PaddleSlim and show the results of accuracy on all the 7 models as mentioned.
## 0. Prerequisite
You need to install at least PaddlePaddle-1.5 python package `pip install paddlepaddle==1.5`.
## 1. How to generate INT8 ProgramDesc with FP32 weights
You can refer to the usage doc of [PaddleSlim](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) in section 1.2 for details that how to use PaddleSlim Compressor. But for PaddleSlim Post-training quantization with MKL-DNN INT8, there are two differences.
* Differences in `paddle.fluid.contrib.slim.Compressor` arguments
Since the only one requirement in PaddleSlim Post-training quantization with MKL-DNN INT8 is the reader of warmup dataset, so you need to set other parameters of `paddle.fluid.contrib.slim.Compressor` to None, [] or ''.
```python
com_pass = Compressor(
place=None, # not required, set to None
scope=None, # not required, set to None
train_program=None, # not required, set to None
train_reader=None, # not required, set to None
train_feed_list=[], # not required, set to []
train_fetch_list=[], # not required, set to []
eval_program=None, # not required, set to None
eval_reader=reader, # required, the reader of warmup dataset
eval_feed_list=[], # not required, set to []
eval_fetch_list=[], # not required, set to []
teacher_programs=[], # not required, set to []
checkpoint_path='', # not required, set to ''
train_optimizer=None, # not required, set to None
distiller_optimizer=None # not required, set to None
)
```
* Differences in yaml config
An example yaml config is listed below, for more details, you can refer to [config_mkldnn_int8.yaml](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/contrib/slim/tests/quantization/config_mkldnn_int8.yaml) which is used in unit test.
```yaml
version: 1.0
strategies:
mkldnn_post_training_strategy:
class: 'MKLDNNPostTrainingQuantStrategy' # required, class name of MKL-DNN INT8 Post-training quantization strategy
int8_model_save_path: 'OUTPUT_PATH' # required, int8 ProgramDesc with fp32 weights
fp32_model_path: 'MODEL_PATH' # required, fp32 ProgramDesc with fp32 weights
cpu_math_library_num_threads: 1 # required, The number of cpu math library threads
compressor:
epoch: 0 # not required, set to 0
checkpoint_path: '' # not required, set to ''
strategies:
- mkldnn_post_training_strategy
```
## 2. How to run INT8 ProgramDesc with fp32 weights
You can load INT8 ProgramDesc with fp32 weights by load_inference_model [API](https://github.com/PaddlePaddle/Paddle/blob/8b50ad80ff6934512d3959947ac1e71ea3fb9ea3/python/paddle/fluid/io.py#L991) and run INT8 inference similar as [FP32](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/eval.py "FP32").
```python
[infer_program, feed_dict, fetch_targets] = fluid.io.load_inference_model(model_path, exe)
```
## 3. Result
We provide the results of accuracy measured on Intel(R) Xeon(R) Gold 6271.
>**I. Top-1 Accuracy on Intel(R) Xeon(R) Gold 6271**
>**Dataset: ILSVRC2012 Validation dataset**
| Model | FP32 Accuracy | INT8 Accuracy | Accuracy Diff(FP32-INT8) |
| :----------: | :-------------: | :------------: | :--------------: |
| GoogleNet | 70.50% | 69.81% | 0.69% |
| MobileNet-V1 | 70.78% | 70.42% | 0.36% |
| MobileNet-V2 | 71.90% | 71.35% | 0.55% |
| ResNet-101 | 77.50% | 77.42% | 0.08% |
| ResNet-50 | 76.63% | 76.52% | 0.11% |
| VGG16 | 72.08% | 72.03% | 0.05% |
| VGG19 | 72.57% | 72.55% | 0.02% |
Notes:
* MKL-DNN and MKL are required.
## 4. How to reproduce the results
Three steps to reproduce the above-mentioned accuracy results, and we take GoogleNet benchmark as an example:
* ### Prepare dataset
You can run the following commands to download and preprocess the ILSVRC2012 Validation dataset.
```bash
cd /PATH/TO/PADDLE
python ./paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
```
Then the ILSVRC2012 Validation dataset will be preprocessed and saved by default in `~/.cache/paddle/dataset/int8/download/int8_full_val.bin`
* ### Prepare model
You can run the following commands to download GoogleNet model.
```bash
mkdir -p /PATH/TO/DOWNLOAD/MODEL/
cd /PATH/TO/DOWNLOAD/MODEL/
export MODEL_NAME=GoogleNet
wget http://paddle-inference-dist.bj.bcebos.com/int8/${MODEL_NAME}_int8_model.tar.gz
mkdir -p ${MODEL_NAME}
tar -xvf ${MODEL_NAME}_int8_model.tar.gz -C ${MODEL_NAME}
```
To download and verify all the 7 models, you need to set `MODEL_NAME` to one of the following values in command line:
```text
MODEL_NAME=GoogleNet, mobilenetv1, mobilenet_v2, Res101, resnet50, VGG16, VGG19
```
* ### Commands to reproduce benchmark
You can run `test_mkldnn_int8_quantization_strategy.py` with the following arguments to reproduce the accuracy result on GoogleNet.
``` bash
cd /PATH/TO/PADDLE/python/paddle/fluid/contrib/slim/tests/
python ./test_mkldnn_int8_quantization_strategy.py --infer_model /PATH/TO/DOWNLOAD/MODEL/${MODEL_NAME}/model --infer_data ~/.cache/paddle/dataset/int8/download/int8_full_val.bin --warmup_batch_size 100 --batch_size 1
```
Notes:
* The above commands will cost maybe several hours in the prediction stage (include int8 prediction and fp32 prediction) since there have 50000 pictures need to be predicted in `int8_full_val.bin`
* Running the above command with environment variable `FLAGS_use_mkldnn=true` will make the FP32 part of the test running using MKL-DNN (the INT8 part uses MKL-DNN either way).
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import paddle.fluid as fluid
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestFilterPruning(unittest.TestCase):
def test_compression(self):
"""
Model: mobilenet_v1
data: mnist
step1: Training one epoch
step2: pruning flops
step3: fine-tune one epoch
step4: check top1_acc.
"""
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = MobileNet("auto_pruning").net(input=image, class_dim=class_dim)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
val_program = fluid.default_main_program().clone(for_test=False)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
fluid.default_main_program(),
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config('./auto_pruning/compress.yaml')
eval_graph = com_pass.run()
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import os
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestCompressor(unittest.TestCase):
def test_eval_func(self):
class_dim = 10
image_shape = [1, 28, 28]
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = fluid.layers.fc(input=image, size=class_dim)
out = fluid.layers.softmax(out)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
val_program = fluid.default_main_program().clone(for_test=False)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
eval_feed_list = [('img', image.name), ('label', label.name)]
eval_fetch_list = [('acc_top1', acc_top1.name)]
def eval_func(program, scope):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(
feed_list=[image.name, label.name],
place=place,
program=program)
results = []
for data in val_reader():
result = exe.run(program=program,
scope=scope,
fetch_list=[acc_top1.name],
feed=feeder.feed(data))
results.append(np.array(result))
result = np.mean(results)
return result
com_pass = Compressor(
place,
fluid.global_scope(),
fluid.default_main_program(),
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_feed_list=eval_feed_list,
eval_fetch_list=eval_fetch_list,
eval_func={"score": eval_func},
prune_infer_model=[[image.name], [out.name]],
train_optimizer=optimizer)
com_pass.config('./configs/compress.yaml')
com_pass.run()
self.assertTrue('score' in com_pass.context.eval_results)
self.assertTrue(float(com_pass.context.eval_results['score'][0]) > 0.9)
self.assertTrue(os.path.exists("./checkpoints/0/eval_model/__model__"))
self.assertTrue(
os.path.exists("./checkpoints/0/eval_model/__model__.infer"))
self.assertTrue(os.path.exists("./checkpoints/0/eval_model/__params__"))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.contrib.slim.core import ConfigFactory
import unittest
class TestFactory(unittest.TestCase):
def test_parse_pruning(self):
factory = ConfigFactory('./configs/filter_pruning.yaml')
pruner_1 = factory.instance('pruner_1')
self.assertEquals(pruner_1.pruning_axis['*'], 0)
self.assertEquals(pruner_1.criterions['*'], 'l1_norm')
strategy = factory.instance('sensitive_pruning_strategy')
pruner_1 = strategy.pruner
self.assertEquals(pruner_1.criterions['*'], 'l1_norm')
self.assertEquals(strategy.start_epoch, 0)
self.assertEquals(strategy.sensitivities_file,
'mobilenet_acc_top1_sensitive.data')
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import paddle.fluid as fluid
import numpy as np
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestFilterPruning(unittest.TestCase):
def test_compression(self):
"""
Model: mobilenet_v1
data: mnist
step1: Training one epoch
step2: pruning flops
step3: fine-tune one epoch
step4: check top1_acc.
"""
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = MobileNet().net(input=image, class_dim=class_dim)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
val_program = fluid.default_main_program().clone(for_test=False)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
fluid.default_main_program(),
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config('./filter_pruning/compress.yaml')
eval_graph = com_pass.run()
self.assertTrue(
abs((com_pass.context.eval_results['acc_top1'][-1] - 0.969) / 0.969)
< 0.02)
def test_uniform_restore_from_checkpoint(self):
np.random.seed(0)
self.uniform_restore_from_checkpoint(
"./filter_pruning/uniform_restore_0.yaml")
acc_0 = self.uniform_restore_from_checkpoint(
"./filter_pruning/uniform_restore_1.yaml")
np.random.seed(0)
acc_1 = self.uniform_restore_from_checkpoint(
"./filter_pruning/uniform_restore.yaml")
self.assertTrue(abs((acc_0 - acc_1) / acc_1) < 0.001)
def uniform_restore_from_checkpoint(self, config_file):
class_dim = 10
image_shape = [1, 28, 28]
train_program = fluid.Program()
startup_program = fluid.Program()
train_program.random_seed = 10
startup_program.random_seed = 10
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
out = fluid.layers.conv2d(image, 4, 1)
out = fluid.layers.fc(out, size=class_dim)
out = fluid.layers.softmax(out)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
val_program = train_program.clone(for_test=False)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CPUPlace()
scope = fluid.Scope()
exe = fluid.Executor(place)
exe.run(startup_program, scope=scope)
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
scope,
train_program,
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config(config_file)
eval_graph = com_pass.run()
return com_pass.context.eval_results['acc_top1'][-1]
if __name__ == '__main__':
unittest.main()
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from __future__ import print_function
import os
import six
import numpy as np
import unittest
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"
def conv_block():
img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
return [img, label], avg_loss
class TestGraph(unittest.TestCase):
def graph_apis(self, use_cuda=False, for_ci=True):
main = fluid.Program()
startup = fluid.Program()
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
feeds, loss = conv_block()
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
graph = IrGraph(core.Graph(main.desc), for_test=False)
backup_graph = graph.clone()
self.assertEqual(len(graph.all_nodes()), len(backup_graph.all_nodes()))
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
origin_binary = fluid.CompiledProgram(graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
backup_binary = fluid.CompiledProgram(
backup_graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup)
iters = 5
batch_size = 8
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=batch_size)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
def _train(binary):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[loss.name])
if not for_ci:
print('{}: {}'.format('loss', loss_v))
_train(origin_binary)
_train(backup_binary)
checkponit_dir = "checkpoint_gpu" if use_cuda else "checkpoint_cpu"
def _set_zero(var_name, scope, place):
var = scope.find_var(var_name).get_tensor()
var_array = np.zeros(var._get_dims()).astype("float32")
var.set(var_array, place)
sum_before = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor(
)))
fluid.io._save_persistable_nodes(exe, checkponit_dir, graph)
_set_zero('conv2d_1.w_0', fluid.global_scope(), place)
set_after = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor(
)))
self.assertEqual(set_after, 0)
fluid.io._load_persistable_nodes(exe, checkponit_dir, graph)
sum_after = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor(
)))
self.assertEqual(sum_before, sum_after)
marked_nodes = set()
for op in graph.all_op_nodes():
if op.name().find('conv2d') > -1:
marked_nodes.add(op)
if not for_ci:
graph.draw('.', 'residual', marked_nodes)
backup_marked_nodes = set()
for op in backup_graph.all_op_nodes():
if op.name().find('conv2d') > -1:
backup_marked_nodes.add(op)
backup_graph.draw('./origin', 'backup', backup_marked_nodes)
self.assertFalse(graph.has_circle())
self.assertEqual(graph.graph_num(), 1)
nodes = graph.topology_sort()
self.assertEqual(len(nodes), len(graph.all_op_nodes()))
nodes_map = graph.build_adjacency_list()
self.assertEqual(len(nodes_map), len(graph.all_op_nodes()))
nodes_num = len(graph.all_nodes())
graph.safe_remove_nodes(marked_nodes)
self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes))
def test_graph_apis_cpu(self):
self.graph_apis(use_cuda=False, for_ci=True)
def test_graph_apis_cuda(self):
if fluid.core.is_compiled_with_cuda():
self.graph_apis(use_cuda=True, for_ci=True)
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from __future__ import print_function
import unittest
import paddle.fluid as fluid
import six
import numpy as np
from paddle.fluid.contrib.slim.graph import GraphWrapper
from paddle.fluid import core
import os
os.environ['CPU_NUM'] = str(4)
def residual_block(num):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act)
data = fluid.layers.data(name='image', shape=[1, 8, 8], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
data.stop_gradinet = False
hidden = data
for _ in six.moves.xrange(num):
conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
fc = fluid.layers.fc(input=hidden, size=10)
loss = fluid.layers.cross_entropy(input=fc, label=label)
loss = fluid.layers.mean(loss)
return data, label, loss
class TestGraphWrapper(unittest.TestCase):
def build_program(self):
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
image, label, self.loss = residual_block(2)
eval_program = main.clone()
opt = fluid.optimizer.SGD(learning_rate=0.001)
opt.minimize(self.loss)
self.scope = core.Scope()
exe = fluid.Executor(place)
exe.run(startup, scope=self.scope)
self.eval_graph = GraphWrapper(
program=eval_program,
in_nodes={'image': image.name,
'label': label.name},
out_nodes={'loss': self.loss.name})
self.train_graph = GraphWrapper(
program=main,
in_nodes={'image': image.name,
'label': label.name},
out_nodes={'loss': self.loss.name})
def test_all_parameters(self):
self.build_program()
self.assertEquals(len(self.train_graph.all_parameters()), 24)
def test_all_vars(self):
self.build_program()
# self.assertEquals(len(self.train_graph.vars()), 90)
# activation inplace has been disabled in python side
# which may produce more variable in program_desc
# update 90 => 94
# delete three useless RAW variables in Conv2D
# update 94 => 91
self.assertEquals(len(self.train_graph.vars()), 91)
def test_numel_params(self):
self.build_program()
self.assertEquals(self.train_graph.numel_params(), 13258)
def test_compile(self):
self.build_program()
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
self.train_graph.compile()
exe.run(self.train_graph.compiled_graph,
scope=self.scope,
feed={
'image':
np.random.randint(0, 40, [16, 1, 8, 8]).astype('float32'),
'label': np.random.randint(0, 10, [16, 1]).astype('int64')
})
def test_pre_and_next_ops(self):
self.build_program()
for op in self.train_graph.ops():
for next_op in self.train_graph.next_ops(op):
self.assertTrue(op in self.train_graph.pre_ops(next_op))
def test_get_optimize_graph(self):
self.build_program()
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
opt = fluid.optimizer.SGD(learning_rate=0.001)
train_graph = self.eval_graph.get_optimize_graph(
opt, place, self.scope, no_grad_var_names=['image'])
self.assertEquals(len(self.train_graph.ops()), len(train_graph.ops()))
exe = fluid.Executor(place)
train_graph.compile()
image = np.random.randint(0, 225, [16, 1, 8, 8]).astype('float32')
label = np.random.randint(0, 10, [16, 1]).astype('int64')
exe.run(train_graph.compiled_graph,
scope=self.scope,
feed={'image': image,
'label': label})
def test_get_optimize_graph_without_loss(self):
self.build_program()
self.eval_graph.out_nodes = {}
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
opt = fluid.optimizer.SGD(learning_rate=0.001)
train_graph = self.eval_graph.get_optimize_graph(
opt, place, self.scope, no_grad_var_names=['image'])
self.assertEquals(train_graph, None)
def test_flops(self):
self.build_program()
self.assertEquals(self.train_graph.flops(), 354624)
def test_merge(self):
self.build_program()
self.train_graph.merge(self.eval_graph)
self.assertEquals(len(self.train_graph.ops()), 72)
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
"""
Test LightNAS.
"""
import sys
import unittest
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.core import Compressor
sys.path.append("./light_nas")
from light_nas_space import LightNASSpace
class TestLightNAS(unittest.TestCase):
"""
Test LightNAS.
"""
def test_compression(self):
"""
Test LightNAS.
"""
# Update compress.yaml
lines = list()
fid = open('./light_nas/compress.yaml')
for line in fid:
if 'target_latency' in line:
lines.append(' target_latency: 0\n')
else:
lines.append(line)
fid.close()
fid = open('./light_nas/compress.yaml', 'w')
for line in lines:
fid.write(line)
fid.close()
# Begin test
if not fluid.core.is_compiled_with_cuda():
return
space = LightNASSpace()
startup_prog, train_prog, test_prog, train_metrics, test_metrics, train_reader, test_reader = space.create_net(
)
train_cost, train_acc1, train_acc5, global_lr = train_metrics
test_cost, test_acc1, test_acc5 = test_metrics
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_prog)
val_fetch_list = [('acc_top1', test_acc1.name),
('acc_top5', test_acc5.name)]
train_fetch_list = [('loss', train_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
train_prog,
train_reader=train_reader,
train_feed_list=None,
train_fetch_list=train_fetch_list,
eval_program=test_prog,
eval_reader=test_reader,
eval_feed_list=None,
eval_fetch_list=val_fetch_list,
train_optimizer=None,
search_space=space)
com_pass.config('./light_nas/compress.yaml')
eval_graph = com_pass.run()
def test_compression_with_target_latency(self):
"""
Test LightNAS with target_latency.
"""
# Update compress.yaml
lines = list()
fid = open('./light_nas/compress.yaml')
for line in fid:
if 'target_latency' in line:
lines.append(' target_latency: 1\n')
else:
lines.append(line)
fid.close()
fid = open('./light_nas/compress.yaml', 'w')
for line in lines:
fid.write(line)
fid.close()
# Begin test
if not fluid.core.is_compiled_with_cuda():
return
space = LightNASSpace()
startup_prog, train_prog, test_prog, train_metrics, test_metrics, train_reader, test_reader = space.create_net(
)
train_cost, train_acc1, train_acc5, global_lr = train_metrics
test_cost, test_acc1, test_acc5 = test_metrics
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_prog)
val_fetch_list = [('acc_top1', test_acc1.name),
('acc_top5', test_acc5.name)]
train_fetch_list = [('loss', train_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
train_prog,
train_reader=train_reader,
train_feed_list=None,
train_fetch_list=train_fetch_list,
eval_program=test_prog,
eval_reader=test_reader,
eval_feed_list=None,
eval_fetch_list=val_fetch_list,
train_optimizer=None,
search_space=space)
com_pass.config('./light_nas/compress.yaml')
eval_graph = com_pass.run()
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import sys
import argparse
import shutil
import logging
import struct
import six
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.log_helper import get_logger
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', type=int, default=1, help='batch size')
parser.add_argument(
'--infer_model',
type=str,
default='',
help='infer_model is used to load an original fp32 ProgramDesc with fp32 weights'
)
parser.add_argument('--infer_data', type=str, default='', help='data file')
parser.add_argument(
'--int8_model_save_path',
type=str,
default='./output',
help='infer_data is used to save an int8 ProgramDesc with fp32 weights')
parser.add_argument(
'--warmup_batch_size',
type=int,
default=100,
help='batch size for quantization warmup')
parser.add_argument(
'--accuracy_diff_threshold',
type=float,
default=0.01,
help='accepted accuracy drop threshold.')
test_args, args = parser.parse_known_args(namespace=unittest)
return test_args, sys.argv[:1] + args
class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
"""
Test API of Post Training quantization strategy for int8 with MKL-DNN.
"""
def _reader_creator(self, data_file='data.bin', cycle=False):
def reader():
with open(data_file, 'rb') as fp:
num = fp.read(8)
num = struct.unpack('q', num)[0]
imgs_offset = 8
img_ch = 3
img_w = 224
img_h = 224
img_pixel_size = 4
img_size = img_ch * img_h * img_w * img_pixel_size
label_size = 8
labels_offset = imgs_offset + num * img_size
step = 0
while step < num:
fp.seek(imgs_offset + img_size * step)
img = fp.read(img_size)
img = struct.unpack_from(
'{}f'.format(img_ch * img_w * img_h), img)
img = np.array(img)
img.shape = (img_ch, img_w, img_h)
fp.seek(labels_offset + label_size * step)
label = fp.read(label_size)
label = struct.unpack('q', label)[0]
yield img, int(label)
step += 1
if cycle and step == num:
step = 0
return reader
def _update_config_file(self, fp32_model_path, output_path):
config_path = './quantization/config_mkldnn_int8.yaml'
new_config_path = './quantization/temp.yaml'
shutil.copy(config_path, new_config_path)
with open(new_config_path, 'r+') as fp:
data = fp.read()
data = data.replace('MODEL_PATH', fp32_model_path)
data = data.replace('OUTPUT_PATH', output_path)
with open(new_config_path, 'w') as fp:
fp.write(data)
return new_config_path
def _transform_depthwise_conv(self, graph):
'''
Transform depthwise_conv2d into conv2d, with MKL-DNN only
'''
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in ['depthwise_conv2d']:
input_var_node = graph._find_node_by_name(
op_node.inputs, op_node.input("Input")[0])
weight_var_node = graph._find_node_by_name(
op_node.inputs, op_node.input("Filter")[0])
output_var_node = graph._find_node_by_name(
graph.all_var_nodes(), op_node.output("Output")[0])
attrs = {
name: op_node.op().attr(name)
for name in op_node.op().attr_names()
}
conv_op_node = graph.create_op_node(
op_type='conv2d',
attrs=attrs,
inputs={
'Input': input_var_node,
'Filter': weight_var_node
},
outputs={'Output': output_var_node})
graph.link_to(input_var_node, conv_op_node)
graph.link_to(weight_var_node, conv_op_node)
graph.link_to(conv_op_node, output_var_node)
graph.safe_remove_nodes(op_node)
return graph
def _predict(self, test_reader=None, model_path=None):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')):
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
else:
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(
model_path, exe, 'model', 'params')
use_mkldnn = fluid.core.globals()["FLAGS_use_mkldnn"]
if (use_mkldnn):
graph = IrGraph(
core.Graph(inference_program.desc), for_test=True)
graph = self._transform_depthwise_conv(graph)
inference_program = graph.to_program()
dshape = [3, 224, 224]
top1 = 0.0
top5 = 0.0
total_samples = 0
for batch_id, data in enumerate(test_reader()):
if six.PY2:
images = map(lambda x: x[0].reshape(dshape), data)
if six.PY3:
images = list(map(lambda x: x[0].reshape(dshape), data))
images = np.array(images).astype('float32')
labels = np.array([x[1] for x in data]).astype("int64")
labels = labels.reshape([-1, 1])
fluid.core.set_num_threads(int(os.environ['CPU_NUM_THREADS']))
out = exe.run(inference_program,
feed={
feed_target_names[0]: images,
feed_target_names[1]: labels
},
fetch_list=fetch_targets)
fluid.core.set_num_threads(1)
top1 += np.sum(out[1]) * len(data)
top5 += np.sum(out[2]) * len(data)
total_samples += len(data)
if (batch_id + 1) % 100 == 0:
_logger.info('{} images have been predicted'.format(
total_samples))
return top1 / total_samples, top5 / total_samples
def _warmup(self, reader=None, config_path=''):
com_pass = Compressor(
place=None,
scope=None,
train_program=None,
train_reader=None,
train_feed_list=[],
train_fetch_list=[],
eval_program=None,
eval_reader=reader,
eval_feed_list=[],
eval_fetch_list=[],
teacher_programs=[],
checkpoint_path='',
train_optimizer=None,
distiller_optimizer=None)
com_pass.config(config_path)
com_pass.run()
def _compare_accuracy(self, fp32_acc1, int8_acc1, threshold):
_logger.info('--- Accuracy summary ---')
_logger.info(
'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'
.format(threshold))
_logger.info('FP32: avg top1 accuracy: {0:.4f}'.format(fp32_acc1))
_logger.info('INT8: avg top1 accuracy: {0:.4f}'.format(int8_acc1))
assert fp32_acc1 > 0.0
assert int8_acc1 > 0.0
assert fp32_acc1 - int8_acc1 <= threshold
def test_compression(self):
if not fluid.core.is_compiled_with_mkldnn():
return
int8_model_path = test_case_args.int8_model_save_path
data_path = test_case_args.infer_data
fp32_model_path = test_case_args.infer_model
batch_size = test_case_args.batch_size
warmup_batch_size = test_case_args.warmup_batch_size
accuracy_diff_threshold = test_case_args.accuracy_diff_threshold
_logger.info(
'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'
.format(batch_size, warmup_batch_size))
#warmup dataset, only use the first batch data
warmup_reader = paddle.batch(
self._reader_creator(data_path, False),
batch_size=warmup_batch_size)
config_path = self._update_config_file(fp32_model_path, int8_model_path)
self._warmup(warmup_reader, config_path)
_logger.info('--- INT8 prediction start ---')
val_reader = paddle.batch(
self._reader_creator(data_path, False), batch_size=batch_size)
int8_model_result = self._predict(val_reader, int8_model_path)
_logger.info('--- FP32 prediction start ---')
val_reader = paddle.batch(
self._reader_creator(data_path, False), batch_size=batch_size)
fp32_model_result = self._predict(val_reader, fp32_model_path)
self._compare_accuracy(fp32_model_result[0], int8_model_result[0],
accuracy_diff_threshold)
if __name__ == '__main__':
global test_case_args
test_case_args, remaining_args = parse_args()
unittest.main(argv=remaining_args)
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import time
import sys
import random
import math
import functools
import contextlib
import numpy as np
from PIL import Image, ImageEnhance
import paddle
import paddle.fluid as fluid
from paddle.dataset.common import download
from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
random.seed(0)
np.random.seed(0)
DATA_DIM = 224
THREAD = 1
BUF_SIZE = 102400
DATA_DIR = 'data/ILSVRC2012'
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
percent = float(target_size) / min(img.size[0], img.size[1])
resized_width = int(round(img.size[0] * percent))
resized_height = int(round(img.size[1] * percent))
img = img.resize((resized_width, resized_height), Image.LANCZOS)
return img
def crop_image(img, target_size, center):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def process_image(sample, mode, color_jitter, rotate):
img_path = sample[0]
img = Image.open(img_path)
img = resize_short(img, target_size=256)
img = crop_image(img, target_size=DATA_DIM, center=True)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
return img, sample[1]
def _reader_creator(file_list,
mode,
shuffle=False,
color_jitter=False,
rotate=False,
data_dir=DATA_DIR):
def reader():
with open(file_list) as flist:
full_lines = [line.strip() for line in flist]
if shuffle:
np.random.shuffle(full_lines)
lines = full_lines
for line in lines:
img_path, label = line.split()
img_path = os.path.join(data_dir, img_path)
if not os.path.exists(img_path):
continue
yield img_path, int(label)
mapper = functools.partial(
process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
def val(data_dir=DATA_DIR):
file_list = os.path.join(data_dir, 'val_list.txt')
return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir)
class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self):
self.int8_download = 'int8/download'
self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' +
self.int8_download)
self.data_cache_folder = ''
data_urls = []
data_md5s = []
if os.environ.get('DATASET') == 'full':
data_urls.append(
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa'
)
data_md5s.append('60f6525b0e1d127f345641d75d41f0a8')
data_urls.append(
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
)
data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5')
self.data_cache_folder = self.download_data(data_urls, data_md5s,
"full_data", False)
else:
data_urls.append(
'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz'
)
data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d')
self.data_cache_folder = self.download_data(data_urls, data_md5s,
"small_data", False)
# reader/decorator.py requires the relative path to the data folder
if not os.path.exists("./data/ILSVRC2012"):
cmd = 'rm -rf {0} && ln -s {1} {0}'.format("data",
self.data_cache_folder)
os.system(cmd)
self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50
self.sample_iterations = 50 if os.environ.get(
'DATASET') == 'full' else 1
self.infer_iterations = 50000 if os.environ.get(
'DATASET') == 'full' else 1
self.timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
self.int8_model = os.path.join(os.getcwd(),
"post_training_" + self.timestamp)
def tearDown(self):
try:
os.system("rm -rf {}".format(self.int8_model))
except Exception as e:
print("Failed to delete {} due to {}".format(self.int8_model,
str(e)))
def cache_unzipping(self, target_folder, zip_path):
if not os.path.exists(target_folder):
cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder,
zip_path)
os.system(cmd)
def download_data(self, data_urls, data_md5s, folder_name, is_model=True):
data_cache_folder = os.path.join(self.cache_folder, folder_name)
zip_path = ''
if os.environ.get('DATASET') == 'full':
file_names = []
for i in range(0, len(data_urls)):
download(data_urls[i], self.int8_download, data_md5s[i])
file_names.append(data_urls[i].split('/')[-1])
zip_path = os.path.join(self.cache_folder,
'full_imagenet_val.tar.gz')
if not os.path.exists(zip_path):
cat_command = 'cat'
for file_name in file_names:
cat_command += ' ' + os.path.join(self.cache_folder,
file_name)
cat_command += ' > ' + zip_path
os.system(cat_command)
if os.environ.get('DATASET') != 'full' or is_model:
download(data_urls[0], self.int8_download, data_md5s[0])
file_name = data_urls[0].split('/')[-1]
zip_path = os.path.join(self.cache_folder, file_name)
print('Data is downloaded at {0}'.format(zip_path))
self.cache_unzipping(data_cache_folder, zip_path)
return data_cache_folder
def download_model(self):
pass
def run_program(self, model_path, batch_size, infer_iterations):
image_shape = [3, 224, 224]
place = fluid.CPUPlace()
exe = fluid.Executor(place)
[infer_program, feed_dict, fetch_targets] = \
fluid.io.load_inference_model(model_path, exe)
val_reader = paddle.batch(val(), batch_size)
iterations = infer_iterations
test_info = []
cnt = 0
periods = []
for batch_id, data in enumerate(val_reader()):
image = np.array(
[x[0].reshape(image_shape) for x in data]).astype("float32")
label = np.array([x[1] for x in data]).astype("int64")
label = label.reshape([-1, 1])
t1 = time.time()
_, acc1, _ = exe.run(
infer_program,
feed={feed_dict[0]: image,
feed_dict[1]: label},
fetch_list=fetch_targets)
t2 = time.time()
period = t2 - t1
periods.append(period)
test_info.append(np.mean(acc1) * len(data))
cnt += len(data)
if (batch_id + 1) % 100 == 0:
print("{0} images,".format(batch_id + 1))
sys.stdout.flush()
if (batch_id + 1) == iterations:
break
throughput = cnt / np.sum(periods)
latency = np.average(periods)
acc1 = np.sum(test_info) / cnt
return (throughput, latency, acc1)
def generate_quantized_model(self,
model_path,
algo="KL",
is_full_quantize=False):
try:
os.system("mkdir " + self.int8_model)
except Exception as e:
print("Failed to create {} due to {}".format(self.int8_model,
str(e)))
sys.exit(-1)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
scope = fluid.global_scope()
val_reader = val()
quantizable_op_type = [
"conv2d", "depthwise_conv2d", "mul", "pool2d", "elementwise_add"
]
ptq = PostTrainingQuantization(
executor=exe,
sample_generator=val_reader,
model_dir=model_path,
algo=algo,
quantizable_op_type=quantizable_op_type,
is_full_quantize=is_full_quantize)
ptq.quantize()
ptq.save_quantized_model(self.int8_model)
def run_test(self, model, algo, data_urls, data_md5s):
infer_iterations = self.infer_iterations
batch_size = self.batch_size
sample_iterations = self.sample_iterations
model_cache_folder = self.download_data(data_urls, data_md5s, model)
print("Start FP32 inference for {0} on {1} images ...".format(
model, infer_iterations * batch_size))
(fp32_throughput, fp32_latency, fp32_acc1) = self.run_program(
model_cache_folder + "/model", batch_size, infer_iterations)
print("Start INT8 post training quantization for {0} on {1} images ...".
format(model, sample_iterations * batch_size))
self.generate_quantized_model(
model_cache_folder + "/model", algo=algo, is_full_quantize=True)
print("Start INT8 inference for {0} on {1} images ...".format(
model, infer_iterations * batch_size))
(int8_throughput, int8_latency, int8_acc1) = self.run_program(
self.int8_model, batch_size, infer_iterations)
print(
"FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
format(model, batch_size, fp32_throughput, fp32_latency, fp32_acc1))
print(
"INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
format(model, batch_size, int8_throughput, int8_latency, int8_acc1))
sys.stdout.flush()
delta_value = fp32_acc1 - int8_acc1
self.assertLess(delta_value, 0.025)
class TestPostTrainingForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_mobilenetv1(self):
model = "MobileNet-V1"
algo = "KL"
data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
self.run_test(model, algo, data_urls, data_md5s)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import unittest
from test_post_training_quantization_mobilenetv1 import TestPostTrainingQuantization
class TestPostTrainingForResnet50(TestPostTrainingQuantization):
def test_post_training_resnet50(self):
model = "ResNet-50"
algo = "direct"
data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz'
]
data_md5s = ['4a5194524823d9b76da6e738e1367881']
self.run_test(model, algo, data_urls, data_md5s)
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import os
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import six
import paddle
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import FakeQAT2MkldnnINT8KernelPass
from paddle.fluid import core
os.environ["CPU_NUM"] = "1"
def conv_net(img, label):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
return avg_loss
class TestMKLDNNTransformBasedFreezePass(unittest.TestCase):
def setUp(self):
self.quantizable_op_and_inputs = {
'conv2d': ['Input', 'Filter'],
'depthwise_conv2d': ['Input', 'Filter'],
'mul': ['X', 'Y']
}
def check_program(self, program):
for block in program.blocks:
for op in block.ops:
if op.type in self.quantizable_op_and_inputs:
for arg_name in op.output_arg_names:
# Check quantizable op's output is linked to
# fake_dequantize's output
self.assertTrue(arg_name.endswith('.dequantized'))
def isinteger(self, x):
return np.equal(np.mod(x, 1), 0)
def build_program(self, main, startup, is_test, seed):
main.random_seed = seed
startup.random_seed = seed
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
loss = conv_net(img, label)
if not is_test:
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
return [img, label], loss
def mkldnn_based_freeze_graph(self,
use_cuda,
seed,
activation_quant_type,
weight_quant_type='abs_max',
qat_perf=False,
for_ci=False):
random.seed(0)
np.random.seed(0)
main = fluid.Program()
startup = fluid.Program()
test_program = fluid.Program()
feeds, loss = self.build_program(main, startup, False, seed)
self.build_program(test_program, startup, True, seed)
test_program = test_program.clone(for_test=True)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
scope = fluid.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
# Apply the QAT QuantizationTransformPass
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quant_type)
transform_pass.apply(main_graph)
transform_pass.apply(test_graph)
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
quantized_test_program = test_graph.to_program()
iters = 5
batch_size = 8
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
# Training the model to get the weights value
with fluid.scope_guard(scope):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[loss])
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass = QuantizationFreezePass(
scope=scope, place=place, weight_quantize_type=weight_quant_type)
freeze_pass.apply(test_graph)
# Transform quantized graph for MKL-DNN INT8 inference
mkldnn_int8_pass = FakeQAT2MkldnnINT8KernelPass(
_scope=scope, _place=place)
mkldnn_int8_pass.apply(test_graph)
dev_name = '_cpu_'
if not for_ci:
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_mkldnn' + dev_name +
activation_quant_type + '_' + weight_quant_type,
marked_nodes)
mkldnn_program = test_graph.to_program()
# Check the transformation weights of conv2d and mul
conv_w_mkldnn = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
mul_w_mkldnn = np.array(scope.find_var('fc_0.w_0').get_tensor())
# Check if weights are still integer
self.assertFalse(self.isinteger(np.sum(conv_w_mkldnn)))
self.assertFalse(self.isinteger(np.sum(mul_w_mkldnn)))
# Check if the conv2d output and mul output are correctly linked to fake_dequantize's
# output
self.check_program(mkldnn_program)
if not for_ci:
print('{}: {}'.format('w_mkldnn' + dev_name + activation_quant_type
+ '_' + weight_quant_type, np.sum(w_mkldnn)))
def test_mkldnn_graph_cpu_static(self):
with fluid.unique_name.guard():
self.mkldnn_based_freeze_graph(
False,
seed=2,
activation_quant_type='range_abs_max',
weight_quant_type='abs_max',
for_ci=True)
self.mkldnn_based_freeze_graph(
False,
seed=2,
activation_quant_type='moving_average_abs_max',
weight_quant_type='abs_max',
for_ci=True)
if __name__ == '__main__':
unittest.main()
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import os
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import six
import paddle
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass
from paddle.fluid import core
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"
def linear_fc(num):
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in six.moves.xrange(num):
hidden = fluid.layers.fc(hidden, size=128, act='relu')
loss = fluid.layers.cross_entropy(input=hidden, label=label)
loss = fluid.layers.mean(loss)
return loss
def residual_block(num, quant_skip_pattern=None):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act)
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in six.moves.xrange(num):
conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
if quant_skip_pattern:
with fluid.name_scope(quant_skip_pattern):
pool = fluid.layers.pool2d(
input=hidden, pool_size=2, pool_type='avg', pool_stride=2)
else:
pool = fluid.layers.pool2d(
input=hidden, pool_size=2, pool_type='avg', pool_stride=2)
fc = fluid.layers.fc(input=pool, size=10)
loss = fluid.layers.cross_entropy(input=fc, label=label)
loss = fluid.layers.mean(loss)
return loss
def conv_net(img, label, quant_skip_pattern):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
pool_type='max',
act="relu")
conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
pool_type='avg',
act="relu")
hidden = fluid.layers.fc(input=conv_pool_2, size=100, act='relu')
with fluid.name_scope(quant_skip_pattern):
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
return avg_loss
class TestQuantizationTransformPass(unittest.TestCase):
def setUp(self):
self.quantizable_op_and_inputs = {
'conv2d': ['Input', 'Filter'],
'depthwise_conv2d': ['Input', 'Filter'],
'mul': ['X', 'Y']
}
self.quantizable_grad_op_inputs = {
'conv2d_grad': ['Input', 'Filter'],
'depthwise_conv2d_grad': ['Input', 'Filter'],
'mul_grad': ['X', 'Y']
}
def check_program(self, program):
quantized_ops = set()
for block in program.blocks:
for op in block.ops:
# check forward
if op.type in self.quantizable_op_and_inputs:
for arg_name in op.input_arg_names:
self.assertTrue(
arg_name.endswith('.quantized.dequantized'))
quantized_ops.add(arg_name)
for op in block.ops:
# check backward
if op.type in self.quantizable_grad_op_inputs:
for pname in self.quantizable_grad_op_inputs[op.type]:
arg_name = op.input(pname)[0]
self.assertTrue(
arg_name.endswith('.quantized.dequantized'))
self.assertTrue(arg_name in quantized_ops)
def linear_fc_quant(self,
activation_quant_type,
weight_quantize_type,
for_ci=True):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = linear_fc(3)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
place = fluid.CPUPlace()
graph = IrGraph(core.Graph(main.desc), for_test=False)
transform_pass = QuantizationTransformPass(
scope=fluid.global_scope(),
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quantize_type)
transform_pass.apply(graph)
if not for_ci:
marked_nodes = set()
for op in graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
graph.draw('.', 'quantize_fc_' + activation_quant_type,
marked_nodes)
program = graph.to_program()
self.check_program(program)
val_graph = IrGraph(core.Graph(program.desc), for_test=False)
if not for_ci:
val_marked_nodes = set()
for op in val_graph.all_op_nodes():
if op.name().find('quantize') > -1:
val_marked_nodes.add(op)
val_graph.draw('.', 'val_fc_' + activation_quant_type,
val_marked_nodes)
def test_linear_fc_quant_abs_max(self):
self.linear_fc_quant('abs_max', 'abs_max', for_ci=True)
def test_linear_fc_quant_range_abs_max(self):
self.linear_fc_quant('range_abs_max', 'abs_max', for_ci=True)
def test_linear_fc_quant_moving_average_abs_max(self):
self.linear_fc_quant(
'moving_average_abs_max', 'channel_wise_abs_max', for_ci=True)
def residual_block_quant(self,
activation_quant_type,
weight_quantize_type,
for_ci=True):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = residual_block(2)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
place = fluid.CPUPlace()
graph = IrGraph(core.Graph(main.desc), for_test=False)
transform_pass = QuantizationTransformPass(
scope=fluid.global_scope(),
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quantize_type)
transform_pass.apply(graph)
if not for_ci:
marked_nodes = set()
for op in graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
graph.draw('.', 'quantize_residual_' + activation_quant_type,
marked_nodes)
program = graph.to_program()
self.check_program(program)
val_graph = IrGraph(core.Graph(program.desc), for_test=False)
if not for_ci:
val_marked_nodes = set()
for op in val_graph.all_op_nodes():
if op.name().find('quantize') > -1:
val_marked_nodes.add(op)
val_graph.draw('.', 'val_residual_' + activation_quant_type,
val_marked_nodes)
def test_residual_block_abs_max(self):
self.residual_block_quant('abs_max', 'abs_max', for_ci=True)
def test_residual_block_range_abs_max(self):
self.residual_block_quant('range_abs_max', 'abs_max', for_ci=True)
def test_residual_block_moving_average_abs_max(self):
self.residual_block_quant(
'moving_average_abs_max', 'channel_wise_abs_max', for_ci=True)
class TestQuantizationFreezePass(unittest.TestCase):
def freeze_graph(self,
use_cuda,
seed,
activation_quant_type,
weight_quant_type='abs_max',
for_ci=True,
quant_skip_pattern='skip_quant'):
def build_program(main, startup, is_test):
main.random_seed = seed
startup.random_seed = seed
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
loss = conv_net(img, label, quant_skip_pattern)
if not is_test:
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
return [img, label], loss
random.seed(0)
np.random.seed(0)
main = fluid.Program()
startup = fluid.Program()
test_program = fluid.Program()
feeds, loss = build_program(main, startup, False)
build_program(test_program, startup, True)
test_program = test_program.clone(for_test=True)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
scope = fluid.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quant_type,
skip_pattern=quant_skip_pattern)
transform_pass.apply(main_graph)
transform_pass.apply(test_graph)
dev_name = '_gpu_' if use_cuda else '_cpu_'
if not for_ci:
marked_nodes = set()
for op in main_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
main_graph.draw('.', 'main' + dev_name + activation_quant_type + '_'
+ weight_quant_type, marked_nodes)
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test' + dev_name + activation_quant_type + '_'
+ weight_quant_type, marked_nodes)
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
quantized_test_program = test_graph.to_program()
iters = 5
batch_size = 8
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
with fluid.scope_guard(scope):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[loss])
if not for_ci:
print('{}: {}'.format('loss' + dev_name +
activation_quant_type + '_' +
weight_quant_type, loss_v))
test_data = next(test_reader())
with fluid.program_guard(quantized_test_program):
w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
quantized_test_program)
# Testing
with fluid.scope_guard(scope):
test_loss1, w_quant = exe.run(program=quantized_test_program,
feed=feeder.feed(test_data),
fetch_list=[loss, w_var])
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass = QuantizationFreezePass(
scope=scope, place=place, weight_quantize_type=weight_quant_type)
freeze_pass.apply(test_graph)
if not for_ci:
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_freeze' + dev_name +
activation_quant_type + '_' + weight_quant_type,
marked_nodes)
server_program = test_graph.to_program()
with fluid.scope_guard(scope):
test_loss2, = exe.run(program=server_program,
feed=feeder.feed(test_data),
fetch_list=[loss])
self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
if not for_ci:
print(
'{}: {}'.format('test_loss1' + dev_name + activation_quant_type
+ '_' + weight_quant_type, test_loss1))
print(
'{}: {}'.format('test_loss2' + dev_name + activation_quant_type
+ '_' + weight_quant_type, test_loss2))
w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
# Maybe failed, this is due to the calculation precision
# self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
if not for_ci:
print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type
+ '_' + weight_quant_type, np.sum(w_freeze)))
print('{}: {}'.format('w_quant' + dev_name + activation_quant_type +
'_' + weight_quant_type, np.sum(w_quant)))
# Convert parameter to 8-bit.
convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
convert_int8_pass.apply(test_graph)
if not for_ci:
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_int8' + dev_name + activation_quant_type
+ '_' + weight_quant_type, marked_nodes)
server_program_int8 = test_graph.to_program()
# Save the 8-bit parameter and model file.
with fluid.scope_guard(scope):
fluid.io.save_inference_model(
'server_int8' + dev_name + activation_quant_type + '_' +
weight_quant_type, ['image', 'label'], [loss], exe,
server_program_int8)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[infer, feed, fetch] = fluid.io.load_inference_model(
'server_int8' + dev_name + activation_quant_type + '_' +
weight_quant_type, exe)
# Check the loaded 8-bit weight.
w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
self.assertEqual(w_8bit.dtype, np.int8)
self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
if not for_ci:
print('{}: {}'.format('w_8bit' + dev_name + activation_quant_type +
'_' + weight_quant_type, np.sum(w_8bit)))
print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type
+ '_' + weight_quant_type, np.sum(w_freeze)))
mobile_pass = TransformForMobilePass()
mobile_pass.apply(test_graph)
if not for_ci:
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_mobile' + dev_name +
activation_quant_type + '_' + weight_quant_type,
marked_nodes)
mobile_program = test_graph.to_program()
with fluid.scope_guard(scope):
fluid.io.save_inference_model(
'mobile_int8' + dev_name + activation_quant_type + '_' +
weight_quant_type, ['image', 'label'], [loss], exe,
mobile_program)
def test_freeze_graph_cuda_dynamic(self):
if fluid.core.is_compiled_with_cuda():
with fluid.unique_name.guard():
self.freeze_graph(
True,
seed=1,
activation_quant_type='abs_max',
weight_quant_type='abs_max',
for_ci=True)
with fluid.unique_name.guard():
self.freeze_graph(
True,
seed=1,
activation_quant_type='abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)
def test_freeze_graph_cpu_dynamic(self):
with fluid.unique_name.guard():
self.freeze_graph(
False,
seed=2,
activation_quant_type='abs_max',
weight_quant_type='abs_max',
for_ci=True)
self.freeze_graph(
False,
seed=2,
activation_quant_type='abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)
def test_freeze_graph_cuda_static(self):
if fluid.core.is_compiled_with_cuda():
with fluid.unique_name.guard():
self.freeze_graph(
True,
seed=1,
activation_quant_type='range_abs_max',
weight_quant_type='abs_max',
for_ci=True)
self.freeze_graph(
True,
seed=1,
activation_quant_type='moving_average_abs_max',
weight_quant_type='abs_max',
for_ci=True)
self.freeze_graph(
True,
seed=1,
activation_quant_type='range_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)
self.freeze_graph(
True,
seed=1,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)
def test_freeze_graph_cpu_static(self):
with fluid.unique_name.guard():
self.freeze_graph(
False,
seed=2,
activation_quant_type='range_abs_max',
weight_quant_type='abs_max',
for_ci=True)
self.freeze_graph(
False,
seed=2,
activation_quant_type='moving_average_abs_max',
weight_quant_type='abs_max',
for_ci=True)
self.freeze_graph(
False,
seed=2,
activation_quant_type='range_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)
self.freeze_graph(
False,
seed=2,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)
def quant_dequant_residual_block(num, quant_skip_pattern=None):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act)
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in six.moves.xrange(num):
conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
if isinstance(quant_skip_pattern, str):
with fluid.name_scope(quant_skip_pattern):
pool1 = fluid.layers.pool2d(
input=hidden, pool_size=2, pool_type='avg', pool_stride=2)
pool2 = fluid.layers.pool2d(
input=hidden, pool_size=2, pool_type='max', pool_stride=2)
pool_add = fluid.layers.elementwise_add(
x=pool1, y=pool2, act='relu')
elif isinstance(quant_skip_pattern, list):
assert len(
quant_skip_pattern
) > 1, 'test config error: the len of quant_skip_pattern list should be greater than 1.'
with fluid.name_scope(quant_skip_pattern[0]):
pool1 = fluid.layers.pool2d(
input=hidden, pool_size=2, pool_type='avg', pool_stride=2)
pool2 = fluid.layers.pool2d(
input=hidden, pool_size=2, pool_type='max', pool_stride=2)
with fluid.name_scope(quant_skip_pattern[1]):
pool_add = fluid.layers.elementwise_add(
x=pool1, y=pool2, act='relu')
else:
pool1 = fluid.layers.pool2d(
input=hidden, pool_size=2, pool_type='avg', pool_stride=2)
pool2 = fluid.layers.pool2d(
input=hidden, pool_size=2, pool_type='max', pool_stride=2)
pool_add = fluid.layers.elementwise_add(x=pool1, y=pool2, act='relu')
fc = fluid.layers.fc(input=pool_add, size=10)
loss = fluid.layers.cross_entropy(input=fc, label=label)
loss = fluid.layers.mean(loss)
return loss
class TestAddQuantDequantPass(unittest.TestCase):
def setUp(self):
self._target_ops = {'elementwise_add', 'pool2d'}
self._target_grad_ops = {'elementwise_add_grad', 'pool2d_grad'}
def check_graph(self, graph, skip_pattern=None):
ops = graph.all_op_nodes()
for op_node in ops:
if op_node.name() in self._target_ops:
user_skipped = False
if isinstance(skip_pattern, list):
user_skipped = op_node.op().has_attr("op_namescope") and \
any(pattern in op_node.op().attr("op_namescope") for pattern in skip_pattern)
elif isinstance(skip_pattern, str):
user_skipped = op_node.op().has_attr("op_namescope") and \
op_node.op().attr("op_namescope").find(skip_pattern) != -1
if user_skipped:
continue
in_nodes_all_not_persistable = True
for input_name in op_node.input_arg_names():
in_node = graph._find_node_by_name(op_node.inputs,
input_name)
in_nodes_all_not_persistable = (
in_nodes_all_not_persistable and
not in_node.persistable())
if not in_nodes_all_not_persistable:
continue
input_names = op_node.input_arg_names()
for input_name in input_names:
self.assertTrue(input_name.endswith('.quant_dequant'))
def residual_block_quant(self, skip_pattern=None, for_ci=True):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = quant_dequant_residual_block(2, skip_pattern)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
place = fluid.CPUPlace()
graph = IrGraph(core.Graph(main.desc), for_test=False)
add_quant_dequant_pass = AddQuantDequantPass(
scope=fluid.global_scope(), place=place, skip_pattern=skip_pattern)
add_quant_dequant_pass.apply(graph)
if not for_ci:
marked_nodes = set()
for op in graph.all_op_nodes():
if op.name().find('quant') > -1:
marked_nodes.add(op)
graph.draw('.', 'add_quant_dequant_graph', marked_nodes)
self.check_graph(graph, skip_pattern)
program = graph.to_program()
val_graph = IrGraph(core.Graph(program.desc), for_test=False)
if not for_ci:
val_marked_nodes = set()
for op in val_graph.all_op_nodes():
if op.name().find('quant') > -1:
val_marked_nodes.add(op)
val_graph.draw('.', 'val_add_quant_dequant_graph', val_marked_nodes)
def test_residual_block(self):
self.residual_block_quant(skip_pattern=None, for_ci=True)
def test_residual_block_skip_pattern(self):
self.residual_block_quant(skip_pattern='skip_quant', for_ci=True)
def test_residual_block_skip_pattern(self):
self.residual_block_quant(
skip_pattern=['skip_quant1', 'skip_quant2'], for_ci=True)
if __name__ == '__main__':
unittest.main()
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import os
import unittest
import random
import numpy as np
import six
import paddle.fluid as fluid
import paddle
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.contrib.slim.quantization import ScaleForTrainingPass
from paddle.fluid.contrib.slim.quantization import ScaleForInferencePass
from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass
from paddle.fluid import core
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"
def residual_block(img, label, num=1):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act)
hidden = img
for _ in six.moves.xrange(num):
conv = conv_bn_layer(hidden, 20, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 20, 1, 1, 0, act=None)
hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
fc = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=fc, label=label)
loss = fluid.layers.mean(loss)
return loss
class TestQuantizationScalePass(unittest.TestCase):
def quantization_scale(self,
use_cuda,
seed,
activation_quant_type,
weight_quant_type='abs_max',
for_ci=False):
def build_program(main, startup, is_test):
main.random_seed = seed
startup.random_seed = seed
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
loss = residual_block(img, label, 1)
if not is_test:
opt = fluid.optimizer.Adam(learning_rate=0.0001)
opt.minimize(loss)
return [img, label], loss
random.seed(0)
np.random.seed(0)
main = fluid.Program()
startup = fluid.Program()
test_program = fluid.Program()
feeds, loss = build_program(main, startup, False)
build_program(test_program, startup, True)
test_program = test_program.clone(for_test=True)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
scope = fluid.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quant_type)
transform_pass.apply(main_graph)
transform_pass.apply(test_graph)
add_quant_dequant_pass = AddQuantDequantPass(scope=scope, place=place)
add_quant_dequant_pass.apply(main_graph)
add_quant_dequant_pass.apply(test_graph)
scale_training_pass = ScaleForTrainingPass(scope=scope, place=place)
scale_training_pass.apply(main_graph)
dev_name = '_gpu' if use_cuda else '_cpu'
if not for_ci:
marked_nodes = set()
for op in main_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
main_graph.draw('.', 'main_scale' + dev_name, marked_nodes)
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_scale' + dev_name, marked_nodes)
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
iters = 5
batch_size = 8
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=batch_size)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
with fluid.scope_guard(scope):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[loss])
if not for_ci:
print('{}: {}'.format('loss' + dev_name, loss_v))
scale_inference_pass = ScaleForInferencePass(scope=scope)
scale_inference_pass.apply(test_graph)
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass = QuantizationFreezePass(
scope=scope, place=place, weight_quantize_type=weight_quant_type)
freeze_pass.apply(test_graph)
server_program = test_graph.to_program()
if not for_ci:
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'quant_scale' + dev_name, marked_nodes)
with open('quant_scale_model' + dev_name + '.txt', 'w') as f:
f.write(str(server_program))
with fluid.scope_guard(scope):
fluid.io.save_inference_model('quant_scale_model' + dev_name,
['image', 'label'], [loss], exe,
server_program)
def test_quant_scale_cuda(self):
if fluid.core.is_compiled_with_cuda():
with fluid.unique_name.guard():
self.quantization_scale(
True,
seed=1,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)
def test_quant_scale_cpu(self):
with fluid.unique_name.guard():
self.quantization_scale(
False,
seed=2,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import paddle.fluid as fluid
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestQuantizationStrategy(unittest.TestCase):
"""
Test API of quantization strategy.
"""
def test_compression(self):
self.quan("./quantization/compress.yaml")
self.quan("./quantization/compress_1.yaml")
def quan(self, config_file):
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
out = MobileNet(name='quan').net(input=image,
class_dim=class_dim)
print("out: {}".format(out.name))
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
val_program = train_program.clone(for_test=False)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
scope = fluid.Scope()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_program, scope=scope)
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
scope,
train_program,
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config(config_file)
eval_graph = com_pass.run()
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import os
import paddle
import unittest
import paddle.fluid as fluid
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestReader(unittest.TestCase):
"""
Test API of quantization strategy.
"""
def set_train_reader(self, image, label, place):
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
return train_reader
def set_val_reader(self, image, label, place):
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
return val_reader
def set_feed_list(self, image, label):
return [('img', image.name), ('label', label.name)]
def quan(self, config_file):
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
train_program = fluid.Program()
startup_program = fluid.Program()
val_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
out = MobileNet(name='quan').net(input=image,
class_dim=class_dim)
print("out: {}".format(out.name))
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
val_program = train_program.clone(for_test=False)
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_program)
val_reader = self.set_val_reader(image, label, place)
val_feed_list = self.set_feed_list(image, label)
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = self.set_train_reader(image, label, place)
train_feed_list = self.set_feed_list(image, label)
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
train_program,
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config(config_file)
eval_graph = com_pass.run()
class TestReader1(TestReader):
def set_train_reader(self, image, label, place):
loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label], capacity=16, iterable=True)
loader.set_sample_generator(
paddle.dataset.mnist.train(), batch_size=128, places=place)
return loader
def set_val_reader(self, image, label, place):
loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label], capacity=16, iterable=True)
loader.set_sample_generator(
paddle.dataset.mnist.test(), batch_size=128, places=place)
return loader
def test_compression(self):
self.quan("./quantization/compress_2.yaml")
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import paddle.fluid as fluid
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestDistillationStrategy(unittest.TestCase):
"""
Test API of distillation strategy.
"""
def test_compression(self):
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = MobileNet(name="student").net(input=image, class_dim=class_dim)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
val_program = fluid.default_main_program().clone(for_test=False)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=fluid.layers.piecewise_decay(
boundaries=[5, 10], values=[0.01, 0.001, 0.0001]),
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
# define teacher program
teacher_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(teacher_program, startup_program):
img = teacher_program.global_block()._clone_variable(
image, force_persistable=False)
predict = MobileNet(name="teacher").net(input=img,
class_dim=class_dim)
exe.run(startup_program)
com_pass = Compressor(
place,
fluid.global_scope(),
fluid.default_main_program(),
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
teacher_programs=[teacher_program.clone(for_test=True)],
train_optimizer=optimizer,
distiller_optimizer=optimizer)
com_pass.config('./distillation/compress.yaml')
eval_graph = com_pass.run()
if __name__ == '__main__':
unittest.main()
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -12,5 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .plot import Ploter
__all__ = ['dump_config', 'Ploter']
文件模式从 100644 更改为 100755
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
class PlotData(object):
def __init__(self):
self.step = []
self.value = []
def append(self, step, value):
self.step.append(step)
self.value.append(value)
def reset(self):
self.step = []
self.value = []
class Ploter(object):
"""
Plot input data in a 2D graph
Args:
title: assign the title of input data.
step: x_axis of the data.
value: y_axis of the data.
"""
def __init__(self, *args):
self.__args__ = args
self.__plot_data__ = {}
for title in args:
self.__plot_data__[title] = PlotData()
# demo in notebooks will use Ploter to plot figure, but when we convert
# the ipydb to py file for testing, the import of matplotlib will make the
# script crash. So we can use `export DISABLE_PLOT=True` to disable import
# these libs
self.__disable_plot__ = os.environ.get("DISABLE_PLOT")
if not self.__plot_is_disabled__():
import matplotlib.pyplot as plt
from IPython import display
self.plt = plt
self.display = display
def __plot_is_disabled__(self):
return self.__disable_plot__ == "True"
def append(self, title, step, value):
"""
Feed data
Args:
title: assign the group data to this subtitle.
step: the x_axis of data.
value: the y_axis of data.
Examples:
.. code-block:: python
plot_curve = Ploter("Curve 1","Curve 2")
plot_curve.append(title="Curve 1",step=1,value=1)
"""
assert isinstance(title, six.string_types)
assert title in self.__plot_data__
data = self.__plot_data__[title]
assert isinstance(data, PlotData)
data.append(step, value)
def plot(self, path=None):
"""
Plot data in a 2D graph
Args:
path: store the figure to this file path. Defaul None.
Examples:
.. code-block:: python
plot_curve = Ploter()
plot_cure.plot()
"""
if self.__plot_is_disabled__():
return
titles = []
for title in self.__args__:
data = self.__plot_data__[title]
assert isinstance(data, PlotData)
if len(data.step) > 0:
titles.append(title)
self.plt.plot(data.step, data.value)
self.plt.legend(titles, loc='upper left')
if path is None:
self.display.clear_output(wait=True)
self.display.display(self.plt.gcf())
else:
self.plt.savefig(path)
self.plt.gcf().clear()
def reset(self):
for key in self.__plot_data__:
data = self.__plot_data__[key]
assert isinstance(data, PlotData)
data.reset()
#!/usr/bin/python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Plot training and testing curve from paddle log.
It takes input from a file or stdin, and output to a file or stdout.
Note: must have numpy and matplotlib installed in order to use this tool.
usage: Plot training and testing curves from paddle log file.
[-h] [-i INPUT] [-o OUTPUT] [--format FORMAT] [key [key ...]]
positional arguments:
key keys of scores to plot, the default will be AvgCost
optional arguments:
-h, --help show this help message and exit
-i INPUT, --input INPUT
input filename of paddle log, default will be standard
input
-o OUTPUT, --output OUTPUT
output filename of figure, default will be standard
output
--format FORMAT figure format(png|pdf|ps|eps|svg)
The keys must be in the order of paddle output(!!!).
For example, paddle.INFO contrains the following log
I0406 21:26:21.325584 3832 Trainer.cpp:601] Pass=0 Batch=7771 AvgCost=0.624935 Eval: error=0.260972
To use this script to generate plot for AvgCost, error:
python plotcurve.py -i paddle.INFO -o figure.png AvgCost error
"""
import six
import sys
import matplotlib
# the following line is added immediately after import matplotlib
# and before import pylot. The purpose is to ensure the plotting
# works even under remote login (i.e. headless display)
matplotlib.use('Agg')
from matplotlib import cm
import matplotlib.pyplot as pyplot
import numpy
import argparse
import re
import os
def plot_paddle_curve(keys, inputfile, outputfile, format='png',
show_fig=False):
"""Plot curves from paddle log and save to outputfile.
:param keys: a list of strings to be plotted, e.g. AvgCost
:param inputfile: a file object for input
:param outputfile: a file object for output
:return: None
"""
pass_pattern = r"Pass=([0-9]*)"
test_pattern = r"Test samples=([0-9]*)"
if not keys:
keys = ['AvgCost']
for k in keys:
pass_pattern += r".*?%s=([0-9e\-\.]*)" % k
test_pattern += r".*?%s=([0-9e\-\.]*)" % k
data = []
test_data = []
compiled_pattern = re.compile(pass_pattern)
compiled_test_pattern = re.compile(test_pattern)
for line in inputfile:
found = compiled_pattern.search(line)
found_test = compiled_test_pattern.search(line)
if found:
data.append([float(x) for x in found.groups()])
if found_test:
test_data.append([float(x) for x in found_test.groups()])
x = numpy.array(data)
x_test = numpy.array(test_data)
if x.shape[0] <= 0:
sys.stderr.write("No data to plot. Exiting!\n")
return
m = len(keys) + 1
for i in six.moves.xrange(1, m):
pyplot.plot(
x[:, 0],
x[:, i],
color=cm.jet(1.0 * (i - 1) / (2 * m)),
label=keys[i - 1])
if (x_test.shape[0] > 0):
pyplot.plot(
x[:, 0],
x_test[:, i],
color=cm.jet(1.0 - 1.0 * (i - 1) / (2 * m)),
label="Test " + keys[i - 1])
pyplot.xlabel('number of epoch')
pyplot.legend(loc='best')
if show_fig:
pyplot.show()
pyplot.savefig(outputfile, bbox_inches='tight')
pyplot.clf()
def main(argv):
"""
main method of plotting curves.
"""
cmdparser = argparse.ArgumentParser(
"Plot training and testing curves from paddle log file.")
cmdparser.add_argument(
'key', nargs='*', help='keys of scores to plot, the default is AvgCost')
cmdparser.add_argument(
'-i',
'--input',
help='input filename of paddle log, '
'default will be standard input')
cmdparser.add_argument(
'-o',
'--output',
help='output filename of figure, '
'default will be standard output')
cmdparser.add_argument('--format', help='figure format(png|pdf|ps|eps|svg)')
args = cmdparser.parse_args(argv)
keys = args.key
if args.input:
inputfile = open(args.input)
else:
inputfile = sys.stdin
format = args.format
if args.output:
outputfile = open(args.output, 'wb')
if not format:
format = os.path.splitext(args.output)[1]
if not format:
format = 'png'
else:
outputfile = sys.stdout
plot_paddle_curve(keys, inputfile, outputfile, format)
inputfile.close()
outputfile.close()
if __name__ == "__main__":
main(sys.argv[1:])
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
requests>=2.20.0
requests>=2.0.0
numpy>=1.12, <=1.16.4 ; python_version<"3.5"
numpy>=1.12 ; python_version>="3.5"
protobuf>=3.1.0
matplotlib<=2.2.4 ; python_version<"3.6"
scipy>=0.19.0, <=1.2.1 ; python_version<"3.5"
nltk>=3.2.2, <=3.4 ; python_version<"3.5"
matplotlib ; python_version>="3.6"
scipy ; python_version>="3.5"
nltk ; python_version>="3.5"
rarfile
Pillow
graphviz
six
funcsigs
pyyaml
decorator
prettytable
objgraph
objgraph
\ No newline at end of file
......@@ -120,15 +120,7 @@ packages=['paddle',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.reader',
'paddle.fluid.contrib.slim',
'paddle.fluid.contrib.slim.core',
'paddle.fluid.contrib.slim.graph',
'paddle.fluid.contrib.slim.prune',
'paddle.fluid.contrib.slim.quantization',
'paddle.fluid.contrib.slim.distillation',
'paddle.fluid.contrib.slim.nas',
'paddle.fluid.contrib.slim.searcher',
'paddle.fluid.contrib.utils',
'paddle.fluid.contrib.utils',
'paddle.fluid.contrib.extend_optimizer',
'paddle.fluid.contrib.mixed_precision',
'paddle.fluid.contrib.layers',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册