未验证 提交 5d29a5bf 编写于 作者: G Guanghua Yu 提交者: GitHub

fix unittest in post training quantization (#49257)

上级 11c7f570
...@@ -77,13 +77,14 @@ def process_image(sample, mode, color_jitter, rotate): ...@@ -77,13 +77,14 @@ def process_image(sample, mode, color_jitter, rotate):
return img, sample[1] return img, sample[1]
def _reader_creator(file_list, def _reader_creator(
mode, file_list,
shuffle=False, mode,
color_jitter=False, shuffle=False,
rotate=False, color_jitter=False,
data_dir=DATA_DIR): rotate=False,
data_dir=DATA_DIR,
):
def reader(): def reader():
with open(file_list) as flist: with open(file_list) as flist:
full_lines = [line.strip() for line in flist] full_lines = [line.strip() for line in flist]
...@@ -98,10 +99,9 @@ def _reader_creator(file_list, ...@@ -98,10 +99,9 @@ def _reader_creator(file_list,
continue continue
yield img_path, int(label) yield img_path, int(label)
mapper = functools.partial(process_image, mapper = functools.partial(
mode=mode, process_image, mode=mode, color_jitter=color_jitter, rotate=rotate
color_jitter=color_jitter, )
rotate=rotate)
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
...@@ -112,11 +112,11 @@ def val(data_dir=DATA_DIR): ...@@ -112,11 +112,11 @@ def val(data_dir=DATA_DIR):
class TestPostTrainingQuantization(unittest.TestCase): class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self): def setUp(self):
self.int8_download = 'int8/download' self.int8_download = 'int8/download'
self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + self.cache_folder = os.path.expanduser(
self.int8_download) '~/.cache/paddle/dataset/' + self.int8_download
)
self.data_cache_folder = '' self.data_cache_folder = ''
data_urls = [] data_urls = []
data_md5s = [] data_md5s = []
...@@ -129,31 +129,34 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -129,31 +129,34 @@ class TestPostTrainingQuantization(unittest.TestCase):
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab' 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
) )
data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5') data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5')
self.data_cache_folder = self.download_data(data_urls, data_md5s, self.data_cache_folder = self.download_data(
"full_data", False) data_urls, data_md5s, "full_data", False
)
else: else:
data_urls.append( data_urls.append(
'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz' 'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz'
) )
data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d') data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d')
self.data_cache_folder = self.download_data(data_urls, data_md5s, self.data_cache_folder = self.download_data(
"small_data", False) data_urls, data_md5s, "small_data", False
)
# reader/decorator.py requires the relative path to the data folder # reader/decorator.py requires the relative path to the data folder
if not os.path.exists("./data/ILSVRC2012"): if not os.path.exists("./data/ILSVRC2012"):
cmd = 'rm -rf {0} && ln -s {1} {0}'.format("data", cmd = 'rm -rf {0} && ln -s {1} {0}'.format(
self.data_cache_folder) "data", self.data_cache_folder
)
os.system(cmd) os.system(cmd)
self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50 self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50
self.sample_iterations = 50 if os.environ.get( self.infer_iterations = (
'DATASET') == 'full' else 2 50000 if os.environ.get('DATASET') == 'full' else 2
self.infer_iterations = 50000 if os.environ.get( )
'DATASET') == 'full' else 2
self.root_path = tempfile.TemporaryDirectory() self.root_path = tempfile.TemporaryDirectory()
self.int8_model = os.path.join(self.root_path.name, self.int8_model = os.path.join(
"post_training_quantization") self.root_path.name, "post_training_quantization"
)
def tearDown(self): def tearDown(self):
self.root_path.cleanup() self.root_path.cleanup()
...@@ -161,7 +164,8 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -161,7 +164,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
def cache_unzipping(self, target_folder, zip_path): def cache_unzipping(self, target_folder, zip_path):
if not os.path.exists(target_folder): if not os.path.exists(target_folder):
cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(
target_folder, zip_path) target_folder, zip_path
)
os.system(cmd) os.system(cmd)
def download_data(self, data_urls, data_md5s, folder_name, is_model=True): def download_data(self, data_urls, data_md5s, folder_name, is_model=True):
...@@ -173,13 +177,15 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -173,13 +177,15 @@ class TestPostTrainingQuantization(unittest.TestCase):
download(data_urls[i], self.int8_download, data_md5s[i]) download(data_urls[i], self.int8_download, data_md5s[i])
file_names.append(data_urls[i].split('/')[-1]) file_names.append(data_urls[i].split('/')[-1])
zip_path = os.path.join(self.cache_folder, zip_path = os.path.join(
'full_imagenet_val.tar.gz') self.cache_folder, 'full_imagenet_val.tar.gz'
)
if not os.path.exists(zip_path): if not os.path.exists(zip_path):
cat_command = 'cat' cat_command = 'cat'
for file_name in file_names: for file_name in file_names:
cat_command += ' ' + os.path.join(self.cache_folder, cat_command += ' ' + os.path.join(
file_name) self.cache_folder, file_name
)
cat_command += ' > ' + zip_path cat_command += ' > ' + zip_path
os.system(cat_command) os.system(cat_command)
...@@ -199,8 +205,16 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -199,8 +205,16 @@ class TestPostTrainingQuantization(unittest.TestCase):
image_shape = [3, 224, 224] image_shape = [3, 224, 224]
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
[infer_program, feed_dict, fetch_targets] = \ [
fluid.io.load_inference_model(model_path, exe) infer_program,
feed_dict,
fetch_targets,
] = fluid.io.load_inference_model(
model_path,
exe,
model_filename="inference.pdmodel",
params_filename="inference.pdiparams",
)
val_reader = paddle.batch(val(), batch_size) val_reader = paddle.batch(val(), batch_size)
iterations = infer_iterations iterations = infer_iterations
...@@ -208,23 +222,28 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -208,23 +222,28 @@ class TestPostTrainingQuantization(unittest.TestCase):
cnt = 0 cnt = 0
periods = [] periods = []
for batch_id, data in enumerate(val_reader()): for batch_id, data in enumerate(val_reader()):
image = np.array([x[0].reshape(image_shape) image = np.array([x[0].reshape(image_shape) for x in data]).astype(
for x in data]).astype("float32") "float32"
)
label = np.array([x[1] for x in data]).astype("int64") label = np.array([x[1] for x in data]).astype("int64")
label = label.reshape([-1, 1]) label = label.reshape([-1, 1])
t1 = time.time() t1 = time.time()
_, acc1, _ = exe.run(infer_program, pred = exe.run(
feed={ infer_program,
feed_dict[0]: image, feed={feed_dict[0]: image},
feed_dict[1]: label fetch_list=fetch_targets,
}, )
fetch_list=fetch_targets)
t2 = time.time() t2 = time.time()
period = t2 - t1 period = t2 - t1
periods.append(period) periods.append(period)
test_info.append(np.mean(acc1) * len(data)) pred = np.array(pred[0])
sort_array = pred.argsort(axis=1)
top_1_pred = sort_array[:, -1:][:, ::-1]
top_1 = np.mean(label == top_1_pred)
test_info.append(np.mean(top_1) * len(data))
cnt += len(data) cnt += len(data)
if (batch_id + 1) % 100 == 0: if (batch_id + 1) % 100 == 0:
...@@ -238,22 +257,25 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -238,22 +257,25 @@ class TestPostTrainingQuantization(unittest.TestCase):
acc1 = np.sum(test_info) / cnt acc1 = np.sum(test_info) / cnt
return (throughput, latency, acc1) return (throughput, latency, acc1)
def generate_quantized_model(self, def generate_quantized_model(
model_path, self,
quantizable_op_type, model_path,
batch_size, quantizable_op_type,
algo="KL", batch_size,
round_type="round", algo="KL",
is_full_quantize=False, round_type="round",
is_use_cache_file=False, is_full_quantize=False,
is_optimize_model=False, is_use_cache_file=False,
batch_nums=10, is_optimize_model=False,
onnx_format=False): batch_nums=10,
onnx_format=False,
):
try: try:
os.system("mkdir " + self.int8_model) os.system("mkdir " + self.int8_model)
except Exception as e: except Exception as e:
print("Failed to create {} due to {}".format( print(
self.int8_model, str(e))) "Failed to create {} due to {}".format(self.int8_model, str(e))
)
sys.exit(-1) sys.exit(-1)
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -261,70 +283,98 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -261,70 +283,98 @@ class TestPostTrainingQuantization(unittest.TestCase):
scope = fluid.global_scope() scope = fluid.global_scope()
val_reader = val() val_reader = val()
ptq = PostTrainingQuantization(executor=exe, ptq = PostTrainingQuantization(
sample_generator=val_reader, executor=exe,
model_dir=model_path, sample_generator=val_reader,
batch_size=batch_size, model_dir=model_path,
batch_nums=batch_nums, model_filename="inference.pdmodel",
algo=algo, params_filename="inference.pdiparams",
quantizable_op_type=quantizable_op_type, batch_size=batch_size,
round_type=round_type, batch_nums=batch_nums,
is_full_quantize=is_full_quantize, algo=algo,
optimize_model=is_optimize_model, quantizable_op_type=quantizable_op_type,
onnx_format=onnx_format, round_type=round_type,
is_use_cache_file=is_use_cache_file) is_full_quantize=is_full_quantize,
optimize_model=is_optimize_model,
onnx_format=onnx_format,
is_use_cache_file=is_use_cache_file,
)
ptq.quantize() ptq.quantize()
ptq.save_quantized_model(self.int8_model) ptq.save_quantized_model(
self.int8_model,
def run_test(self, model_filename="inference.pdmodel",
model, params_filename="inference.pdiparams",
algo, )
round_type,
data_urls, def run_test(
data_md5s, self,
quantizable_op_type, model,
is_full_quantize, algo,
is_use_cache_file, round_type,
is_optimize_model, data_urls,
diff_threshold, data_md5s,
onnx_format=False, quantizable_op_type,
batch_nums=10): is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
onnx_format=False,
batch_nums=10,
):
infer_iterations = self.infer_iterations infer_iterations = self.infer_iterations
batch_size = self.batch_size batch_size = self.batch_size
sample_iterations = self.sample_iterations
model_cache_folder = self.download_data(data_urls, data_md5s, model) model_cache_folder = self.download_data(data_urls, data_md5s, model)
print("Start FP32 inference for {0} on {1} images ...".format( print(
model, infer_iterations * batch_size)) "Start FP32 inference for {0} on {1} images ...".format(
model, infer_iterations * batch_size
)
)
(fp32_throughput, fp32_latency, fp32_acc1) = self.run_program( (fp32_throughput, fp32_latency, fp32_acc1) = self.run_program(
os.path.join(model_cache_folder, "model"), batch_size, os.path.join(model_cache_folder, "MobileNetV1_infer"),
infer_iterations) batch_size,
infer_iterations,
print("Start INT8 post training quantization for {0} on {1} images ...". )
format(model, sample_iterations * batch_size))
self.generate_quantized_model(os.path.join(model_cache_folder, "model"), print(
quantizable_op_type, batch_size, "Start INT8 post training quantization for {0} on {1} images ...".format(
sample_iterations, algo, round_type, model, batch_nums * batch_size
is_full_quantize, is_use_cache_file, )
is_optimize_model, batch_nums, )
onnx_format) self.generate_quantized_model(
os.path.join(model_cache_folder, "MobileNetV1_infer"),
print("Start INT8 inference for {0} on {1} images ...".format( quantizable_op_type,
model, infer_iterations * batch_size)) batch_size,
(int8_throughput, int8_latency, algo,
int8_acc1) = self.run_program(self.int8_model, batch_size, round_type,
infer_iterations) is_full_quantize,
is_use_cache_file,
is_optimize_model,
batch_nums,
onnx_format,
)
print(
"Start INT8 inference for {0} on {1} images ...".format(
model, infer_iterations * batch_size
)
)
(int8_throughput, int8_latency, int8_acc1) = self.run_program(
self.int8_model, batch_size, infer_iterations
)
print("---Post training quantization of {} method---".format(algo)) print("---Post training quantization of {} method---".format(algo))
print( print(
"FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}." "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.".format(
.format(model, batch_size, fp32_throughput, fp32_latency, model, batch_size, fp32_throughput, fp32_latency, fp32_acc1
fp32_acc1)) )
)
print( print(
"INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n" "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n".format(
.format(model, batch_size, int8_throughput, int8_latency, model, batch_size, int8_throughput, int8_latency, int8_acc1
int8_acc1)) )
)
sys.stdout.flush() sys.stdout.flush()
delta_value = fp32_acc1 - int8_acc1 delta_value = fp32_acc1 - int8_acc1
...@@ -332,15 +382,14 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -332,15 +382,14 @@ class TestPostTrainingQuantization(unittest.TestCase):
class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_kl_mobilenetv1(self): def test_post_training_kl_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "KL" algo = "KL"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"depthwise_conv2d", "depthwise_conv2d",
...@@ -351,21 +400,30 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): ...@@ -351,21 +400,30 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file = False is_use_cache_file = False
is_optimize_model = True is_optimize_model = True
diff_threshold = 0.025 diff_threshold = 0.025
self.run_test(model, algo, round_type, data_urls, data_md5s, batch_nums = 3
quantizable_op_type, is_full_quantize, is_use_cache_file, self.run_test(
is_optimize_model, diff_threshold) model,
algo,
round_type,
data_urls,
data_md5s,
quantizable_op_type,
is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
)
class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_avg_mobilenetv1(self): def test_post_training_avg_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "avg" algo = "avg"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"depthwise_conv2d", "depthwise_conv2d",
...@@ -375,21 +433,29 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): ...@@ -375,21 +433,29 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file = False is_use_cache_file = False
is_optimize_model = True is_optimize_model = True
diff_threshold = 0.025 diff_threshold = 0.025
self.run_test(model, algo, round_type, data_urls, data_md5s, self.run_test(
quantizable_op_type, is_full_quantize, is_use_cache_file, model,
is_optimize_model, diff_threshold) algo,
round_type,
data_urls,
data_md5s,
quantizable_op_type,
is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
)
class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_hist_mobilenetv1(self): def test_post_training_hist_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "hist" algo = "hist"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"depthwise_conv2d", "depthwise_conv2d",
...@@ -400,29 +466,30 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): ...@@ -400,29 +466,30 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
is_optimize_model = True is_optimize_model = True
diff_threshold = 0.03 diff_threshold = 0.03
batch_nums = 3 batch_nums = 3
self.run_test(model, self.run_test(
algo, model,
round_type, algo,
data_urls, round_type,
data_md5s, data_urls,
quantizable_op_type, data_md5s,
is_full_quantize, quantizable_op_type,
is_use_cache_file, is_full_quantize,
is_optimize_model, is_use_cache_file,
diff_threshold, is_optimize_model,
batch_nums=batch_nums) diff_threshold,
batch_nums=batch_nums,
)
class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_abs_max_mobilenetv1(self): def test_post_training_abs_max_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "abs_max" algo = "abs_max"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"mul", "mul",
...@@ -432,21 +499,29 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): ...@@ -432,21 +499,29 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
is_optimize_model = False is_optimize_model = False
# The accuracy diff of post-training quantization (abs_max) maybe bigger # The accuracy diff of post-training quantization (abs_max) maybe bigger
diff_threshold = 0.05 diff_threshold = 0.05
self.run_test(model, algo, round_type, data_urls, data_md5s, self.run_test(
quantizable_op_type, is_full_quantize, is_use_cache_file, model,
is_optimize_model, diff_threshold) algo,
round_type,
data_urls,
data_md5s,
quantizable_op_type,
is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
)
class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_onnx_format_mobilenetv1(self): def test_post_training_onnx_format_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "emd" algo = "emd"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"depthwise_conv2d", "depthwise_conv2d",
...@@ -458,18 +533,20 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): ...@@ -458,18 +533,20 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization):
onnx_format = True onnx_format = True
diff_threshold = 0.05 diff_threshold = 0.05
batch_nums = 3 batch_nums = 3
self.run_test(model, self.run_test(
algo, model,
round_type, algo,
data_urls, round_type,
data_md5s, data_urls,
quantizable_op_type, data_md5s,
is_full_quantize, quantizable_op_type,
is_use_cache_file, is_full_quantize,
is_optimize_model, is_use_cache_file,
diff_threshold, is_optimize_model,
onnx_format=onnx_format, diff_threshold,
batch_nums=batch_nums) onnx_format=onnx_format,
batch_nums=batch_nums,
)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册