提交 accb132f 编写于 作者: S Sylwester Fraczek 提交者: Tao Luo

fix slim int8 mkldnn multithreading issue (#18009)

上级 5c3cbb58
...@@ -44,6 +44,7 @@ limitations under the License. */ ...@@ -44,6 +44,7 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/py_func_op.h" #include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
...@@ -164,6 +165,8 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -164,6 +165,8 @@ PYBIND11_MODULE(core_noavx, m) {
BindException(&m); BindException(&m);
m.def("set_num_threads", &platform::SetNumThreads);
m.def( m.def(
"_append_python_callable_object_and_return_id", "_append_python_callable_object_and_return_id",
[](py::object py_obj) -> size_t { [](py::object py_obj) -> size_t {
...@@ -283,8 +286,8 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -283,8 +286,8 @@ PYBIND11_MODULE(core_noavx, m) {
LoD is short for Level of Details and is usually used for varied sequence LoD is short for Level of Details and is usually used for varied sequence
length. You can skip the following comment if you don't need optional LoD. length. You can skip the following comment if you don't need optional LoD.
For example, a LoDTensor X can look like the example below. It contains For example, a LoDTensor X can look like the example below. It contains
2 sequences. The first has length 2 and the second has length 3, as 2 sequences. The first has length 2 and the second has length 3, as
described by x.lod. described by x.lod.
The first tensor dimension 5=2+3 is calculated from LoD if it's available. The first tensor dimension 5=2+3 is calculated from LoD if it's available.
...@@ -292,7 +295,7 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -292,7 +295,7 @@ PYBIND11_MODULE(core_noavx, m) {
columns, hence [5, 2]. columns, hence [5, 2].
x.lod = [[2, 3]] x.lod = [[2, 3]]
x.data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] x.data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
x.shape = [5, 2] x.shape = [5, 2]
...@@ -1002,7 +1005,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1002,7 +1005,7 @@ All parameter, weight, gradient are variables in Paddle.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
arr = fluid.LoDTensorArray() arr = fluid.LoDTensorArray()
...@@ -1482,14 +1485,14 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1482,14 +1485,14 @@ All parameter, weight, gradient are variables in Paddle.
"memory_optimize", "memory_optimize",
[](const BuildStrategy &self) { return self.memory_optimize_; }, [](const BuildStrategy &self) { return self.memory_optimize_; },
[](BuildStrategy &self, bool b) { self.memory_optimize_ = b; }, [](BuildStrategy &self, bool b) { self.memory_optimize_ = b; },
R"DOC(The type is BOOL, memory opitimize aims to save total memory R"DOC(The type is BOOL, memory opitimize aims to save total memory
consumption, set to True to enable it. consumption, set to True to enable it.
Memory Optimize is our experimental feature, some variables Memory Optimize is our experimental feature, some variables
may be reused/removed by optimize strategy. If you need to may be reused/removed by optimize strategy. If you need to
fetch some variable values when using this feature, please fetch some variable values when using this feature, please
set the persistable property of the variables to True. set the persistable property of the variables to True.
Default False)DOC") Default False)DOC")
.def_property( .def_property(
"is_distribution", "is_distribution",
......
...@@ -3,7 +3,7 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") ...@@ -3,7 +3,7 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
function(inference_analysis_python_api_int8_test target model_dir data_dir filename) function(inference_analysis_python_api_int8_test target model_dir data_dir filename)
py_test(${target} SRCS ${filename} py_test(${target} SRCS ${filename}
ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} ENVS CPU_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
ARGS --infer_model ${model_dir}/model ARGS --infer_model ${model_dir}/model
--infer_data ${data_dir}/data.bin --infer_data ${data_dir}/data.bin
--int8_model_save_path int8_models/${target} --int8_model_save_path int8_models/${target}
......
...@@ -84,8 +84,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): ...@@ -84,8 +84,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
while step < num: while step < num:
fp.seek(imgs_offset + img_size * step) fp.seek(imgs_offset + img_size * step)
img = fp.read(img_size) img = fp.read(img_size)
img = struct.unpack_from('{}f'.format(img_ch * img_w * img = struct.unpack_from(
img_h), img) '{}f'.format(img_ch * img_w * img_h), img)
img = np.array(img) img = np.array(img)
img.shape = (img_ch, img_w, img_h) img.shape = (img_ch, img_w, img_h)
fp.seek(labels_offset + label_size * step) fp.seek(labels_offset + label_size * step)
...@@ -137,12 +137,14 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): ...@@ -137,12 +137,14 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
images = np.array(images).astype('float32') images = np.array(images).astype('float32')
labels = np.array([x[1] for x in data]).astype("int64") labels = np.array([x[1] for x in data]).astype("int64")
labels = labels.reshape([-1, 1]) labels = labels.reshape([-1, 1])
fluid.core.set_num_threads(int(os.environ['CPU_NUM_THREADS']))
out = exe.run(inference_program, out = exe.run(inference_program,
feed={ feed={
feed_target_names[0]: images, feed_target_names[0]: images,
feed_target_names[1]: labels feed_target_names[1]: labels
}, },
fetch_list=fetch_targets) fetch_list=fetch_targets)
fluid.core.set_num_threads(1)
top1 += np.sum(out[1]) * len(data) top1 += np.sum(out[1]) * len(data)
top5 += np.sum(out[2]) * len(data) top5 += np.sum(out[2]) * len(data)
total_samples += len(data) total_samples += len(data)
...@@ -183,8 +185,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): ...@@ -183,8 +185,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
accuracy_diff_threshold = test_case_args.accuracy_diff_threshold accuracy_diff_threshold = test_case_args.accuracy_diff_threshold
_logger.info( _logger.info(
'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'. 'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'
format(batch_size, warmup_batch_size)) .format(batch_size, warmup_batch_size))
#warmup dataset, only use the first batch data #warmup dataset, only use the first batch data
warmup_reader = paddle.batch( warmup_reader = paddle.batch(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册