From 51bac34750e996b68e78fc562fc2a71a8e1ea39b Mon Sep 17 00:00:00 2001 From: Sylwester Fraczek Date: Sat, 15 Jun 2019 17:01:22 +0200 Subject: [PATCH] [cherry-pick to release/1.5] slim threading fix (#18119) * fix multithreading issue test=develop * rview fixes test=develop * reivew fix: omp->cpu, infernce_api.cc->pybind.cc test=release/1.5 --- paddle/fluid/pybind/pybind.cc | 19 +++++++++++-------- .../fluid/contrib/slim/tests/CMakeLists.txt | 2 +- .../test_mkldnn_int8_quantization_strategy.py | 10 ++++++---- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 1f9c5a679b5..b0030d010f9 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -44,6 +44,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/py_func_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" +#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/init.h" @@ -164,6 +165,8 @@ PYBIND11_MODULE(core_noavx, m) { BindException(&m); + m.def("set_num_threads", &platform::SetNumThreads); + m.def( "_append_python_callable_object_and_return_id", [](py::object py_obj) -> size_t { @@ -283,8 +286,8 @@ PYBIND11_MODULE(core_noavx, m) { LoD is short for Level of Details and is usually used for varied sequence length. You can skip the following comment if you don't need optional LoD. - For example, a LoDTensor X can look like the example below. It contains - 2 sequences. The first has length 2 and the second has length 3, as + For example, a LoDTensor X can look like the example below. It contains + 2 sequences. The first has length 2 and the second has length 3, as described by x.lod. The first tensor dimension 5=2+3 is calculated from LoD if it's available. @@ -292,7 +295,7 @@ PYBIND11_MODULE(core_noavx, m) { columns, hence [5, 2]. x.lod = [[2, 3]] - + x.data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] x.shape = [5, 2] @@ -1002,7 +1005,7 @@ All parameter, weight, gradient are variables in Paddle. Examples: .. code-block:: python - + import paddle.fluid as fluid arr = fluid.LoDTensorArray() @@ -1482,14 +1485,14 @@ All parameter, weight, gradient are variables in Paddle. "memory_optimize", [](const BuildStrategy &self) { return self.memory_optimize_; }, [](BuildStrategy &self, bool b) { self.memory_optimize_ = b; }, - R"DOC(The type is BOOL, memory opitimize aims to save total memory + R"DOC(The type is BOOL, memory opitimize aims to save total memory consumption, set to True to enable it. - - Memory Optimize is our experimental feature, some variables + + Memory Optimize is our experimental feature, some variables may be reused/removed by optimize strategy. If you need to fetch some variable values when using this feature, please set the persistable property of the variables to True. - + Default False)DOC") .def_property( "is_distribution", diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index 23607d5052c..e61e93da3f0 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -3,7 +3,7 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") function(inference_analysis_python_api_int8_test target model_dir data_dir filename) py_test(${target} SRCS ${filename} - ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + ENVS CPU_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} ARGS --infer_model ${model_dir}/model --infer_data ${data_dir}/data.bin --int8_model_save_path int8_models/${target} diff --git a/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py b/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py index 01c8f893faf..f1ebb8ae72f 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py +++ b/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py @@ -84,8 +84,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): while step < num: fp.seek(imgs_offset + img_size * step) img = fp.read(img_size) - img = struct.unpack_from('{}f'.format(img_ch * img_w * - img_h), img) + img = struct.unpack_from( + '{}f'.format(img_ch * img_w * img_h), img) img = np.array(img) img.shape = (img_ch, img_w, img_h) fp.seek(labels_offset + label_size * step) @@ -137,12 +137,14 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): images = np.array(images).astype('float32') labels = np.array([x[1] for x in data]).astype("int64") labels = labels.reshape([-1, 1]) + fluid.core.set_num_threads(int(os.environ['CPU_NUM_THREADS'])) out = exe.run(inference_program, feed={ feed_target_names[0]: images, feed_target_names[1]: labels }, fetch_list=fetch_targets) + fluid.core.set_num_threads(1) top1 += np.sum(out[1]) * len(data) top5 += np.sum(out[2]) * len(data) total_samples += len(data) @@ -183,8 +185,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): accuracy_diff_threshold = test_case_args.accuracy_diff_threshold _logger.info( - 'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'. - format(batch_size, warmup_batch_size)) + 'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.' + .format(batch_size, warmup_batch_size)) #warmup dataset, only use the first batch data warmup_reader = paddle.batch( -- GitLab