提交 51bac347 编写于 作者: S Sylwester Fraczek 提交者: Tao Luo

[cherry-pick to release/1.5] slim threading fix (#18119)

* fix multithreading issue

test=develop

* rview fixes

test=develop

* reivew fix: omp->cpu, infernce_api.cc->pybind.cc

test=release/1.5
上级 31ef8c1c
......@@ -44,6 +44,7 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/init.h"
......@@ -164,6 +165,8 @@ PYBIND11_MODULE(core_noavx, m) {
BindException(&m);
m.def("set_num_threads", &platform::SetNumThreads);
m.def(
"_append_python_callable_object_and_return_id",
[](py::object py_obj) -> size_t {
......@@ -283,8 +286,8 @@ PYBIND11_MODULE(core_noavx, m) {
LoD is short for Level of Details and is usually used for varied sequence
length. You can skip the following comment if you don't need optional LoD.
For example, a LoDTensor X can look like the example below. It contains
2 sequences. The first has length 2 and the second has length 3, as
For example, a LoDTensor X can look like the example below. It contains
2 sequences. The first has length 2 and the second has length 3, as
described by x.lod.
The first tensor dimension 5=2+3 is calculated from LoD if it's available.
......@@ -292,7 +295,7 @@ PYBIND11_MODULE(core_noavx, m) {
columns, hence [5, 2].
x.lod = [[2, 3]]
x.data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
x.shape = [5, 2]
......@@ -1002,7 +1005,7 @@ All parameter, weight, gradient are variables in Paddle.
Examples:
.. code-block:: python
import paddle.fluid as fluid
arr = fluid.LoDTensorArray()
......@@ -1482,14 +1485,14 @@ All parameter, weight, gradient are variables in Paddle.
"memory_optimize",
[](const BuildStrategy &self) { return self.memory_optimize_; },
[](BuildStrategy &self, bool b) { self.memory_optimize_ = b; },
R"DOC(The type is BOOL, memory opitimize aims to save total memory
R"DOC(The type is BOOL, memory opitimize aims to save total memory
consumption, set to True to enable it.
Memory Optimize is our experimental feature, some variables
Memory Optimize is our experimental feature, some variables
may be reused/removed by optimize strategy. If you need to
fetch some variable values when using this feature, please
set the persistable property of the variables to True.
Default False)DOC")
.def_property(
"is_distribution",
......
......@@ -3,7 +3,7 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
function(inference_analysis_python_api_int8_test target model_dir data_dir filename)
py_test(${target} SRCS ${filename}
ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
ENVS CPU_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
ARGS --infer_model ${model_dir}/model
--infer_data ${data_dir}/data.bin
--int8_model_save_path int8_models/${target}
......
......@@ -84,8 +84,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
while step < num:
fp.seek(imgs_offset + img_size * step)
img = fp.read(img_size)
img = struct.unpack_from('{}f'.format(img_ch * img_w *
img_h), img)
img = struct.unpack_from(
'{}f'.format(img_ch * img_w * img_h), img)
img = np.array(img)
img.shape = (img_ch, img_w, img_h)
fp.seek(labels_offset + label_size * step)
......@@ -137,12 +137,14 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
images = np.array(images).astype('float32')
labels = np.array([x[1] for x in data]).astype("int64")
labels = labels.reshape([-1, 1])
fluid.core.set_num_threads(int(os.environ['CPU_NUM_THREADS']))
out = exe.run(inference_program,
feed={
feed_target_names[0]: images,
feed_target_names[1]: labels
},
fetch_list=fetch_targets)
fluid.core.set_num_threads(1)
top1 += np.sum(out[1]) * len(data)
top5 += np.sum(out[2]) * len(data)
total_samples += len(data)
......@@ -183,8 +185,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
accuracy_diff_threshold = test_case_args.accuracy_diff_threshold
_logger.info(
'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'.
format(batch_size, warmup_batch_size))
'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'
.format(batch_size, warmup_batch_size))
#warmup dataset, only use the first batch data
warmup_reader = paddle.batch(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册