提交 6181acca 编写于 作者: W wangliu

add interface to dynamic set omp thread num

上级 8747dd62
...@@ -26,9 +26,6 @@ limitations under the License. */ ...@@ -26,9 +26,6 @@ limitations under the License. */
#include "framework/program/var_desc.h" #include "framework/program/var_desc.h"
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#ifdef _OPENMP
#include <omp.h>
#endif // _OPENMP
#ifdef PADDLE_EXECUTOR_MULTITHREAD #ifdef PADDLE_EXECUTOR_MULTITHREAD
#include <queue> #include <queue>
#include <utility> #include <utility>
...@@ -407,14 +404,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict( ...@@ -407,14 +404,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
return result_vector; return result_vector;
} }
template <typename Dtype, Precision P>
void Executor<Dtype, P>::SetThreadNum(int num) {
#ifdef _OPENMP
// omp_set_dynamic(0);
omp_set_num_threads(num);
#endif
}
template class Executor<CPU, Precision::FP32>; template class Executor<CPU, Precision::FP32>;
template class Executor<FPGA, Precision::FP32>; template class Executor<FPGA, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP32>; template class Executor<GPU_MALI, Precision::FP32>;
......
...@@ -58,8 +58,6 @@ class Executor { ...@@ -58,8 +58,6 @@ class Executor {
std::vector<Ptype> Predict(const std::vector<Ptype> &input, std::vector<Ptype> Predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims); const std::vector<int64_t> &dims);
void SetThreadNum(int num);
protected: protected:
Executor() = default; Executor() = default;
void InitMemory(); void InitMemory();
......
...@@ -16,6 +16,14 @@ limitations under the License. */ ...@@ -16,6 +16,14 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::SetThreadNum(int num) {
#ifdef _OPENMP
// omp_set_dynamic(0);
omp_set_num_threads(num);
#endif
};
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize, bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
int batch_size) { int batch_size) {
...@@ -81,7 +89,9 @@ PaddleMobile<Dtype, P>::~PaddleMobile() { ...@@ -81,7 +89,9 @@ PaddleMobile<Dtype, P>::~PaddleMobile() {
} }
template class PaddleMobile<CPU, Precision::FP32>; template class PaddleMobile<CPU, Precision::FP32>;
template class PaddleMobile<FPGA, Precision::FP32>; template class PaddleMobile<FPGA, Precision::FP32>;
template class PaddleMobile<GPU_MALI, Precision::FP32>; template class PaddleMobile<GPU_MALI, Precision::FP32>;
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -17,6 +17,9 @@ limitations under the License. */ ...@@ -17,6 +17,9 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#ifdef _OPENMP
#include <omp.h>
#endif // _OPENMP
#include "common/types.h" #include "common/types.h"
#include "framework/tensor.h" #include "framework/tensor.h"
...@@ -44,6 +47,7 @@ class PaddleMobile { ...@@ -44,6 +47,7 @@ class PaddleMobile {
* */ * */
bool Load(const std::string &model_path, const std::string &para_path, bool Load(const std::string &model_path, const std::string &para_path,
bool optimize = false, int batch_size = 1); bool optimize = false, int batch_size = 1);
void SetThreadNum(int num);
/* /*
* @b to predict * @b to predict
......
...@@ -17,26 +17,21 @@ limitations under the License. */ ...@@ -17,26 +17,21 @@ limitations under the License. */
#include "../test_include.h" #include "../test_include.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
bool optimize = true; bool optimize = true;
auto time1 = time(); auto time1 = time();
// auto program = loader.Load(g_googlenet, optimize); if (paddle_mobile.Load(g_googlenet, optimize)) {
auto program = loader.Load(g_googlenet_combine + "/model",
g_googlenet_combine + "/params", optimize);
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time2) << "ms\n"; DLOG << "load cost :" << time_diff(time1, time1) << "ms";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, optimize);
executor.SetThreadNum(4);
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims); GetInput<float>(g_test_image_1x3x224x224, &input, dims);
auto time3 = time(); auto time3 = time();
int count = 1; auto vec_result = paddle_mobile.Predict(input, dims);
for (int i = 0; i < count; ++i) {
executor.Predict(input, dims);
}
auto time4 = time(); auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) / count << "ms\n";
DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
}
return 0; return 0;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册