未验证 提交 ff16c478 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #14671 from luotao1/box_coder

speedup box_coder_op for multi-threads
...@@ -43,6 +43,9 @@ class BoxCoderKernel : public framework::OpKernel<T> { ...@@ -43,6 +43,9 @@ class BoxCoderKernel : public framework::OpKernel<T> {
const T* prior_box_var_data = nullptr; const T* prior_box_var_data = nullptr;
if (prior_box_var) prior_box_var_data = prior_box_var->data<T>(); if (prior_box_var) prior_box_var_data = prior_box_var->data<T>();
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(2)
#endif
for (int64_t i = 0; i < row; ++i) { for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) { for (int64_t j = 0; j < col; ++j) {
T prior_box_width = prior_box_data[j * len + 2] - T prior_box_width = prior_box_data[j * len + 2] -
...@@ -96,6 +99,9 @@ class BoxCoderKernel : public framework::OpKernel<T> { ...@@ -96,6 +99,9 @@ class BoxCoderKernel : public framework::OpKernel<T> {
const T* prior_box_var_data = nullptr; const T* prior_box_var_data = nullptr;
if (prior_box_var) prior_box_var_data = prior_box_var->data<T>(); if (prior_box_var) prior_box_var_data = prior_box_var->data<T>();
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(2)
#endif
for (int64_t i = 0; i < row; ++i) { for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) { for (int64_t j = 0; j < col; ++j) {
size_t offset = i * col * len + j * len; size_t offset = i * col * len + j * len;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册