From b71abeee1b8cbfe5422441566ec6d6a43b22c4b9 Mon Sep 17 00:00:00 2001 From: Zhang Ting <709968123@qq.com> Date: Fri, 1 May 2020 22:02:14 +0800 Subject: [PATCH] use 32 bit index to improve activation ops (#24206) * improve activation ops performance, test=develop * use 32bit only GPU computation, test=develop --- paddle/fluid/operators/activation_op.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index ec3c39097a..b3784ed074 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -37,6 +37,8 @@ limitations under the License. */ namespace paddle { namespace operators { +using framework::To32BitIndex; + enum ActBwdOpFwdDeps { kNoDeps = 0x00, // Do not need any forward input/output kDepX = 0x01, // Only need forward input X @@ -177,7 +179,14 @@ class ActivationKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - functor(*place, x, out); + // use 32bit index to speed up computation + bool use_32bit_index = out.size() < Eigen::NumTraits::highest(); + bool is_gpu_place = platform::is_gpu_place(context.GetPlace()); + if (use_32bit_index && is_gpu_place) { + functor(*place, To32BitIndex(x), To32BitIndex(out)); + } else { + functor(*place, x, out); + } } }; @@ -208,7 +217,15 @@ class ActivationGradKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - functor(*place, x, out, dout, dx); + // use 32bit index to speed up computation + bool use_32bit_index = out.size() < Eigen::NumTraits::highest(); + bool is_gpu_place = platform::is_gpu_place(context.GetPlace()); + if (use_32bit_index && is_gpu_place) { + functor(*place, To32BitIndex(x), To32BitIndex(out), To32BitIndex(dout), + To32BitIndex(dx)); + } else { + functor(*place, x, out, dout, dx); + } } }; -- GitLab