diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index ea6aebd291eee580a307aa112117434fa942005e..600ab14d37aad6b95516c5bd6551d12165596f57 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -51,7 +51,7 @@ math_library(pooling) math_library(selected_rows_functor DEPS selected_rows math_function blas) math_library(sequence2batch) math_library(sequence_padding) -math_library(sequence_pooling DEPS math_function) +math_library(sequence_pooling DEPS math_function jit_kernel_helper) math_library(sequence_scale) math_library(softmax DEPS math_function) diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index 6d491dbf1ed162ef07fda4c07e95cc57108486fd..23dc516933b2e094d3dc74030a35513b9fbef607 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include +#include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_pooling.h" @@ -239,15 +240,33 @@ class SequencePoolFunctor { last_pool(context, input, output); return; } - if (pooltype == "FIRST") { math::FirstSeqPoolFunctor first_pool; first_pool(context, input, output); return; } + auto lod = input.lod()[0]; + if (pooltype == "SUM") { + auto place = context.GetPlace(); + PADDLE_ENFORCE(platform::is_cpu_place(place)); + const T* src = input.data(); + T* dst = output->mutable_data(place); + jit::seq_pool_attr_t attr; + attr.w = input.numel() / input.dims()[0]; + attr.type = jit::SeqPoolType::sum; + auto seqpool = + jit::Get, platform::CPUPlace>( + attr); + for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { + attr.h = static_cast(lod[i + 1] - lod[i]); + seqpool(src, dst, &attr); + dst += attr.w; + src += attr.h * attr.w; + } + return; + } auto& place = *context.eigen_device(); - auto blas = math::GetBlas(context); for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { Tensor in_t = input.Slice(static_cast(lod[i]), static_cast(lod[i + 1])); @@ -258,15 +277,6 @@ class SequencePoolFunctor { auto out_e = EigenVector::Flatten(out_t); if (pooltype == "AVERAGE") { out_e.device(place) = in_e.mean(Eigen::array({{0}})); - } else if (pooltype == "SUM") { - if (h > 0) { - const T* in_data = in_t.data(); - T* out_data = out_t.mutable_data(context.GetPlace()); - blas.VCOPY(w, in_data, out_data); - for (int64_t r = 1; r != h; ++r) { - blas.AXPY(w, 1., in_data + r * w, out_data); - } - } } else if (pooltype == "SQRT") { out_e.device(place) = in_e.sum(Eigen::array({{0}})) / std::sqrt(static_cast(h));