From 4cc7707d281931d254a377e29c5f9fe37a6a993a Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 18 Dec 2018 14:05:19 +0000 Subject: [PATCH] add crf_decoding and layer norm intrisic code --- .../fluid/operators/jit/more/CMakeLists.txt | 4 + .../jit/more/intrinsic/CMakeLists.txt | 9 ++ .../jit/more/intrinsic/crf_decoding.cc | 138 ++++++++++++++++++ .../jit/more/intrinsic/crf_decoding.h | 89 +++++++++++ paddle/fluid/operators/jit/more/more.h | 15 -- 5 files changed, 240 insertions(+), 15 deletions(-) create mode 100644 paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt create mode 100644 paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc create mode 100644 paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h delete mode 100644 paddle/fluid/operators/jit/more/more.h diff --git a/paddle/fluid/operators/jit/more/CMakeLists.txt b/paddle/fluid/operators/jit/more/CMakeLists.txt index 5bb78b930..a740d1a84 100644 --- a/paddle/fluid/operators/jit/more/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/CMakeLists.txt @@ -7,4 +7,8 @@ if(WITH_MKLML) add_subdirectory(mkl) endif() +if(WITH_AVX) + add_subdirectory(intrinsic) +endif() + set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} PARENT_SCOPE) diff --git a/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt b/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt new file mode 100644 index 000000000..c4a501386 --- /dev/null +++ b/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt @@ -0,0 +1,9 @@ + +file(GLOB jit_kernel_cc_intrinsic RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") +cc_library(jit_kernel_intrinsic SRCS ${jit_kernel_cc_intrinsic} DEPS jit_kernel_base) + +set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} jit_kernel_intrinsic PARENT_SCOPE) + +# use mkl kernels by name and type +USE_JITKERNEL_MORE(crfdecoding, intrinsic) +USE_JITKERNEL_MORE(layernorm, intrinsic) diff --git a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc new file mode 100644 index 000000000..016fca386 --- /dev/null +++ b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc @@ -0,0 +1,138 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h" +#include "paddle/fluid/operators/jit/refer/refer.h" +#include "paddle/fluid/operators/jit/registry.h" +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace more { +namespace mkl { + +template <> +void VMul(const float* x, const float* y, float* z, int n) { + platform::dynload::vsMul(n, x, y, z); +} + +template <> +void VMul(const double* x, const double* y, double* z, int n) { + platform::dynload::vdMul(n, x, y, z); +} + +template <> +void VAdd(const float* x, const float* y, float* z, int n) { + platform::dynload::vsAdd(n, x, y, z); +} + +template <> +void VAdd(const double* x, const double* y, double* z, int n) { + platform::dynload::vdAdd(n, x, y, z); +} + +template <> +void VScal(const float* a, const float* x, float* y, int n) { + if (x == y) { + platform::dynload::cblas_sscal(n, *a, y, 1); + } else { + refer::VScal(a, x, y, n); + } +} + +template <> +void VScal(const double* a, const double* x, double* y, int n) { + if (x == y) { + platform::dynload::cblas_dscal(n, *a, y, 1); + } else { + refer::VScal(a, x, y, n); + } +} + +template <> +void VExp(const float* x, float* y, int n) { + platform::dynload::vsExp(n, x, y); +} + +template <> +void VExp(const double* x, double* y, int n) { + platform::dynload::vdExp(n, x, y); +} + +// TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 +template <> +bool VMulKernel::UseMe(int d) const { + return platform::MayIUse(platform::avx512f) && d > 512; +} + +template <> +bool VAddKernel::UseMe(int d) const { + return platform::MayIUse(platform::avx512f) && d > 512; +} + +template <> +bool VScalKernel::UseMe(int d) const { + return platform::MayIUse(platform::avx512f) && d > 512; +} + +template <> +bool VExpKernel::UseMe(int d) const { + return d > 7; +} + +template <> +bool VSigmoidKernel::UseMe(int d) const { + return d > 7; +} + +template <> +bool VTanhKernel::UseMe(int d) const { + return d > 7; +} + +#define AWALYS_USE_ME_WITH_DOUBLE(func) \ + template <> \ + bool func##Kernel::UseMe(int d) const { \ + return true; \ + } + +AWALYS_USE_ME_WITH_DOUBLE(VMul); +AWALYS_USE_ME_WITH_DOUBLE(VAdd); +AWALYS_USE_ME_WITH_DOUBLE(VScal); +AWALYS_USE_ME_WITH_DOUBLE(VExp); +AWALYS_USE_ME_WITH_DOUBLE(VSigmoid); +AWALYS_USE_ME_WITH_DOUBLE(VTanh); + +#undef AWALYS_USE_ME_WITH_DOUBLE +} // namespace mkl +} // namespace more +} // namespace jit +} // namespace operators +} // namespace paddle + +namespace mkl = paddle::operators::jit::more::mkl; + +#define REGISTER_MKL_KERNEL(key, func) \ + REGISTER_JITKERNEL_MORE(key, mkl, mkl::func##Kernel, \ + mkl::func##Kernel) + +REGISTER_MKL_KERNEL(vmul, VMul); +REGISTER_MKL_KERNEL(vadd, VAdd); +REGISTER_MKL_KERNEL(vscal, VScal); +REGISTER_MKL_KERNEL(vexp, VExp); +REGISTER_MKL_KERNEL(vsigmoid, VSigmoid); +REGISTER_MKL_KERNEL(vtanh, VTanh); + +#undef REGISTER_MKL_KERNEL diff --git a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h new file mode 100644 index 000000000..bf209d2f9 --- /dev/null +++ b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h @@ -0,0 +1,89 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/operators/jit/kernel_base.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace more { +namespace mkl { + +template +void VMul(const T* x, const T* y, T* z, int n); + +template +void VAdd(const T* x, const T* y, T* z, int n); + +template +void VScal(const T* a, const T* x, T* y, int n); + +template +void VExp(const T* x, T* y, int n); + +template +void VSigmoid(const T* x, T* y, int n) { + const T min = SIGMOID_THRESHOLD_MIN; + const T max = SIGMOID_THRESHOLD_MAX; + for (int i = 0; i < n; ++i) { + y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); + y[i] = static_cast(0) - y[i]; + } + VExp(y, y, n); + for (int i = 0; i < n; ++i) { + y[i] = static_cast(1) / (static_cast(1) + y[i]); + } +} + +template +void VTanh(const T* x, T* y, int n) { + for (int i = 0; i < n; ++i) { + y[i] = static_cast(2) * x[i]; + } + VSigmoid(y, y, n); + for (int i = 0; i < n; ++i) { + y[i] = static_cast(2) * y[i] - static_cast(1); + } +} + +#define DECLARE_MKL_KERNEL(name, tuples) \ + template \ + class name##Kernel : public KernelImpl> { \ + public: \ + name##Kernel() { this->func = name; } \ + bool UseMe(typename tuples::attr_type) const override; \ + } + +// XYZN +DECLARE_MKL_KERNEL(VMul, XYZNTuples); +DECLARE_MKL_KERNEL(VAdd, XYZNTuples); + +// AXYN +DECLARE_MKL_KERNEL(VScal, AXYNTuples); + +// XYN +DECLARE_MKL_KERNEL(VExp, XYNTuples); +DECLARE_MKL_KERNEL(VSigmoid, XYNTuples); +DECLARE_MKL_KERNEL(VTanh, XYNTuples); + +#undef DECLARE_MKL_KERNEL + +} // namespace mkl +} // namespace more +} // namespace jit +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/jit/more/more.h b/paddle/fluid/operators/jit/more/more.h deleted file mode 100644 index ab99fdc05..000000000 --- a/paddle/fluid/operators/jit/more/more.h +++ /dev/null @@ -1,15 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. */ - -#pragma once -- GitLab