add crf_decoding and layer norm intrisic code

4cc7707d · tensor-tang · 10c340c9 · 4cc7707d · 4cc7707d · 4cc7707d
5 changed file
--- a/paddle/fluid/operators/jit/more/CMakeLists.txt
+++ b/paddle/fluid/operators/jit/more/CMakeLists.txt
@@ -7,4 +7,8 @@ if(WITH_MKLML)
    add_subdirectory(mkl)
 endif()
+if(WITH_AVX)
+    add_subdirectory(intrinsic)
+endif()
 set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} PARENT_SCOPE)
--- a/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt
+++ b/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt
+file(GLOB jit_kernel_cc_intrinsic RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc")
+cc_library(jit_kernel_intrinsic SRCS ${jit_kernel_cc_intrinsic} DEPS jit_kernel_base)
+set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} jit_kernel_intrinsic PARENT_SCOPE)
+# use mkl kernels by name and type
+USE_JITKERNEL_MORE(crfdecoding, intrinsic)
+USE_JITKERNEL_MORE(layernorm, intrinsic)
--- a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc
+++ b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+#include "paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h"
+#include "paddle/fluid/operators/jit/refer/refer.h"
+#include "paddle/fluid/operators/jit/registry.h"
+#include "paddle/fluid/platform/cpu_info.h"
+namespace paddle {
+namespace operators {
+namespace jit {
+namespace more {
+namespace mkl {
+template <>
+void VMul<float>(const float* x, const float* y, float* z, int n) {
+  platform::dynload::vsMul(n, x, y, z);
+}
+template <>
+void VMul<double>(const double* x, const double* y, double* z, int n) {
+  platform::dynload::vdMul(n, x, y, z);
+}
+template <>
+void VAdd<float>(const float* x, const float* y, float* z, int n) {
+  platform::dynload::vsAdd(n, x, y, z);
+}
+template <>
+void VAdd<double>(const double* x, const double* y, double* z, int n) {
+  platform::dynload::vdAdd(n, x, y, z);
+}
+template <>
+void VScal<float>(const float* a, const float* x, float* y, int n) {
+  if (x == y) {
+    platform::dynload::cblas_sscal(n, *a, y, 1);
+  } else {
+    refer::VScal<float>(a, x, y, n);
+  }
+}
+template <>
+void VScal<double>(const double* a, const double* x, double* y, int n) {
+  if (x == y) {
+    platform::dynload::cblas_dscal(n, *a, y, 1);
+  } else {
+    refer::VScal<double>(a, x, y, n);
+  }
+}
+template <>
+void VExp<float>(const float* x, float* y, int n) {
+  platform::dynload::vsExp(n, x, y);
+}
+template <>
+void VExp<double>(const double* x, double* y, int n) {
+  platform::dynload::vdExp(n, x, y);
+}
+// TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512
+template <>
+bool VMulKernel<float>::UseMe(int d) const {
+  return platform::MayIUse(platform::avx512f) && d > 512;
+}
+template <>
+bool VAddKernel<float>::UseMe(int d) const {
+  return platform::MayIUse(platform::avx512f) && d > 512;
+}
+template <>
+bool VScalKernel<float>::UseMe(int d) const {
+  return platform::MayIUse(platform::avx512f) && d > 512;
+}
+template <>
+bool VExpKernel<float>::UseMe(int d) const {
+  return d > 7;
+}
+template <>
+bool VSigmoidKernel<float>::UseMe(int d) const {
+  return d > 7;
+}
+template <>
+bool VTanhKernel<float>::UseMe(int d) const {
+  return d > 7;
+}
+#define AWALYS_USE_ME_WITH_DOUBLE(func)           \
+  template <>                                     \
+  bool func##Kernel<double>::UseMe(int d) const { \
+    return true;                                  \
+  }
+AWALYS_USE_ME_WITH_DOUBLE(VMul);
+AWALYS_USE_ME_WITH_DOUBLE(VAdd);
+AWALYS_USE_ME_WITH_DOUBLE(VScal);
+AWALYS_USE_ME_WITH_DOUBLE(VExp);
+AWALYS_USE_ME_WITH_DOUBLE(VSigmoid);
+AWALYS_USE_ME_WITH_DOUBLE(VTanh);
+#undef AWALYS_USE_ME_WITH_DOUBLE
+}  // namespace mkl
+}  // namespace more
+}  // namespace jit
+}  // namespace operators
+}  // namespace paddle
+namespace mkl = paddle::operators::jit::more::mkl;
+#define REGISTER_MKL_KERNEL(key, func)                        \
+  REGISTER_JITKERNEL_MORE(key, mkl, mkl::func##Kernel<float>, \
+                          mkl::func##Kernel<double>)
+REGISTER_MKL_KERNEL(vmul, VMul);
+REGISTER_MKL_KERNEL(vadd, VAdd);
+REGISTER_MKL_KERNEL(vscal, VScal);
+REGISTER_MKL_KERNEL(vexp, VExp);
+REGISTER_MKL_KERNEL(vsigmoid, VSigmoid);
+REGISTER_MKL_KERNEL(vtanh, VTanh);
+#undef REGISTER_MKL_KERNEL
--- a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h
+++ b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+#pragma once
+#include <type_traits>
+#include "paddle/fluid/operators/jit/kernel_base.h"
+namespace paddle {
+namespace operators {
+namespace jit {
+namespace more {
+namespace mkl {
+template <typename T>
+void VMul(const T* x, const T* y, T* z, int n);
+template <typename T>
+void VAdd(const T* x, const T* y, T* z, int n);
+template <typename T>
+void VScal(const T* a, const T* x, T* y, int n);
+template <typename T>
+void VExp(const T* x, T* y, int n);
+template <typename T>
+void VSigmoid(const T* x, T* y, int n) {
+  const T min = SIGMOID_THRESHOLD_MIN;
+  const T max = SIGMOID_THRESHOLD_MAX;
+  for (int i = 0; i < n; ++i) {
+    y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
+    y[i] = static_cast<T>(0) - y[i];
+  }
+  VExp(y, y, n);
+  for (int i = 0; i < n; ++i) {
+    y[i] = static_cast<T>(1) / (static_cast<T>(1) + y[i]);
+  }
+}
+template <typename T>
+void VTanh(const T* x, T* y, int n) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = static_cast<T>(2) * x[i];
+  }
+  VSigmoid(y, y, n);
+  for (int i = 0; i < n; ++i) {
+    y[i] = static_cast<T>(2) * y[i] - static_cast<T>(1);
+  }
+}
+#define DECLARE_MKL_KERNEL(name, tuples)                      \
+  template <typename T>                                       \
+  class name##Kernel : public KernelImpl<tuples<T>> {         \
+   public:                                                    \
+    name##Kernel() { this->func = name<T>; }                  \
+    bool UseMe(typename tuples<T>::attr_type) const override; \
+  }
+// XYZN
+DECLARE_MKL_KERNEL(VMul, XYZNTuples);
+DECLARE_MKL_KERNEL(VAdd, XYZNTuples);
+// AXYN
+DECLARE_MKL_KERNEL(VScal, AXYNTuples);
+// XYN
+DECLARE_MKL_KERNEL(VExp, XYNTuples);
+DECLARE_MKL_KERNEL(VSigmoid, XYNTuples);
+DECLARE_MKL_KERNEL(VTanh, XYNTuples);
+#undef DECLARE_MKL_KERNEL
+}  // namespace mkl
+}  // namespace more
+}  // namespace jit
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/fluid/operators/jit/more/more.h
+++ b/paddle/fluid/operators/jit/more/more.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. */
-#pragma once