From 720b55cbcff16832671873e409b5aa41b1cec1ef Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 17 Dec 2018 12:30:18 +0000 Subject: [PATCH] enable crf decoding and layer norm refer code --- paddle/fluid/operators/crf_decoding_op.h | 9 +-- paddle/fluid/operators/jit/helper.cc | 4 + paddle/fluid/operators/jit/kernel_base.h | 19 ++++- .../fluid/operators/jit/refer/CMakeLists.txt | 2 + paddle/fluid/operators/jit/refer/refer.cc | 3 + paddle/fluid/operators/jit/refer/refer.h | 80 +++++++++++++++++++ paddle/fluid/operators/jit/test.cc | 2 +- paddle/fluid/operators/layer_norm_op.h | 14 ++-- 8 files changed, 119 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index e9d2e84a434..860d71e1fe6 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/jit_kernel.h" +#include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/math/math_function.h" namespace paddle { @@ -82,10 +82,9 @@ class CRFDecodingOpKernel : public framework::OpKernel { Tensor track; int* track_value = track.mutable_data(emission_dims, platform::CPUPlace()); - const auto& ker = math::jitkernel::KernelPool::Instance() - .template Get>( - static_cast(tag_num)); - ker->Compute(static_cast(seq_len), x, w, alpha_value, track_value); + auto ker = jit::Get( + tag_num); + ker(static_cast(seq_len), x, w, alpha_value, track_value, tag_num); T max_score = -std::numeric_limits::max(); int max_i = 0; for (size_t i = 0; i < tag_num; ++i) { diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index 0543b0743c0..a0ff82043fc 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -42,6 +42,8 @@ const char* to_string(KernelType kt) { ONE_CASE(gruh1); ONE_CASE(gruhtpart1); ONE_CASE(gruhtpart2); + ONE_CASE(crfdecoding); + ONE_CASE(layernorm); default: PADDLE_THROW("Not support type: %d", kt); return "NOT JITKernel"; @@ -64,6 +66,8 @@ KernelType to_kerneltype(const std::string& act) { } else if (lower == "tanh" || lower == "vtanh") { return vtanh; } + PADDLE_THROW("Not support type: %s, or forget to add this case", act); + return non_kernel; } diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index f10d9f3fdd6..59531c2f17c 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -37,7 +37,9 @@ typedef enum { lstmc1h1, gruh1, gruhtpart1, - gruhtpart2 + gruhtpart2, + crfdecoding, + layernorm } KernelType; template @@ -109,6 +111,21 @@ struct GRUTuples { typedef void (*func_type)(gru_t*, const gru_attr_t*); }; +template +struct CRFDecodingTuples { + typedef T data_type; + typedef int attr_type; + typedef void (*func_type)(const int, const T*, const T*, T*, int*, int); +}; + +template +struct LayerNormTuples { + typedef T data_type; + typedef int attr_type; + typedef void (*func_type)(T*, T*, T*, T*, const T*, const T*, int, + const float, int); +}; + // Just for adding to kernel pool without template class Kernel { public: diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index 78d1cb8f9a7..f3a0e9b11f6 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -23,3 +23,5 @@ USE_JITKERNEL_REFER(lstmc1h1) USE_JITKERNEL_REFER(gruh1) USE_JITKERNEL_REFER(gruhtpart1) USE_JITKERNEL_REFER(gruhtpart2) +USE_JITKERNEL_REFER(crfdecoding) +USE_JITKERNEL_REFER(layernorm) diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index c99174a66f3..00daa0d4786 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -42,4 +42,7 @@ REGISTER_REFER_KERNEL(gruh1, GRUH1); REGISTER_REFER_KERNEL(gruhtpart1, GRUHtPart1); REGISTER_REFER_KERNEL(gruhtpart2, GRUHtPart2); +REGISTER_REFER_KERNEL(crfdecoding, CRFDecoding); +REGISTER_REFER_KERNEL(layernorm, LayerNorm); + #undef REGISTER_REFER_KERNEL diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index a9a6ffbccd8..5780ea05bdf 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -13,6 +13,9 @@ * limitations under the License. */ #pragma once + +#include +#include #include "paddle/fluid/operators/jit/helper.h" #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/platform/enforce.h" @@ -242,6 +245,80 @@ void GRUHtPart2(gru_t* step, const gru_attr_t* attr) { } } +template +void CRFDecoding(const int seq_len, const T* x, const T* w, T* alpha, + int* track, int right) { + constexpr int state_trans_base_idx = 2; + for (int i = 0; i < right; ++i) { + alpha[i] = w[i] + x[i]; + } + for (int k = 1; k < seq_len; ++k) { + for (int i = 0; i < right; ++i) { + T max_score = -std::numeric_limits::max(); + int max_j = 0; + for (int j = 0; j < right; ++j) { + T score = alpha[(k - 1) * right + j] + + w[(j + state_trans_base_idx) * right + i]; + if (score > max_score) { + max_score = score; + max_j = j; + } + } + alpha[k * right + i] = max_score + x[k * right + i]; + track[k * right + i] = max_j; + } + } +} + +template +void LayerNorm(T* x, T* out, T* mean, T* var, const T* scale, const T* bias, + int height, const float epsilon, int right) { + // get mean + for (int i = 0; i < height; i++) { + T sum = 0.0; + int offset = i * right; + for (int j = 0; j < right; j++) { + sum += x[offset + j]; + } + mean[i] = sum / right; + } + + // get variance + for (int i = 0; i < height; i++) { + T sum = 0.0; + int offset = i * right; + for (int j = 0; j < right; j++) { + sum += (x[offset + j] - mean[i]) * (x[offset + j] - mean[i]); + } + var[i] = sum / right; + } + + for (int i = 0; i < height; i++) { + int offset = i * right; + T sqrt_var = std::sqrt(var[i] + (T)epsilon); + for (int j = 0; j < right; j++) { + out[offset + j] = (x[offset + j] - mean[i]) / sqrt_var; + } + } + if (scale) { + for (int i = 0; i < height; i++) { + int offset = i * right; + for (int j = 0; j < right; j++) { + out[offset + j] *= scale[j]; + } + } + } + + if (bias) { + for (int i = 0; i < height; i++) { + int offset = i * right; + for (int j = 0; j < right; j++) { + out[offset + j] += bias[j]; + } + } + } +} + #define DECLARE_REFER_KERNEL(name, tuples) \ template \ class name##Kernel : public ReferKernel> { \ @@ -275,6 +352,9 @@ DECLARE_REFER_KERNEL(GRUH1, GRUTuples); DECLARE_REFER_KERNEL(GRUHtPart1, GRUTuples); DECLARE_REFER_KERNEL(GRUHtPart2, GRUTuples); +DECLARE_REFER_KERNEL(CRFDecoding, CRFDecodingTuples); +DECLARE_REFER_KERNEL(LayerNorm, LayerNormTuples); + #undef DECLARE_REFER_KERNEL } // namespace refer diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 36f8eb6e7b6..85eadea7516 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -515,7 +515,7 @@ TEST(JITKernel, gruhtpart2) { TestGRUKernel(); } -// TODO(TJ): refine the tests template +// TODO(yihua/TJ): add crf decoding and layer norm unit tests TEST(JITKernel, pool) { // TODO(TJ): add some test diff --git a/paddle/fluid/operators/layer_norm_op.h b/paddle/fluid/operators/layer_norm_op.h index 78d20ddf5fd..bb00ed47293 100644 --- a/paddle/fluid/operators/layer_norm_op.h +++ b/paddle/fluid/operators/layer_norm_op.h @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/blas.h" #if !defined(PADDLE_WITH_CUDA) && !defined(_WIN32) && !defined(__APPLE__) && \ !defined(__OSX__) -#include "paddle/fluid/operators/math/jit_kernel.h" +#include "paddle/fluid/operators/jit/kernels.h" #endif #include "paddle/fluid/operators/math/math_function.h" @@ -229,12 +229,12 @@ class LayerNormKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(scale->numel(), right); PADDLE_ENFORCE_EQ(bias->numel(), right); - const auto& ker = math::jitkernel::KernelPool::Instance() - .template Get>( - static_cast(right)); - ker->Compute(x.data(), out.data(), mean->data(), var->data(), - scale->data(), bias->data(), static_cast(left), - static_cast(epsilon)); + auto ker = + jit::Get( + right); + ker(x.data(), out.data(), mean->data(), var->data(), + scale->data(), bias->data(), static_cast(left), + static_cast(epsilon), right); #endif } }; -- GitLab