fix build

6648995f · tensor-tang · 74292f41 · 6648995f · 6648995f · 6648995f
7 changed file
--- a/paddle/fluid/operators/crf_decoding_op.h
+++ b/paddle/fluid/operators/crf_decoding_op.h
@@ -82,8 +82,8 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
    Tensor track;
    int* track_value =
        track.mutable_data<int>(emission_dims, platform::CPUPlace());
-    auto ker = jit::Get<jit::crfdecoding, jit::CRFDecoding, platform::CPUPlace>(
+    auto ker = jit::Get<jit::crfdecoding, jit::CRFDecodingTuples<T>,
-        tag_num);
+                        platform::CPUPlace>(tag_num);
    ker(static_cast<int>(seq_len), x, w, alpha_value, track_value, tag_num);
    T max_score = -std::numeric_limits<T>::max();
    int max_i = 0;

--- a/paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
@@ -108,7 +108,7 @@ class ElementwiseMulMKLDNNKernel : public framework::OpKernel<T> {
        constexpr int simd_width = 16;
        int C = c / simd_width;
-        auto multiply = jit::Get<jit::nchw16cmulnc, jit::NCHW16CMulNCTuples,
+        auto multiply = jit::Get<jit::nchw16cmulnc, jit::NCHW16CMulNCTuples<T>,
                                 platform::CPUPlace>(0);
 #pragma omp parallel for collapse(2)
        for (int ni = 0; ni < n; ni++) {

--- a/paddle/fluid/operators/fused/fusion_gru_op.cc
+++ b/paddle/fluid/operators/fused/fusion_gru_op.cc
@@ -183,29 +183,29 @@ class FusionGRUKernel : public framework::OpKernel<T> {
  const int total_T = x_dims[0];           \
  const int D3 = wh_dims[1]
-#define INIT_OTHER_DEFINES                                                 \
+#define INIT_OTHER_DEFINES                                                    \
-  auto* h0 = ctx.Input<Tensor>("H0");                                      \
+  auto* h0 = ctx.Input<Tensor>("H0");                                         \
-  auto* wx = ctx.Input<Tensor>("WeightX");                                 \
+  auto* wx = ctx.Input<Tensor>("WeightX");                                    \
-  auto* bias = ctx.Input<Tensor>("Bias");                                  \
+  auto* bias = ctx.Input<Tensor>("Bias");                                     \
-  auto* hidden_out = ctx.Output<LoDTensor>("Hidden");                      \
+  auto* hidden_out = ctx.Output<LoDTensor>("Hidden");                         \
-  bool is_reverse = ctx.Attr<bool>("is_reverse");                          \
+  bool is_reverse = ctx.Attr<bool>("is_reverse");                             \
-  const int M = x_dims[1];                                                 \
+  const int M = x_dims[1];                                                    \
-  const int D = wh_dims[0];                                                \
+  const int D = wh_dims[0];                                                   \
-  const int D2 = D * 2;                                                    \
+  const int D2 = D * 2;                                                       \
-  const jit::gru_attr_t attr(                                              \
+  const jit::gru_attr_t attr(                                                 \
-      D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")),     \
+      D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")),        \
-      jit::to_kerneltype(ctx.Attr<std::string>("activation")));            \
+      jit::to_kerneltype(ctx.Attr<std::string>("activation")));               \
-  jit::gru_t one_step;                                                     \
+  jit::gru_t one_step;                                                        \
-  auto ComputeH1 =                                                         \
+  auto ComputeH1 =                                                            \
-      jit::Get<jit::gruh1, jit::GRUTuples, platform::CPUPlace>(attr);      \
+      jit::Get<jit::gruh1, jit::GRUTuples<T>, platform::CPUPlace>(attr);      \
-  auto ComputeHtPart1 =                                                    \
+  auto ComputeHtPart1 =                                                       \
-      jit::Get<jit::gruhtpart1, jit::GRUTuples, platform::CPUPlace>(attr); \
+      jit::Get<jit::gruhtpart1, jit::GRUTuples<T>, platform::CPUPlace>(attr); \
-  auto ComputeHtPart2 =                                                    \
+  auto ComputeHtPart2 =                                                       \
-      jit::Get<jit::gruhtpart2, jit::GRUTuples, platform::CPUPlace>(attr); \
+      jit::Get<jit::gruhtpart2, jit::GRUTuples<T>, platform::CPUPlace>(attr); \
-  const T* x_data = x->data<T>();                                          \
+  const T* x_data = x->data<T>();                                             \
-  const T* wx_data = wx->data<T>();                                        \
+  const T* wx_data = wx->data<T>();                                           \
-  const T* wh_data = wh->data<T>();                                        \
+  const T* wh_data = wh->data<T>();                                           \
-  auto place = ctx.GetPlace();                                             \
+  auto place = ctx.GetPlace();                                                \
  T* xx_data = xx->mutable_data<T>(place)
  void SeqCompute(const framework::ExecutionContext& ctx) const {

--- a/paddle/fluid/operators/fused/fusion_lstm_op.cc
+++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc
@@ -236,33 +236,32 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
  const int D = wh_dims[0];                                 \
  const int D4 = wh_dims[1]
-#define INIT_OTHER_DEFINES                                                     \
+#define INIT_OTHER_DEFINES                                                   \
-  const T* x_data = x->data<T>();                                              \
+  const T* x_data = x->data<T>();                                            \
-  const T* wx_data = wx->data<T>();                                            \
+  const T* wx_data = wx->data<T>();                                          \
-  const T* wh_data = wh->data<T>();                                            \
+  const T* wh_data = wh->data<T>();                                          \
-  /* diagonal weight*/                                                         \
+  /* diagonal weight*/                                                       \
-  const T* wp_data = bias->data<T>() + D4;                                     \
+  const T* wp_data = bias->data<T>() + D4;                                   \
-  /* for peephole only*/                                                       \
+  /* for peephole only*/                                                     \
-  T* checked_cell_data = nullptr;                                              \
+  T* checked_cell_data = nullptr;                                            \
-  auto place = ctx.GetPlace();                                                 \
+  auto place = ctx.GetPlace();                                               \
-  if (use_peepholes) {                                                         \
+  if (use_peepholes) {                                                       \
-    /* w_ic * Ct-1, w_fc * Ct-1  ; w_oc * Ct => ih*/                           \
+    /* w_ic * Ct-1, w_fc * Ct-1  ; w_oc * Ct => ih*/                         \
-    auto* checked_cell = ctx.Output<Tensor>("CheckedCell");                    \
+    auto* checked_cell = ctx.Output<Tensor>("CheckedCell");                  \
-    checked_cell_data = checked_cell->mutable_data<T>(place);                  \
+    checked_cell_data = checked_cell->mutable_data<T>(place);                \
-  }                                                                            \
+  }                                                                          \
-  const jit                                                                    \
+  const jit::lstm_attr_t attr(                                               \
-      : lstm_attr_t attr(                                                      \
+      D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")),       \
-            D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")),   \
+      jit::to_kerneltype(ctx.Attr<std::string>("candidate_activation")),     \
-            jit::to_kerneltype(ctx.Attr<std::string>("candidate_activation")), \
+      jit::to_kerneltype(ctx.Attr<std::string>("cell_activation")),          \
-            jit::to_kerneltype(ctx.Attr<std::string>("cell_activation")),      \
+      use_peepholes);                                                        \
-            use_peepholes);                                                    \
+  jit::lstm_t one_step;                                                      \
-  math::jitkernel::lstm_t one_step;                                            \
+  one_step.wp = wp_data;                                                     \
-  one_step.wp = wp_data;                                                       \
+  one_step.checked = checked_cell_data;                                      \
-  one_step.checked = checked_cell_data;                                        \
+  auto ComputeC1H1 =                                                         \
-  auto ComputeC1H1 =                                                           \
+      jit::Get<jit::lstmc1h1, jit::LSTMTuples<T>, platform::CPUPlace>(attr); \
-      jit::Get<jit::lstmc1h1, jit::LSTMTuples, platform::CPUPlace>(attr);      \
+  auto ComputeCtHt =                                                         \
-  auto ComputeCtHt =                                                           \
+      jit::Get<jit::lstmctht, jit::LSTMTuples<T>, platform::CPUPlace>(attr)
-      jit::Get<jit::lstmctht, jit::LSTMTuples, platform::CPUPlace>(attr)
 // Wh GEMM
 #define GEMM_WH_ADDON(bs, prev, out)                                           \
@@ -434,7 +433,7 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
        one_step.ct_1 = cur_prev_c_data;
        one_step.ct = cur_c_out_data;
        one_step.ht = cur_h_out_data;
-        ComputeC1H1(&one_step, &attr);
+        ComputeCtHt(&one_step, &attr);
        // move one batch
        cur_in_data += D4;

--- a/paddle/fluid/operators/jit/helper.h
+++ b/paddle/fluid/operators/jit/helper.h
@@ -32,7 +32,7 @@ inline typename std::enable_if<
    std::is_same<typename KernelTuples::data_type, float>::value &&
        std::is_same<PlaceType, platform::CPUPlace>::value,
    typename KernelTuples::func_type>::type
-GetJitCode(typename KernelTuples::attr_type attr) {
+GetJitCode(const typename KernelTuples::attr_type& attr) {
  using Func = typename KernelTuples::func_type;
  using Attr = typename KernelTuples::attr_type;
  size_t key = JitCodeKey<Attr>(attr);
@@ -68,7 +68,7 @@ inline typename std::enable_if<
    !std::is_same<typename KernelTuples::data_type, float>::value ||
        !std::is_same<PlaceType, platform::CPUPlace>::value,
    typename KernelTuples::func_type>::type
-GetJitCode(typename KernelTuples::attr_type attr) {
+GetJitCode(const typename KernelTuples::attr_type& attr) {
  return nullptr;
 }
@@ -93,8 +93,8 @@ inline typename KernelTuples::func_type GetRefer() {
 template <KernelType KT, typename KernelTuples,
          typename PlaceType = platform::CPUPlace>
-// TODO(TJ): const & attr
+typename KernelTuples::func_type Get(
-typename KernelTuples::func_type Get(typename KernelTuples::attr_type attr) {
+    const typename KernelTuples::attr_type& attr) {
  auto jitfunc = GetJitCode<KT, KernelTuples, PlaceType>(attr);
  if (jitfunc) {
    return jitfunc;

--- a/paddle/fluid/operators/layer_norm_op.h
+++ b/paddle/fluid/operators/layer_norm_op.h
@@ -230,7 +230,7 @@ class LayerNormKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(bias->numel(), right);
    auto ker =
-        jit::Get<jit::layernorm, jit::LayerNormTuples, platform::CPUPlace>(
+        jit::Get<jit::layernorm, jit::LayerNormTuples<T>, platform::CPUPlace>(
            right);
    ker(x.data<T>(), out.data<T>(), mean->data<T>(), var->data<T>(),
        scale->data<T>(), bias->data<T>(), static_cast<int>(left),

--- a/paddle/fluid/operators/math/fc_compute.h
+++ b/paddle/fluid/operators/math/fc_compute.h
@@ -31,13 +31,14 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
  }
  if (relu) {
    auto compute =
-        jit::Get<jit::vaddrelu, jit::XYZNTuples, platform::CPUPlcace>(N);
+        jit::Get<jit::vaddrelu, jit::XYZNTuples<T>, platform::CPUPlace>(N);
    for (int i = 0; i < M; i++) {
      T* dst = Y + i * N;
      compute(B, dst, dst, N);
    }
  } else {
-    auto compute = jit::Get<jit::vadd, jit::XYZNTuples, platform::CPUPlcace>(N);
+    auto compute =
+        jit::Get<jit::vadd, jit::XYZNTuples<T>, platform::CPUPlace>(N);
 #ifdef PADDLE_WITH_MKLML
 #pragma omp parallel for
 #endif