cache fc kernel

test=develop

cache fc kernel
test=develop
a18c0d42 · tensor-tang · 6e1ee7fb · a18c0d42
隐藏空白更改
内联并排

Showing with 6 addition and 4 deletion

paddle/fluid/operators/math/fc_compute.h paddle/fluid/operators/math/fc_compute.h +6 -4

未找到文件。
--- a/paddle/fluid/operators/math/fc_compute.h
+++ b/paddle/fluid/operators/math/fc_compute.h
@@ -30,15 +30,17 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
    return;
  }
  if (relu) {
-    auto compute =
+    auto compute = jit::KernelFuncs<jit::kVAddRelu, jit::XYZNTuples<T>,
-        jit::Get<jit::kVAddRelu, jit::XYZNTuples<T>, platform::CPUPlace>(N);
+                                    platform::CPUPlace>::Cache()
+                       .At(N);
    for (int i = 0; i < M; i++) {
      T* dst = Y + i * N;
      compute(B, dst, dst, N);
    }
  } else {
-    auto compute =
+    auto compute = jit::KernelFuncs<jit::kVAdd, jit::XYZNTuples<T>,
-        jit::Get<jit::kVAdd, jit::XYZNTuples<T>, platform::CPUPlace>(N);
+                                    platform::CPUPlace>::Cache()
+                       .At(N);
 #ifdef PADDLE_WITH_MKLML
 #pragma omp parallel for
 #endif