diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index 2b5094925c1ab7a7cca5c8f04b5f6c5d9e7e29f1..087f903a8bba9a4bfcd7eaabd7098555442a904e 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -276,6 +276,7 @@ class GRUCPUKernel : public framework::OpKernel { context.Attr("gate_activation")); #ifdef PADDLE_WITH_MKLML + // use MKL packed to speedup GEMM if (FLAGS_paddle_num_threads >= 4) { auto blas = math::GetBlas(dev_ctx); T* packed_gate = blas.GEMM_ALLOC(CblasBMatrix, 1 /*height of C*/,