提交 171a0e2b 编写于 作者: T tensor-tang

add some comment

上级 5377edd2
...@@ -276,6 +276,7 @@ class GRUCPUKernel : public framework::OpKernel<T> { ...@@ -276,6 +276,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
context.Attr<std::string>("gate_activation")); context.Attr<std::string>("gate_activation"));
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
// use MKL packed to speedup GEMM
if (FLAGS_paddle_num_threads >= 4) { if (FLAGS_paddle_num_threads >= 4) {
auto blas = math::GetBlas<DeviceContext, T>(dev_ctx); auto blas = math::GetBlas<DeviceContext, T>(dev_ctx);
T* packed_gate = blas.GEMM_ALLOC(CblasBMatrix, 1 /*height of C*/, T* packed_gate = blas.GEMM_ALLOC(CblasBMatrix, 1 /*height of C*/,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册