提交 d4dda862 编写于 作者: G GaoWei8 提交者: Yiqun Liu

optimize fc jit (#21878)

test=develop
上级 879e3074
......@@ -61,27 +61,20 @@ class FCFunctor<platform::CPUDeviceContext, T> {
"When bias is NULL, relu can not be true."));
return;
}
if (relu) {
auto compute =
jit::KernelFuncs<jit::VAddReluTuple<T>, platform::CPUPlace>::Cache()
.At(N);
for (int i = 0; i < M; i++) {
T* dst = Y + i * N;
T* src = (padding_weights) ? Y1_data + i * (N + 4) : dst;
compute(B, src, dst, N);
}
} else {
auto compute =
jit::KernelFuncs<jit::VAddTuple<T>, platform::CPUPlace>::Cache().At(
N);
auto compute =
relu
? jit::KernelFuncs<jit::VAddReluTuple<T>,
platform::CPUPlace>::Cache()
.At(N)
: jit::KernelFuncs<jit::VAddTuple<T>, platform::CPUPlace>::Cache()
.At(N);
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
for (int i = 0; i < M; i++) {
T* dst = Y + i * N;
T* src = (padding_weights) ? Y1_data + i * (N + 4) : dst;
compute(B, src, dst, N);
}
for (int i = 0; i < M; i++) {
T* dst = Y + i * N;
T* src = (padding_weights) ? Y1_data + i * (N + 4) : dst;
compute(B, src, dst, N);
}
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册