diff --git a/paddle/phi/kernels/cpu/embedding_kernel.cc b/paddle/phi/kernels/cpu/embedding_kernel.cc index 0430f7a005221c0ca95c0719a2a2c91bdb5e0d32..7accdffe1340d703b1c41487fc8fe6e60b8cffd9 100644 --- a/paddle/phi/kernels/cpu/embedding_kernel.cc +++ b/paddle/phi/kernels/cpu/embedding_kernel.cc @@ -48,6 +48,10 @@ struct EmbeddingCPUFunctor { dev_ctx_.template Alloc(out_); auto* output = out_->data(); +#if defined(_OPENMP) && !defined(PADDLE_WITH_CUDA) +#pragma omp parallel for +#endif + for (int64_t i = 0; i < ids_numel; ++i) { if (padding_idx_ != kNoPadding && ids[i] == padding_idx_) { memset(output + i * row_width, 0, row_width * sizeof(T));