Modify the dropout op to multi-thread (#19504)

* Modify the dropout op to multi-thread test=develop * define parallel test=develop

Modify the dropout op to multi-thread (#19504)
* Modify the dropout op to multi-thread test=develop * define parallel test=develop
e8405e5c · GaoWei8 · tensor-tang · 2916caa2 · e8405e5c
隐藏空白更改
内联并排

Showing with 12 addition and 5 deletion

paddle/fluid/operators/dropout_op.h paddle/fluid/operators/dropout_op.h +12 -5

未找到文件。
--- a/paddle/fluid/operators/dropout_op.h
+++ b/paddle/fluid/operators/dropout_op.h
@@ -77,13 +77,20 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
        }
      }
    } else {
-      auto X = EigenMatrix<T>::Reshape(*x, 1);
-      auto Y = EigenMatrix<T>::Reshape(*y, 1);
-      auto& place =
-          *context.template device_context<DeviceContext>().eigen_device();
      if (upscale_in_train) {
-        Y.device(place) = X;
+        const auto* X_data = x->data<T>();
+        auto* Y_data = y->mutable_data<T>(context.GetPlace());
+#ifdef PADDLE_WITH_MKLML
+#pragma omp parallel for
+#endif
+        for (int i = 0; i < x->numel(); i++) {
+          Y_data[i] = X_data[i];
+        }
      } else {
+        auto X = EigenMatrix<T>::Reshape(*x, 1);
+        auto Y = EigenMatrix<T>::Reshape(*y, 1);
+        auto& place =
+            *context.template device_context<DeviceContext>().eigen_device();
        Y.device(place) = X * static_cast<T>(1.0f - dropout_prob);
      }
    }