From f1275fb605997cfb18c80d5c655728a83738aaee Mon Sep 17 00:00:00 2001
From: smallv0221 <33639025+smallv0221@users.noreply.github.com>
Date: Thu, 26 Aug 2021 19:28:01 +0800
Subject: [PATCH] Support dropout backward in eval mode (#35122)

* Support dropout backward in eval mode

* add downscale case

* minor fix

* minor fix
---
 paddle/fluid/operators/dropout_op.cc |  4 --
 paddle/fluid/operators/dropout_op.h  | 56 +++++++++++++++-------------
 2 files changed, 30 insertions(+), 30 deletions(-)
diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc
index 6ee19c939f..37e622e768 100644
--- a/paddle/fluid/operators/dropout_op.cc
+++ b/paddle/fluid/operators/dropout_op.cc
@@ -117,10 +117,6 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(ctx->Attrs().Get<bool>("is_test"), false,
-                      platform::errors::InvalidArgument(
-                          "GradOp is only callable when is_test is false"));
-
     OP_INOUT_CHECK(ctx->HasInput("Mask"), "Input", "Mask", "DropoutGrad");
     OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input",
                    framework::GradVarName("Out"), "DropoutGrad");
diff --git a/paddle/fluid/operators/dropout_op.h b/paddle/fluid/operators/dropout_op.h
index 69c420e2c9..997a7d835a 100644
--- a/paddle/fluid/operators/dropout_op.h
+++ b/paddle/fluid/operators/dropout_op.h
@@ -160,17 +160,12 @@ template <typename DeviceContext, typename T>
 class DropoutGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    PADDLE_ENFORCE_EQ(!context.Attr<bool>("is_test"), true,
-                      platform::errors::PreconditionNotMet(
-                          "GradOp is only callable when is_test is false"));
-
     auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
     auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out"));
     auto* mask = context.Input<Tensor>("Mask");
     grad_x->mutable_data<T>(context.GetPlace());
     auto size = grad_x->numel();
 
-    auto M = EigenVector<uint8_t>::Flatten(*mask);
     auto dX = EigenVector<T>::Flatten(*grad_x);
     auto dY = EigenVector<T>::Flatten(*grad_y);
 
@@ -178,32 +173,41 @@ class DropoutGradKernel : public framework::OpKernel<T> {
         *context.template device_context<DeviceContext>().eigen_device();
     auto& dropout_implementation =
         context.Attr<std::string>("dropout_implementation");
-    if (dropout_implementation == "upscale_in_train") {
-      float dropout_prob = context.Attr<float>("dropout_prob");
-      if (dropout_prob == 1.0f) {
-        dX.device(place) = static_cast<T>(0) * dY;
+    if (context.Attr<bool>("is_test") == true) {
+      if (dropout_implementation == "upscale_in_train") {
+        dX.device(place) = static_cast<T>(1) * dY;
       } else {
-        int vec_size = VectorizedSize<T>(grad_y->data<T>());
-        if (platform::is_gpu_place(context.GetPlace()) && vec_size == 4 &&
-            size % 4 == 0) {
+        float dropout_prob = context.Attr<float>("dropout_prob");
+        dX.device(place) = dY * static_cast<T>(1.0f - dropout_prob);
+      }
+    } else {
+      auto M = EigenVector<uint8_t>::Flatten(*mask);
+      if (dropout_implementation == "upscale_in_train") {
+        float dropout_prob = context.Attr<float>("dropout_prob");
+        if (dropout_prob == 1.0f) {
+          dX.device(place) = static_cast<T>(0) * dY;
+        } else {
+          int vec_size = VectorizedSize<T>(grad_y->data<T>());
+          if (platform::is_gpu_place(context.GetPlace()) && vec_size == 4 &&
+              size % 4 == 0) {
 #if defined(__NVCC__) || defined(__HIPCC__)
-          auto factor = static_cast<T>(1.0f / (1.0f - dropout_prob));
-          auto stream = context.cuda_device_context().stream();
-          platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(
-              context.cuda_device_context(), size);
-          DropoutGradCUDAKernel<
-              T, uint8_t,
-              4><<<config.block_per_grid, config.thread_per_block, 0, stream>>>(
-              grad_y->data<T>(), mask->data<uint8_t>(), factor, size,
-              grad_x->data<T>());
+            auto factor = static_cast<T>(1.0f / (1.0f - dropout_prob));
+            auto stream = context.cuda_device_context().stream();
+            platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(
+                context.cuda_device_context(), size);
+            DropoutGradCUDAKernel<T, uint8_t, 4><<<
+                config.block_per_grid, config.thread_per_block, 0, stream>>>(
+                grad_y->data<T>(), mask->data<uint8_t>(), factor, size,
+                grad_x->data<T>());
 #endif
-        } else {
-          dX.device(place) =
-              dY * M.cast<T>() / static_cast<T>(1.0f - dropout_prob);
+          } else {
+            dX.device(place) =
+                dY * M.cast<T>() / static_cast<T>(1.0f - dropout_prob);
+          }
         }
+      } else {
+        dX.device(place) = dY * M.cast<T>();
       }
-    } else {
-      dX.device(place) = dY * M.cast<T>();
     }
   }
 };
-- 
GitLab