[MLU] fix compute error of dropout op (#45923)

36915474 · qipengh · GitHub · 3576e49c · 36915474 · 36915474
3 changed file
--- a/paddle/fluid/operators/dropout_op_mlu.cc
+++ b/paddle/fluid/operators/dropout_op_mlu.cc
@@ -39,8 +39,17 @@ class DropoutMLUKernel : public framework::OpKernel<T> {
    MLUCnnlTensorDesc x_desc(*x);
    MLUCnnlTensorDesc out_desc(*out);

-    if (!is_test) {
-      // exec dropout op for training only.
+    if (is_test && is_upscale) {
+      // dropout op for inference: out = input.
+      framework::TensorCopy(
+          *x,
+          ctx.GetPlace(),
+          ctx.template device_context<platform::MLUDeviceContext>(),
+          out);
+      return;
+    } else if (!is_test) {
+      // dropout op for training: out = input * mask / ( 1.0 - dropout_prob ) or
+      // out = input * mask.
      int seed_data = 0;
      if (seed_tensor) {
        if (platform::is_mlu_place(seed_tensor->place())) {
@@ -79,50 +88,44 @@ class DropoutMLUKernel : public framework::OpKernel<T> {
      const int device_id = ctx.GetPlace().GetDeviceId();
      auto mlu_gen_random = GetMLURandomGenerator(ctx, device_id, seed_data);

-      const float prob = is_upscale ? dropout_prob : 0.0f;
+      // compute out = input * mask / ( 1.0 - dropout_prob )
      MLUCnnl::FusedDropout(ctx,
                            mlu_gen_random->get(),
                            x_desc.get(),
                            GetBasePtr(x),
-                            prob,
+                            dropout_prob,
                            GetBasePtr(&(mlu_gen_random->get_state())),
                            mask_desc.get(),
                            GetBasePtr(mask),
                            out_desc.get(),
                            GetBasePtr(out));
-    } else {
-      // exec dropout op for inference only.
+
      if (is_upscale) {
-        framework::TensorCopy(
-            *x,
-            ctx.GetPlace(),
-            ctx.template device_context<platform::MLUDeviceContext>(),
-            out);
-      } else {
-        auto scale = static_cast<T>(1.0f - dropout_prob);
+        return;
+      }
+    }
+
+    // In downgrade_in_infer mode, need to multiply (1.0f - dropout_prob).
    Tensor scale_tensor(x->dtype());
+    Tensor bias_tensor(x->dtype());
    scale_tensor.mutable_data<T>({1}, ctx.GetPlace());
+    bias_tensor.mutable_data<T>({1}, ctx.GetPlace());
    MLUCnnlTensorDesc scale_desc(scale_tensor);
-        MLUCnnl::Fill(ctx,
-                      CNNL_POINTER_MODE_HOST,
-                      &scale,
-                      scale_desc.get(),
-                      GetBasePtr(&scale_tensor));
-
-        auto data_type = ToCnnlDataType<T>();
-        MLUCnnlOpTensorDesc op_tensor_desc(
-            CNNL_OP_TENSOR_MUL, data_type, CNNL_NOT_PROPAGATE_NAN);
-        MLUCnnl::OpTensor(ctx,
-                          op_tensor_desc.get(),
-                          x_desc.get(),
-                          GetBasePtr(x),
+    MLUCnnlTensorDesc bias_desc(bias_tensor);
+    FillMLUTensorWithHostValue(
+        ctx, static_cast<T>(1.0f - dropout_prob), &scale_tensor);
+    FillMLUTensorWithHostValue(ctx, static_cast<T>(0.0f), &bias_tensor);
+
+    MLUCnnl::Scale(ctx,
+                   0,
+                   is_test ? x_desc.get() : out_desc.get(),
+                   is_test ? GetBasePtr(x) : GetBasePtr(out),
                   scale_desc.get(),
                   GetBasePtr(&scale_tensor),
+                   bias_desc.get(),
+                   GetBasePtr(&bias_tensor),
                   out_desc.get(),
-                          GetBasePtr(out),
-                          data_type);
-      }
-    }
+                   GetBasePtr(out));
  }
 };


--- a/paddle/fluid/operators/pool_op_mlu.cc
+++ b/paddle/fluid/operators/pool_op_mlu.cc
@@ -141,10 +141,9 @@ class MLUPoolOpKernel : public framework::OpKernel<T> {
          handle, pool_mode, out_w, out_h, &extra_input_size);

      if (extra_input_size > 0) {
-        phi::CPUContext cpu_ctx;
-        framework::Tensor extra_host_tensor =
-            ctx.AllocateTmpTensor<int8_t, phi::CPUContext>(
-                {static_cast<int64_t>(extra_input_size)}, cpu_ctx);
+        framework::Tensor extra_host_tensor;
+        extra_host_tensor.mutable_data<int8_t>(
+            {static_cast<int64_t>(extra_input_size)}, platform::CPUPlace());
        cnnlInitPoolingExtraInput(handle,
                                  pool_desc.get(),
                                  trans_in_x_desc.get(),

--- a/python/paddle/fluid/tests/unittests/mlu/test_dropout_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_dropout_op_mlu.py
@@ -31,24 +31,43 @@ SEED = 2022
 class TestDropoutOp(OpTest):

    def setUp(self):
-        self.op_type = "dropout"
        self.set_mlu()
        self.init_dtype()
-        self.inputs = {'X': np.random.random((32, 64)).astype(self.dtype)}
+        self.init_inputs_shape()
+        self.init_attrs()
+        self.op_type = 'dropout'
+        self.inputs = {'X': np.random.random(self.shape).astype(self.dtype)}
        self.attrs = {
-            'dropout_prob': 0.0,
-            'fix_seed': True,
-            'is_test': False,
-            'dropout_implementation': 'upscale_in_train'
-        }
-        self.outputs = {
-            'Out': self.inputs['X'],
-            'Mask': np.ones((32, 64)).astype('uint8')
+            'dropout_prob': self.dropout_prob,
+            'fix_seed': self.fix_seed,
+            'is_test': self.is_test,
+            'dropout_implementation': self.dropout_implementation
        }

+        out = self.inputs['X'] * (1.0 - self.dropout_prob)
+        if self.is_test == False:
+            mask = None
+            if self.dropout_prob == 0.0:
+                mask = np.ones(self.shape).astype('uint8')
+            elif self.dropout_prob == 1.0:
+                mask = np.zeros(self.shape).astype('uint8')
+            self.outputs = {'Out': out, 'Mask': mask}
+        else:
+            self.outputs = {'Out': out}
+
    def init_dtype(self):
        self.dtype = np.float32

+    def init_inputs_shape(self):
+        self.shape = [32, 64]
+
+    def init_attrs(self):
+        self.__class__.no_need_check_grad = False
+        self.dropout_prob = 0.0
+        self.fix_seed = True
+        self.is_test = False
+        self.dropout_implementation = "upscale_in_train"
+
    def set_mlu(self):
        self.__class__.use_mlu = True
        self.place = paddle.device.MLUPlace(0)
@@ -57,84 +76,111 @@ class TestDropoutOp(OpTest):
        self.check_output_with_place(self.place)

    def test_check_grad_normal(self):
+        if hasattr(self.__class__, "no_need_check_grad"
+                   ) and self.__class__.no_need_check_grad == True:
+            return
+
        self.check_grad_with_place(self.place, ['X'], 'Out')


 class TestDropoutOpInput1d(TestDropoutOp):
-    # change input shape
-    def setUp(self):
-        self.op_type = "dropout"
-        self.set_mlu()
-        self.init_dtype()
-        self.inputs = {'X': np.random.random((3, 62)).astype(self.dtype)}
-        self.attrs = {
-            'dropout_prob': 0.0,
-            'fix_seed': True,
-            'is_test': False,
-            'dropout_implementation': 'upscale_in_train'
-        }
-        self.outputs = {
-            'Out': self.inputs['X'],
-            'Mask': np.ones((3, 62)).astype('uint8')
-        }

-
-class TestDropoutOpInput1d_1(TestDropoutOp):
-    # the input is 1-D
-    def setUp(self):
-        self.op_type = "dropout"
-        self.set_mlu()
-        self.init_dtype()
-        self.inputs = {'X': np.random.random((2000)).astype(self.dtype)}
-        self.attrs = {
-            'dropout_prob': 0.0,
-            'fix_seed': True,
-            'is_test': False,
-            'dropout_implementation': 'upscale_in_train'
-        }
-        self.outputs = {
-            'Out': self.inputs['X'],
-            'Mask': np.ones((2000)).astype('uint8')
-        }
+    def init_inputs_shape(self):
+        self.shape = [2000]


 class TestDropoutOp2(TestDropoutOp):
-    # the dropout_prob is 1.0
-    def setUp(self):
-        self.op_type = "dropout"
-        self.set_mlu()
-        self.init_dtype()
-        self.inputs = {'X': np.random.random((32, 64)).astype(self.dtype)}
-        self.attrs = {
-            'dropout_prob': 1.0,
-            'fix_seed': True,
-            'is_test': False,
-            'dropout_implementation': 'upscale_in_train'
-        }
-        self.outputs = {
-            'Out': np.zeros((32, 64)).astype('float32'),
-            'Mask': np.zeros((32, 64)).astype('uint8')
-        }
+
+    def init_inputs_shape(self):
+        self.shape = [32, 64]
+
+    def init_attrs(self):
+        self.dropout_prob = 1.0
+        self.fix_seed = True
+        self.is_test = False
+        self.dropout_implementation = "upscale_in_train"


 class TestDropoutOp3(TestDropoutOp):
-    # the input dim is 3
+
+    def init_inputs_shape(self):
+        self.shape = [32, 64, 2]
+
+
+class TestDropoutOp4(TestDropoutOp):
+
+    def init_attrs(self):
+        self.__class__.no_need_check_grad = True
+        self.dropout_prob = 0.35
+        self.fix_seed = True
+        self.is_test = True
+        self.dropout_implementation = "downgrade_in_infer"
+
+
+class TestDropoutOp5(TestDropoutOp):
+
+    def init_inputs_shape(self):
+        self.shape = [32, 64, 3]
+
+    def init_attrs(self):
+        self.__class__.no_need_check_grad = True
+        self.dropout_prob = 0.75
+        self.fix_seed = True
+        self.is_test = True
+        self.dropout_implementation = "downgrade_in_infer"
+
+
+class TestDropoutOp6(TestDropoutOp):
+
+    def init_attrs(self):
+        self.__class__.no_need_check_grad = True
+        self.dropout_prob = 0.0
+        self.fix_seed = True
+        self.is_test = False
+        self.dropout_implementation = "downgrade_in_infer"
+
+
+class TestDropoutOpWithSeed(TestDropoutOp):
+    # the seed is a Tensor
    def setUp(self):
        self.op_type = "dropout"
        self.set_mlu()
-        self.init_dtype()
-        self.inputs = {'X': np.random.random((32, 64, 2)).astype(self.dtype)}
+        self.dtype = np.float32
+        self.inputs = {
+            "X": np.random.random((32, 64)).astype(self.dtype),
+            "Seed": np.asarray([125], dtype="int32")
+        }
        self.attrs = {
            'dropout_prob': 0.0,
-            'fix_seed': True,
            'is_test': False,
            'dropout_implementation': 'upscale_in_train'
        }
        self.outputs = {
            'Out': self.inputs['X'],
-            'Mask': np.ones((32, 64, 2)).astype('uint8')
+            'Mask': np.ones((32, 64)).astype('uint8')
        }

+    def set_mlu(self):
+        self.__class__.use_mlu = True
+        self.place = paddle.device.MLUPlace(0)
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+
+    def test_check_grad_normal(self):
+        self.check_grad_with_place(self.place, ['X'], 'Out')
+
+
+class TestDropoutOpFp16(TestDropoutOp):
+    # float16
+    def init_dtype(self):
+        self.dtype = np.float16
+
+    def set_mlu(self):
+        self.__class__.use_mlu = True
+        self.place = paddle.device.MLUPlace(0)
+        self.__class__.no_need_check_grad = True
+

 @skip_check_grad_ci(reason="For inference, check_grad is not required.")
 class TestDropoutOpInference(OpTest):
@@ -179,38 +225,6 @@ class TestDropoutOpInference2(TestDropoutOpInference):
        self.outputs = {'Out': self.inputs['X']}


-class TestDropoutOpWithSeed(TestDropoutOp):
-    # the seed is a Tensor
-    def setUp(self):
-        self.op_type = "dropout"
-        self.set_mlu()
-        self.init_dtype()
-        self.inputs = {
-            "X": np.random.random((32, 64)).astype(self.dtype),
-            "Seed": np.asarray([125], dtype="int32")
-        }
-        self.attrs = {
-            'dropout_prob': 0.0,
-            'is_test': False,
-            'dropout_implementation': 'upscale_in_train'
-        }
-        self.outputs = {
-            'Out': self.inputs['X'],
-            'Mask': np.ones((32, 64)).astype('uint8')
-        }
-
-
-class TestDropoutOpFp16(TestDropoutOp):
-    # float16
-    def init_dtype(self):
-        self.dtype = np.float16
-
-    def set_mlu(self):
-        self.__class__.use_mlu = True
-        self.place = paddle.device.MLUPlace(0)
-        self.__class__.no_need_check_grad = True
-
-
 class TestDropoutAPI(unittest.TestCase):

    def setUp(self):