diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc
index cbfb795d6a23e1837b2a4095a4480c979e6b6d3b..7613b04bccfdc2084decc0b383eec199f7e10991 100644
--- a/paddle/fluid/operators/dropout_op.cc
+++ b/paddle/fluid/operators/dropout_op.cc
@@ -179,8 +179,12 @@ REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker,
 REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad);
 REGISTER_OP_CPU_KERNEL(
     dropout, ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext,
+                          paddle::platform::bfloat16>);
 REGISTER_OP_CPU_KERNEL(
     dropout_grad,
     ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::DropoutGradKernel<paddle::platform::CPUDeviceContext,
+                           paddle::platform::bfloat16>);
diff --git a/paddle/fluid/operators/dropout_op.cu b/paddle/fluid/operators/dropout_op.cu
index 0d5ee41c5c3a262e9ee606f0a7dadab499cb0bf2..f6ddff1d0327d3c7961781f875da69f89df1edec 100644
--- a/paddle/fluid/operators/dropout_op.cu
+++ b/paddle/fluid/operators/dropout_op.cu
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/dropout_impl.cu.h"
 #include "paddle/fluid/operators/dropout_op.h"
+#include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/float16.h"
 
 namespace paddle {
@@ -84,8 +85,10 @@ namespace plat = paddle::platform;
 REGISTER_OP_CUDA_KERNEL(
     dropout, ops::GPUDropoutKernel<plat::CUDADeviceContext, float>,
     ops::GPUDropoutKernel<plat::CUDADeviceContext, plat::float16>,
+    ops::GPUDropoutKernel<plat::CUDADeviceContext, plat::bfloat16>,
     ops::GPUDropoutKernel<plat::CUDADeviceContext, double>);
 REGISTER_OP_CUDA_KERNEL(
     dropout_grad, ops::GPUDropoutGradKernel<plat::CUDADeviceContext, float>,
     ops::GPUDropoutGradKernel<plat::CUDADeviceContext, plat::float16>,
+    ops::GPUDropoutGradKernel<plat::CUDADeviceContext, plat::bfloat16>,
     ops::GPUDropoutGradKernel<plat::CUDADeviceContext, double>);
diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc
index 43da63aae73e1eb89b7b2cc6861a7ea7750d97aa..1ef90ff2b732ee1109d7a050e0d1124b528c7403 100644
--- a/paddle/fluid/operators/reshape_op.cc
+++ b/paddle/fluid/operators/reshape_op.cc
@@ -698,13 +698,14 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double,
                                 ops::ReshapeKernel, int, ops::ReshapeKernel,
                                 uint8_t, ops::ReshapeKernel, int64_t,
                                 ops::ReshapeKernel, plat::float16,
+                                ops::ReshapeKernel, plat::bfloat16,
                                 ops::ReshapeKernel);
 REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
                                 double, ops::ReshapeGradKernel, int,
                                 ops::ReshapeGradKernel, int64_t,
                                 ops::ReshapeGradKernel, uint8_t,
                                 ops::ReshapeGradKernel, plat::float16,
-
+                                ops::ReshapeGradKernel, plat::bfloat16,
                                 ops::ReshapeGradKernel);
 REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
                                 ops::ReshapeKernel, int, ops::ReshapeKernel,
@@ -712,13 +713,15 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
                                 ops::ReshapeKernel, plat::float16,
                                 ops::ReshapeKernel, bool, ops::ReshapeKernel,
                                 plat::complex<float>, ops::ReshapeKernel,
-                                plat::complex<double>, ops::ReshapeKernel);
+                                plat::complex<double>, ops::ReshapeKernel,
+                                plat::bfloat16, ops::ReshapeKernel);
 REGISTER_OP_CUDA_KERNEL_FUNCTOR(
     reshape2_grad, float, ops::ReshapeGradKernel, double,
     ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, uint8_t,
     ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, plat::float16,
     ops::ReshapeGradKernel, bool, ops::ReshapeGradKernel, plat::complex<float>,
-    ops::ReshapeGradKernel, plat::complex<double>, ops::ReshapeGradKernel);
+    ops::ReshapeGradKernel, plat::complex<double>, ops::ReshapeGradKernel,
+    plat::bfloat16, ops::ReshapeGradKernel);
 
 REGISTER_OP_CUDA_KERNEL_FUNCTOR(
     reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, double,
@@ -727,7 +730,7 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(
     plat::float16, ops::ReshapeDoubleGradKernel, bool,
     ops::ReshapeDoubleGradKernel, plat::complex<float>,
     ops::ReshapeDoubleGradKernel, plat::complex<double>,
-    ops::ReshapeDoubleGradKernel);
+    ops::ReshapeDoubleGradKernel, plat::bfloat16, ops::ReshapeDoubleGradKernel);
 #endif
 
 #ifdef PADDLE_WITH_XPU
diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc
index 4965e5e156c342be084938d6caf31820ca3428bc..37c79b9898e9d9ac88f43b57522210959c00e725 100644
--- a/paddle/fluid/operators/slice_op.cc
+++ b/paddle/fluid/operators/slice_op.cc
@@ -442,7 +442,9 @@ REGISTER_OP_CPU_KERNEL(
     ops::SliceKernel<paddle::platform::CPUDeviceContext,
                      paddle::platform::complex<float>>,
     ops::SliceKernel<paddle::platform::CPUDeviceContext,
-                     paddle::platform::complex<double>>);
+                     paddle::platform::complex<double>>,
+    ops::SliceKernel<paddle::platform::CPUDeviceContext,
+                     paddle::platform::bfloat16>);
 
 REGISTER_OP_CPU_KERNEL(
     slice_grad, ops::SliceGradKernel<paddle::platform::CPUDeviceContext, bool>,
@@ -453,7 +455,9 @@ REGISTER_OP_CPU_KERNEL(
     ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
                          paddle::platform::complex<float>>,
     ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
-                         paddle::platform::complex<double>>);
+                         paddle::platform::complex<double>>,
+    ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
+                         paddle::platform::bfloat16>);
 
 REGISTER_OP_CUDA_KERNEL(
     slice, ops::SliceKernel<paddle::platform::CUDADeviceContext, bool>,
@@ -463,6 +467,8 @@ REGISTER_OP_CUDA_KERNEL(
     ops::SliceKernel<paddle::platform::CUDADeviceContext, int64_t>,
     ops::SliceKernel<paddle::platform::CUDADeviceContext,
                      paddle::platform::float16>,
+    ops::SliceKernel<paddle::platform::CUDADeviceContext,
+                     paddle::platform::bfloat16>,
     ops::SliceKernel<paddle::platform::CUDADeviceContext,
                      paddle::platform::complex<float>>,
     ops::SliceKernel<paddle::platform::CUDADeviceContext,
@@ -476,6 +482,8 @@ REGISTER_OP_CUDA_KERNEL(
     ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
     ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
                          paddle::platform::float16>,
+    ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
+                         paddle::platform::bfloat16>,
     ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
                          paddle::platform::complex<float>>,
     ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
diff --git a/paddle/pten/kernels/funcs/eigen/pad.cc b/paddle/pten/kernels/funcs/eigen/pad.cc
index ed4b1c0643a906e592a66f31b75f3e5f97001ea9..a051c3d9681085e90caa7002324b0cad6a1954ce 100644
--- a/paddle/pten/kernels/funcs/eigen/pad.cc
+++ b/paddle/pten/kernels/funcs/eigen/pad.cc
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#include "paddle/pten/common/bfloat16.h"
 #include "paddle/pten/common/complex.h"
 #include "paddle/pten/kernels/funcs/eigen/eigen_function.h"
 
@@ -61,6 +62,7 @@ INSTANTIATION(EigenPad, int);
 INSTANTIATION(EigenPad, int64_t);
 INSTANTIATION(EigenPad, float);
 INSTANTIATION(EigenPad, double);
+INSTANTIATION(EigenPad, dtype::bfloat16);
 INSTANTIATION(EigenPad, dtype::complex<float>);
 INSTANTIATION(EigenPad, dtype::complex<double>);
 #undef INSTANTIATION
diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py
index cb72248b155fdc548d3e7899baaadb63f8b78f01..f670f7c38097b39723e22d15d152590fa19c607d 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py
@@ -17,7 +17,7 @@ from __future__ import print_function
 import unittest
 import numpy as np
 import paddle.fluid.core as core
-from op_test import OpTest, skip_check_grad_ci
+from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
 import paddle
 import paddle.static as static
 import paddle.fluid as fluid
@@ -233,6 +233,27 @@ class TestFP16DropoutOp2(TestFP16DropoutOp):
         self.fix_seed = False
 
 
+class TestBF16DropoutOp(OpTest):
+    def setUp(self):
+        self.op_type = "dropout"
+        self.dtype = np.uint16
+
+        x = np.random.random((32, 64)).astype("float32")
+        self.inputs = {'X': convert_float_to_uint16(x)}
+        self.attrs = {'dropout_prob': 1.0, 'fix_seed': True, 'is_test': False}
+        self.outputs = {
+            'Out':
+            convert_float_to_uint16(np.zeros((32, 64)).astype('float32')),
+            'Mask': np.zeros((32, 64)).astype('uint8')
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad_normal(self):
+        self.check_grad(['X'], 'Out')
+
+
 class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase):
     def test_seed_cpu_place(self):
         paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_reshape_op.py b/python/paddle/fluid/tests/unittests/test_reshape_op.py
index a0063738d367ed2d1160c53af39821fdd1446e15..c860d6972fb762f5c9cde1fb17c36bfb415a3f8a 100755
--- a/python/paddle/fluid/tests/unittests/test_reshape_op.py
+++ b/python/paddle/fluid/tests/unittests/test_reshape_op.py
@@ -17,11 +17,12 @@ from __future__ import print_function
 import unittest
 import numpy as np
 
-from op_test import OpTest
+from op_test import OpTest, convert_float_to_uint16
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import compiler
 from paddle.static import Program, program_guard
+import paddle.fluid.core as core
 
 
 # situation 1: have shape( list, no tensor), no actual shape(Tensor)
@@ -48,6 +49,33 @@ class TestReshapeOp(OpTest):
         self.check_grad(["X"], "Out")
 
 
+class TestReshapeBF16Op(OpTest):
+    def setUp(self):
+        self.init_data()
+        self.op_type = "reshape2"
+        self.dtype = np.uint16
+        x = np.random.random(self.ori_shape).astype("float32")
+        out = x.reshape(self.infered_shape)
+        self.inputs = {"X": convert_float_to_uint16(x)}
+        self.attrs = {"shape": self.new_shape}
+        self.outputs = {
+            "Out": convert_float_to_uint16(out),
+            'XShape': convert_float_to_uint16(
+                np.random.random(self.ori_shape).astype("float32"))
+        }
+
+    def init_data(self):
+        self.ori_shape = (2, 60)
+        self.new_shape = (12, 10)
+        self.infered_shape = (12, 10)
+
+    def test_check_output(self):
+        self.check_output(no_check_set=['XShape'])
+
+    def test_check_grad(self):
+        self.check_grad(["X"], "Out")
+
+
 class TestReshapeOpDimInfer1(TestReshapeOp):
     def init_data(self):
         self.ori_shape = (5, 25)
diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py
index 57d5453ec968ea07a54bc2436fbacabac542d732..629d61d01b283b501996d2dbad366839ff9d8e68 100644
--- a/python/paddle/fluid/tests/unittests/test_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_slice_op.py
@@ -17,7 +17,7 @@ from __future__ import print_function
 import unittest
 import numpy as np
 import paddle.fluid.core as core
-from op_test import OpTest
+from op_test import OpTest, convert_float_to_uint16
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 import paddle
@@ -484,6 +484,35 @@ class TestFP16_2(OpTest):
                 numeric_grad_delta=0.5)
 
 
+class TestBF16(OpTest):
+    def setUp(self):
+        self.op_type = "slice"
+        self.config()
+        self.inputs = {'Input': convert_float_to_uint16(self.input)}
+        self.outputs = {'Out': convert_float_to_uint16(self.out)}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': self.starts,
+            'ends': self.ends,
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.dtype = np.uint16
+        self.input = np.random.random([3, 4, 5, 6]).astype(np.float32)
+        self.starts = [-3, 0, 2]
+        self.ends = [3, 100, -1]
+        self.axes = [0, 1, 3]
+        self.out = self.input[-3:3, 0:100, :, 2:-1]
+        self.infer_flags = [1, 1, 1]
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad_normal(self):
+        self.check_grad(['Input'], 'Out')
+
+
 # Test python API
 class TestSliceAPI(unittest.TestCase):
     def test_1(self):