未验证 提交 e8ac7fc3 编写于 作者: Z zhangbo9674 提交者: GitHub

[bf16] add bf16 kernel: dropout & reshape & slice (#39395)

* add dropout

* add reshape

* add slice

* refien slice unittest

* refine slice unittest

* add cpu bf16 kernel
上级 14ed2f54
......@@ -179,8 +179,12 @@ REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker,
REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad);
REGISTER_OP_CPU_KERNEL(
dropout, ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, float>,
ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, double>);
ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, double>,
ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>);
REGISTER_OP_CPU_KERNEL(
dropout_grad,
ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, double>);
ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::DropoutGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>);
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/dropout_impl.cu.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
......@@ -84,8 +85,10 @@ namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
dropout, ops::GPUDropoutKernel<plat::CUDADeviceContext, float>,
ops::GPUDropoutKernel<plat::CUDADeviceContext, plat::float16>,
ops::GPUDropoutKernel<plat::CUDADeviceContext, plat::bfloat16>,
ops::GPUDropoutKernel<plat::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
dropout_grad, ops::GPUDropoutGradKernel<plat::CUDADeviceContext, float>,
ops::GPUDropoutGradKernel<plat::CUDADeviceContext, plat::float16>,
ops::GPUDropoutGradKernel<plat::CUDADeviceContext, plat::bfloat16>,
ops::GPUDropoutGradKernel<plat::CUDADeviceContext, double>);
......@@ -698,13 +698,14 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel,
uint8_t, ops::ReshapeKernel, int64_t,
ops::ReshapeKernel, plat::float16,
ops::ReshapeKernel, plat::bfloat16,
ops::ReshapeKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
double, ops::ReshapeGradKernel, int,
ops::ReshapeGradKernel, int64_t,
ops::ReshapeGradKernel, uint8_t,
ops::ReshapeGradKernel, plat::float16,
ops::ReshapeGradKernel, plat::bfloat16,
ops::ReshapeGradKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel,
......@@ -712,13 +713,15 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, plat::float16,
ops::ReshapeKernel, bool, ops::ReshapeKernel,
plat::complex<float>, ops::ReshapeKernel,
plat::complex<double>, ops::ReshapeKernel);
plat::complex<double>, ops::ReshapeKernel,
plat::bfloat16, ops::ReshapeKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(
reshape2_grad, float, ops::ReshapeGradKernel, double,
ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, uint8_t,
ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, plat::float16,
ops::ReshapeGradKernel, bool, ops::ReshapeGradKernel, plat::complex<float>,
ops::ReshapeGradKernel, plat::complex<double>, ops::ReshapeGradKernel);
ops::ReshapeGradKernel, plat::complex<double>, ops::ReshapeGradKernel,
plat::bfloat16, ops::ReshapeGradKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(
reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, double,
......@@ -727,7 +730,7 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(
plat::float16, ops::ReshapeDoubleGradKernel, bool,
ops::ReshapeDoubleGradKernel, plat::complex<float>,
ops::ReshapeDoubleGradKernel, plat::complex<double>,
ops::ReshapeDoubleGradKernel);
ops::ReshapeDoubleGradKernel, plat::bfloat16, ops::ReshapeDoubleGradKernel);
#endif
#ifdef PADDLE_WITH_XPU
......
......@@ -442,7 +442,9 @@ REGISTER_OP_CPU_KERNEL(
ops::SliceKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::SliceKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
paddle::platform::complex<double>>,
ops::SliceKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>);
REGISTER_OP_CPU_KERNEL(
slice_grad, ops::SliceGradKernel<paddle::platform::CPUDeviceContext, bool>,
......@@ -453,7 +455,9 @@ REGISTER_OP_CPU_KERNEL(
ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
paddle::platform::complex<double>>,
ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>);
REGISTER_OP_CUDA_KERNEL(
slice, ops::SliceKernel<paddle::platform::CUDADeviceContext, bool>,
......@@ -463,6 +467,8 @@ REGISTER_OP_CUDA_KERNEL(
ops::SliceKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::bfloat16>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::complex<float>>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
......@@ -476,6 +482,8 @@ REGISTER_OP_CUDA_KERNEL(
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::bfloat16>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::complex<float>>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
......
......@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/kernels/funcs/eigen/eigen_function.h"
......@@ -61,6 +62,7 @@ INSTANTIATION(EigenPad, int);
INSTANTIATION(EigenPad, int64_t);
INSTANTIATION(EigenPad, float);
INSTANTIATION(EigenPad, double);
INSTANTIATION(EigenPad, dtype::bfloat16);
INSTANTIATION(EigenPad, dtype::complex<float>);
INSTANTIATION(EigenPad, dtype::complex<double>);
#undef INSTANTIATION
......
......@@ -17,7 +17,7 @@ from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci
from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
import paddle
import paddle.static as static
import paddle.fluid as fluid
......@@ -233,6 +233,27 @@ class TestFP16DropoutOp2(TestFP16DropoutOp):
self.fix_seed = False
class TestBF16DropoutOp(OpTest):
def setUp(self):
self.op_type = "dropout"
self.dtype = np.uint16
x = np.random.random((32, 64)).astype("float32")
self.inputs = {'X': convert_float_to_uint16(x)}
self.attrs = {'dropout_prob': 1.0, 'fix_seed': True, 'is_test': False}
self.outputs = {
'Out':
convert_float_to_uint16(np.zeros((32, 64)).astype('float32')),
'Mask': np.zeros((32, 64)).astype('uint8')
}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X'], 'Out')
class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase):
def test_seed_cpu_place(self):
paddle.enable_static()
......
......@@ -17,11 +17,12 @@ from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
from op_test import OpTest, convert_float_to_uint16
import paddle
import paddle.fluid as fluid
from paddle.fluid import compiler
from paddle.static import Program, program_guard
import paddle.fluid.core as core
# situation 1: have shape( list, no tensor), no actual shape(Tensor)
......@@ -48,6 +49,33 @@ class TestReshapeOp(OpTest):
self.check_grad(["X"], "Out")
class TestReshapeBF16Op(OpTest):
def setUp(self):
self.init_data()
self.op_type = "reshape2"
self.dtype = np.uint16
x = np.random.random(self.ori_shape).astype("float32")
out = x.reshape(self.infered_shape)
self.inputs = {"X": convert_float_to_uint16(x)}
self.attrs = {"shape": self.new_shape}
self.outputs = {
"Out": convert_float_to_uint16(out),
'XShape': convert_float_to_uint16(
np.random.random(self.ori_shape).astype("float32"))
}
def init_data(self):
self.ori_shape = (2, 60)
self.new_shape = (12, 10)
self.infered_shape = (12, 10)
def test_check_output(self):
self.check_output(no_check_set=['XShape'])
def test_check_grad(self):
self.check_grad(["X"], "Out")
class TestReshapeOpDimInfer1(TestReshapeOp):
def init_data(self):
self.ori_shape = (5, 25)
......
......@@ -17,7 +17,7 @@ from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
from op_test import OpTest, convert_float_to_uint16
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle
......@@ -484,6 +484,35 @@ class TestFP16_2(OpTest):
numeric_grad_delta=0.5)
class TestBF16(OpTest):
def setUp(self):
self.op_type = "slice"
self.config()
self.inputs = {'Input': convert_float_to_uint16(self.input)}
self.outputs = {'Out': convert_float_to_uint16(self.out)}
self.attrs = {
'axes': self.axes,
'starts': self.starts,
'ends': self.ends,
'infer_flags': self.infer_flags
}
def config(self):
self.dtype = np.uint16
self.input = np.random.random([3, 4, 5, 6]).astype(np.float32)
self.starts = [-3, 0, 2]
self.ends = [3, 100, -1]
self.axes = [0, 1, 3]
self.out = self.input[-3:3, 0:100, :, 2:-1]
self.infer_flags = [1, 1, 1]
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['Input'], 'Out')
# Test python API
class TestSliceAPI(unittest.TestCase):
def test_1(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册