未验证 提交 a012d426 编写于 作者: M mengqingchun02 提交者: GitHub

Support fp16 of adam operator in xpu environment (#45292)

* support beam_search operator on xpu. test=kunlun

* support beam_search operator on xpu. test=kunlun

* support beam_search operator on xpu. test=kunlun

* support beam_search operator on xpu. test=kunlun

* support beam_search operator on xpu. test=kunlun

* support fp16 of adam operator in xpu environment. test=kunlun

* support fp16 of adam operator in xpu environment. test=kunlun

* support fp16 of adam operator in xpu environment. test=kunlun
上级 0a67c2e5
...@@ -34,7 +34,9 @@ XPUOpMap& get_kl2_ops() { ...@@ -34,7 +34,9 @@ XPUOpMap& get_kl2_ops() {
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})}, pOpKernelType(vartype::FP16, XPUPlace())})},
{"adamw", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"adamw", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"adam", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"adam",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"arg_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"arg_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"argsort", {"argsort",
XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()),
......
...@@ -159,6 +159,57 @@ class XPUTestAdamOp(XPUOpTestWrapper): ...@@ -159,6 +159,57 @@ class XPUTestAdamOp(XPUOpTestWrapper):
self.learning_rate = 0.001 self.learning_rate = 0.001
self.epsilon = 1e-8 self.epsilon = 1e-8
class TestAdamOp7(TestAdamOp):
'''Test Adam Op with float16 accuracy
'''
def setUp(self):
self.init_dtype()
self.set_xpu()
self.op_type = "adam"
self.place = paddle.XPUPlace(0)
self.set_data()
self.set_attrs()
self.set_shape()
self.set_inputs()
self.set_steps()
param_out, moment1_out, \
moment2_out = adam_step(self.inputs, self.attrs)
self.outputs = {
'Moment1Out':
moment1_out,
'Moment2Out':
moment2_out,
'ParamOut':
param_out,
'Beta1PowOut':
np.array([self.beta1_pow]).astype("float16") * self.beta1,
'Beta2PowOut':
np.array([self.beta2_pow]).astype("float16") * self.beta2
}
def set_inputs(self):
param = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
grad = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
moment1 = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
# The second moment is positive
moment2 = np.random.random(self.shape).astype(self.dtype)
self.beta1_pow = self.beta1**10
self.beta2_pow = self.beta2**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment1': moment1,
'Moment2': moment2,
'LearningRate':
np.array([self.learning_rate]).astype("float16"),
'Beta1Pow': np.array([self.beta1_pow]).astype("float16"),
'Beta2Pow': np.array([self.beta2_pow]).astype("float16")
}
class TestAdamOpMultipleSteps(TestAdamOp2): class TestAdamOpMultipleSteps(TestAdamOp2):
'''Test Adam Operator with supplied attributes '''Test Adam Operator with supplied attributes
''' '''
...@@ -372,6 +423,60 @@ class TestSparseAdamOp(unittest.TestCase): ...@@ -372,6 +423,60 @@ class TestSparseAdamOp(unittest.TestCase):
self.check_with_place(paddle.XPUPlace(0), False) self.check_with_place(paddle.XPUPlace(0), False)
class TestSparseAdamOp1(TestSparseAdamOp):
def setup(self, scope, place, lazy_mode):
beta1 = 0.78
beta2 = 0.836
epsilon = 1e-4
beta1_pow = np.array([beta1**10]).astype("float16")
beta2_pow = np.array([beta2**10]).astype("float16")
height = 10
rows = [0, 4, 7]
self.rows = rows
row_numel = 12
self.row_numel = row_numel
self.dense_inputs = {
"Param": np.full((height, row_numel), 5.0).astype("float16"),
"Moment1": np.full((height, row_numel), 5.0).astype("float16"),
"Moment2": np.full((height, row_numel), 5.0).astype("float16"),
'Beta1Pow': beta1_pow,
'Beta2Pow': beta2_pow,
"LearningRate": np.full((1), 2.0).astype("float16")
}
self.init_output = np.full((height, row_numel), 0.0).astype("float16")
self.attrs = {
'epsilon': epsilon,
'beta1': beta1,
'beta2': beta2,
'min_row_size_to_use_multithread': 2
}
grad_selected_rows = scope.var('Grad').get_selected_rows()
grad_selected_rows.set_height(height)
grad_selected_rows.set_rows(rows)
np_array = np.ones((len(rows), row_numel)).astype("float16")
np_array[0, 0] = 2.0
np_array[2, 8] = 4.0
grad_tensor = grad_selected_rows.get_tensor()
grad_tensor.set(np_array, place)
self.sparse_inputs = ["Grad"]
param_out, mom1, mom2 = adam_step_sparse(self.dense_inputs, self.attrs,
height, rows, row_numel,
np_array, lazy_mode)
self.outputs = {
"ParamOut": param_out,
"Moment1Out": mom1,
"Moment2Out": mom2,
'Beta1PowOut': beta1_pow * beta1,
'Beta2PowOut': beta2_pow * beta2
}
support_types = get_xpu_op_support_types('adam') support_types = get_xpu_op_support_types('adam')
for stype in support_types: for stype in support_types:
create_test_class(globals(), XPUTestAdamOp, stype) create_test_class(globals(), XPUTestAdamOp, stype)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册