diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index f4f1ff479c72f5722ab739ffae004e8aae277129..e13009cca8aa1a5d2542f2049b7ecfe1e768a246 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221124") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221201") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index d1c1c361a9b3ba4dd3f9c85e121fb44bc47d1eb4..835d52bdf4eea074003460cc4a70f8a0b5b46342 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -117,7 +117,8 @@ XPUOpMap& get_kl2_ops() { {"clip_by_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"coalesce_tensor", - XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace())})}, {"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, diff --git a/paddle/phi/kernels/xpu/rmsprop_kernel.cc b/paddle/phi/kernels/xpu/rmsprop_kernel.cc index c95076933cd2bdbcd559e33670c78c79b7ef4188..ddf02313701e4b314a9222b4ab2dd1b776c04fc4 100644 --- a/paddle/phi/kernels/xpu/rmsprop_kernel.cc +++ b/paddle/phi/kernels/xpu/rmsprop_kernel.cc @@ -37,12 +37,6 @@ void RmspropDenseKernel(const Context& dev_ctx, DenseTensor* moment_out, DenseTensor* mean_square_out, DenseTensor* mean_grad_out) { - // check input - PADDLE_ENFORCE_EQ(centered, - false, - errors::Unimplemented( - "centered=True is not supported in the xpu kernel of " - "rmsprop. use XPU_BLACK_LIST to disable this op.")); // copy learning_rate to cpu PADDLE_ENFORCE_EQ( learning_rate.dims().size(), @@ -62,23 +56,56 @@ void RmspropDenseKernel(const Context& dev_ctx, dev_ctx.template Alloc(moment_out); dev_ctx.template Alloc(mean_square_out); - // int rmsprop(Context* ctx, const T* g, const T* p, const float* ms, const - // float* mom, T* p_out, float* ms_out, float* mom_out, float epsilon, float - // rho, float momentum, float lr, int n); - int r = xpu::rmsprop(dev_ctx.x_context(), - grad.data(), - param.data(), - mean_square.data(), - moment.data(), - param_out->data(), - mean_square_out->data(), - moment_out->data(), - epsilon, - decay, - momentum, - learning_rate_cpu, - param.numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "rmsprop"); + if (centered) { + dev_ctx.template Alloc(mean_grad_out); + auto mg_tensor = mean_grad.get_ptr(); + if (mg_tensor) { + PADDLE_ENFORCE_EQ( + mg_tensor->Holder(), + mean_grad_out->Holder(), + phi::errors::InvalidArgument( + "MeanGrad and MeanGradOut must be the same Tensor")); + } else { + PADDLE_ENFORCE_EQ( + mg_tensor, + mean_grad_out, + phi::errors::InvalidArgument( + "MeanGrad and MeanGradOut must be the same Tensor")); + } + int r = xpu::rmsprop(dev_ctx.x_context(), + grad.data(), + param.data(), + mean_square.data(), + moment.data(), + param_out->data(), + mean_square_out->data(), + moment_out->data(), + epsilon, + decay, + momentum, + learning_rate_cpu, + param.numel(), + centered, + mg_tensor->data(), + mean_grad_out->data()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "centered rmsprop"); + + } else { + int r = xpu::rmsprop(dev_ctx.x_context(), + grad.data(), + param.data(), + mean_square.data(), + moment.data(), + param_out->data(), + mean_square_out->data(), + moment_out->data(), + epsilon, + decay, + momentum, + learning_rate_cpu, + param.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "uncentered rmsprop"); + } } } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py index c2d6ff05870eb09dc287e341b379cc48ca475fd4..2a94fcd60fa732be444ad05f8e12e00d6322979e 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py @@ -27,7 +27,9 @@ from xpu.get_test_cover_info import ( ) import paddle +import paddle.fluid as fluid import paddle.fluid.core as core +from paddle.fluid.op import Operator paddle.enable_static() @@ -161,6 +163,185 @@ class XPUTestRMSPropOP(XPUOpTestWrapper): self.momentum = 0.002 +class TestBase(unittest.TestCase): + def setup( + self, place, is_sparse, centered, size, row_num=None, epsilon=1e-6 + ): + np.random.seed(5) # fix seed + + self.scope = fluid.global_scope() + self.place = place + + self.param_name = "param" + self.param = np.random.random(size).astype("float32") + + self.mean_square_name = "mean_square" + self.mean_square = np.random.uniform(low=1, high=2, size=size).astype( + "float32" + ) + + self.mean_grad_name = "mean_grad" + self.mean_grad = np.random.random(size).astype("float32") + + self.lr_name = "lr" + self.learning_rate = np.array([0.01]).astype("float32") + + self.grad_name = "grad" + + self.is_sparse = is_sparse + self.grad = np.random.random(size).astype("float32") + grad_tensor = self.scope.var(self.grad_name).get_tensor() + grad_tensor.set(self.grad, place) + + self.moment_name = "moment" + self.moment = np.random.uniform(low=0, high=1, size=size).astype( + "float32" + ) + + self.epsilon = epsilon + self.decay = 0.9 + self.momentum = 0.1 + self.centered = centered + + self.ms_out = ( + self.decay * self.mean_square + + (1 - self.decay) * self.grad * self.grad + ) + if centered: + self.mg_out = ( + self.decay * self.mean_grad + (1 - self.decay) * self.grad + ) + self.moment_out = ( + self.momentum * self.moment + + self.learning_rate + * self.grad + / np.sqrt(self.ms_out - np.square(self.mg_out) + self.epsilon) + ) + else: + self.moment_out = ( + self.momentum * self.moment + + self.learning_rate + * self.grad + / np.sqrt(self.ms_out + self.epsilon) + ) + + self.param_out = self.param - self.moment_out + + # create and initialize Param Variable + self.param_tensor = self.scope.var(self.param_name).get_tensor() + self.param_tensor.set(self.param, place) + + self.mean_square_tensor = self.scope.var( + self.mean_square_name + ).get_tensor() + self.mean_square_tensor.set(self.mean_square, place) + + lr = self.scope.var(self.lr_name).get_tensor() + lr.set(self.learning_rate, place) + + self.moment_tensor = self.scope.var(self.moment_name).get_tensor() + self.moment_tensor.set(self.moment, place) + + if self.centered: + self.mean_grad_tensor = self.scope.var( + self.mean_grad_name + ).get_tensor() + self.mean_grad_tensor.set(self.mean_grad, place) + + def check(self, actual_t, expect_t, place, out_name, atol=1e-5): + np.testing.assert_allclose( + actual_t, + expect_t, + rtol=1e-05, + atol=atol, + err_msg='Output (' + + out_name + + ') has diff at ' + + str(place) + + '\nExpect ' + + str(expect_t) + + '\n' + + 'But Got' + + str(actual_t), + ) + + +class TestRmspropOp(TestBase): + def check_with_place( + self, place, is_sparse, centered, size, row_num=None, epsilon=1e-6 + ): + self.setup(place, is_sparse, centered, size, row_num, epsilon) + self.run_and_check() + + def run_and_check(self): + grad_name = self.grad_name + + kwargs = { + 'Param': self.param_name, + 'Grad': grad_name, + 'MeanSquare': self.mean_square_name, + 'Moment': self.moment_name, + 'LearningRate': self.lr_name, + 'ParamOut': self.param_name, + 'MeanSquareOut': self.mean_square_name, + 'MomentOut': self.moment_name, + 'epsilon': self.epsilon, + 'decay': self.decay, + 'momentum': self.momentum, + 'centered': self.centered, + } + + if self.centered: + kwargs['MeanGrad'] = self.mean_grad_name + kwargs['MeanGradOut'] = self.mean_grad_name + + rmsprop_op = Operator('rmsprop', **kwargs) + atol = 1e-6 + + rmsprop_op.run(self.scope, self.place) + + self.check( + np.array(self.mean_square_tensor), + self.ms_out, + self.place, + self.mean_square_name, + atol=atol, + ) + self.check( + np.array(self.moment_tensor), + self.moment_out, + self.place, + self.moment_name, + atol=atol, + ) + self.check( + np.array(self.param_tensor), + self.param_out, + self.place, + self.param_name, + atol=atol, + ) + + if self.centered: + self.check( + np.array(self.mean_grad_tensor), + self.mg_out, + self.place, + self.mean_grad_name, + ) + + def test_rmsprop(self): + places = [core.XPUPlace(0)] + + size = (128, 320) + for place in places: + for centered in [False, True]: + with fluid.scope_guard(core.Scope()): + self.check_with_place( + place, is_sparse=False, centered=centered, size=size + ) + + support_types = get_xpu_op_support_types('rmsprop') for stype in support_types: create_test_class(globals(), XPUTestRMSPropOP, stype)