diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 3189590645e7b1b896dbfcdcd9ab6bb68fe7f0cc..b5a3f0154745b9425c3dfc45a129117238fa80de 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -13,7 +13,7 @@ if(NOT XPU_SDK_ROOT) elseif(WITH_SUNWAY) SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz" CACHE STRING "" FORCE) else() - SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_02_19.tar.gz" CACHE STRING "" FORCE) + SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_02_27.tar.gz" CACHE STRING "" FORCE) endif() SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") diff --git a/paddle/fluid/operators/optimizers/lamb_op_xpu.cc b/paddle/fluid/operators/optimizers/lamb_op_xpu.cc new file mode 100644 index 0000000000000000000000000000000000000000..e7cbe4aa8dd4b36983dca5413ccdcb8ceac63a3c --- /dev/null +++ b/paddle/fluid/operators/optimizers/lamb_op_xpu.cc @@ -0,0 +1,125 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/optimizers/lamb_op.h" +#include "gflags/gflags.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +#ifdef PADDLE_WITH_XPU +template +class LambOpXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + using paddle::framework::LoDTensor; + const auto* param_var = ctx.InputVar("Param"); + PADDLE_ENFORCE_EQ(param_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Param").front(), + framework::ToTypeName(param_var->Type()))); + + using paddle::framework::LoDTensor; + + // inputs + T epsilon = static_cast(ctx.Attr("epsilon")); + T weight_decay = static_cast(ctx.Attr("weight_decay")); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + auto& param = GET_DATA_SAFELY(ctx.Input("Param"), "Input", + "Param", "Lamb"); + auto* grad_var = ctx.InputVar("Grad"); + auto& mom1 = GET_DATA_SAFELY(ctx.Input("Moment1"), "Input", + "Moment1", "Lamb"); + auto& mom2 = GET_DATA_SAFELY(ctx.Input("Moment2"), "Input", + "Moment2", "Lamb"); + auto& lr = GET_DATA_SAFELY(ctx.Input("LearningRate"), "Input", + "LearningRate", "Lamb"); + + auto& beta1_pow = GET_DATA_SAFELY(ctx.Input("Beta1Pow"), "Input", + "Beta1Pow", "Lamb"); + auto& beta2_pow = GET_DATA_SAFELY(ctx.Input("Beta2Pow"), "Input", + "Beta2Pow", "Lamb"); + + auto& param_out = GET_DATA_SAFELY(ctx.Output("ParamOut"), + "Output", "ParamOut", "Lamb"); + auto& mom1_out = GET_DATA_SAFELY(ctx.Output("Moment1Out"), + "Output", "Moment1Out", "Lamb"); + auto& mom2_out = GET_DATA_SAFELY(ctx.Output("Moment2Out"), + "Output", "Moment2Out", "Lamb"); + auto& beta1_pow_out = GET_DATA_SAFELY(ctx.Output("Beta1PowOut"), + "Output", "Beta1PowOut", "Lamb"); + auto& beta2_pow_out = GET_DATA_SAFELY(ctx.Output("Beta2PowOut"), + "Output", "Beta2PowOut", "Lamb"); + auto& dev_ctx = ctx.template device_context(); + + if (grad_var->IsType()) { + auto& grad = *ctx.Input("Grad"); + int r = xpu::lamb(dev_ctx.x_context(), grad.template data(), + mom1.template data(), mom2.template data(), + param.template data(), beta1_pow.template data(), + beta2_pow.template data(), beta1, beta2, epsilon, + weight_decay, lr.template data(), + mom1_out.template mutable_data(ctx.GetPlace()), + mom2_out.template mutable_data(ctx.GetPlace()), + param_out.template mutable_data(ctx.GetPlace()), + beta1_pow_out.template mutable_data(ctx.GetPlace()), + beta2_pow_out.template mutable_data(ctx.GetPlace()), + param.numel()); + + if (r == xpu::Error_t::INVALID_PARAM) { + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::InvalidArgument( + "XPU kernel error of LambOp, error message: INVALID_PARAM, " + "please check your input & output.")); + } else if (r == xpu::Error_t::RUNTIME_ERROR) { + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::Unavailable( + "XPU kernel error of LambOp, error message: " + "RUNTIME_ERROR, please check whether Baidu " + "Kunlun Card is properly installed.")); + } else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) { + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::ResourceExhausted( + "XPU kernel error of LambOp, error " + "message: NO_ENOUGH_WORKSPACE, XPU " + "has no enough memory.")); + } else { + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::ResourceExhausted( + "XPU kernel error of LambOp, error " + "message: OTHER " + "XPU API returns error code: %d.", + r)); + } + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable type not supported by lamb_op. Expect LoDTensor, " + "but got %s", + framework::ToTypeName(param_var->Type()))); + } + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL( + lamb, ops::LambOpXPUKernel); +#endif diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..0e1714f1922de1c4f481590d92302f748bf4dc0f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py @@ -0,0 +1,121 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import sys +sys.path.append("..") +import unittest +import numpy as np +from op_test_xpu import XPUOpTest +from paddle.fluid import core +from paddle.fluid.op import Operator +import paddle.fluid as fluid +import paddle + + +class TestLambOp1(XPUOpTest): + def set_attrs(self): + self.attrs = { + 'epsilon': 1e-6, + 'beta1': 0.9, + 'beta2': 0.999, + 'weight_decay': 0.01 + } + + def setUp(self): + '''Test Lamb Op with supplied attributes + ''' + self.op_type = "lamb" + param = np.random.uniform(-1, 1, 5000).astype("float32") + grad = np.random.uniform(-1, 1, 5000).astype("float32") + moment1 = np.random.uniform(-1, 1, 5000).astype("float32") + moment2 = np.random.random(5000).astype("float32") + + self.set_attrs() + learning_rate = 0.001 + beta1_pow = self.attrs['beta1'] + beta2_pow = self.attrs['beta2'] + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32") + } + + param_out, moment1_out, moment2_out, \ + beta1_pow_out, beta2_pow_out = lamb_step(self.inputs, self.attrs) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'ParamOut': param_out, + 'Beta1PowOut': beta1_pow_out, + 'Beta2PowOut': beta2_pow_out + } + + def test_check_output(self): + self.check_output_with_place(paddle.XPUPlace(0)) + + +def lamb_step(inputs, attributes): + ''' + Simulate one step of the lamb optimizer + :param inputs: dict of inputs + :param attributes: dict of attributes + :return tuple: tuple of output param, moment1, moment2, + beta1 power accumulator and beta2 power accumulator + ''' + param = inputs['Param'] + grad = inputs['Grad'] + moment1 = inputs['Moment1'] + moment2 = inputs['Moment2'] + lr = inputs['LearningRate'] + beta1_pow = inputs['Beta1Pow'] + beta2_pow = inputs['Beta2Pow'] + + beta1 = attributes['beta1'] + beta2 = attributes['beta2'] + epsilon = attributes['epsilon'] + weight_decay = attributes['weight_decay'] + + moment1_out = beta1 * moment1 + (1 - beta1) * grad + moment2_out = beta2 * moment2 + (1 - beta2) * np.square(grad) + + moment1_unbiased = moment1_out / (1 - beta1_pow) + moment2_unbiased = moment2_out / (1 - beta2_pow) + + r_1 = np.linalg.norm(param) + r_2 = np.linalg.norm(moment1_unbiased / (np.sqrt(moment2_unbiased) + epsilon + ) + weight_decay * param) + if r_1 > 0.0 and r_2 > 0.0: + lr_t = lr * r_1 / r_2 + else: + lr_t = 1.0 + + param_out = param - lr_t * (moment1_unbiased / ( + np.sqrt(moment2_unbiased) + epsilon) + weight_decay * param) + + beta1_pow_out = beta1_pow * beta1 + beta2_pow_out = beta2_pow * beta2 + + return param_out, moment1_out, moment2_out, beta1_pow_out, beta2_pow_out + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 872fd857381d024d25b7d15c30c9effa96527b4a..0420a71fdfc85dd1aef400a01580f8df18b9e68b 100644 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -695,4 +695,5 @@ STATIC_MODE_TESTING_LIST = [ 'test_shape_op_xpu', 'test_slice_op_xpu', 'test_generate_proposals_v2_op', + 'test_lamb_op_xpu', ]