diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc index 9fa7ef0b4607dc73d99ab925e0e05c67922aa93c..e53ce8cc67c08269e15a20e2cd2fc57a2c5ace17 100644 --- a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc @@ -82,7 +82,7 @@ class LargeScaleFuseAdamOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator"); AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator"); - + AddInput("LearningRate", "(Tensor) Learning rate of SGD"); AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator"); AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator"); @@ -150,6 +150,4 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL( lookup_sparse_table_fuse_adam, - ops::LargeScaleFuseAdamOpKernel, - ops::LargeScaleFuseAdamOpKernel); + ops::LargeScaleFuseAdamOpKernel); diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h index 7e6cb74c83ebcce9fcf46fdb80a7f126c4525225..77dbe4f6072ff41258e8cbef92cbcd55cc2e7699 100644 --- a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/operators/distributed/large_scale_kv.h" #include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" namespace paddle { namespace operators { @@ -37,8 +38,9 @@ class LargeScaleFuseAdamOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *learning_rate = ctx.Input("LearningRate"); + using paddle::framework::LoDTensor; + const auto *learning_rate = ctx.Input("LearningRate"); const auto *grad_var = ctx.InputVar("Grad"); PADDLE_ENFORCE( @@ -56,8 +58,8 @@ class LargeScaleFuseAdamOpKernel framework::SelectedRows tmp_grad_merge; const framework::SelectedRows *grad_merge_ptr; - math::scatter::MergeAdd merge_func; - merge_func(context.template device_context(), *in_grad, + math::scatter::MergeAdd merge_func; + merge_func(ctx.template device_context(), grad, &tmp_grad_merge, true); grad_merge_ptr = &tmp_grad_merge; @@ -71,8 +73,8 @@ class LargeScaleFuseAdamOpKernel auto grad_width = grad_v.dims()[1]; // auto is_entry = context.Attr("is_entry"); - auto tablename = context.Attr("tablename"); - auto value_names = Attr>("value_names"); + auto tablename = ctx.Attr("tablename"); + auto value_names = ctx.Attr>("value_names"); auto *beta1_pow = ctx.Input("Beta1Pow"); auto *beta2_pow = ctx.Input("Beta2Pow"); @@ -116,11 +118,11 @@ class LargeScaleFuseAdamOpKernel auto &moment_1 = values[1]; auto &moment_2 = values[2]; - T lr = *lr_; + T lr_ = lr[0]; T beta1_ = beta1_pow->data()[0]; T beta2_ = beta2_pow->data()[0]; - lr *= sqrt(1 - beta1_) / (1 - beta2_); + lr_ *= sqrt(1 - beta1_) / (1 - beta2_); for (size_t i = 0; i < in_rows.size(); i++) { auto *m1_data = moment_1[i]->data(); @@ -131,7 +133,7 @@ class LargeScaleFuseAdamOpKernel auto g = grad_v.data()[grad_width * i + x]; m1_data[x] = beta1_ * m1_data[x] + (1 - beta1_) * g; m2_data[x] = beta2_ * m2_data[x] + (1 - beta2_) * g * g; - p_data[x] -= lr * (m1_data[x] / (sqrt(m2_data[x]) + epsilon)); + p_data[x] -= lr_ * (m1_data[x] / (sqrt(m2_data[x]) + epsilon)); } } } diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc index 78b0c8c1da95f15eb11c3821ff0b60df9cf35aa2..010658b5280d7feeb683112b401dbcaaa265daac 100644 --- a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc @@ -79,7 +79,7 @@ class LargeScaleFuseSGDOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Grad", "(SelectedRows) Ids's type should be SelectedRows" "THe ids to be looked up in W."); - + AddInput("LearningRate", "(Tensor) Learning rate of SGD"); AddAttr("is_entry", "(bool)" "sparse table need entry"); @@ -117,5 +117,4 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL( lookup_sparse_table_fuse_sgd, - ops::LargeScaleFuseSGDOpKernel, - ops::LargeScaleFuseSGDOpKernel); + ops::LargeScaleFuseSGDOpKernel); diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h index 8b653b0b724f4e162759fc30a726d87af9efd222..bda4df49f9598a72ce804cab1b7ba5bd69a32ba5 100644 --- a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/operators/distributed/large_scale_kv.h" #include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" namespace paddle { namespace operators { @@ -56,8 +57,8 @@ class LargeScaleFuseSGDOpKernel framework::SelectedRows tmp_grad_merge; const framework::SelectedRows *grad_merge_ptr; - math::scatter::MergeAdd merge_func; - merge_func(context.template device_context(), *in_grad, + math::scatter::MergeAdd merge_func; + merge_func(ctx.template device_context(), grad, &tmp_grad_merge, true); grad_merge_ptr = &tmp_grad_merge; @@ -71,8 +72,8 @@ class LargeScaleFuseSGDOpKernel auto grad_width = grad_v.dims()[1]; // auto is_entry = context.Attr("is_entry"); - auto tablename = context.Attr("tablename"); - auto value_names = Attr>("value_names"); + auto tablename = ctx.Attr("tablename"); + auto value_names = ctx.Attr>("value_names"); std::vector *>> values; std::vector dims; @@ -88,15 +89,16 @@ class LargeScaleFuseSGDOpKernel auto ¶ms = values[0]; - auto blas = math::GetBlas(context); + auto blas = math::GetBlas(ctx); std::vector grads; - framework::TensorToVector(grad_v, context.device_context(), &grads); + framework::TensorToVector(grad_v, ctx.device_context(), &grads); - blas.VMUL(grads, lr[0], grads); + blas.SCAL(grads.size(), lr[0], grads.data()); for (int x = 0; x < static_cast(in_rows.size()); ++x) { - blas.VSUB(grad_width, params[x], grads.data() + grad_width * x, params); + blas.VSUB(grad_width, params[x]->data(), grads.data() + grad_width * x, + params[x]->data()); } } }; diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py index 89ae1ee1db1de97b61441cc901bc559d2dfa26fe..179fa4f574f62f52c3790f1bebab7ebdf897d5dd 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py @@ -657,13 +657,15 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): if op.type == "sgd": grad = main_program.global_block().vars[op.input("Grad")[0]] + lr = main_program.global_block().vars[op.input("LearningRate")[0]] # remove origin optimzier op block._remove_op(opt_idx) block._insert_op( opt_idx, type="lookup_sparse_table_fuse_sgd", - inputs={"Grad": grad}, + inputs={"Grad": grad, + "LearningRate": lr}, attrs={ "is_entry": is_entry, "tablename": table_name, @@ -672,6 +674,7 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): elif op.type == "adam": grad = main_program.global_block().vars[op.input("Grad")[0]] + lr = main_program.global_block().vars[op.input("LearningRate")[0]] beta1_pow = main_program.global_block().vars[op.input("Beta1Pow")[ 0]] beta2_pow = main_program.global_block().vars[op.input("Beta2Pow")[ @@ -693,6 +696,7 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): type="lookup_sparse_table_fuse_adam", inputs={ "Grad": grad, + "LearningRate": lr, "Beta1Pow": beta1_pow, "Beta2Pow": beta2_pow },