未验证 提交 ca871957 编写于 作者: H hong 提交者: GitHub

Move meshgrid to phi (#40994)

* move momentum, rmsprop to phi; test=develop

* update

* update

* update

* update

* udpate; test=develop

* fix xpu npu bugs; test=develop

* fix npu bug; test=develop

* fix windows compile error; test=develop

* fix windows compile error; test=develop

* polish code; test=develop

* fix conflict; test=develop

* add meshgrid;

* update

* polish code

* polish code;

* fix bug

* format; remove useless code

* fix npu bug

* fix bug
上级 e77a947e
......@@ -280,6 +280,46 @@ struct SelectedRowsAddToTensor<platform::CPUDeviceContext, T> {
}
};
template <typename T>
struct SelectedRowsAddToTensor<phi::CPUContext, T> {
void operator()(const phi::CPUContext& context,
const phi::SelectedRows& input1, framework::Tensor* input2) {
if (UNLIKELY(input1.rows().size() == 0)) {
LOG(WARNING) << "input selected rows is empty!";
return;
}
auto in1_height = input1.height();
auto in2_dims = input2->dims();
PADDLE_ENFORCE_EQ(
in1_height, in2_dims[0],
platform::errors::InvalidArgument("The two inputs height must be equal."
"But recieved first input height = "
"[%d], second input height = [%d]",
in1_height, in2_dims[0]));
auto& in1_value = input1.value();
auto& in1_rows = input1.rows();
int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_ENFORCE_EQ(
in1_row_numel, input2->numel() / in1_height,
platform::errors::InvalidArgument(
"The two inputs width must be equal."
"But recieved first input width = [%d], second input width = [%d]",
in1_row_numel, input2->numel() / in1_height));
auto* in1_data = in1_value.data<T>();
auto* input2_data = input2->data<T>();
for (size_t i = 0; i < in1_rows.size(); i++) {
for (int64_t j = 0; j < in1_row_numel; j++) {
input2_data[in1_rows[i] * in1_row_numel + j] +=
in1_data[i * in1_row_numel + j];
}
}
}
};
template struct SelectedRowsAddToTensor<platform::CPUDeviceContext, float>;
template struct SelectedRowsAddToTensor<platform::CPUDeviceContext, double>;
template struct SelectedRowsAddToTensor<platform::CPUDeviceContext, int>;
......@@ -287,6 +327,11 @@ template struct SelectedRowsAddToTensor<platform::CPUDeviceContext, int64_t>;
template struct SelectedRowsAddToTensor<platform::CPUDeviceContext,
platform::bfloat16>;
template struct SelectedRowsAddToTensor<phi::CPUContext, float>;
template struct SelectedRowsAddToTensor<phi::CPUContext, double>;
template struct SelectedRowsAddToTensor<phi::CPUContext, int>;
template struct SelectedRowsAddToTensor<phi::CPUContext, int64_t>;
template struct SelectedRowsAddToTensor<phi::CPUContext, platform::bfloat16>;
// This is a separated namespace for manipulate SelectedRows typed
// data. Like merge duplicated rows, adding two SelectedRows etc.
//
......
......@@ -174,12 +174,77 @@ struct SelectedRowsAddTensor<platform::CUDADeviceContext, T> {
}
};
template <typename T>
struct SelectedRowsAddTensor<phi::GPUContext, T> {
void operator()(const phi::GPUContext& context,
const phi::SelectedRows& input1,
const framework::Tensor& input2, framework::Tensor* output) {
auto in1_height = input1.height();
auto in2_dims = input2.dims();
auto out_dims = output->dims();
PADDLE_ENFORCE_EQ(
in1_height, in2_dims[0],
platform::errors::InvalidArgument(
"The two inputs height must be equal."
"But recieved first input height = [%d], first input height = [%d]",
in1_height, in2_dims[0]));
PADDLE_ENFORCE_EQ(
in1_height, out_dims[0],
platform::errors::InvalidArgument(
"The input and output height must be equal."
"But recieved input height = [%d], output height = [%d]",
in1_height, out_dims[0]));
auto& in1_value = input1.value();
auto& in1_rows = input1.rows();
int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_ENFORCE_EQ(
in1_row_numel, input2.numel() / in1_height,
platform::errors::InvalidArgument(
"The two inputs width must be equal."
"But recieved first input width = [%d], second input width = [%d]",
in1_row_numel, input2.numel() / in1_height));
PADDLE_ENFORCE_EQ(
in1_row_numel, output->numel() / in1_height,
platform::errors::InvalidArgument(
"The input and output width must be equal."
"But recieved input width = [%d], output width = [%d]",
in1_row_numel, output->numel() / in1_height));
auto* in1_data = in1_value.data<T>();
auto* in2_data = input2.data<T>();
auto* out_data = output->data<T>();
phi::funcs::SetConstant<phi::GPUContext, T> functor;
functor(context, output, static_cast<T>(0));
const int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddTensorKernel<
T, block_size><<<grid, threads, 0, context.stream()>>>(
in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), out_data,
in1_row_numel);
auto out_eigen = framework::EigenVector<T>::Flatten(*output);
auto in2_eigen = framework::EigenVector<T>::Flatten(input2);
out_eigen.device(*context.eigen_device()) = out_eigen + in2_eigen;
}
};
template struct SelectedRowsAddTensor<platform::CUDADeviceContext, float>;
template struct SelectedRowsAddTensor<platform::CUDADeviceContext, double>;
template struct SelectedRowsAdd<platform::CUDADeviceContext, platform::float16>;
template struct SelectedRowsAddTensor<platform::CUDADeviceContext,
platform::float16>;
template struct SelectedRowsAddTensor<phi::GPUContext, float>;
template struct SelectedRowsAddTensor<phi::GPUContext, double>;
template struct SelectedRowsAdd<phi::GPUContext, platform::float16>;
template struct SelectedRowsAddTensor<phi::GPUContext, platform::float16>;
template <typename T>
struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
void operator()(const platform::CUDADeviceContext& context,
......@@ -285,12 +350,54 @@ struct SelectedRowsAddToTensor<platform::CUDADeviceContext, T> {
}
};
template <typename T>
struct SelectedRowsAddToTensor<phi::GPUContext, T> {
void operator()(const phi::GPUContext& context,
const phi::SelectedRows& input1, framework::Tensor* input2) {
auto in1_height = input1.height();
auto in2_dims = input2->dims();
PADDLE_ENFORCE_EQ(
in1_height, in2_dims[0],
platform::errors::InvalidArgument("The two inputs height must be equal."
"But recieved first input height = "
"[%d], second input height = [%d]",
in1_height, in2_dims[0]));
auto& in1_value = input1.value();
auto& in1_rows = input1.rows();
int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_ENFORCE_EQ(
in1_row_numel, input2->numel() / in1_height,
platform::errors::InvalidArgument(
"The two inputs width must be equal."
"But recieved first input width = [%d], second input width = [%d]",
in1_row_numel, input2->numel() / in1_height));
auto* in1_data = in1_value.data<T>();
auto* in2_data = input2->data<T>();
const int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddToTensorKernel<
T, block_size><<<grid, threads, 0, context.stream()>>>(
in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), in2_data,
in1_row_numel);
}
};
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext, float>;
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext, double>;
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext, int>;
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext, int64_t>;
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext,
platform::float16>;
template struct SelectedRowsAddToTensor<phi::GPUContext, float>;
template struct SelectedRowsAddToTensor<phi::GPUContext, double>;
template struct SelectedRowsAddToTensor<phi::GPUContext, int>;
template struct SelectedRowsAddToTensor<phi::GPUContext, int64_t>;
template struct SelectedRowsAddToTensor<phi::GPUContext, platform::float16>;
namespace scatter {
......
......@@ -12,12 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/meshgrid_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace operators {
......@@ -145,29 +146,3 @@ REGISTER_OPERATOR(meshgrid, ops::MeshgridOp, ops::MeshgridOpMaker,
ops::MeshgridGradOpMaker<paddle::framework::OpDesc>,
ops::MeshgridGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(meshgrid_grad, ops::MeshgridGradOp);
REGISTER_OP_CPU_KERNEL(
meshgrid, ops::MeshgridKernel<paddle::platform::CPUDeviceContext, float>,
ops::MeshgridKernel<paddle::platform::CPUDeviceContext, double>,
ops::MeshgridKernel<paddle::platform::CPUDeviceContext, int>,
ops::MeshgridKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
meshgrid_grad,
ops::MeshgridGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::MeshgridGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::MeshgridGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::MeshgridGradKernel<paddle::platform::CPUDeviceContext, double>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL(
meshgrid, ops::MeshgridKernel<paddle::platform::CUDADeviceContext, float>,
ops::MeshgridKernel<paddle::platform::CUDADeviceContext, double>,
ops::MeshgridKernel<paddle::platform::CUDADeviceContext, int>,
ops::MeshgridKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::MeshgridKernel<paddle::platform::CUDADeviceContext, bool>);
REGISTER_OP_CUDA_KERNEL(
meshgrid_grad,
ops::MeshgridGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::MeshgridGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::MeshgridGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::MeshgridGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
#endif
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/errors.h"
#define MAX_RANK_SUPPORTED 6
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class MeshgridKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto ins = context.MultiInput<framework::Tensor>("X");
auto rank = ins.size();
switch (rank) {
case 1:
MeshgridForward<1>(context);
break;
case 2:
MeshgridForward<2>(context);
break;
case 3:
MeshgridForward<3>(context);
break;
case 4:
MeshgridForward<4>(context);
break;
case 5:
MeshgridForward<5>(context);
break;
case 6:
MeshgridForward<6>(context);
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Excepted Tensor numbers between 1 and 6, but only received d% .",
rank));
}
}
protected:
template <int Rank>
void MeshgridForward(const framework::ExecutionContext& context) const {
auto ins = context.MultiInput<framework::Tensor>("X");
auto outs = context.MultiOutput<framework::Tensor>("Out");
PADDLE_ENFORCE_EQ(
ins.size() > 1, true,
platform::errors::InvalidArgument(
"Expected at least 2 input tensors, but only received d%.",
ins.size()));
int64_t size = ins.size();
std::vector<int64_t> shape(size);
for (int64_t i = 0; i < size; i++) {
switch (ins[i]->dims().size()) {
case 0:
shape[i] = 1;
break;
case 1:
shape[i] = ins[i]->dims()[0];
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected scalar or 1D tensor in the tensor list but got tensor "
"%d: ",
i));
}
}
for (int64_t i = 0; i < size; i++) {
std::vector<int64_t> view_shape(size, 1);
view_shape[i] = shape[i];
framework::Tensor reshape_ins_tensor;
paddle::framework::TensorCopy(*ins[i], context.GetPlace(),
context.device_context(),
&reshape_ins_tensor);
framework::DDim out_dims_reshape = phi::make_ddim(view_shape);
reshape_ins_tensor.Resize(out_dims_reshape);
framework::DDim out_dims = phi::make_ddim(shape);
Eigen::DSizes<Eigen::DenseIndex, Rank> bcast_dims;
for (int64_t j = 0; j < size; j++) {
bcast_dims[j] = shape[j];
}
bcast_dims[i] = 1;
outs[i]->Resize(out_dims);
auto x = framework::EigenTensor<T, Rank>::From(
static_cast<const framework::Tensor>(reshape_ins_tensor));
outs[i]->mutable_data<T>(context.GetPlace());
auto y = framework::EigenTensor<T, Rank>::From(*outs[i]);
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
EigenBroadcast<std::decay_t<decltype(place)>, T, Rank>::Eval(place, y, x,
bcast_dims);
}
}
};
template <typename DeviceContext, typename T>
class MeshgridGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto out_grad =
context.MultiInput<framework::Tensor>(framework::GradVarName("Out"));
int n = out_grad.size();
switch (n) {
case 1:
MeshgridBackward<1>(context);
break;
case 2:
MeshgridBackward<2>(context);
break;
case 3:
MeshgridBackward<3>(context);
break;
case 4:
MeshgridBackward<4>(context);
break;
case 5:
MeshgridBackward<5>(context);
break;
case 6:
MeshgridBackward<6>(context);
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Excepted Tensor numbers between 1 and 6, but only received d% .",
n));
}
}
protected:
template <int Rank>
void MeshgridBackward(const framework::ExecutionContext& context) const {
auto out_grad =
context.MultiInput<framework::Tensor>(framework::GradVarName("Out"));
auto ins = context.MultiInput<framework::Tensor>("X");
auto outs =
context.MultiOutput<framework::Tensor>(framework::GradVarName("X"));
int n = out_grad.size();
auto out_dims = out_grad[0]->dims();
for (int i = 0; i < n; i++) {
outs[i]->mutable_data<T>(context.GetPlace());
auto out_grad_tmp = framework::EigenVector<T>::Flatten(*out_grad[i]);
auto in_grad = framework::EigenVector<T>::Flatten(*outs[i]);
std::vector<int> reduce_dims_vec;
std::vector<int> reshape_dims_vec;
for (int j = 0; j < n; j++) {
reduce_dims_vec.push_back(reshape_dims_vec.size());
if (j == i) {
reshape_dims_vec.push_back(1);
reshape_dims_vec.push_back(out_dims[j]);
} else {
reshape_dims_vec.push_back(out_dims[j]);
reshape_dims_vec.push_back(1);
}
}
Eigen::DSizes<Eigen::DenseIndex, Rank> reduce_dims;
for (int k = 0; k < n; k++) {
reduce_dims[k] = reduce_dims_vec[k];
}
Eigen::DSizes<Eigen::DenseIndex, Rank * 2> reshape_dims;
for (int k = 0; k < n * 2; k++) {
reshape_dims[k] = reshape_dims_vec[k];
}
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
EigenBroadcastGrad<std::decay_t<decltype(place)>, T, Rank>::Eval(
place, in_grad, out_grad_tmp, reduce_dims, reshape_dims);
}
}
};
} // namespace operators
} // namespace paddle
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/meshgrid_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
......
......@@ -12,11 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/optimizers/adagrad_op.h"
#include <vector>
#include <cmath>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......@@ -102,54 +101,8 @@ for numerical stability to avoid the division by zero error.
}
};
namespace {
size_t FindPos(const std::vector<int64_t>& rows, int64_t value) {
return std::find(rows.begin(), rows.end(), value) - rows.begin();
}
} // namespace
template <typename T>
struct SparseAdagradFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context,
const phi::SelectedRows& grad,
const framework::Tensor& learning_rate, T epsilon,
framework::Tensor* moment, framework::Tensor* param) {
// 1. g_m.rows = set(g.rows)
auto grad_width = grad.value().dims()[1];
math::scatter::MergeAdd<platform::CPUDeviceContext, T> merge_func;
auto grad_merge = merge_func(context, grad);
auto& merge_rows = grad_merge.rows();
auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
// 2. m += g_m * g_m
auto grad_square =
SquareSelectedRows<platform::CPUDeviceContext, T>(context, grad_merge);
math::SelectedRowsAddToTensor<platform::CPUDeviceContext, T> functor;
functor(context, grad_square, moment);
// 3. update parameter
auto* lr = learning_rate.data<T>();
auto* param_data = param->data<T>();
auto* moment_data = moment->data<T>();
for (size_t i = 0; i < merge_rows.size(); i++) {
for (int64_t j = 0; j < grad_width; j++) {
param_data[merge_rows[i] * grad_width + j] -=
lr[0] * grad_merge_data[i * grad_width + j] /
(std::sqrt(moment_data[merge_rows[i] * grad_width + j]) + epsilon);
}
}
}
};
template struct SparseAdagradFunctor<platform::CPUDeviceContext, float>;
template struct SparseAdagradFunctor<platform::CPUDeviceContext, double>;
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(adagrad, ops::AdagradOp, ops::AdagradOpMaker);
REGISTER_OP_CPU_KERNEL(
adagrad, ops::AdagradOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::AdagradOpKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
struct SparseAdagradFunctor {
void operator()(const DeviceContext &context, const phi::SelectedRows &grad,
const framework::Tensor &learning_rate, T epsilon,
framework::Tensor *moment, framework::Tensor *param);
};
template <typename DeviceContext, typename T>
phi::SelectedRows SquareSelectedRows(const DeviceContext &context,
const phi::SelectedRows &input) {
phi::SelectedRows out;
out.set_rows(input.rows());
out.set_height(input.height());
out.mutable_value()->mutable_data<T>(input.value().dims(),
context.GetPlace());
auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
auto e_in = framework::EigenVector<T>::Flatten(input.value());
e_out.device(*context.eigen_device()) = e_in.square();
return out;
}
template <typename DeviceContext, typename T>
class AdagradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto *param_var = ctx.InputVar("Param");
PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument(
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type())));
auto *param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto *moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment_out_tensor->mutable_data<T>(ctx.GetPlace());
T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
auto *grad_var = ctx.InputVar("Grad");
if (grad_var->IsType<framework::LoDTensor>()) {
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment"));
auto *learning_rate = ctx.Input<framework::Tensor>("LearningRate");
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
auto *place = ctx.template device_context<DeviceContext>().eigen_device();
moment_out.device(*place) = moment + grad * grad;
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
if (platform::is_cpu_place(ctx.GetPlace())) {
auto *lr = learning_rate->data<T>();
param_out.device(*place) =
param - lr[0] * grad / (moment_out.sqrt() + epsilon);
} else {
auto lr = framework::EigenVector<T>::Flatten(*learning_rate);
param_out.device(*place) =
param -
lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon);
}
} else if (grad_var->IsType<phi::SelectedRows>()) {
auto *param_tensor = ctx.Input<framework::Tensor>("Param");
PADDLE_ENFORCE_EQ(param_tensor, param_out_tensor,
platform::errors::InvalidArgument(
"the input tensor not euqal with output tensor"));
auto *moment_tensor = ctx.Input<framework::Tensor>("Moment");
PADDLE_ENFORCE_EQ(moment_tensor, moment_out_tensor,
platform::errors::InvalidArgument(
"the input moment not eual with output moment"));
SparseAdagradFunctor<DeviceContext, T> functor;
functor(ctx.template device_context<DeviceContext>(),
*ctx.Input<phi::SelectedRows>("Grad"),
*ctx.Input<framework::Tensor>("LearningRate"), epsilon,
moment_out_tensor, param_out_tensor);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Unsupported Variable Type of Grad"));
}
}
};
} // namespace operators
} // namespace paddle
......@@ -17,6 +17,7 @@
#include <memory>
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/phi/kernels/momentum_kernel.h"
#include "paddle/phi/kernels/sgd_kernel.h"
namespace paddle {
......@@ -25,8 +26,7 @@ namespace operators {
template <typename DeviceContext, typename T>
class DGCMomentumKernel : public framework::OpKernel<T> {
public:
DGCMomentumKernel()
: _momentum_op_kernel(new MomentumOpKernel<DeviceContext, T>()) {}
DGCMomentumKernel() {}
void Compute(const framework::ExecutionContext& context) const override {
auto rampup_begin_step = context.Attr<float>("rampup_begin_step");
......@@ -60,15 +60,56 @@ class DGCMomentumKernel : public framework::OpKernel<T> {
VLOG(10) << "current_step:" << *current_step
<< ", rampup_begin_step:" << rampup_begin_step;
const auto* grad_var = context.InputVar("Grad");
if (static_cast<int>(*current_step) < static_cast<int>(rampup_begin_step)) {
VLOG(10) << " so use momentum optimizer";
return _momentum_op_kernel->Compute(context);
auto* learning_rate = context.Input<framework::Tensor>("LearningRate");
bool multi_precision = context.Attr<bool>("multi_precision");
auto* param = context.Input<framework::Tensor>("Param");
auto* velocity = context.Input<framework::Tensor>("Velocity");
auto* param_out = context.Output<framework::Tensor>("ParamOut");
auto* velocity_out = context.Output<framework::Tensor>("VelocityOut");
auto* master_param_out =
context.Output<framework::Tensor>("MasterParamOut");
paddle::optional<const framework::Tensor&> master_param_opt =
paddle::none;
float mu = context.Attr<float>("mu");
bool use_nesterov = context.Attr<bool>("use_nesterov");
std::string regularization_method =
context.Attr<std::string>("regularization_method");
float regularization_coeff = context.Attr<float>("regularization_coeff");
float rescale_grad = context.Attr<float>("rescale_grad");
if (grad_var->IsType<framework::Tensor>()) {
// sgd_dense
auto* grad = context.Input<framework::Tensor>("Grad");
phi::MomentumDenseKernel<T>(
static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*param, *grad, *velocity, *learning_rate, master_param_opt, mu,
use_nesterov, regularization_method, regularization_coeff,
multi_precision, rescale_grad, param_out, velocity_out,
master_param_out);
} else {
// sgd dense param sparse grad
auto* grad = context.Input<phi::SelectedRows>("Grad");
phi::MomentumSparseKernel<T>(
static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*param, *grad, *velocity, *learning_rate, master_param_opt, mu,
use_nesterov, regularization_method, regularization_coeff,
multi_precision, rescale_grad, param_out, velocity_out,
master_param_out);
}
return;
}
VLOG(10) << " so use sgd optimizer";
const auto* param_var = context.InputVar("Param");
const auto* grad_var = context.InputVar("Grad");
auto* learning_rate = context.Input<framework::Tensor>("LearningRate");
bool multi_precision = context.Attr<bool>("multi_precision");
if (param_var->IsType<framework::LoDTensor>()) {
......@@ -125,9 +166,6 @@ class DGCMomentumKernel : public framework::OpKernel<T> {
PADDLE_THROW("gdc not support yet");
}
}
private:
std::unique_ptr<MomentumOpKernel<DeviceContext, T>> _momentum_op_kernel;
};
} // namespace operators
......
......@@ -18,13 +18,16 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/kernels/impl/momentum_kernel_impl.h"
namespace paddle {
namespace operators {
template <typename T>
using MultiPrecisionType = typename details::MPTypeTrait<T>::Type;
template <typename MT, uint32_t kParamNum, bool kHasMasterParams>
struct MergedMomentumMasterParams {
MT *PADDLE_RESTRICT master_params[kParamNum];
......@@ -259,11 +262,11 @@ class MergedMomentumOpKernel : public framework::OpKernel<T> {
#undef PADDLE_LAUNCH_MERGED_MOMENTUM_KERNEL
} else {
for (size_t idx = 0; idx < n; idx++) {
RegularizationType regularization_flag =
phi::RegularizationType regularization_flag =
regularization_methods.size() > 0 &&
regularization_methods[idx] == "l2_decay"
? RegularizationType::kL2DECAY
: RegularizationType::kNONE;
? phi::RegularizationType::kL2DECAY
: phi::RegularizationType::kNONE;
MT regularization_coeff = static_cast<MT>(0.0);
if (regularization_coeffs.size() != 0) {
......@@ -276,7 +279,7 @@ class MergedMomentumOpKernel : public framework::OpKernel<T> {
MT *master_out_data =
multi_precision ? master_params_out[idx]->data<MT>() : nullptr;
if (platform::is_cpu_place(ctx.GetPlace())) {
CPUDenseMomentumFunctor<MT> functor;
phi::CPUDenseMomentumFunctor<MT> functor;
functor(params[idx], grads[idx], velocitys[idx], lr_temp,
static_cast<MT>(mu), use_nesterov, regularization_flag,
regularization_coeff, params_out[idx], velocitys_out[idx]);
......@@ -286,7 +289,7 @@ class MergedMomentumOpKernel : public framework::OpKernel<T> {
static_cast<const DeviceContext &>(ctx.device_context()),
params[idx]->numel());
#define PADDLE_LAUNCH_DENSE_MTMOMENTUM_KERNEL(__nesterov, __reg_type) \
DenseMomentumFunctor<T, MT, __reg_type, __nesterov> functor( \
phi::DenseMomentumFunctor<T, MT, __reg_type, __nesterov> functor( \
params[idx]->data<T>(), grads[idx]->data<T>(), \
velocitys[idx]->data<MT>(), lr_temp->data<MPType>(), master_in_data, \
static_cast<MT>(mu), static_cast<MT>(rescale_grad), \
......@@ -294,26 +297,26 @@ class MergedMomentumOpKernel : public framework::OpKernel<T> {
velocitys_out[idx]->data<MT>(), master_out_data); \
for_range(functor);
if (use_nesterov) {
if (regularization_flag == RegularizationType::kL2DECAY) {
if (regularization_flag == phi::RegularizationType::kL2DECAY) {
PADDLE_LAUNCH_DENSE_MTMOMENTUM_KERNEL(
UseNesterov, RegularizationType::kL2DECAY);
phi::UseNesterov, phi::RegularizationType::kL2DECAY);
VLOG(10)
<< "Launch MergedMomentum gpu kernel use_nesterov kL2DECAY.";
} else {
PADDLE_LAUNCH_DENSE_MTMOMENTUM_KERNEL(UseNesterov,
RegularizationType::kNONE);
PADDLE_LAUNCH_DENSE_MTMOMENTUM_KERNEL(
phi::UseNesterov, phi::RegularizationType::kNONE);
VLOG(10)
<< "Launch MergedMomentum gpu kernel use_nesterov kNONE.";
}
} else {
if (regularization_flag == RegularizationType::kL2DECAY) {
if (regularization_flag == phi::RegularizationType::kL2DECAY) {
PADDLE_LAUNCH_DENSE_MTMOMENTUM_KERNEL(
NoNesterov, RegularizationType::kL2DECAY);
phi::NoNesterov, phi::RegularizationType::kL2DECAY);
VLOG(10)
<< "Launch MergedMomentum gpu kernel no_nesterov kL2DECAY.";
} else {
PADDLE_LAUNCH_DENSE_MTMOMENTUM_KERNEL(NoNesterov,
RegularizationType::kNONE);
PADDLE_LAUNCH_DENSE_MTMOMENTUM_KERNEL(
phi::NoNesterov, phi::RegularizationType::kNONE);
VLOG(10) << "Launch MergedMomentum gpu kernel no_nesterov kNONE.";
}
}
......
......@@ -15,6 +15,7 @@
#include "paddle/fluid/operators/optimizers/merged_momentum_op.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/phi/kernels/impl/momentum_kernel_impl.h"
namespace paddle {
namespace operators {
......@@ -118,11 +119,11 @@ class NPUMergedMomentumOpKernel : public framework::OpKernel<T> {
FillNpuTensorWithConstant<T>(&mu_tensor, mu);
for (size_t idx = 0; idx < n; ++idx) {
RegularizationType regularization_flag =
phi::RegularizationType regularization_flag =
regularization_methods.size() > 0 &&
regularization_methods[idx] == "l2_decay"
? RegularizationType::kL2DECAY
: RegularizationType::kNONE;
? phi::RegularizationType::kL2DECAY
: phi::RegularizationType::kNONE;
float regularization_coeff = 0.0;
if (regularization_coeffs.size() != 0) {
regularization_coeff = regularization_coeffs[idx];
......@@ -136,7 +137,7 @@ class NPUMergedMomentumOpKernel : public framework::OpKernel<T> {
auto grad = grads[idx];
Tensor regularized_grad;
if (regularization_flag == RegularizationType::kL2DECAY) {
if (regularization_flag == phi::RegularizationType::kL2DECAY) {
regularized_grad.mutable_data<T>(grad->dims(), ctx.GetPlace());
const auto& runner1 = NpuOpRunner("Muls", {*param}, {regularized_grad},
{{"value", regularization_coeff}});
......
......@@ -108,9 +108,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
ops::MomentumOpInferVarType);
REGISTER_OP_CPU_KERNEL(
momentum, ops::MomentumOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::MomentumOpKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_VERSION(momentum)
.AddCheckpoint(
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/platform/float16.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
momentum, ops::MomentumOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::MomentumOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::MomentumOpKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
......@@ -26,44 +26,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using framework::Tensor;
using phi::SelectedRows;
struct NoNesterov;
struct UseNesterov;
namespace details {
template <typename T>
struct CPUDenseUpdater {
template <typename G>
void operator()(const Tensor& param, const Tensor& velocity, const T& mu,
const T& lr, const bool use_nesterov, G&& grad,
Tensor* param_out, Tensor* velocity_out) const {
auto param_out_vec = framework::EigenVector<T>::Flatten(*param_out);
auto velocity_out_vec = framework::EigenVector<T>::Flatten(*velocity_out);
auto param_vec = framework::EigenVector<T>::Flatten(param);
auto velocity_vec = framework::EigenVector<T>::Flatten(velocity);
velocity_out_vec = velocity_vec * mu + grad;
if (use_nesterov) {
param_out_vec = param_vec - (grad + velocity_out_vec * mu) * lr;
} else {
param_out_vec = param_vec - lr * velocity_out_vec;
}
}
};
} // namespace details
template <typename T>
using MultiPrecisionType = typename details::MPTypeTrait<T>::Type;
enum class RegularizationType {
kNONE = 0,
kL1DECAY = 1, // do not need support right now
kL2DECAY = 2,
};
class MomentumOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override;
......@@ -148,460 +110,5 @@ class MomentumOp : public framework::OperatorWithKernel {
}
};
template <typename T>
class CPUDenseMomentumFunctor {
public:
void operator()(const Tensor* param, const Tensor* grad,
const Tensor* velocity, const Tensor* learning_rate,
const T mu, const bool use_nesterov,
const RegularizationType regularization_flag,
const T regularization_coeff, Tensor* param_out,
Tensor* velocity_out) {
auto grad_vec = framework::EigenVector<T>::Flatten(*grad);
auto* lr = learning_rate->data<MultiPrecisionType<T>>();
details::CPUDenseUpdater<T> updater;
if (regularization_flag == RegularizationType::kL2DECAY) {
auto param_vec = framework::EigenVector<T>::Flatten(*param);
updater(*param, *velocity, mu, static_cast<T>(lr[0]), use_nesterov,
param_vec * regularization_coeff + grad_vec, param_out,
velocity_out);
} else {
updater(*param, *velocity, mu, static_cast<T>(lr[0]), use_nesterov,
grad_vec, param_out, velocity_out);
}
}
};
template <typename T, typename MT, RegularizationType kRegType,
typename UpdateMethod>
class DenseMomentumFunctor;
// NOTE(dzh) for performance.
// avoid if/else in inside kernel, implement GPU UseNesterov/NoNesterov as two
// functor.
template <typename T, typename MT, RegularizationType kRegType>
class DenseMomentumFunctor<T, MT, kRegType, UseNesterov> {
private:
const T* param_;
const T* grad_;
const MT* velocity_;
const MultiPrecisionType<MT>* lr_;
const MT* master_param_;
const MT mu_;
const MT rescale_grad_;
const int64_t num_;
T* param_out_;
MT* velocity_out_;
MT* master_param_out_;
const MT regularization_coeff_;
public:
DenseMomentumFunctor(const T* param, const T* grad, const MT* velocity,
const MultiPrecisionType<MT>* learning_rate,
const MT* master_param, const MT mu,
const MT rescale_grad, const int64_t num,
const MT regularization_coeff, T* param_out,
MT* velocity_out, MT* master_param_out)
: param_(param),
grad_(grad),
velocity_(velocity),
lr_(learning_rate),
master_param_(master_param),
mu_(mu),
rescale_grad_(rescale_grad),
num_(num),
param_out_(param_out),
velocity_out_(velocity_out),
master_param_out_(master_param_out),
regularization_coeff_(regularization_coeff) {}
inline HOSTDEVICE void operator()(size_t i) const {
// put memory access in register
const MT param =
master_param_ ? master_param_[i] : static_cast<MT>(param_[i]);
MT grad = static_cast<MT>(grad_[i]) * rescale_grad_;
const MT lr = static_cast<MT>(lr_[0]);
const MT velocity = velocity_[i];
if (kRegType == RegularizationType::kL2DECAY) {
grad += regularization_coeff_ * param;
}
MT velocity_out = velocity * mu_ + grad;
MT param_out = param - (grad + velocity_out * mu_) * lr;
// write reigster to memory
velocity_out_[i] = velocity_out;
param_out_[i] = static_cast<T>(param_out);
if (master_param_out_) {
master_param_out_[i] = param_out;
}
}
};
template <typename T, typename MT, RegularizationType kRegType>
class DenseMomentumFunctor<T, MT, kRegType, NoNesterov> {
private:
const T* param_;
const T* grad_;
const MT* velocity_;
const MultiPrecisionType<MT>* lr_;
const MT* master_param_;
const MT mu_;
const MT rescale_grad_;
const int64_t num_;
T* param_out_;
MT* velocity_out_;
MT* master_param_out_;
const MT regularization_coeff_;
public:
DenseMomentumFunctor(const T* param, const T* grad, const MT* velocity,
const MultiPrecisionType<MT>* learning_rate,
const MT* master_param, const MT mu,
const MT rescale_grad, const int64_t num,
const MT regularization_coeff, T* param_out,
MT* velocity_out, MT* master_param_out)
: param_(param),
grad_(grad),
velocity_(velocity),
lr_(learning_rate),
master_param_(master_param),
mu_(mu),
rescale_grad_(rescale_grad),
num_(num),
param_out_(param_out),
velocity_out_(velocity_out),
master_param_out_(master_param_out),
regularization_coeff_(regularization_coeff) {}
inline HOSTDEVICE void operator()(size_t i) const {
// put memory access in register
const MT param =
master_param_ ? master_param_[i] : static_cast<MT>(param_[i]);
MT grad = static_cast<MT>(grad_[i]) * rescale_grad_;
const MT lr = static_cast<MT>(lr_[0]);
const MT velocity = velocity_[i];
if (kRegType == RegularizationType::kL2DECAY) {
grad += regularization_coeff_ * param;
}
MT velocity_out = velocity * mu_ + grad;
MT param_out = param - lr * velocity_out;
// write reigster to memory
velocity_out_[i] = velocity_out;
param_out_[i] = static_cast<T>(param_out);
if (master_param_out_) {
master_param_out_[i] = param_out;
}
}
};
template <typename T, typename MT, typename UpdateMethod>
class SparseMomentumFunctor;
template <typename T, typename MT>
class SparseMomentumFunctor<T, MT, UseNesterov> {
private:
const T* param_;
const T* grad_;
const MT* velocity_;
const MultiPrecisionType<MT>* lr_;
const MT* master_param_;
const MT mu_;
const MT rescale_grad_;
const int64_t* rows_;
const int64_t row_numel_;
const int64_t row_height_;
T* param_out_;
MT* velocity_out_;
MT* master_param_out_;
const RegularizationType regularization_flag_;
const MT regularization_coeff_;
public:
SparseMomentumFunctor(const T* param, const T* grad, const MT* velocity,
const MultiPrecisionType<MT>* lr,
const MT* master_param, const MT mu,
const MT rescale_grad, const int64_t* rows,
int64_t row_numel, int64_t row_height,
const RegularizationType regularization_flag,
const MT regularization_coeff, T* param_out,
MT* velocity_out, MT* master_param_out)
: param_(param),
grad_(grad),
velocity_(velocity),
lr_(lr),
master_param_(master_param),
mu_(mu),
rescale_grad_(rescale_grad),
rows_(rows),
row_numel_(row_numel),
row_height_(row_height),
param_out_(param_out),
velocity_out_(velocity_out),
master_param_out_(master_param_out),
regularization_flag_(regularization_flag),
regularization_coeff_(regularization_coeff) {}
inline HOSTDEVICE void operator()(size_t i) {
auto row_idx =
phi::funcs::BinarySearch<int64_t>(rows_, row_height_, i / row_numel_);
MT grad =
row_idx >= 0
? static_cast<MT>(grad_[row_idx * row_numel_ + i % row_numel_]) *
rescale_grad_
: static_cast<MT>(0);
// put memory access in register
const MT param =
master_param_ ? master_param_[i] : static_cast<MT>(param_[i]);
const MT lr = static_cast<MT>(lr_[0]);
const MT velocity = velocity_[i];
grad = regularization_flag_ == RegularizationType::kL2DECAY
? grad + regularization_coeff_ * param
: grad;
MT velocity_out = velocity * mu_ + grad;
MT param_out = param - (grad + velocity_out * mu_) * lr;
// write reigster to memory
velocity_out_[i] = velocity_out;
param_out_[i] = static_cast<T>(param_out);
if (master_param_out_) {
master_param_out_[i] = param_out;
}
}
};
template <typename T, typename MT>
class SparseMomentumFunctor<T, MT, NoNesterov> {
private:
const T* param_;
const T* grad_;
const MT* velocity_;
const MultiPrecisionType<MT>* lr_;
const MT* master_param_;
const MT mu_;
const MT rescale_grad_;
const int64_t* rows_;
const int64_t row_numel_;
const int64_t row_height_;
T* param_out_;
MT* velocity_out_;
MT* master_param_out_;
const RegularizationType regularization_flag_;
const MT regularization_coeff_;
public:
SparseMomentumFunctor(const T* param, const T* grad, const MT* velocity,
const MultiPrecisionType<MT>* lr,
const MT* master_param, const MT mu,
const MT rescale_grad, const int64_t* rows,
int64_t row_numel, int64_t row_height,
const RegularizationType regularization_flag,
const MT regularization_coeff, T* param_out,
MT* velocity_out, MT* master_param_out)
: param_(param),
grad_(grad),
velocity_(velocity),
lr_(lr),
master_param_(master_param),
mu_(mu),
rescale_grad_(rescale_grad),
rows_(rows),
row_numel_(row_numel),
row_height_(row_height),
param_out_(param_out),
velocity_out_(velocity_out),
master_param_out_(master_param_out),
regularization_flag_(regularization_flag),
regularization_coeff_(regularization_coeff) {}
inline HOSTDEVICE void operator()(size_t i) {
auto row_idx =
phi::funcs::BinarySearch<int64_t>(rows_, row_height_, i / row_numel_);
MT grad =
row_idx >= 0
? static_cast<MT>(grad_[row_idx * row_numel_ + i % row_numel_]) *
rescale_grad_
: static_cast<MT>(0);
// put memory access in register
const MT param =
master_param_ ? master_param_[i] : static_cast<MT>(param_[i]);
const MT lr = static_cast<MT>(lr_[0]);
const MT velocity = velocity_[i];
grad = regularization_flag_ == RegularizationType::kL2DECAY
? grad + regularization_coeff_ * param
: grad;
MT velocity_out = velocity * mu_ + grad;
MT param_out = param - velocity_out * lr;
// write reigster to memory
velocity_out_[i] = velocity_out;
param_out_[i] = static_cast<T>(param_out);
if (master_param_out_) {
master_param_out_[i] = param_out;
}
}
};
template <typename DeviceContext, typename T>
class MomentumOpKernel : public framework::OpKernel<T> {
using MPDType = MultiPrecisionType<T>;
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const bool multi_precision = ctx.Attr<bool>("multi_precision");
if (multi_precision) {
InnerCompute<MPDType>(ctx, multi_precision);
} else {
InnerCompute<T>(ctx, multi_precision);
}
}
private:
template <typename MT>
void InnerCompute(const framework::ExecutionContext& ctx,
const bool multi_precision) const {
std::string regularization_method =
ctx.Attr<std::string>("regularization_method");
MT regularization_coeff =
static_cast<MT>(ctx.Attr<float>("regularization_coeff"));
RegularizationType regularization_flag{
RegularizationType::kNONE}; // disable regularization
if (regularization_method == "l2_decay") {
regularization_flag = RegularizationType::kL2DECAY;
}
MT mu = static_cast<MT>(ctx.Attr<float>("mu"));
MT rescale_grad = static_cast<MT>(ctx.Attr<float>("rescale_grad"));
bool use_nesterov = ctx.Attr<bool>("use_nesterov");
auto learning_rate = ctx.Input<framework::Tensor>("LearningRate");
auto param = ctx.Input<framework::Tensor>("Param");
auto param_out = ctx.Output<framework::Tensor>("ParamOut");
auto velocity = ctx.Input<framework::Tensor>("Velocity");
auto velocity_out = ctx.Output<framework::Tensor>("VelocityOut");
const framework::Tensor* master_param = nullptr;
framework::Tensor* master_param_out = nullptr;
if (multi_precision) {
bool has_master =
ctx.HasInput("MasterParam") && ctx.HasOutput("MasterParamOut");
PADDLE_ENFORCE_EQ(has_master, true,
platform::errors::InvalidArgument(
"The Input(MasterParam) and Output(MasterParamOut) "
"should not be null when "
"the attr `multi_precision` is true"));
master_param = ctx.Input<framework::Tensor>("MasterParam");
master_param_out = ctx.Output<framework::Tensor>("MasterParamOut");
}
param_out->mutable_data<T>(ctx.GetPlace());
velocity_out->mutable_data<MT>(ctx.GetPlace());
const MT* master_in_data =
multi_precision ? master_param->data<MT>() : nullptr;
MT* master_out_data =
multi_precision ? master_param_out->mutable_data<MT>(ctx.GetPlace())
: nullptr;
auto* grad_var = ctx.InputVar("Grad");
if (grad_var->IsType<framework::LoDTensor>()) {
auto grad = ctx.Input<framework::Tensor>("Grad");
if (platform::is_cpu_place(ctx.GetPlace())) {
CPUDenseMomentumFunctor<MT> functor;
functor(param, grad, velocity, learning_rate, mu, use_nesterov,
regularization_flag, regularization_coeff, param_out,
velocity_out);
} else if (platform::is_gpu_place(ctx.GetPlace())) {
platform::ForRange<DeviceContext> for_range(
static_cast<const DeviceContext&>(ctx.device_context()),
param->numel());
#define PADDLE_LAUNCH_DENSE_MOMENTUM_KERNEL(__nesterov, __reg_type) \
DenseMomentumFunctor<T, MT, __reg_type, __nesterov> functor( \
param->data<T>(), grad->data<T>(), velocity->data<MT>(), \
learning_rate->data<MPDType>(), master_in_data, mu, rescale_grad, \
param->numel(), regularization_coeff, \
param_out->mutable_data<T>(ctx.GetPlace()), \
velocity_out->mutable_data<MT>(ctx.GetPlace()), master_out_data); \
for_range(functor);
if (use_nesterov) {
if (regularization_flag == RegularizationType::kL2DECAY) {
PADDLE_LAUNCH_DENSE_MOMENTUM_KERNEL(UseNesterov,
RegularizationType::kL2DECAY);
} else {
PADDLE_LAUNCH_DENSE_MOMENTUM_KERNEL(UseNesterov,
RegularizationType::kNONE);
}
} else {
if (regularization_flag == RegularizationType::kL2DECAY) {
PADDLE_LAUNCH_DENSE_MOMENTUM_KERNEL(NoNesterov,
RegularizationType::kL2DECAY);
} else {
PADDLE_LAUNCH_DENSE_MOMENTUM_KERNEL(NoNesterov,
RegularizationType::kNONE);
}
}
}
} else if (grad_var->IsType<phi::SelectedRows>()) {
// sparse update embedding with selectedrows
auto grad = ctx.Input<phi::SelectedRows>("Grad");
// sparse update maybe empty.
if (grad->rows().size() == 0) {
VLOG(3) << "Grad SelectedRows contains no data!";
return;
}
phi::SelectedRows tmp_merged_grad;
phi::SelectedRows* merged_grad = &tmp_merged_grad;
math::scatter::MergeAdd<DeviceContext, T> merge_func;
merge_func(ctx.template device_context<DeviceContext>(), *grad,
merged_grad);
auto* grad_merge_rows = merged_grad->mutable_rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(
grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
int64_t row_numel =
merged_grad->value().numel() / merged_grad->rows().size();
platform::ForRange<DeviceContext> for_range(
static_cast<const DeviceContext&>(ctx.device_context()),
param->numel());
if (use_nesterov) {
SparseMomentumFunctor<T, MT, UseNesterov> functor(
param->data<T>(), merged_grad->value().data<T>(),
velocity->data<MT>(), learning_rate->data<MPDType>(),
master_in_data, mu, rescale_grad, rows, row_numel,
static_cast<int64_t>(merged_grad->rows().size()),
regularization_flag, regularization_coeff,
param_out->mutable_data<T>(ctx.GetPlace()),
velocity_out->mutable_data<MT>(ctx.GetPlace()), master_out_data);
for_range(functor);
} else {
SparseMomentumFunctor<T, MT, NoNesterov> functor(
param->data<T>(), merged_grad->value().data<T>(),
velocity->data<MT>(), learning_rate->data<MPDType>(),
master_in_data, mu, rescale_grad, rows, row_numel,
static_cast<int64_t>(merged_grad->rows().size()),
regularization_flag, regularization_coeff,
param_out->mutable_data<T>(ctx.GetPlace()),
velocity_out->mutable_data<MT>(ctx.GetPlace()), master_out_data);
for_range(functor);
}
} else {
PADDLE_ENFORCE_EQ(false, true,
platform::errors::PermissionDenied(
"Unsupported Variable Type of Grad "
"in MomentumOp. Excepted LodTensor "
"or SelectedRows, But received [%s]",
paddle::framework::ToTypeName(grad_var->Type())));
}
}
};
} // namespace operators
} // namespace paddle
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/optimizers/sgd_op.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/phi/kernels/impl/momentum_kernel_impl.h"
namespace paddle {
namespace operators {
......@@ -28,10 +29,10 @@ class NPUMomentumOpKernel : public framework::OpKernel<T> {
std::string regularization_method =
ctx.Attr<std::string>("regularization_method");
auto regularization_coeff = ctx.Attr<float>("regularization_coeff");
RegularizationType regularization_flag{
RegularizationType::kNONE}; // disable regularization
phi::RegularizationType regularization_flag{
phi::RegularizationType::kNONE}; // disable regularization
if (regularization_method == "l2_decay") {
regularization_flag = RegularizationType::kL2DECAY;
regularization_flag = phi::RegularizationType::kL2DECAY;
}
T mu = static_cast<T>(ctx.Attr<float>("mu"));
......@@ -55,7 +56,7 @@ class NPUMomentumOpKernel : public framework::OpKernel<T> {
FillNpuTensorWithConstant<T>(&mu_tensor, mu);
Tensor regularized_grad;
if (regularization_flag == RegularizationType::kL2DECAY) {
if (regularization_flag == phi::RegularizationType::kL2DECAY) {
regularized_grad.mutable_data<T>(grad->dims(), ctx.GetPlace());
const auto& runner1 = NpuOpRunner("Muls", {*param}, {regularized_grad},
{{"value", regularization_coeff}});
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/optimizers/rmsprop_op.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
......@@ -170,6 +170,3 @@ http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(rmsprop, ops::RmspropOp, ops::RmspropOpMaker);
REGISTER_OP_CPU_KERNEL(
rmsprop, ops::RmspropOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::RmspropOpKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/optimizers/rmsprop_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
rmsprop, ops::RmspropOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::RmspropOpKernel<paddle::platform::CUDADeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <math.h>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
namespace paddle {
namespace operators {
template <typename T>
struct DenseRmspropGradFunctor {
inline explicit DenseRmspropGradFunctor(const T *grad) : grad_(grad) {}
HOSTDEVICE inline T operator()(int64_t idx) const { return grad_[idx]; }
const T *grad_;
};
template <typename T>
struct SparseRmspropGradFunctor {
inline SparseRmspropGradFunctor(const T *grad, const int64_t *rows,
int64_t row_numel, int64_t row_count)
: grad_(grad),
rows_(rows),
row_numel_(row_numel),
row_count_(row_count) {}
HOSTDEVICE inline T operator()(int64_t idx) const {
auto row_idx =
phi::funcs::BinarySearch(rows_, row_count_, idx / row_numel_);
return row_idx >= 0 ? grad_[row_idx * row_numel_ + idx % row_numel_] : 0;
}
const T *grad_;
const int64_t *rows_;
int64_t row_numel_;
int64_t row_count_;
};
template <typename T, typename GradFunctor>
struct UncenteredRmspropFunctor {
UncenteredRmspropFunctor(T *param, T *ms, T *mom, const T *lr, T rho,
T epsilon, T momentum,
const GradFunctor &grad_functor)
: param_(param),
ms_(ms),
mom_(mom),
lr_(lr),
rho_(rho),
epsilon_(epsilon),
momentum_(momentum),
grad_functor_(grad_functor) {}
HOSTDEVICE inline void operator()(int64_t idx) const {
T g = grad_functor_(idx);
T ms_out = rho_ * ms_[idx] + (1 - rho_) * g * g;
T mom_out = momentum_ * mom_[idx] + lr_[0] * g / sqrt(ms_out + epsilon_);
param_[idx] -= mom_out;
ms_[idx] = ms_out;
mom_[idx] = mom_out;
}
T *param_;
T *ms_;
T *mom_;
const T *lr_;
T rho_;
T epsilon_;
T momentum_;
GradFunctor grad_functor_;
};
template <typename T, typename GradFunctor>
struct CenteredRmspropFunctor {
CenteredRmspropFunctor(T *param, T *ms, T *mom, T *mean_grad, const T *lr,
T rho, T epsilon, T momentum,
const GradFunctor &grad_functor)
: param_(param),
ms_(ms),
mom_(mom),
mean_grad_(mean_grad),
lr_(lr),
rho_(rho),
epsilon_(epsilon),
momentum_(momentum),
grad_functor_(grad_functor) {}
HOSTDEVICE inline void operator()(int64_t idx) const {
T g = grad_functor_(idx);
T ms_out = rho_ * ms_[idx] + (1 - rho_) * g * g;
T mg_out = rho_ * mean_grad_[idx] + (1 - rho_) * g;
T mom_out = momentum_ * mom_[idx] +
lr_[0] * g / sqrt(ms_out - mg_out * mg_out + epsilon_);
param_[idx] -= mom_out;
ms_[idx] = ms_out;
mom_[idx] = mom_out;
mean_grad_[idx] = mg_out;
}
T *param_;
T *ms_;
T *mom_;
T *mean_grad_;
const T *lr_;
T rho_;
T epsilon_;
T momentum_;
GradFunctor grad_functor_;
};
template <typename DeviceContext, typename T>
class RmspropOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
using LoDTensor = framework::LoDTensor;
auto *grad_var = ctx.InputVar("Grad");
auto *param_out = ctx.Output<LoDTensor>("ParamOut");
auto *moment_out = ctx.Output<LoDTensor>("MomentOut");
auto *mean_square_out = ctx.Output<LoDTensor>("MeanSquareOut");
auto epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
auto rho = static_cast<T>(ctx.Attr<float>("decay"));
auto momentum = static_cast<T>(ctx.Attr<float>("momentum"));
bool centered = ctx.Attr<bool>("centered");
auto &p_tensor = *ctx.Input<LoDTensor>("Param");
auto &ms_tensor = *ctx.Input<LoDTensor>("MeanSquare");
auto &lr_tensor = *ctx.Input<LoDTensor>("LearningRate");
auto &mom_tensor = *ctx.Input<LoDTensor>("Moment");
PADDLE_ENFORCE_EQ(p_tensor.IsSharedBufferWith(*param_out), true,
platform::errors::InvalidArgument(
"Param and ParamOut must be the same Tensor"));
PADDLE_ENFORCE_EQ(mom_tensor.IsSharedBufferWith(*moment_out), true,
platform::errors::InvalidArgument(
"Moment and MomentOut must be the same Tensor"));
PADDLE_ENFORCE_EQ(
ms_tensor.IsSharedBufferWith(*mean_square_out), true,
platform::errors::InvalidArgument(
"MeanSquare and MeanSquareOut must be the same Tensor"));
auto &dev_ctx = ctx.template device_context<DeviceContext>();
size_t limit = static_cast<size_t>(ms_tensor.numel());
if (grad_var->IsType<LoDTensor>()) {
auto &grad_tensor = grad_var->Get<LoDTensor>();
if (std::is_same<DeviceContext, platform::CPUDeviceContext>::value) {
auto &place =
*ctx.template device_context<DeviceContext>().eigen_device();
auto lr_value = lr_tensor.data<T>()[0];
auto p = framework::EigenVector<T>::Flatten(p_tensor);
auto ms = framework::EigenVector<T>::Flatten(ms_tensor);
auto g = framework::EigenVector<T>::Flatten(grad_tensor);
auto mom = framework::EigenVector<T>::Flatten(mom_tensor);
auto p_out = framework::EigenVector<T>::Flatten(*param_out);
auto mom_out = framework::EigenVector<T>::Flatten(*moment_out);
auto ms_out = framework::EigenVector<T>::Flatten(*mean_square_out);
ms_out.device(place) = rho * ms + (1 - rho) * g * g;
if (centered) {
auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad");
auto mg = framework::EigenVector<T>::Flatten(mg_tensor);
auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut");
PADDLE_ENFORCE_EQ(
&mg_tensor, mean_grad_out,
platform::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
auto mg_out = framework::EigenVector<T>::Flatten(*mean_grad_out);
mg_out.device(place) = rho * mg + (1 - rho) * g;
mom_out.device(place) =
momentum * mom +
lr_value * g / (ms_out - mg_out.square() + epsilon).sqrt();
} else {
mom_out.device(place) =
momentum * mom + lr_value * g / (ms_out + epsilon).sqrt();
}
p_out.device(place) = p - mom_out;
} else {
DenseRmspropGradFunctor<T> grad_func(grad_tensor.data<T>());
platform::ForRange<DeviceContext> for_range(dev_ctx, limit);
if (centered) {
auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad");
auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut");
PADDLE_ENFORCE_EQ(
&mg_tensor, mean_grad_out,
platform::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
for_range(CenteredRmspropFunctor<T, DenseRmspropGradFunctor<T>>(
param_out->mutable_data<T>(ctx.GetPlace()),
mean_square_out->mutable_data<T>(ctx.GetPlace()),
moment_out->mutable_data<T>(ctx.GetPlace()),
mean_grad_out->mutable_data<T>(ctx.GetPlace()),
lr_tensor.data<T>(), rho, epsilon, momentum, grad_func));
} else {
for_range(UncenteredRmspropFunctor<T, DenseRmspropGradFunctor<T>>(
param_out->mutable_data<T>(ctx.GetPlace()),
mean_square_out->mutable_data<T>(ctx.GetPlace()),
moment_out->mutable_data<T>(ctx.GetPlace()), lr_tensor.data<T>(),
rho, epsilon, momentum, grad_func));
}
}
} else if (grad_var->IsType<phi::SelectedRows>()) {
auto &grad = grad_var->Get<phi::SelectedRows>();
phi::SelectedRows tmp_merged_grad;
phi::SelectedRows *merged_grad = &tmp_merged_grad;
math::scatter::MergeAdd<DeviceContext, T> merge_func;
merge_func(dev_ctx, grad, merged_grad);
platform::ForRange<DeviceContext> for_range(dev_ctx, limit);
auto &grad_merge_rows = merged_grad->rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(
&grad_merge_rows);
const int64_t *rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto &merged_tensor = merged_grad->value();
int64_t row_count = merged_grad->rows().size();
int64_t row_numel = merged_tensor.numel() / row_count;
SparseRmspropGradFunctor<T> grad_func(merged_tensor.data<T>(), rows,
row_numel, row_count);
if (centered) {
auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad");
auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut");
PADDLE_ENFORCE_EQ(
&mg_tensor, mean_grad_out,
platform::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
for_range(CenteredRmspropFunctor<T, SparseRmspropGradFunctor<T>>(
param_out->mutable_data<T>(ctx.GetPlace()),
mean_square_out->mutable_data<T>(ctx.GetPlace()),
moment_out->mutable_data<T>(ctx.GetPlace()),
mean_grad_out->mutable_data<T>(ctx.GetPlace()), lr_tensor.data<T>(),
rho, epsilon, momentum, grad_func));
} else {
for_range(UncenteredRmspropFunctor<T, SparseRmspropGradFunctor<T>>(
param_out->mutable_data<T>(ctx.GetPlace()),
mean_square_out->mutable_data<T>(ctx.GetPlace()),
moment_out->mutable_data<T>(ctx.GetPlace()), lr_tensor.data<T>(),
rho, epsilon, momentum, grad_func));
}
} else {
PADDLE_ENFORCE_EQ(false, true,
platform::errors::PermissionDenied(
"Unsupported Variable Type of Grad "
"in RmspropOp. Excepted LodTensor "
"or SelectedRows, But received [%s]",
paddle::framework::ToTypeName(grad_var->Type())));
}
}
};
} // namespace operators
} // namespace paddle
......@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/optimizers/rmsprop_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
......
......@@ -14,9 +14,9 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/optimizers/rmsprop_op.h"
#include <gflags/gflags.h>
#include <iostream>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
......
......@@ -11,7 +11,7 @@ set_property(GLOBAL PROPERTY PHI_KERNELS "")
# [ 1. Common kernel compilation dependencies ]
set(COMMON_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils custom_kernel)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor selected_rows_functor )
# remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
namespace phi {
template <typename T, typename Context>
void AdagradDenseKernel(const Context& dev_ctx,
const DenseTensor& param,
const DenseTensor& grad,
const DenseTensor& moment,
const DenseTensor& learning_rate,
float epsilon,
DenseTensor* param_out,
DenseTensor* moment_out);
template <typename T, typename Context>
void AdagradSparseKernel(const Context& dev_ctx,
const DenseTensor& param,
const SelectedRows& grad,
const DenseTensor& moment,
const DenseTensor& learning_rate,
float epsilon,
DenseTensor* param_out,
DenseTensor* moment_out);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
namespace phi {
namespace {
size_t FindPos(const std::vector<int64_t>& rows, int64_t value) {
return std::find(rows.begin(), rows.end(), value) - rows.begin();
}
} // namespace
template <typename T>
struct SparseAdagradFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext& context,
const phi::SelectedRows& grad,
const DenseTensor& learning_rate,
T epsilon,
DenseTensor* moment,
DenseTensor* param) {
// 1. g_m.rows = set(g.rows)
auto grad_width = grad.value().dims()[1];
paddle::operators::math::scatter::MergeAdd<phi::CPUContext, T> merge_func;
auto grad_merge = merge_func(context, grad);
auto& merge_rows = grad_merge.rows();
auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
// 2. m += g_m * g_m
auto grad_square =
SquareSelectedRows<phi::CPUContext, T>(context, grad_merge);
paddle::operators::math::SelectedRowsAddToTensor<phi::CPUContext, T>
functor;
functor(context, grad_square, moment);
// 3. update parameter
auto* lr = learning_rate.data<T>();
auto* param_data = param->data<T>();
auto* moment_data = moment->data<T>();
for (size_t i = 0; i < merge_rows.size(); i++) {
for (int64_t j = 0; j < grad_width; j++) {
param_data[merge_rows[i] * grad_width + j] -=
lr[0] * grad_merge_data[i * grad_width + j] /
(std::sqrt(moment_data[merge_rows[i] * grad_width + j]) + epsilon);
}
}
}
};
template struct SparseAdagradFunctor<phi::CPUContext, float>;
template struct SparseAdagradFunctor<phi::CPUContext, double>;
} // namespace phi
PD_REGISTER_KERNEL(
adagrad, CPU, ALL_LAYOUT, phi::AdagradDenseKernel, float, double) {}
PD_REGISTER_KERNEL(adagrad_dense_param_sparse_grad,
CPU,
ALL_LAYOUT,
phi::AdagradSparseKernel,
float,
double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/meshgrid_grad_kernel.h"
#include "paddle/phi/kernels/impl/meshgrid_grad_kernel_impl.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
PD_REGISTER_KERNEL(meshgrid_grad,
CPU,
ALL_LAYOUT,
phi::MeshgridGradKernel,
float,
double,
int,
int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/meshgrid_kernel.h"
#include "paddle/phi/kernels/impl/meshgrid_kernel_impl.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
PD_REGISTER_KERNEL(meshgrid,
CPU,
ALL_LAYOUT,
phi::MeshgridKernel,
float,
double,
int,
int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/momentum_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/momentum_kernel_impl.h"
PD_REGISTER_KERNEL(
momentum, CPU, ALL_LAYOUT, phi::MomentumDenseKernel, float, double) {}
PD_REGISTER_KERNEL(momentum_dense_param_sparse_grad,
CPU,
ALL_LAYOUT,
phi::MomentumSparseKernel,
float,
double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/rmsprop_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/rmsprop_kernel_impl.h"
PD_REGISTER_KERNEL(
rmsprop, CPU, ALL_LAYOUT, phi::RmspropDenseKernel, float, double) {}
PD_REGISTER_KERNEL(rmsprop_dense_param_sparse_grad,
CPU,
ALL_LAYOUT,
phi::RmspropSparseKernel,
float,
double) {}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/adagrad_kernel.h"
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/optimizers/adagrad_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
namespace paddle {
namespace operators {
namespace {
namespace phi {
template <typename T, int block_size>
__global__ void MergeGradKernel(const T* grad, const int64_t* grad_rows,
T* grad_merge, const int64_t* grad_merge_rows,
__global__ void MergeGradKernel(const T* grad,
const int64_t* grad_rows,
T* grad_merge,
const int64_t* grad_merge_rows,
size_t grad_merge_rows_size,
int64_t row_numel) {
const int ty = blockIdx.y;
......@@ -48,9 +52,12 @@ __global__ void MergeGradKernel(const T* grad, const int64_t* grad_rows,
}
template <typename T, int block_size>
__global__ void SparseAdagradFunctorKernel(const T* grad, const int64_t* rows,
const T* learning_rate, T* param,
T* moment, int64_t row_numel,
__global__ void SparseAdagradFunctorKernel(const T* grad,
const int64_t* rows,
const T* learning_rate,
T* param,
T* moment,
int64_t row_numel,
T epsilon) {
const int ty = blockIdx.y;
int tid = threadIdx.x;
......@@ -67,25 +74,27 @@ __global__ void SparseAdagradFunctorKernel(const T* grad, const int64_t* rows,
(sqrt(moment[index]) + epsilon));
}
}
} // namespace
template <typename T>
struct SparseAdagradFunctor<platform::CUDADeviceContext, T> {
void operator()(const platform::CUDADeviceContext& context,
struct SparseAdagradFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext& context,
const phi::SelectedRows& grad,
const framework::Tensor& learning_rate, T epsilon,
framework::Tensor* moment, framework::Tensor* param) {
const DenseTensor& learning_rate,
T epsilon,
DenseTensor* moment,
DenseTensor* param) {
// 1. g_m.rows = set(g.rows)
auto grad_width = grad.value().dims()[1];
math::scatter::MergeAdd<platform::CUDADeviceContext, T> merge_func;
paddle::operators::math::scatter::MergeAdd<phi::GPUContext, T> merge_func;
auto grad_merge = merge_func(context, grad);
auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
framework::Vector<int64_t> merge_rows(grad_merge.rows());
paddle::framework::Vector<int64_t> merge_rows(grad_merge.rows());
// 2. m += g_m * g_m
auto grad_square =
SquareSelectedRows<platform::CUDADeviceContext, T>(context, grad_merge);
SquareSelectedRows<phi::GPUContext, T>(context, grad_merge);
math::SelectedRowsAddToTensor<platform::CUDADeviceContext, T> functor;
paddle::operators::math::SelectedRowsAddToTensor<phi::GPUContext, T>
functor;
functor(context, grad_square, moment);
// 3. update parameter
......@@ -98,22 +107,33 @@ struct SparseAdagradFunctor<platform::CUDADeviceContext, T> {
dim3 grid2(1, merge_rows.size());
paddle::framework::MixVector<int64_t> mixv_merge_rows(&merge_rows);
SparseAdagradFunctorKernel<
T, 256><<<grid2, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
grad_merge_data, mixv_merge_rows.CUDAMutableData(context.GetPlace()),
lr, param_data, moment_data, grad_width, epsilon);
T,
256><<<grid2,
threads,
0,
reinterpret_cast<const phi::GPUContext&>(context).stream()>>>(
grad_merge_data,
mixv_merge_rows.CUDAMutableData(context.GetPlace()),
lr,
param_data,
moment_data,
grad_width,
epsilon);
mixv_merge_rows.CopyToCPU();
}
};
template struct SparseAdagradFunctor<platform::CUDADeviceContext, float>;
template struct SparseAdagradFunctor<platform::CUDADeviceContext, double>;
template struct SparseAdagradFunctor<phi::GPUContext, float>;
template struct SparseAdagradFunctor<phi::GPUContext, double>;
} // namespace phi
} // namespace operators
} // namespace paddle
PD_REGISTER_KERNEL(
adagrad, GPU, ALL_LAYOUT, phi::AdagradDenseKernel, float, double) {}
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
adagrad, ops::AdagradOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::AdagradOpKernel<paddle::platform::CUDADeviceContext, double>);
PD_REGISTER_KERNEL(adagrad_dense_param_sparse_grad,
GPU,
ALL_LAYOUT,
phi::AdagradSparseKernel,
float,
double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/meshgrid_grad_kernel.h"
#include "paddle/phi/kernels/impl/meshgrid_grad_kernel_impl.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
PD_REGISTER_KERNEL(meshgrid_grad,
GPU,
ALL_LAYOUT,
phi::MeshgridGradKernel,
float,
double,
int,
int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/meshgrid_kernel.h"
#include "paddle/phi/kernels/impl/meshgrid_kernel_impl.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
PD_REGISTER_KERNEL(meshgrid,
GPU,
ALL_LAYOUT,
phi::MeshgridKernel,
float,
double,
int,
int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/momentum_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/momentum_kernel_impl.h"
PD_REGISTER_KERNEL(momentum,
GPU,
ALL_LAYOUT,
phi::MomentumDenseKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(momentum_dense_param_sparse_grad,
GPU,
ALL_LAYOUT,
phi::MomentumSparseKernel,
float,
double,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/rmsprop_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/rmsprop_kernel_impl.h"
PD_REGISTER_KERNEL(
rmsprop, GPU, ALL_LAYOUT, phi::RmspropDenseKernel, float, double) {}
PD_REGISTER_KERNEL(rmsprop_dense_param_sparse_grad,
GPU,
ALL_LAYOUT,
phi::RmspropSparseKernel,
float,
double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace phi {
template <typename DeviceContext, typename T>
struct SparseAdagradFunctor {
void operator()(const DeviceContext& context,
const phi::SelectedRows& grad,
const DenseTensor& learning_rate,
T epsilon,
DenseTensor* moment,
DenseTensor* param);
};
template <typename DeviceContext, typename T>
phi::SelectedRows SquareSelectedRows(const DeviceContext& context,
const phi::SelectedRows& input) {
phi::SelectedRows out;
out.set_rows(input.rows());
out.set_height(input.height());
out.mutable_value()->Resize(input.value().dims());
context.template Alloc<T>(out.mutable_value());
auto e_out = EigenVector<T>::Flatten(*(out.mutable_value()));
auto e_in = EigenVector<T>::Flatten(input.value());
e_out.device(*context.eigen_device()) = e_in.square();
return out;
}
template <typename T, typename Context>
void AdagradDenseKernel(const Context& ctx,
const DenseTensor& param_t,
const DenseTensor& grad_t,
const DenseTensor& moment_t,
const DenseTensor& learning_rate,
float epsilon_t,
DenseTensor* param_out_tensor,
DenseTensor* moment_out_tensor) {
ctx.template Alloc<T>(param_out_tensor);
ctx.template Alloc<T>(moment_out_tensor);
T epsilon = static_cast<T>(epsilon_t);
auto param = EigenVector<T>::Flatten(param_t);
auto grad = EigenVector<T>::Flatten(grad_t);
auto moment = EigenVector<T>::Flatten(moment_t);
auto param_out = EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = EigenVector<T>::Flatten(*moment_out_tensor);
auto place = *ctx.eigen_device();
moment_out.device(place) = moment + grad * grad;
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
if (paddle::platform::is_cpu_place(ctx.GetPlace())) {
auto* lr = learning_rate.data<T>();
param_out.device(place) =
param - lr[0] * grad / (moment_out.sqrt() + epsilon);
} else {
auto lr = EigenVector<T>::Flatten(learning_rate);
param_out.device(place) =
param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon);
}
}
template <typename T, typename Context>
void AdagradSparseKernel(const Context& ctx,
const DenseTensor& param_t,
const SelectedRows& grad_t,
const DenseTensor& moment_t,
const DenseTensor& learning_rate,
float epsilon_t,
DenseTensor* param_out,
DenseTensor* moment_out) {
auto* param_out_tensor = param_out;
auto* moment_out_tensor = moment_out;
ctx.template Alloc<T>(param_out_tensor);
ctx.template Alloc<T>(moment_out_tensor);
T epsilon = static_cast<T>(epsilon_t);
auto* param_tensor = &param_t;
PADDLE_ENFORCE_EQ(param_tensor,
param_out_tensor,
phi::errors::InvalidArgument(
"the input tensor not euqal with output tensor"));
auto* moment_tensor = &moment_t;
PADDLE_ENFORCE_EQ(moment_tensor,
moment_out_tensor,
phi::errors::InvalidArgument(
"the input moment not eual with output moment"));
SparseAdagradFunctor<Context, T> functor;
functor(
ctx, grad_t, learning_rate, epsilon, moment_out_tensor, param_out_tensor);
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/meshgrid_grad_kernel.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
namespace phi {
template <typename T, typename Context, int Rank>
void MeshgridBackward(const Context& ctx,
const std::vector<const DenseTensor*>& ins,
const std::vector<const DenseTensor*>& out_grad,
std::vector<DenseTensor*> outs) {
int n = out_grad.size();
auto out_dims = out_grad[0]->dims();
for (int i = 0; i < n; i++) {
ctx.template Alloc<T>(outs[i]);
auto out_grad_tmp = EigenVector<T>::Flatten(*out_grad[i]);
auto in_grad = EigenVector<T>::Flatten(*outs[i]);
std::vector<int> reduce_dims_vec;
std::vector<int> reshape_dims_vec;
for (int j = 0; j < n; j++) {
reduce_dims_vec.push_back(reshape_dims_vec.size());
if (j == i) {
reshape_dims_vec.push_back(1);
reshape_dims_vec.push_back(out_dims[j]);
} else {
reshape_dims_vec.push_back(out_dims[j]);
reshape_dims_vec.push_back(1);
}
}
Eigen::DSizes<Eigen::DenseIndex, Rank> reduce_dims;
for (int k = 0; k < n; k++) {
reduce_dims[k] = reduce_dims_vec[k];
}
Eigen::DSizes<Eigen::DenseIndex, Rank * 2> reshape_dims;
for (int k = 0; k < n * 2; k++) {
reshape_dims[k] = reshape_dims_vec[k];
}
auto& place = *ctx.eigen_device();
funcs::EigenBroadcastGrad<std::decay_t<decltype(place)>, T, Rank>::Eval(
place, in_grad, out_grad_tmp, reduce_dims, reshape_dims);
}
}
template <typename T, typename Context>
void MeshgridGradKernel(const Context& ctx,
const std::vector<const DenseTensor*>& inputs,
const std::vector<const DenseTensor*>& outputs_grad,
std::vector<DenseTensor*> inputs_grad) {
int n = outputs_grad.size();
switch (n) {
case 1:
MeshgridBackward<T, Context, 1>(ctx, inputs, outputs_grad, inputs_grad);
break;
case 2:
MeshgridBackward<T, Context, 2>(ctx, inputs, outputs_grad, inputs_grad);
break;
case 3:
MeshgridBackward<T, Context, 3>(ctx, inputs, outputs_grad, inputs_grad);
break;
case 4:
MeshgridBackward<T, Context, 4>(ctx, inputs, outputs_grad, inputs_grad);
break;
case 5:
MeshgridBackward<T, Context, 5>(ctx, inputs, outputs_grad, inputs_grad);
break;
case 6:
MeshgridBackward<T, Context, 6>(ctx, inputs, outputs_grad, inputs_grad);
break;
default:
PADDLE_THROW(phi::errors::InvalidArgument(
"Excepted Tensor numbers between 1 and 6, but only received d% .",
n));
}
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/meshgrid_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
namespace phi {
template <typename T, typename Context, int Rank>
void MeshgridForward(const Context& ctx,
const std::vector<const DenseTensor*>& ins,
std::vector<DenseTensor*> outs) {
PADDLE_ENFORCE_EQ(
ins.size() > 1,
true,
phi::errors::InvalidArgument(
"Expected at least 2 input tensors, but only received d%.",
ins.size()));
int64_t size = ins.size();
std::vector<int64_t> shape(size);
for (int64_t i = 0; i < size; i++) {
switch (ins[i]->dims().size()) {
case 0:
shape[i] = 1;
break;
case 1:
shape[i] = ins[i]->dims()[0];
break;
default:
PADDLE_THROW(phi::errors::InvalidArgument(
"Expected scalar or 1D tensor in the tensor list but got tensor "
"%d: ",
i));
}
}
for (int64_t i = 0; i < size; i++) {
std::vector<int64_t> view_shape(size, 1);
view_shape[i] = shape[i];
DenseTensor reshape_ins_tensor;
paddle::framework::TensorCopy(
*ins[i], ctx.GetPlace(), ctx, &reshape_ins_tensor);
DDim out_dims_reshape = phi::make_ddim(view_shape);
reshape_ins_tensor.Resize(out_dims_reshape);
DDim out_dims = phi::make_ddim(shape);
Eigen::DSizes<Eigen::DenseIndex, Rank> bcast_dims;
for (int64_t j = 0; j < size; j++) {
bcast_dims[j] = shape[j];
}
bcast_dims[i] = 1;
outs[i]->Resize(out_dims);
auto x = EigenTensor<T, Rank>::From(
static_cast<const DenseTensor>(reshape_ins_tensor));
ctx.template Alloc<T>(outs[i]);
auto y = EigenTensor<T, Rank>::From(*outs[i]);
auto& place = *ctx.eigen_device();
funcs::EigenBroadcast<std::decay_t<decltype(place)>, T, Rank>::Eval(
place, y, x, bcast_dims);
}
}
template <typename T, typename Context>
void MeshgridKernel(const Context& ctx,
const std::vector<const DenseTensor*>& inputs,
std::vector<DenseTensor*> outputs) {
int rank = inputs.size();
switch (rank) {
case 1:
MeshgridForward<T, Context, 1>(ctx, inputs, outputs);
break;
case 2:
MeshgridForward<T, Context, 2>(ctx, inputs, outputs);
break;
case 3:
MeshgridForward<T, Context, 3>(ctx, inputs, outputs);
break;
case 4:
MeshgridForward<T, Context, 4>(ctx, inputs, outputs);
break;
case 5:
MeshgridForward<T, Context, 5>(ctx, inputs, outputs);
break;
case 6:
MeshgridForward<T, Context, 6>(ctx, inputs, outputs);
break;
default:
PADDLE_THROW(phi::errors::InvalidArgument(
"Excepted Tensor numbers between 1 and 6, but only received d% .",
rank));
}
}
} // namespace phi
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include "paddle/phi/kernels/rmsprop_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/for_range.h"
namespace phi {
template <typename T>
struct DenseRmspropGradFunctor {
inline explicit DenseRmspropGradFunctor(const T *grad) : grad_(grad) {}
HOSTDEVICE inline T operator()(int64_t idx) const { return grad_[idx]; }
const T *grad_;
};
template <typename T>
struct SparseRmspropGradFunctor {
inline SparseRmspropGradFunctor(const T *grad,
const int64_t *rows,
int64_t row_numel,
int64_t row_count)
: grad_(grad),
rows_(rows),
row_numel_(row_numel),
row_count_(row_count) {}
HOSTDEVICE inline T operator()(int64_t idx) const {
auto row_idx =
phi::funcs::BinarySearch(rows_, row_count_, idx / row_numel_);
return row_idx >= 0 ? grad_[row_idx * row_numel_ + idx % row_numel_] : 0;
}
const T *grad_;
const int64_t *rows_;
int64_t row_numel_;
int64_t row_count_;
};
template <typename T, typename GradFunctor>
struct UncenteredRmspropFunctor {
UncenteredRmspropFunctor(T *param,
T *ms,
T *mom,
const T *lr,
T rho,
T epsilon,
T momentum,
const GradFunctor &grad_functor)
: param_(param),
ms_(ms),
mom_(mom),
lr_(lr),
rho_(rho),
epsilon_(epsilon),
momentum_(momentum),
grad_functor_(grad_functor) {}
HOSTDEVICE inline void operator()(int64_t idx) const {
T g = grad_functor_(idx);
T ms_out = rho_ * ms_[idx] + (1 - rho_) * g * g;
T mom_out = momentum_ * mom_[idx] + lr_[0] * g / sqrt(ms_out + epsilon_);
param_[idx] -= mom_out;
ms_[idx] = ms_out;
mom_[idx] = mom_out;
}
T *param_;
T *ms_;
T *mom_;
const T *lr_;
T rho_;
T epsilon_;
T momentum_;
GradFunctor grad_functor_;
};
template <typename T, typename GradFunctor>
struct CenteredRmspropFunctor {
CenteredRmspropFunctor(T *param,
T *ms,
T *mom,
T *mean_grad,
const T *lr,
T rho,
T epsilon,
T momentum,
const GradFunctor &grad_functor)
: param_(param),
ms_(ms),
mom_(mom),
mean_grad_(mean_grad),
lr_(lr),
rho_(rho),
epsilon_(epsilon),
momentum_(momentum),
grad_functor_(grad_functor) {}
HOSTDEVICE inline void operator()(int64_t idx) const {
T g = grad_functor_(idx);
T ms_out = rho_ * ms_[idx] + (1 - rho_) * g * g;
T mg_out = rho_ * mean_grad_[idx] + (1 - rho_) * g;
T mom_out = momentum_ * mom_[idx] +
lr_[0] * g / sqrt(ms_out - mg_out * mg_out + epsilon_);
param_[idx] -= mom_out;
ms_[idx] = ms_out;
mom_[idx] = mom_out;
mean_grad_[idx] = mg_out;
}
T *param_;
T *ms_;
T *mom_;
T *mean_grad_;
const T *lr_;
T rho_;
T epsilon_;
T momentum_;
GradFunctor grad_functor_;
};
template <typename T, typename Context>
void RmspropDenseKernel(const Context &ctx,
const DenseTensor &param,
const DenseTensor &mean_square,
const DenseTensor &grad,
const DenseTensor &moment,
const DenseTensor &learning_rate,
paddle::optional<const DenseTensor &> mean_grad_opt,
float epsilon_t,
float decay_t,
float momentum_t,
bool centered,
DenseTensor *param_out,
DenseTensor *moment_out,
DenseTensor *mean_square_out,
DenseTensor *mean_grad_out) {
auto epsilon = static_cast<T>(epsilon_t);
auto rho = static_cast<T>(decay_t);
auto momentum = static_cast<T>(momentum_t);
auto &p_tensor = param;
auto &ms_tensor = mean_square;
auto &lr_tensor = learning_rate;
auto &mom_tensor = moment;
PADDLE_ENFORCE_EQ(p_tensor.IsSharedBufferWith(*param_out),
true,
phi::errors::InvalidArgument(
"Param and ParamOut must be the same Tensor"));
PADDLE_ENFORCE_EQ(mom_tensor.IsSharedBufferWith(*moment_out),
true,
phi::errors::InvalidArgument(
"Moment and MomentOut must be the same Tensor"));
PADDLE_ENFORCE_EQ(
ms_tensor.IsSharedBufferWith(*mean_square_out),
true,
phi::errors::InvalidArgument(
"MeanSquare and MeanSquareOut must be the same Tensor"));
size_t limit = static_cast<size_t>(ms_tensor.numel());
auto &grad_tensor = grad;
if (paddle::platform::is_cpu_place(ctx.GetPlace())) {
auto &place = *ctx.eigen_device();
auto lr_value = lr_tensor.data<T>()[0];
auto p = EigenVector<T>::Flatten(p_tensor);
auto ms = EigenVector<T>::Flatten(ms_tensor);
auto g = EigenVector<T>::Flatten(grad_tensor);
auto mom = EigenVector<T>::Flatten(mom_tensor);
auto p_out = EigenVector<T>::Flatten(*param_out);
auto mom_out = EigenVector<T>::Flatten(*moment_out);
auto ms_out = EigenVector<T>::Flatten(*mean_square_out);
ms_out.device(place) = rho * ms + (1 - rho) * g * g;
if (centered) {
auto mg_tensor = mean_grad_opt.get_ptr();
auto mg = EigenVector<T>::Flatten(*mg_tensor);
PADDLE_ENFORCE_EQ(
mg_tensor,
mean_grad_out,
phi::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
auto mg_out = EigenVector<T>::Flatten(*mean_grad_out);
mg_out.device(place) = rho * mg + (1 - rho) * g;
mom_out.device(place) =
momentum * mom +
lr_value * g / (ms_out - mg_out.square() + epsilon).sqrt();
} else {
mom_out.device(place) =
momentum * mom + lr_value * g / (ms_out + epsilon).sqrt();
}
p_out.device(place) = p - mom_out;
} else {
DenseRmspropGradFunctor<T> grad_func(grad_tensor.data<T>());
funcs::ForRange<Context> for_range(ctx, limit);
if (centered) {
auto mg_tensor = mean_grad_opt.get_ptr();
PADDLE_ENFORCE_EQ(
mg_tensor,
mean_grad_out,
phi::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
for_range(CenteredRmspropFunctor<T, DenseRmspropGradFunctor<T>>(
ctx.template Alloc<T>(param_out),
ctx.template Alloc<T>(mean_square_out),
ctx.template Alloc<T>(moment_out),
ctx.template Alloc<T>(mean_grad_out),
lr_tensor.data<T>(),
rho,
epsilon,
momentum,
grad_func));
} else {
for_range(UncenteredRmspropFunctor<T, DenseRmspropGradFunctor<T>>(
ctx.template Alloc<T>(param_out),
ctx.template Alloc<T>(mean_square_out),
ctx.template Alloc<T>(moment_out),
lr_tensor.data<T>(),
rho,
epsilon,
momentum,
grad_func));
}
}
}
template <typename T, typename Context>
void RmspropSparseKernel(const Context &ctx,
const DenseTensor &param,
const DenseTensor &mean_square,
const SelectedRows &grad,
const DenseTensor &moment,
const DenseTensor &learning_rate,
paddle::optional<const DenseTensor &> mean_grad_opt,
float epsilon_t,
float decay_t,
float momentum_t,
bool centered,
DenseTensor *param_out,
DenseTensor *moment_out,
DenseTensor *mean_square_out,
DenseTensor *mean_grad_out) {
auto epsilon = static_cast<T>(epsilon_t);
auto rho = static_cast<T>(decay_t);
auto momentum = static_cast<T>(momentum_t);
auto &p_tensor = param;
auto &ms_tensor = mean_square;
auto &lr_tensor = learning_rate;
auto &mom_tensor = moment;
PADDLE_ENFORCE_EQ(p_tensor.IsSharedBufferWith(*param_out),
true,
phi::errors::InvalidArgument(
"Param and ParamOut must be the same Tensor"));
PADDLE_ENFORCE_EQ(mom_tensor.IsSharedBufferWith(*moment_out),
true,
phi::errors::InvalidArgument(
"Moment and MomentOut must be the same Tensor"));
PADDLE_ENFORCE_EQ(
ms_tensor.IsSharedBufferWith(*mean_square_out),
true,
phi::errors::InvalidArgument(
"MeanSquare and MeanSquareOut must be the same Tensor"));
size_t limit = static_cast<size_t>(ms_tensor.numel());
phi::SelectedRows tmp_merged_grad;
phi::SelectedRows *merged_grad = &tmp_merged_grad;
paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
merge_func(ctx, grad, merged_grad);
funcs::ForRange<Context> for_range(ctx, limit);
auto &grad_merge_rows = merged_grad->rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(&grad_merge_rows);
const int64_t *rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto &merged_tensor = merged_grad->value();
int64_t row_count = merged_grad->rows().size();
int64_t row_numel = merged_tensor.numel() / row_count;
SparseRmspropGradFunctor<T> grad_func(
merged_tensor.data<T>(), rows, row_numel, row_count);
if (centered) {
auto mg_tensor = mean_grad_opt.get_ptr();
PADDLE_ENFORCE_EQ(mg_tensor,
mean_grad_out,
phi::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
for_range(CenteredRmspropFunctor<T, SparseRmspropGradFunctor<T>>(
ctx.template Alloc<T>(param_out),
ctx.template Alloc<T>(mean_square_out),
ctx.template Alloc<T>(moment_out),
ctx.template Alloc<T>(mean_grad_out),
lr_tensor.data<T>(),
rho,
epsilon,
momentum,
grad_func));
} else {
for_range(UncenteredRmspropFunctor<T, SparseRmspropGradFunctor<T>>(
ctx.template Alloc<T>(param_out),
ctx.template Alloc<T>(mean_square_out),
ctx.template Alloc<T>(moment_out),
lr_tensor.data<T>(),
rho,
epsilon,
momentum,
grad_func));
}
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void MeshgridGradKernel(const Context& ctx,
const std::vector<const DenseTensor*>& inputs,
const std::vector<const DenseTensor*>& outputs_grad,
std::vector<DenseTensor*> inputs_grad);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void MeshgridKernel(const Context& ctx,
const std::vector<const DenseTensor*>& inputs,
std::vector<DenseTensor*> outputs);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
namespace phi {
template <typename T, typename Context>
void MomentumDenseKernel(const Context& dev_ctx,
const DenseTensor& param,
const DenseTensor& grad,
const DenseTensor& velocity,
const DenseTensor& learning_rate,
paddle::optional<const DenseTensor&> master_param,
float mu,
bool use_nesterov,
const std::string& regularization_method,
float regularization_coeff,
bool multi_precision,
float rescale_grad,
DenseTensor* param_out,
DenseTensor* velocity_out,
DenseTensor* master_param_out);
template <typename T, typename Context>
void MomentumSparseKernel(const Context& dev_ctx,
const DenseTensor& param,
const SelectedRows& grad,
const DenseTensor& velocity,
const DenseTensor& learning_rate,
paddle::optional<const DenseTensor&> master_param,
float mu,
bool use_nesterov,
const std::string& regularization_method,
float regularization_coeff,
bool multi_precision,
float rescale_grad,
DenseTensor* param_out,
DenseTensor* velocity_out,
DenseTensor* master_param_out);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
namespace phi {
template <typename T, typename Context>
void RmspropDenseKernel(const Context& dev_ctx,
const DenseTensor& param,
const DenseTensor& mean_square,
const DenseTensor& grad,
const DenseTensor& moment,
const DenseTensor& learning_rate,
paddle::optional<const DenseTensor&> mean_grad,
float epsilon,
float decay,
float momentum,
bool centered,
DenseTensor* param_out,
DenseTensor* moment_out,
DenseTensor* mean_square_out,
DenseTensor* mean_grad_out);
template <typename T, typename Context>
void RmspropSparseKernel(const Context& dev_ctx,
const DenseTensor& param,
const DenseTensor& mean_square,
const SelectedRows& grad,
const DenseTensor& moment,
const DenseTensor& learning_rate,
paddle::optional<const DenseTensor&> mean_grad,
float epsilon,
float decay,
float momentum,
bool centered,
DenseTensor* param_out,
DenseTensor* moment_out,
DenseTensor* mean_square_out,
DenseTensor* mean_grad_out);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature AdagradOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("Grad")) {
return KernelSignature("adagrad",
{"Param", "Grad", "Moment", "LearningRate"},
{"epsilon"},
{"ParamOut", "MomentOut"});
} else if (ctx.IsSelectedRowsInput("Grad")) {
return KernelSignature("adagrad_dense_param_sparse_grad",
{"Param", "Grad", "Moment", "LearningRate"},
{"epsilon"},
{"ParamOut", "MomentOut"});
}
return KernelSignature("unregistered", {}, {}, {});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(adagrad, phi::AdagradOpArgumentMapping);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature MeshgridOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("meshgrid", {"X"}, {}, {"Out"});
}
KernelSignature MeshgridGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"meshgrid_grad", {"X", GradVarName("Out")}, {}, {GradVarName("X")});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(meshgrid, phi::MeshgridOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(meshgrid_grad, phi::MeshgridGradOpArgumentMapping);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature MomentumOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("Grad")) {
return KernelSignature(
"momentum",
{"Param", "Grad", "Velocity", "LearningRate", "MasterParam"},
{"mu",
"use_nesterov",
"regularization_method",
"regularization_coeff",
"multi_precision",
"rescale_grad"},
{"ParamOut", "VelocityOut", "MasterParamOut"});
} else if (ctx.IsSelectedRowsInput("Grad")) {
return KernelSignature(
"momentum_dense_param_sparse_grad",
{"Param", "Grad", "Velocity", "LearningRate", "MasterParam"},
{"mu",
"use_nesterov",
"regularization_method",
"regularization_coeff",
"multi_precision",
"rescale_grad"},
{"ParamOut", "VelocityOut", "MasterParamOut"});
}
return KernelSignature("unregistered", {}, {}, {});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(momentum, phi::MomentumOpArgumentMapping);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature RmspropOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("Grad")) {
return KernelSignature(
"rmsprop",
{"Param", "MeanSquare", "Grad", "Moment", "LearningRate", "MeanGrad"},
{"epsilon", "decay", "momentum", "centered"},
{"ParamOut", "MomentOut", "MeanSquareOut", "MeanGradOut"});
} else if (ctx.IsSelectedRowsInput("Grad")) {
return KernelSignature(
"rmsprop_dense_param_sparse_grad",
{"Param", "MeanSquare", "Grad", "Moment", "LearningRate", "MeanGrad"},
{"epsilon", "decay", "momentum", "centered"},
{"ParamOut", "MomentOut", "MeanSquareOut", "MeanGradOut"});
}
return KernelSignature("unregistered", {}, {}, {});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(rmsprop, phi::RmspropOpArgumentMapping);
......@@ -20,6 +20,7 @@ import paddle.fluid.core as core
from paddle.fluid.op import Operator
from op_test import OpTest
import math
import paddle
class TestAdagradOp1(OpTest):
......@@ -189,4 +190,5 @@ class TestSparseAdagradOp(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -258,6 +258,7 @@ class TestMergedMomentum(unittest.TestCase):
def setUp(self):
paddle.enable_static()
self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]]
self.seed = 10
def gen_rand_data(self, shapes, dtype):
......@@ -391,4 +392,5 @@ class TestMergedMomentum2(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -84,7 +84,6 @@ class TestMeshgridOp3(unittest.TestCase):
feed={'x': input_1,
'y': input_2},
fetch_list=[grid_x, grid_y])
assert np.array_equal(res_1, out_1)
assert np.array_equal(res_2, out_2)
......@@ -180,4 +179,5 @@ class TestMeshgridOp8(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -872,6 +872,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase):
place=place, use_amp=use_amp, use_multi_tensor=True)
output2, params2 = self._momentum_optimize_dygraph(
place=place, use_amp=use_amp, use_multi_tensor=False)
self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True)
for idx in range(len(params1)):
self.assertEqual(
......@@ -991,4 +992,5 @@ class TestMultiTensorMomentumStatic(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -316,4 +316,5 @@ class TestRMSPropV2Group(TestRMSPropV2):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册