提交 26aac8d8 编写于 作者: P phlrain

update

上级 5b5941c7
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <memory> #include <memory>
#include "paddle/fluid/operators/optimizers/momentum_op.h" #include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/operators/optimizers/sgd_op.h" #include "paddle/phi/kernels/sgd_kernel.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -26,8 +26,7 @@ template <typename DeviceContext, typename T> ...@@ -26,8 +26,7 @@ template <typename DeviceContext, typename T>
class DGCMomentumKernel : public framework::OpKernel<T> { class DGCMomentumKernel : public framework::OpKernel<T> {
public: public:
DGCMomentumKernel() DGCMomentumKernel()
: _momentum_op_kernel(new MomentumOpKernel<DeviceContext, T>()), : _momentum_op_kernel(new MomentumOpKernel<DeviceContext, T>()) {}
_sgd_op_kernel(new SGDOpKernel<DeviceContext, T>()) {}
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto rampup_begin_step = context.Attr<float>("rampup_begin_step"); auto rampup_begin_step = context.Attr<float>("rampup_begin_step");
...@@ -67,12 +66,68 @@ class DGCMomentumKernel : public framework::OpKernel<T> { ...@@ -67,12 +66,68 @@ class DGCMomentumKernel : public framework::OpKernel<T> {
} }
VLOG(10) << " so use sgd optimizer"; VLOG(10) << " so use sgd optimizer";
return _sgd_op_kernel->Compute(context);
const auto* param_var = context.InputVar("Param");
const auto* grad_var = context.InputVar("Grad");
auto* learning_rate = context.Input<framework::Tensor>("LearningRate");
bool multi_precision = context.Attr<bool>("multi_precision");
if (param_var->IsType<framework::LoDTensor>()) {
auto* param = context.Input<framework::Tensor>("Param");
auto* param_out = context.Output<framework::Tensor>("ParamOut");
auto* master_param_out =
context.Output<framework::Tensor>("MasterParamOut");
paddle::optional<const framework::Tensor&> master_param_opt =
paddle::none;
if (multi_precision) {
auto* master_param = context.Input<framework::Tensor>("MasterParam");
master_param_opt = *master_param;
}
if (grad_var->IsType<framework::Tensor>()) {
// sgd_dense
auto* grad = context.Input<framework::Tensor>("Grad");
phi::SGDDenseKernel<T>(
static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*param, *learning_rate, *grad, master_param_opt, multi_precision,
param_out, master_param_out);
} else {
// sgd dense param sparse grad
auto* grad = context.Input<phi::SelectedRows>("Grad");
phi::SGDDenseParamSparseGradKernel<T>(
static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*param, *learning_rate, *grad, master_param_opt, multi_precision,
param_out, master_param_out);
}
} else if (param_var->IsType<phi::SelectedRows>() &&
grad_var->IsType<phi::SelectedRows>() &&
platform::is_cpu_place(context.GetPlace())) {
// sgd sparse param sparse grad
auto* param = context.Input<phi::SelectedRows>("Param");
auto* param_out = context.Output<phi::SelectedRows>("ParamOut");
auto* master_param_out =
context.Output<phi::SelectedRows>("MasterParamOut");
paddle::optional<const phi::SelectedRows&> master_param_opt =
paddle::none;
if (multi_precision) {
auto* master_param = context.Input<phi::SelectedRows>("MasterParam");
master_param_opt = *master_param;
}
auto* grad = context.Input<phi::SelectedRows>("Grad");
phi::SGDSparseParamSparseGradKernel<T>(
static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*param, *learning_rate, *grad, master_param_opt, multi_precision,
param_out, master_param_out);
} else {
PADDLE_THROW("gdc not support yet");
}
} }
private: private:
std::unique_ptr<MomentumOpKernel<DeviceContext, T>> _momentum_op_kernel; std::unique_ptr<MomentumOpKernel<DeviceContext, T>> _momentum_op_kernel;
std::unique_ptr<SGDOpKernel<DeviceContext, T>> _sgd_op_kernel;
}; };
} // namespace operators } // namespace operators
......
...@@ -166,8 +166,3 @@ REGISTER_OPERATOR( ...@@ -166,8 +166,3 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>, paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
ops::SGDOpInferVarType); ops::SGDOpInferVarType);
REGISTER_OP_CPU_KERNEL(
sgd, ops::SGDOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::SGDOpKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>,
ops::SGDOpKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -166,10 +166,3 @@ class SGDOpKernel<platform::CUDADeviceContext, T> ...@@ -166,10 +166,3 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
sgd, ops::SGDOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::SGDOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::SGDOpKernel<paddle::platform::CUDADeviceContext, plat::float16>);
...@@ -221,6 +221,7 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> { ...@@ -221,6 +221,7 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(DenseTensor);
PT_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(DenseTensor);
PT_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(SelectedRows);
PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(DenseTensor);
#ifndef PADDLE_WITH_CUSTOM_KERNEL #ifndef PADDLE_WITH_CUSTOM_KERNEL
PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(SelectedRows); PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(SelectedRows);
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/phi/kernels/sgd_kernel.h" #include "paddle/phi/kernels/sgd_kernel.h"
#include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
namespace phi { namespace phi {
...@@ -112,11 +114,11 @@ void sgd_dense_param_sparse_grad_impl<phi::dtype::bfloat16>( ...@@ -112,11 +114,11 @@ void sgd_dense_param_sparse_grad_impl<phi::dtype::bfloat16>(
} }
template <typename T, typename Context> template <typename T, typename Context>
void SGDKernel(const Context& dev_ctx, void SGDDenseKernel(const Context& dev_ctx,
const DenseTensor& param, const DenseTensor& param,
const DenseTensor& learning_rate, const DenseTensor& learning_rate,
const DenseTensor& grad, const DenseTensor& grad,
const DenseTensor& master_param, paddle::optional<const DenseTensor&> master_param,
bool multi_precision, bool multi_precision,
DenseTensor* param_out, DenseTensor* param_out,
DenseTensor* master_param_out) { DenseTensor* master_param_out) {
...@@ -125,11 +127,12 @@ void SGDKernel(const Context& dev_ctx, ...@@ -125,11 +127,12 @@ void SGDKernel(const Context& dev_ctx,
} }
template <typename T, typename Context> template <typename T, typename Context>
void SGDKernel(const Context& dev_ctx, void SGDDenseParamSparseGradKernel(
const Context& dev_ctx,
const DenseTensor& param, const DenseTensor& param,
const DenseTensor& learning_rate, const DenseTensor& learning_rate,
const SelectedRows& grad, const SelectedRows& grad,
const DenseTensor& master_param, paddle::optional<const DenseTensor&> master_param,
bool multi_precision, bool multi_precision,
DenseTensor* param_out, DenseTensor* param_out,
DenseTensor* master_param_out) { DenseTensor* master_param_out) {
...@@ -138,11 +141,12 @@ void SGDKernel(const Context& dev_ctx, ...@@ -138,11 +141,12 @@ void SGDKernel(const Context& dev_ctx,
} }
template <typename T, typename Context> template <typename T, typename Context>
void SGDKernel(const Context& dev_ctx, void SGDSparseParamSparseGradKernel(
const Context& dev_ctx,
const SelectedRows& param, const SelectedRows& param,
const DenseTensor& learning_rate, const DenseTensor& learning_rate,
const SelectedRows& grad, const SelectedRows& grad,
const SelectedRows& master_param, paddle::optional<const SelectedRows&> master_param,
bool multi_precision, bool multi_precision,
SelectedRows* param_out, SelectedRows* param_out,
SelectedRows* master_param_out) { SelectedRows* master_param_out) {
...@@ -183,3 +187,27 @@ void SGDKernel(const Context& dev_ctx, ...@@ -183,3 +187,27 @@ void SGDKernel(const Context& dev_ctx,
} }
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(sgd,
CPU,
ALL_LAYOUT,
phi::SGDDenseKernel,
phi::dtype::bfloat16,
float,
double) {}
PD_REGISTER_KERNEL(sgd_dense_param_sparse_grad,
CPU,
ALL_LAYOUT,
phi::SGDDenseParamSparseGradKernel,
phi::dtype::bfloat16,
float,
double) {}
PD_REGISTER_KERNEL(sgd_sparse_param_sparse_grad,
CPU,
ALL_LAYOUT,
phi::SGDSparseParamSparseGradKernel,
phi::dtype::bfloat16,
float,
double) {}
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_helper.h" #include "paddle/phi/backends/gpu/gpu_helper.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi { namespace phi {
template <typename T, typename MT> template <typename T, typename MT>
...@@ -61,14 +64,15 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows, ...@@ -61,14 +64,15 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows,
} }
template <typename T, typename Context> template <typename T, typename Context>
void SGDKernel(const Context& dev_ctx, void SGDDenseKernel(const Context& dev_ctx,
const DenseTensor& param, const DenseTensor& param,
const DenseTensor& learning_rate, const DenseTensor& learning_rate,
const DenseTensor& grad, const DenseTensor& grad,
const DenseTensor& master_param, paddle::optional<const DenseTensor&> master_param,
bool multi_precision, bool multi_precision,
DenseTensor* param_out, DenseTensor* param_out,
DenseTensor* master_param_out) { DenseTensor* master_param_out) {
LOG(ERROR) << "run here";
using MPDType = typename paddle::operators::details::MPTypeTrait<T>::Type; using MPDType = typename paddle::operators::details::MPTypeTrait<T>::Type;
// do check here // do check here
// if (multi_precision) { // if (multi_precision) {
...@@ -77,7 +81,7 @@ void SGDKernel(const Context& dev_ctx, ...@@ -77,7 +81,7 @@ void SGDKernel(const Context& dev_ctx,
// } // }
const MPDType* master_in_data = const MPDType* master_in_data =
multi_precision ? master_param.data<MPDType>() : nullptr; multi_precision ? master_param->data<MPDType>() : nullptr;
MPDType* master_out_data = MPDType* master_out_data =
multi_precision multi_precision
? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace()) ? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace())
...@@ -91,17 +95,18 @@ void SGDKernel(const Context& dev_ctx, ...@@ -91,17 +95,18 @@ void SGDKernel(const Context& dev_ctx,
grad.data<T>(), grad.data<T>(),
learning_rate.data<T>(), learning_rate.data<T>(),
param.numel(), param.numel(),
param_out->mutable_data<T>(ctx.GetPlace()), param_out->mutable_data<T>(dev_ctx.GetPlace()),
master_in_data, master_in_data,
master_out_data); master_out_data);
} }
template <typename T, typename Context> template <typename T, typename Context>
void SGDKernel(const Context& dev_ctx, void SGDDenseParamSparseGradKernel(
const Context& dev_ctx,
const DenseTensor& param, const DenseTensor& param,
const DenseTensor& learning_rate, const DenseTensor& learning_rate,
const SelectedRows& grad, const SelectedRows& grad,
const DenseTensor& master_param, paddle::optional<const DenseTensor&> master_param,
bool multi_precision, bool multi_precision,
DenseTensor* param_out, DenseTensor* param_out,
DenseTensor* master_param_out) { DenseTensor* master_param_out) {
...@@ -113,7 +118,7 @@ void SGDKernel(const Context& dev_ctx, ...@@ -113,7 +118,7 @@ void SGDKernel(const Context& dev_ctx,
// } // }
const MPDType* master_in_data = const MPDType* master_in_data =
multi_precision ? master_param.data<MPDType>() : nullptr; multi_precision ? master_param->data<MPDType>() : nullptr;
MPDType* master_out_data = MPDType* master_out_data =
multi_precision multi_precision
? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace()) ? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace())
...@@ -155,7 +160,7 @@ void SGDKernel(const Context& dev_ctx, ...@@ -155,7 +160,7 @@ void SGDKernel(const Context& dev_ctx,
int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); int max_threads = dev_ctx.GetMaxPhysicalThreadCount();
int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); int max_blocks = std::max(max_threads / kThreadsPerBlock, 1);
paddle::framework::MixVector<int64_t> mixv_in_rows(&in_rows); paddle::framework::MixVector<int64_t> mixv_in_rows(&in_rows);
SparseSGDFunctorKernel<<<max_blocks, thread_x, 0, dev_ctx..stream()>>>( SparseSGDFunctorKernel<<<max_blocks, thread_x, 0, dev_ctx.stream()>>>(
in_data, in_data,
mixv_in_rows.CUDAData(dev_ctx.GetPlace()), mixv_in_rows.CUDAData(dev_ctx.GetPlace()),
learning_rate.data<T>(), learning_rate.data<T>(),
...@@ -164,4 +169,41 @@ void SGDKernel(const Context& dev_ctx, ...@@ -164,4 +169,41 @@ void SGDKernel(const Context& dev_ctx,
in_rows.size()); in_rows.size());
} }
template <typename T, typename Context>
void SGDSparseParamSparseGradKernel(
const Context& dev_ctx,
const SelectedRows& param,
const DenseTensor& learning_rate,
const SelectedRows& grad,
paddle::optional<const SelectedRows&> master_param,
bool multi_precision,
SelectedRows* param_out,
SelectedRows* master_param_out) {
PADDLE_THROW("not impl");
}
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(sgd,
GPU,
ALL_LAYOUT,
phi::SGDDenseKernel,
phi::dtype::float16,
float,
double) {}
PD_REGISTER_KERNEL(sgd_dense_param_sparse_grad,
GPU,
ALL_LAYOUT,
phi::SGDDenseParamSparseGradKernel,
phi::dtype::float16,
float,
double) {}
PD_REGISTER_KERNEL(sgd_sparse_param_sparse_grad,
GPU,
ALL_LAYOUT,
phi::SGDSparseParamSparseGradKernel,
phi::dtype::float16,
float,
double) {}
...@@ -20,31 +20,33 @@ ...@@ -20,31 +20,33 @@
namespace phi { namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void SGDKernel(const Context& dev_ctx, void SGDDenseKernel(const Context& dev_ctx,
const DenseTensor& param, const DenseTensor& param,
const DenseTensor& learning_rate, const DenseTensor& learning_rate,
const DenseTensor& grad, const DenseTensor& grad,
const DenseTensor& master_param, paddle::optional<const DenseTensor&> master_param,
bool multi_precision, bool multi_precision,
DenseTensor* param_out, DenseTensor* param_out,
DenseTensor* master_param_out); DenseTensor* master_param_out);
template <typename T, typename Context> template <typename T, typename Context>
void SGDKernel(const Context& dev_ctx, void SGDDenseParamSparseGradKernel(
const Context& dev_ctx,
const DenseTensor& param, const DenseTensor& param,
const DenseTensor& learning_rate, const DenseTensor& learning_rate,
const SelectedRows& grad, const SelectedRows& grad,
const DenseTensor& master_param, paddle::optional<const DenseTensor&> master_param,
bool multi_precision, bool multi_precision,
DenseTensor* param_out, DenseTensor* param_out,
DenseTensor* master_param_out); DenseTensor* master_param_out);
template <typename T, typename Context> template <typename T, typename Context>
void SGDKernel(const Context& dev_ctx, void SGDSparseParamSparseGradKernel(
const Context& dev_ctx,
const SelectedRows& param, const SelectedRows& param,
const DenseTensor& learning_rate, const DenseTensor& learning_rate,
const SelectedRows& grad, const SelectedRows& grad,
const SelectedRows& master_param, paddle::optional<const SelectedRows&> master_param,
bool multi_precision, bool multi_precision,
SelectedRows* param_out, SelectedRows* param_out,
SelectedRows* master_param_out); SelectedRows* master_param_out);
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature SGDOpArgumentMapping(const ArgumentMappingContext& ctx) {
LOG(ERROR) << "11";
if (ctx.IsDenseTensorInput("Grad")) {
LOG(ERROR) << "dense";
return KernelSignature("sgd",
{"Param", "LearningRate", "Grad", "MasterParam"},
{"multi_precision"},
{"ParamOut", "MasterParamOut"});
} else if (ctx.IsSelectedRowsInput("Grad")) {
if (ctx.IsDenseTensorInput("Param")) {
return KernelSignature("sgd_dense_param_sparse_grad",
{"Param", "LearningRate", "Grad", "MasterParam"},
{"multi_precision"},
{"ParamOut", "MasterParamOut"});
} else {
return KernelSignature("sgd_sparse_param_sparse_grad",
{"Param", "LearningRate", "Grad", "MasterParam"},
{"multi_precision"},
{"ParamOut", "MasterParamOut"});
}
}
return KernelSignature("unregistered", {}, {}, {});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(sgd, phi::SGDOpArgumentMapping);
...@@ -24,374 +24,366 @@ import paddle ...@@ -24,374 +24,366 @@ import paddle
paddle.enable_static() paddle.enable_static()
# class TestSGDOp(OpTest):
class TestSGDOp(OpTest): # def setUp(self):
def setUp(self): # self.op_type = "sgd"
self.op_type = "sgd" # self.conf()
self.conf() # w = np.random.random((self.h, self.w)).astype("float32")
w = np.random.random((self.h, self.w)).astype("float32") # g = np.random.random((self.h, self.w)).astype("float32")
g = np.random.random((self.h, self.w)).astype("float32") # lr = np.array([0.1]).astype("float32")
lr = np.array([0.1]).astype("float32")
# self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr}
self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr} # self.outputs = {'ParamOut': w - lr * g}
self.outputs = {'ParamOut': w - lr * g}
# def conf(self):
def conf(self): # self.h = 102
self.h = 102 # self.w = 105
self.w = 105
# def test_check_output(self):
def test_check_output(self): # self.check_output()
self.check_output()
# class TestSGDOpCase8X(TestSGDOp):
# def conf(self):
class TestSGDOpCase8X(TestSGDOp): # self.h = 10
def conf(self): # self.w = 64
self.h = 10
self.w = 64 # class TestSparseSGDOp(unittest.TestCase):
# def check_with_place(self, place):
# scope = core.Scope()
class TestSparseSGDOp(unittest.TestCase):
def check_with_place(self, place): # # create and initialize Grad Variable
scope = core.Scope() # height = 10
# rows = [0, 4, 7]
# create and initialize Grad Variable # self.conf()
height = 10
rows = [0, 4, 7] # grad_selected_rows = scope.var('Grad').get_selected_rows()
self.conf() # grad_selected_rows.set_height(height)
# grad_selected_rows.set_rows(rows)
grad_selected_rows = scope.var('Grad').get_selected_rows() # np_array = np.ones((len(rows), self.row_numel)).astype("float32")
grad_selected_rows.set_height(height) # np_array[0, 0] = 2.0
grad_selected_rows.set_rows(rows) # np_array[2, 8] = 4.0
np_array = np.ones((len(rows), self.row_numel)).astype("float32")
np_array[0, 0] = 2.0 # grad_tensor = grad_selected_rows.get_tensor()
np_array[2, 8] = 4.0 # grad_tensor.set(np_array, place)
grad_tensor = grad_selected_rows.get_tensor() # # create and initialize Param Variable
grad_tensor.set(np_array, place) # param = scope.var('Param').get_tensor()
# param_array = np.full((height, self.row_numel), 5.0).astype("float32")
# create and initialize Param Variable # param.set(param_array, place)
param = scope.var('Param').get_tensor()
param_array = np.full((height, self.row_numel), 5.0).astype("float32") # # create and initialize LeraningRate Variable
param.set(param_array, place) # lr = scope.var('LearningRate').get_tensor()
# lr_array = np.full((1), 2.0).astype("float32")
# create and initialize LeraningRate Variable # lr.set(lr_array, place)
lr = scope.var('LearningRate').get_tensor()
lr_array = np.full((1), 2.0).astype("float32") # # create and run sgd operator
lr.set(lr_array, place) # sgd_op = Operator(
# "sgd",
# create and run sgd operator # Param='Param',
sgd_op = Operator( # Grad='Grad',
"sgd", # ParamOut='Param',
Param='Param', # LearningRate='LearningRate')
Grad='Grad', # sgd_op.run(scope, place)
ParamOut='Param',
LearningRate='LearningRate') # # get and compare result
sgd_op.run(scope, place) # result_array = np.array(param)
# get and compare result # # rows[0] = 0, 5.0 - 2.0 * 2.0
result_array = np.array(param) # self.assertAlmostEqual(1.0, result_array[rows[0], 0])
# # rows[0] = 0, 5.0 - 2.0 * 1.0
# rows[0] = 0, 5.0 - 2.0 * 2.0 # self.assertAlmostEqual(3.0, result_array[rows[0], 2])
self.assertAlmostEqual(1.0, result_array[rows[0], 0]) # # 5.0 - 2.0 * 0.0
# rows[0] = 0, 5.0 - 2.0 * 1.0 # self.assertAlmostEqual(5.0, result_array[1, 0])
self.assertAlmostEqual(3.0, result_array[rows[0], 2]) # # rows[1] = 4, 5.0 - 2.0 * 1.0
# 5.0 - 2.0 * 0.0 # self.assertAlmostEqual(3.0, result_array[rows[1], 10])
self.assertAlmostEqual(5.0, result_array[1, 0]) # # 5.0 - 2.0 * 0.0
# rows[1] = 4, 5.0 - 2.0 * 1.0 # self.assertAlmostEqual(5.0, result_array[5, 8])
self.assertAlmostEqual(3.0, result_array[rows[1], 10]) # # rows[2] = 7, 5.0 - 2.0 * 1.0
# 5.0 - 2.0 * 0.0 # self.assertAlmostEqual(3.0, result_array[rows[2], 1])
self.assertAlmostEqual(5.0, result_array[5, 8]) # # rows[2] = 7, 5.0 - 2.0 * 4.0
# rows[2] = 7, 5.0 - 2.0 * 1.0 # self.assertAlmostEqual(-3.0, result_array[rows[2], 8])
self.assertAlmostEqual(3.0, result_array[rows[2], 1])
# rows[2] = 7, 5.0 - 2.0 * 4.0 # def test_sparse_sgd(self):
self.assertAlmostEqual(-3.0, result_array[rows[2], 8]) # places = [core.CPUPlace()]
# if core.is_compiled_with_cuda():
def test_sparse_sgd(self): # places.append(core.CUDAPlace(0))
places = [core.CPUPlace()] # for place in places:
if core.is_compiled_with_cuda(): # self.check_with_place(place)
places.append(core.CUDAPlace(0))
for place in places: # def conf(self):
self.check_with_place(place) # self.row_numel = 12
def conf(self): # class TestSparseSGDOpCase8X(TestSparseSGDOp):
self.row_numel = 12 # def conf(self):
# self.row_numel = 16
class TestSparseSGDOpCase8X(TestSparseSGDOp): # class TestSGDOpOptimizeSelectedRows(unittest.TestCase):
def conf(self): # def check_with_place(self, place):
self.row_numel = 16 # scope = core.Scope()
# row_width = 12
class TestSGDOpOptimizeSelectedRows(unittest.TestCase): # # create and initialize Grad Variable
def check_with_place(self, place): # grad_height = 10
scope = core.Scope() # grad_rows = [0, 4, 7]
row_width = 12 # grad_selected_rows = scope.var('Grad').get_selected_rows()
# create and initialize Grad Variable # grad_selected_rows.set_height(grad_height)
grad_height = 10 # grad_selected_rows.set_rows(grad_rows)
grad_rows = [0, 4, 7] # grad_array = np.ones((len(grad_rows), row_width)).astype("float32")
# grad_array[0, 0] = 2.0
grad_selected_rows = scope.var('Grad').get_selected_rows() # grad_array[2, 8] = 4.0
grad_selected_rows.set_height(grad_height)
grad_selected_rows.set_rows(grad_rows) # grad_tensor = grad_selected_rows.get_tensor()
grad_array = np.ones((len(grad_rows), row_width)).astype("float32") # grad_tensor.set(grad_array, place)
grad_array[0, 0] = 2.0
grad_array[2, 8] = 4.0 # # create and initialize Param Variable
# # create and initialize W Variable
grad_tensor = grad_selected_rows.get_tensor() # param_rows = [0, 1, 2, 3, 4, 5, 6, 7]
grad_tensor.set(grad_array, place)
# # init Param
# create and initialize Param Variable # w_selected_rows = scope.var('Param').get_selected_rows()
# create and initialize W Variable # w_selected_rows.set_height(len(param_rows))
param_rows = [0, 1, 2, 3, 4, 5, 6, 7] # w_selected_rows.set_rows(param_rows)
# w_selected_rows.sync_index()
# init Param # w_array = np.ones((len(param_rows), row_width)).astype("float32")
w_selected_rows = scope.var('Param').get_selected_rows() # for i in range(len(param_rows)):
w_selected_rows.set_height(len(param_rows)) # w_array[i] *= i
w_selected_rows.set_rows(param_rows) # w_tensor = w_selected_rows.get_tensor()
w_selected_rows.sync_index() # w_tensor.set(w_array, place)
w_array = np.ones((len(param_rows), row_width)).astype("float32")
for i in range(len(param_rows)): # w_before_optimize = np.array(w_tensor)
w_array[i] *= i
w_tensor = w_selected_rows.get_tensor() # # create and initialize LeraningRate Variable
w_tensor.set(w_array, place) # lr_value = 0.1
# lr = scope.var('LearningRate').get_tensor()
w_before_optimize = np.array(w_tensor) # lr_array = np.full((1), lr_value).astype("float32")
# lr.set(lr_array, place)
# create and initialize LeraningRate Variable
lr_value = 0.1 # # optimize with Python
lr = scope.var('LearningRate').get_tensor() # w_after_optimize = np.copy(w_before_optimize)
lr_array = np.full((1), lr_value).astype("float32") # for index, id in enumerate(grad_rows):
lr.set(lr_array, place) # w_after_optimize[id] = w_before_optimize[
# id] - lr_value * grad_array[index]
# optimize with Python
w_after_optimize = np.copy(w_before_optimize) # # create and run sgd operator
for index, id in enumerate(grad_rows): # sgd_op = Operator(
w_after_optimize[id] = w_before_optimize[ # "sgd",
id] - lr_value * grad_array[index] # Param='Param',
# Grad='Grad',
# create and run sgd operator # ParamOut='Param',
sgd_op = Operator( # LearningRate='LearningRate')
"sgd", # sgd_op.run(scope, place)
Param='Param',
Grad='Grad', # # get and compare result
ParamOut='Param', # result_array = np.array(w_tensor)
LearningRate='LearningRate') # assert (result_array == w_after_optimize).all()
sgd_op.run(scope, place)
# def test_sparse_parameter_sgd(self):
# get and compare result # places = [core.CPUPlace()]
result_array = np.array(w_tensor) # # do not support GPU kernel currently
assert (result_array == w_after_optimize).all() # for place in places:
# self.check_with_place(place)
def test_sparse_parameter_sgd(self):
places = [core.CPUPlace()] # class TestSGDOpWithLargeInput(unittest.TestCase):
# do not support GPU kernel currently # def runTest(self):
for place in places: # paddle.enable_static()
self.check_with_place(place) # data = fluid.layers.fill_constant(shape=[1], value=128, dtype='int64')
# label = fluid.layers.fill_constant(
# shape=[1, 150], value=0.5, dtype='float32')
class TestSGDOpWithLargeInput(unittest.TestCase): # emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32')
def runTest(self): # out = fluid.layers.l2_normalize(x=emb, axis=-1)
paddle.enable_static()
data = fluid.layers.fill_constant(shape=[1], value=128, dtype='int64') # cost = fluid.layers.square_error_cost(input=out, label=label)
label = fluid.layers.fill_constant( # avg_cost = fluid.layers.mean(cost)
shape=[1, 150], value=0.5, dtype='float32') # sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32') # sgd_optimizer.minimize(avg_cost)
out = fluid.layers.l2_normalize(x=emb, axis=-1)
# place = fluid.CPUPlace()
cost = fluid.layers.square_error_cost(input=out, label=label) # exe = fluid.Executor(place)
avg_cost = fluid.layers.mean(cost) # exe.run(fluid.default_startup_program())
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) # compiled_prog = fluid.compiler.CompiledProgram(
sgd_optimizer.minimize(avg_cost) # fluid.default_main_program())
# result = exe.run(compiled_prog, fetch_list=[avg_cost])
place = fluid.CPUPlace()
exe = fluid.Executor(place) # class TestSGDV2(unittest.TestCase):
exe.run(fluid.default_startup_program()) # def test_sgd_dygraph(self):
compiled_prog = fluid.compiler.CompiledProgram( # paddle.disable_static()
fluid.default_main_program()) # value = np.arange(26).reshape(2, 13).astype("float32")
result = exe.run(compiled_prog, fetch_list=[avg_cost]) # a = paddle.to_tensor(value)
# linear = paddle.nn.Linear(13, 5)
# # This can be any optimizer supported by dygraph.
class TestSGDV2(unittest.TestCase): # adam = paddle.optimizer.SGD(learning_rate=0.01,
def test_sgd_dygraph(self): # parameters=linear.parameters(),
paddle.disable_static() # weight_decay=0.01)
value = np.arange(26).reshape(2, 13).astype("float32") # out = linear(a)
a = paddle.to_tensor(value) # out.backward()
linear = paddle.nn.Linear(13, 5) # adam.step()
# This can be any optimizer supported by dygraph. # adam.clear_gradients()
adam = paddle.optimizer.SGD(learning_rate=0.01,
parameters=linear.parameters(), # def test_sgd(self):
weight_decay=0.01) # paddle.enable_static()
out = linear(a)
out.backward() # def check_sgd_optimizer(optimizer_attr):
adam.step() # init_program = paddle.static.Program()
adam.clear_gradients() # program = paddle.static.Program()
# block = program.global_block()
def test_sgd(self): # mul_x = block.create_parameter(
paddle.enable_static() # dtype="float32",
# shape=[5, 10],
def check_sgd_optimizer(optimizer_attr): # lod_level=0,
init_program = paddle.static.Program() # name="mul.x",
program = paddle.static.Program() # optimize_attr=optimizer_attr)
block = program.global_block() # mul_y = block.create_var(
mul_x = block.create_parameter( # dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
dtype="float32", # mul_out = block.create_var(
shape=[5, 10], # dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
lod_level=0, # mean_out = block.create_var(
name="mul.x", # dtype="float32", shape=[1], lod_level=0, name="mean.out")
optimize_attr=optimizer_attr) # block.append_op(
mul_y = block.create_var( # type="mul",
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") # inputs={"X": mul_x,
mul_out = block.create_var( # "Y": mul_y},
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") # outputs={"Out": mul_out},
mean_out = block.create_var( # attrs={"x_num_col_dims": 1})
dtype="float32", shape=[1], lod_level=0, name="mean.out") # block.append_op(
block.append_op( # type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
type="mul", # sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.01)
inputs={"X": mul_x, # opts, _ = sgd_optimizer.minimize(mean_out, init_program)
"Y": mul_y}, # return opts
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) # opts = check_sgd_optimizer({'learning_rate': 1.1})
block.append_op( # self.assertEqual(len(opts), 2)
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) # self.assertEqual([op.type for op in opts], ["scale", "sgd"])
sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.01)
opts, _ = sgd_optimizer.minimize(mean_out, init_program) # opts = check_sgd_optimizer({'learning_rate': 1.0})
return opts # self.assertEqual(len(opts), 1)
# self.assertEqual([op.type for op in opts], ["sgd"])
opts = check_sgd_optimizer({'learning_rate': 1.1})
self.assertEqual(len(opts), 2) # def test_raise_error(self):
self.assertEqual([op.type for op in opts], ["scale", "sgd"]) # self.assertRaises(ValueError, paddle.optimizer.SGD, learning_rate=None)
opts = check_sgd_optimizer({'learning_rate': 1.0}) # def test_sgd_group_dygraph(self):
self.assertEqual(len(opts), 1) # paddle.disable_static()
self.assertEqual([op.type for op in opts], ["sgd"]) # value = np.arange(26).reshape(2, 13).astype("float32")
# a = paddle.to_tensor(value)
def test_raise_error(self): # linear_1 = paddle.nn.Linear(13, 5)
self.assertRaises(ValueError, paddle.optimizer.SGD, learning_rate=None) # linear_2 = paddle.nn.Linear(5, 3)
# # This can be any optimizer supported by dygraph.
def test_sgd_group_dygraph(self): # adam = paddle.optimizer.SGD(learning_rate=0.01,
paddle.disable_static() # parameters=[{
value = np.arange(26).reshape(2, 13).astype("float32") # 'params': linear_1.parameters()
a = paddle.to_tensor(value) # }, {
linear_1 = paddle.nn.Linear(13, 5) # 'params': linear_2.parameters(),
linear_2 = paddle.nn.Linear(5, 3) # 'weight_decay': 0.001,
# This can be any optimizer supported by dygraph. # 'learning_rate': 0.1
adam = paddle.optimizer.SGD(learning_rate=0.01, # }],
parameters=[{ # weight_decay=0.01)
'params': linear_1.parameters() # out = linear_1(a)
}, { # out = linear_2(out)
'params': linear_2.parameters(), # out.backward()
'weight_decay': 0.001, # adam.step()
'learning_rate': 0.1 # adam.clear_gradients()
}],
weight_decay=0.01) # class TestSGDMultiPrecision2_0(unittest.TestCase):
out = linear_1(a) # def dygraph_sgd_mp(self, mp):
out = linear_2(out) # paddle.disable_static()
out.backward() # paddle.seed(10)
adam.step() # paddle.set_device('gpu')
adam.clear_gradients() # input = paddle.randn((2, 2))
# model = paddle.nn.Linear(2, 2)
# optimizer = paddle.optimizer.SGD(parameters=model.parameters(),
class TestSGDMultiPrecision2_0(unittest.TestCase): # multi_precision=mp)
def dygraph_sgd_mp(self, mp): # if mp == True:
paddle.disable_static() # model = paddle.amp.decorate(models=model, level='O2')
paddle.seed(10) # scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
paddle.set_device('gpu')
input = paddle.randn((2, 2)) # for idx in range(5):
model = paddle.nn.Linear(2, 2) # if mp == True:
optimizer = paddle.optimizer.SGD(parameters=model.parameters(), # with paddle.amp.auto_cast(level='O2'):
multi_precision=mp) # output = model(input)
if mp == True: # loss = paddle.mean(output)
model = paddle.amp.decorate(models=model, level='O2') # scaled = scaler.scale(loss)
scaler = paddle.amp.GradScaler(init_loss_scaling=1024) # scaled.backward()
# scaler.minimize(optimizer, scaled)
for idx in range(5): # optimizer.clear_grad()
if mp == True: # else:
with paddle.amp.auto_cast(level='O2'): # output = model(input)
output = model(input) # loss = paddle.mean(output)
loss = paddle.mean(output) # optimizer.step()
scaled = scaler.scale(loss) # optimizer.clear_grad()
scaled.backward()
scaler.minimize(optimizer, scaled) # return output, model.parameters()
optimizer.clear_grad()
else: # def static_sgd_mp(self, mp):
output = model(input) # paddle.enable_static()
loss = paddle.mean(output) # paddle.seed(10)
optimizer.step() # np.random.seed(10)
optimizer.clear_grad() # exe = paddle.static.Executor('gpu')
# train_program = paddle.static.Program()
return output, model.parameters() # startup_program = paddle.static.Program()
# optimizer = paddle.optimizer.SGD(multi_precision=mp)
def static_sgd_mp(self, mp):
paddle.enable_static() # if mp:
paddle.seed(10) # optimizer = paddle.static.amp.decorate(
np.random.seed(10) # optimizer,
exe = paddle.static.Executor('gpu') # init_loss_scaling=128.0,
train_program = paddle.static.Program() # use_dynamic_loss_scaling=True,
startup_program = paddle.static.Program() # use_pure_fp16=True,
optimizer = paddle.optimizer.SGD(multi_precision=mp) # use_fp16_guard=False)
# with paddle.static.program_guard(train_program, startup_program):
if mp: # if mp:
optimizer = paddle.static.amp.decorate( # data = paddle.static.data(
optimizer, # shape=[2, 2], name='X', dtype='float16')
init_loss_scaling=128.0, # else:
use_dynamic_loss_scaling=True, # data = paddle.static.data(
use_pure_fp16=True, # shape=[2, 2], name='X', dtype='float32')
use_fp16_guard=False) # hidden = paddle.static.nn.fc(x=data, size=10)
with paddle.static.program_guard(train_program, startup_program): # loss = paddle.fluid.layers.mean(hidden)
if mp: # optimizer.minimize(loss)
data = paddle.static.data( # exe.run(startup_program)
shape=[2, 2], name='X', dtype='float16')
else: # if mp:
data = paddle.static.data( # optimizer.amp_init(place='gpu', scope=paddle.static.global_scope())
shape=[2, 2], name='X', dtype='float32') # x = np.random.random(size=(2, 2)).astype('float16')
hidden = paddle.static.nn.fc(x=data, size=10) # else:
loss = paddle.fluid.layers.mean(hidden) # x = np.random.random(size=(2, 2)).astype('float32')
optimizer.minimize(loss) # out = []
exe.run(startup_program) # for idx in range(5):
# loss_data, = exe.run(train_program,
if mp: # feed={"X": x},
optimizer.amp_init(place='gpu', scope=paddle.static.global_scope()) # fetch_list=[loss.name])
x = np.random.random(size=(2, 2)).astype('float16') # out.append(loss_data)
else: # return out
x = np.random.random(size=(2, 2)).astype('float32')
out = [] # def test_main(self):
for idx in range(5): # if not paddle.is_compiled_with_cuda():
loss_data, = exe.run(train_program, # return
feed={"X": x}, # "Test dygraph mode"
fetch_list=[loss.name]) # output1_dy, params1_dy = self.dygraph_sgd_mp(mp=True)
out.append(loss_data) # output2_dy, params2_dy = self.dygraph_sgd_mp(mp=False)
return out # self.assertEqual(
# np.allclose(
def test_main(self): # output1_dy.astype('float32').numpy(),
if not paddle.is_compiled_with_cuda(): # output2_dy.astype('float32').numpy(),
return # atol=1e-01),
"Test dygraph mode" # True)
output1_dy, params1_dy = self.dygraph_sgd_mp(mp=True) # for idx in range(len(params1_dy)):
output2_dy, params2_dy = self.dygraph_sgd_mp(mp=False) # self.assertEqual(
self.assertEqual( # np.allclose(
np.allclose( # params1_dy[idx].astype('float32').numpy(),
output1_dy.astype('float32').numpy(), # params2_dy[idx].astype('float32').numpy(),
output2_dy.astype('float32').numpy(), # atol=1e-01),
atol=1e-01), # True)
True) # "Test static mode"
for idx in range(len(params1_dy)): # output1_st = self.static_sgd_mp(mp=True)
self.assertEqual( # output2_st = self.static_sgd_mp(mp=False)
np.allclose( # for idx in range(len(output1_st)):
params1_dy[idx].astype('float32').numpy(), # self.assertEqual(
params2_dy[idx].astype('float32').numpy(), # np.allclose(
atol=1e-01), # output1_st[idx].astype('float32'),
True) # output2_st[idx].astype('float32'),
"Test static mode" # atol=1e-01),
output1_st = self.static_sgd_mp(mp=True) # True)
output2_st = self.static_sgd_mp(mp=False)
for idx in range(len(output1_st)):
self.assertEqual(
np.allclose(
output1_st[idx].astype('float32'),
output2_st[idx].astype('float32'),
atol=1e-01),
True)
class TestSGDMultiPrecision1_0(unittest.TestCase): class TestSGDMultiPrecision1_0(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册