提交 943dedec 编写于 作者: P phlrain

add sgd kernel; test=develop

上级 a4bccde0
......@@ -2048,7 +2048,11 @@ void OperatorWithKernel::BuildPhiKernelContext(
// deal with optional here
if ((it == ctx.inputs.end() || it->second.size() == 0) &&
(input_defs[i].type_index ==
std::type_index(typeid(paddle::optional<const phi::DenseTensor&>)))) {
std::type_index(
typeid(paddle::optional<const phi::DenseTensor&>)) ||
input_defs[i].type_index ==
std::type_index(
typeid(paddle::optional<const phi::SelectedRows&>)))) {
pt_kernel_context->EmplaceBackInputWithoutSetRange(nullptr);
auto end_idx = start_idx + 1;
pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx),
......
......@@ -81,6 +81,12 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
default_tensor_layout,
default_key.dtype(),
arg_type);
} else if (arg_type == std::type_index(typeid(
paddle::optional<const SelectedRows&>))) {
args_def->AppendInput(default_key.backend(),
default_tensor_layout,
default_key.dtype(),
arg_type);
} else if (arg_type ==
std::type_index(typeid(const std::vector<DenseTensor>&))) {
args_def->AppendInput(default_key.backend(),
......
......@@ -14,6 +14,7 @@
#include "paddle/phi/kernels/sgd_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_helper.h"
......@@ -72,7 +73,6 @@ void SGDDenseKernel(const Context& dev_ctx,
bool multi_precision,
DenseTensor* param_out,
DenseTensor* master_param_out) {
LOG(ERROR) << "run here";
using MPDType = typename paddle::operators::details::MPTypeTrait<T>::Type;
// do check here
// if (multi_precision) {
......
......@@ -17,9 +17,7 @@
namespace phi {
KernelSignature SGDOpArgumentMapping(const ArgumentMappingContext& ctx) {
LOG(ERROR) << "11";
if (ctx.IsDenseTensorInput("Grad")) {
LOG(ERROR) << "dense";
return KernelSignature("sgd",
{"Param", "LearningRate", "Grad", "MasterParam"},
{"multi_precision"},
......
......@@ -24,366 +24,374 @@ import paddle
paddle.enable_static()
# class TestSGDOp(OpTest):
# def setUp(self):
# self.op_type = "sgd"
# self.conf()
# w = np.random.random((self.h, self.w)).astype("float32")
# g = np.random.random((self.h, self.w)).astype("float32")
# lr = np.array([0.1]).astype("float32")
# self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr}
# self.outputs = {'ParamOut': w - lr * g}
# def conf(self):
# self.h = 102
# self.w = 105
# def test_check_output(self):
# self.check_output()
# class TestSGDOpCase8X(TestSGDOp):
# def conf(self):
# self.h = 10
# self.w = 64
# class TestSparseSGDOp(unittest.TestCase):
# def check_with_place(self, place):
# scope = core.Scope()
# # create and initialize Grad Variable
# height = 10
# rows = [0, 4, 7]
# self.conf()
# grad_selected_rows = scope.var('Grad').get_selected_rows()
# grad_selected_rows.set_height(height)
# grad_selected_rows.set_rows(rows)
# np_array = np.ones((len(rows), self.row_numel)).astype("float32")
# np_array[0, 0] = 2.0
# np_array[2, 8] = 4.0
# grad_tensor = grad_selected_rows.get_tensor()
# grad_tensor.set(np_array, place)
# # create and initialize Param Variable
# param = scope.var('Param').get_tensor()
# param_array = np.full((height, self.row_numel), 5.0).astype("float32")
# param.set(param_array, place)
# # create and initialize LeraningRate Variable
# lr = scope.var('LearningRate').get_tensor()
# lr_array = np.full((1), 2.0).astype("float32")
# lr.set(lr_array, place)
# # create and run sgd operator
# sgd_op = Operator(
# "sgd",
# Param='Param',
# Grad='Grad',
# ParamOut='Param',
# LearningRate='LearningRate')
# sgd_op.run(scope, place)
# # get and compare result
# result_array = np.array(param)
# # rows[0] = 0, 5.0 - 2.0 * 2.0
# self.assertAlmostEqual(1.0, result_array[rows[0], 0])
# # rows[0] = 0, 5.0 - 2.0 * 1.0
# self.assertAlmostEqual(3.0, result_array[rows[0], 2])
# # 5.0 - 2.0 * 0.0
# self.assertAlmostEqual(5.0, result_array[1, 0])
# # rows[1] = 4, 5.0 - 2.0 * 1.0
# self.assertAlmostEqual(3.0, result_array[rows[1], 10])
# # 5.0 - 2.0 * 0.0
# self.assertAlmostEqual(5.0, result_array[5, 8])
# # rows[2] = 7, 5.0 - 2.0 * 1.0
# self.assertAlmostEqual(3.0, result_array[rows[2], 1])
# # rows[2] = 7, 5.0 - 2.0 * 4.0
# self.assertAlmostEqual(-3.0, result_array[rows[2], 8])
# def test_sparse_sgd(self):
# places = [core.CPUPlace()]
# if core.is_compiled_with_cuda():
# places.append(core.CUDAPlace(0))
# for place in places:
# self.check_with_place(place)
# def conf(self):
# self.row_numel = 12
# class TestSparseSGDOpCase8X(TestSparseSGDOp):
# def conf(self):
# self.row_numel = 16
# class TestSGDOpOptimizeSelectedRows(unittest.TestCase):
# def check_with_place(self, place):
# scope = core.Scope()
# row_width = 12
# # create and initialize Grad Variable
# grad_height = 10
# grad_rows = [0, 4, 7]
# grad_selected_rows = scope.var('Grad').get_selected_rows()
# grad_selected_rows.set_height(grad_height)
# grad_selected_rows.set_rows(grad_rows)
# grad_array = np.ones((len(grad_rows), row_width)).astype("float32")
# grad_array[0, 0] = 2.0
# grad_array[2, 8] = 4.0
# grad_tensor = grad_selected_rows.get_tensor()
# grad_tensor.set(grad_array, place)
# # create and initialize Param Variable
# # create and initialize W Variable
# param_rows = [0, 1, 2, 3, 4, 5, 6, 7]
# # init Param
# w_selected_rows = scope.var('Param').get_selected_rows()
# w_selected_rows.set_height(len(param_rows))
# w_selected_rows.set_rows(param_rows)
# w_selected_rows.sync_index()
# w_array = np.ones((len(param_rows), row_width)).astype("float32")
# for i in range(len(param_rows)):
# w_array[i] *= i
# w_tensor = w_selected_rows.get_tensor()
# w_tensor.set(w_array, place)
# w_before_optimize = np.array(w_tensor)
# # create and initialize LeraningRate Variable
# lr_value = 0.1
# lr = scope.var('LearningRate').get_tensor()
# lr_array = np.full((1), lr_value).astype("float32")
# lr.set(lr_array, place)
# # optimize with Python
# w_after_optimize = np.copy(w_before_optimize)
# for index, id in enumerate(grad_rows):
# w_after_optimize[id] = w_before_optimize[
# id] - lr_value * grad_array[index]
# # create and run sgd operator
# sgd_op = Operator(
# "sgd",
# Param='Param',
# Grad='Grad',
# ParamOut='Param',
# LearningRate='LearningRate')
# sgd_op.run(scope, place)
# # get and compare result
# result_array = np.array(w_tensor)
# assert (result_array == w_after_optimize).all()
# def test_sparse_parameter_sgd(self):
# places = [core.CPUPlace()]
# # do not support GPU kernel currently
# for place in places:
# self.check_with_place(place)
# class TestSGDOpWithLargeInput(unittest.TestCase):
# def runTest(self):
# paddle.enable_static()
# data = fluid.layers.fill_constant(shape=[1], value=128, dtype='int64')
# label = fluid.layers.fill_constant(
# shape=[1, 150], value=0.5, dtype='float32')
# emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32')
# out = fluid.layers.l2_normalize(x=emb, axis=-1)
# cost = fluid.layers.square_error_cost(input=out, label=label)
# avg_cost = fluid.layers.mean(cost)
# sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
# sgd_optimizer.minimize(avg_cost)
# place = fluid.CPUPlace()
# exe = fluid.Executor(place)
# exe.run(fluid.default_startup_program())
# compiled_prog = fluid.compiler.CompiledProgram(
# fluid.default_main_program())
# result = exe.run(compiled_prog, fetch_list=[avg_cost])
# class TestSGDV2(unittest.TestCase):
# def test_sgd_dygraph(self):
# paddle.disable_static()
# value = np.arange(26).reshape(2, 13).astype("float32")
# a = paddle.to_tensor(value)
# linear = paddle.nn.Linear(13, 5)
# # This can be any optimizer supported by dygraph.
# adam = paddle.optimizer.SGD(learning_rate=0.01,
# parameters=linear.parameters(),
# weight_decay=0.01)
# out = linear(a)
# out.backward()
# adam.step()
# adam.clear_gradients()
# def test_sgd(self):
# paddle.enable_static()
# def check_sgd_optimizer(optimizer_attr):
# init_program = paddle.static.Program()
# program = paddle.static.Program()
# block = program.global_block()
# mul_x = block.create_parameter(
# dtype="float32",
# shape=[5, 10],
# lod_level=0,
# name="mul.x",
# optimize_attr=optimizer_attr)
# mul_y = block.create_var(
# dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
# mul_out = block.create_var(
# dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
# mean_out = block.create_var(
# dtype="float32", shape=[1], lod_level=0, name="mean.out")
# block.append_op(
# type="mul",
# inputs={"X": mul_x,
# "Y": mul_y},
# outputs={"Out": mul_out},
# attrs={"x_num_col_dims": 1})
# block.append_op(
# type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
# sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.01)
# opts, _ = sgd_optimizer.minimize(mean_out, init_program)
# return opts
# opts = check_sgd_optimizer({'learning_rate': 1.1})
# self.assertEqual(len(opts), 2)
# self.assertEqual([op.type for op in opts], ["scale", "sgd"])
# opts = check_sgd_optimizer({'learning_rate': 1.0})
# self.assertEqual(len(opts), 1)
# self.assertEqual([op.type for op in opts], ["sgd"])
# def test_raise_error(self):
# self.assertRaises(ValueError, paddle.optimizer.SGD, learning_rate=None)
# def test_sgd_group_dygraph(self):
# paddle.disable_static()
# value = np.arange(26).reshape(2, 13).astype("float32")
# a = paddle.to_tensor(value)
# linear_1 = paddle.nn.Linear(13, 5)
# linear_2 = paddle.nn.Linear(5, 3)
# # This can be any optimizer supported by dygraph.
# adam = paddle.optimizer.SGD(learning_rate=0.01,
# parameters=[{
# 'params': linear_1.parameters()
# }, {
# 'params': linear_2.parameters(),
# 'weight_decay': 0.001,
# 'learning_rate': 0.1
# }],
# weight_decay=0.01)
# out = linear_1(a)
# out = linear_2(out)
# out.backward()
# adam.step()
# adam.clear_gradients()
# class TestSGDMultiPrecision2_0(unittest.TestCase):
# def dygraph_sgd_mp(self, mp):
# paddle.disable_static()
# paddle.seed(10)
# paddle.set_device('gpu')
# input = paddle.randn((2, 2))
# model = paddle.nn.Linear(2, 2)
# optimizer = paddle.optimizer.SGD(parameters=model.parameters(),
# multi_precision=mp)
# if mp == True:
# model = paddle.amp.decorate(models=model, level='O2')
# scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
# for idx in range(5):
# if mp == True:
# with paddle.amp.auto_cast(level='O2'):
# output = model(input)
# loss = paddle.mean(output)
# scaled = scaler.scale(loss)
# scaled.backward()
# scaler.minimize(optimizer, scaled)
# optimizer.clear_grad()
# else:
# output = model(input)
# loss = paddle.mean(output)
# optimizer.step()
# optimizer.clear_grad()
# return output, model.parameters()
# def static_sgd_mp(self, mp):
# paddle.enable_static()
# paddle.seed(10)
# np.random.seed(10)
# exe = paddle.static.Executor('gpu')
# train_program = paddle.static.Program()
# startup_program = paddle.static.Program()
# optimizer = paddle.optimizer.SGD(multi_precision=mp)
# if mp:
# optimizer = paddle.static.amp.decorate(
# optimizer,
# init_loss_scaling=128.0,
# use_dynamic_loss_scaling=True,
# use_pure_fp16=True,
# use_fp16_guard=False)
# with paddle.static.program_guard(train_program, startup_program):
# if mp:
# data = paddle.static.data(
# shape=[2, 2], name='X', dtype='float16')
# else:
# data = paddle.static.data(
# shape=[2, 2], name='X', dtype='float32')
# hidden = paddle.static.nn.fc(x=data, size=10)
# loss = paddle.fluid.layers.mean(hidden)
# optimizer.minimize(loss)
# exe.run(startup_program)
# if mp:
# optimizer.amp_init(place='gpu', scope=paddle.static.global_scope())
# x = np.random.random(size=(2, 2)).astype('float16')
# else:
# x = np.random.random(size=(2, 2)).astype('float32')
# out = []
# for idx in range(5):
# loss_data, = exe.run(train_program,
# feed={"X": x},
# fetch_list=[loss.name])
# out.append(loss_data)
# return out
# def test_main(self):
# if not paddle.is_compiled_with_cuda():
# return
# "Test dygraph mode"
# output1_dy, params1_dy = self.dygraph_sgd_mp(mp=True)
# output2_dy, params2_dy = self.dygraph_sgd_mp(mp=False)
# self.assertEqual(
# np.allclose(
# output1_dy.astype('float32').numpy(),
# output2_dy.astype('float32').numpy(),
# atol=1e-01),
# True)
# for idx in range(len(params1_dy)):
# self.assertEqual(
# np.allclose(
# params1_dy[idx].astype('float32').numpy(),
# params2_dy[idx].astype('float32').numpy(),
# atol=1e-01),
# True)
# "Test static mode"
# output1_st = self.static_sgd_mp(mp=True)
# output2_st = self.static_sgd_mp(mp=False)
# for idx in range(len(output1_st)):
# self.assertEqual(
# np.allclose(
# output1_st[idx].astype('float32'),
# output2_st[idx].astype('float32'),
# atol=1e-01),
# True)
class TestSGDOp(OpTest):
def setUp(self):
self.op_type = "sgd"
self.conf()
w = np.random.random((self.h, self.w)).astype("float32")
g = np.random.random((self.h, self.w)).astype("float32")
lr = np.array([0.1]).astype("float32")
self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr}
self.outputs = {'ParamOut': w - lr * g}
def conf(self):
self.h = 102
self.w = 105
def test_check_output(self):
self.check_output()
class TestSGDOpCase8X(TestSGDOp):
def conf(self):
self.h = 10
self.w = 64
class TestSparseSGDOp(unittest.TestCase):
def check_with_place(self, place):
scope = core.Scope()
# create and initialize Grad Variable
height = 10
rows = [0, 4, 7]
self.conf()
grad_selected_rows = scope.var('Grad').get_selected_rows()
grad_selected_rows.set_height(height)
grad_selected_rows.set_rows(rows)
np_array = np.ones((len(rows), self.row_numel)).astype("float32")
np_array[0, 0] = 2.0
np_array[2, 8] = 4.0
grad_tensor = grad_selected_rows.get_tensor()
grad_tensor.set(np_array, place)
# create and initialize Param Variable
param = scope.var('Param').get_tensor()
param_array = np.full((height, self.row_numel), 5.0).astype("float32")
param.set(param_array, place)
# create and initialize LeraningRate Variable
lr = scope.var('LearningRate').get_tensor()
lr_array = np.full((1), 2.0).astype("float32")
lr.set(lr_array, place)
# create and run sgd operator
sgd_op = Operator(
"sgd",
Param='Param',
Grad='Grad',
ParamOut='Param',
LearningRate='LearningRate')
sgd_op.run(scope, place)
# get and compare result
result_array = np.array(param)
# rows[0] = 0, 5.0 - 2.0 * 2.0
self.assertAlmostEqual(1.0, result_array[rows[0], 0])
# rows[0] = 0, 5.0 - 2.0 * 1.0
self.assertAlmostEqual(3.0, result_array[rows[0], 2])
# 5.0 - 2.0 * 0.0
self.assertAlmostEqual(5.0, result_array[1, 0])
# rows[1] = 4, 5.0 - 2.0 * 1.0
self.assertAlmostEqual(3.0, result_array[rows[1], 10])
# 5.0 - 2.0 * 0.0
self.assertAlmostEqual(5.0, result_array[5, 8])
# rows[2] = 7, 5.0 - 2.0 * 1.0
self.assertAlmostEqual(3.0, result_array[rows[2], 1])
# rows[2] = 7, 5.0 - 2.0 * 4.0
self.assertAlmostEqual(-3.0, result_array[rows[2], 8])
def test_sparse_sgd(self):
places = [core.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))
for place in places:
self.check_with_place(place)
def conf(self):
self.row_numel = 12
class TestSparseSGDOpCase8X(TestSparseSGDOp):
def conf(self):
self.row_numel = 16
class TestSGDOpOptimizeSelectedRows(unittest.TestCase):
def check_with_place(self, place):
scope = core.Scope()
row_width = 12
# create and initialize Grad Variable
grad_height = 10
grad_rows = [0, 4, 7]
grad_selected_rows = scope.var('Grad').get_selected_rows()
grad_selected_rows.set_height(grad_height)
grad_selected_rows.set_rows(grad_rows)
grad_array = np.ones((len(grad_rows), row_width)).astype("float32")
grad_array[0, 0] = 2.0
grad_array[2, 8] = 4.0
grad_tensor = grad_selected_rows.get_tensor()
grad_tensor.set(grad_array, place)
# create and initialize Param Variable
# create and initialize W Variable
param_rows = [0, 1, 2, 3, 4, 5, 6, 7]
# init Param
w_selected_rows = scope.var('Param').get_selected_rows()
w_selected_rows.set_height(len(param_rows))
w_selected_rows.set_rows(param_rows)
w_selected_rows.sync_index()
w_array = np.ones((len(param_rows), row_width)).astype("float32")
for i in range(len(param_rows)):
w_array[i] *= i
w_tensor = w_selected_rows.get_tensor()
w_tensor.set(w_array, place)
w_before_optimize = np.array(w_tensor)
# create and initialize LeraningRate Variable
lr_value = 0.1
lr = scope.var('LearningRate').get_tensor()
lr_array = np.full((1), lr_value).astype("float32")
lr.set(lr_array, place)
# optimize with Python
w_after_optimize = np.copy(w_before_optimize)
for index, id in enumerate(grad_rows):
w_after_optimize[id] = w_before_optimize[
id] - lr_value * grad_array[index]
# create and run sgd operator
sgd_op = Operator(
"sgd",
Param='Param',
Grad='Grad',
ParamOut='Param',
LearningRate='LearningRate')
sgd_op.run(scope, place)
# get and compare result
result_array = np.array(w_tensor)
assert (result_array == w_after_optimize).all()
def test_sparse_parameter_sgd(self):
places = [core.CPUPlace()]
# do not support GPU kernel currently
for place in places:
self.check_with_place(place)
class TestSGDOpWithLargeInput(unittest.TestCase):
def runTest(self):
paddle.enable_static()
data = fluid.layers.fill_constant(shape=[1], value=128, dtype='int64')
label = fluid.layers.fill_constant(
shape=[1, 150], value=0.5, dtype='float32')
emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32')
out = fluid.layers.l2_normalize(x=emb, axis=-1)
cost = fluid.layers.square_error_cost(input=out, label=label)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
compiled_prog = fluid.compiler.CompiledProgram(
fluid.default_main_program())
result = exe.run(compiled_prog, fetch_list=[avg_cost])
class TestSGDV2(unittest.TestCase):
def test_sgd_dygraph(self):
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5)
# This can be any optimizer supported by dygraph.
adam = paddle.optimizer.SGD(learning_rate=0.01,
parameters=linear.parameters(),
weight_decay=0.01)
out = linear(a)
out.backward()
adam.step()
adam.clear_gradients()
def test_sgd(self):
paddle.enable_static()
def check_sgd_optimizer(optimizer_attr):
init_program = paddle.static.Program()
program = paddle.static.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="mul.x",
optimize_attr=optimizer_attr)
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.01)
opts, _ = sgd_optimizer.minimize(mean_out, init_program)
return opts
opts = check_sgd_optimizer({'learning_rate': 1.1})
self.assertEqual(len(opts), 2)
self.assertEqual([op.type for op in opts], ["scale", "sgd"])
opts = check_sgd_optimizer({'learning_rate': 1.0})
self.assertEqual(len(opts), 1)
self.assertEqual([op.type for op in opts], ["sgd"])
def test_raise_error(self):
self.assertRaises(ValueError, paddle.optimizer.SGD, learning_rate=None)
def test_sgd_group_dygraph(self):
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear_1 = paddle.nn.Linear(13, 5)
linear_2 = paddle.nn.Linear(5, 3)
# This can be any optimizer supported by dygraph.
adam = paddle.optimizer.SGD(learning_rate=0.01,
parameters=[{
'params': linear_1.parameters()
}, {
'params': linear_2.parameters(),
'weight_decay': 0.001,
'learning_rate': 0.1
}],
weight_decay=0.01)
out = linear_1(a)
out = linear_2(out)
out.backward()
adam.step()
adam.clear_gradients()
class TestSGDMultiPrecision2_0(unittest.TestCase):
def dygraph_sgd_mp(self, mp):
paddle.disable_static()
paddle.seed(10)
paddle.set_device('gpu')
input = paddle.randn((2, 2))
model = paddle.nn.Linear(2, 2)
optimizer = paddle.optimizer.SGD(parameters=model.parameters(),
multi_precision=mp)
if mp == True:
model = paddle.amp.decorate(models=model, level='O2')
scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
for idx in range(5):
if mp == True:
with paddle.amp.auto_cast(level='O2'):
output = model(input)
loss = paddle.mean(output)
scaled = scaler.scale(loss)
scaled.backward()
scaler.minimize(optimizer, scaled)
optimizer.clear_grad()
else:
output = model(input)
loss = paddle.mean(output)
optimizer.step()
optimizer.clear_grad()
return output, model.parameters()
def static_sgd_mp(self, mp):
paddle.enable_static()
paddle.seed(10)
np.random.seed(10)
exe = paddle.static.Executor('gpu')
train_program = paddle.static.Program()
startup_program = paddle.static.Program()
optimizer = paddle.optimizer.SGD(multi_precision=mp)
if mp:
optimizer = paddle.static.amp.decorate(
optimizer,
init_loss_scaling=128.0,
use_dynamic_loss_scaling=True,
use_pure_fp16=True,
use_fp16_guard=False)
with paddle.static.program_guard(train_program, startup_program):
if mp:
data = paddle.static.data(
shape=[2, 2], name='X', dtype='float16')
else:
data = paddle.static.data(
shape=[2, 2], name='X', dtype='float32')
hidden = paddle.static.nn.fc(x=data, size=10)
loss = paddle.fluid.layers.mean(hidden)
optimizer.minimize(loss)
exe.run(startup_program)
if mp:
optimizer.amp_init(place='gpu', scope=paddle.static.global_scope())
x = np.random.random(size=(2, 2)).astype('float16')
else:
x = np.random.random(size=(2, 2)).astype('float32')
out = []
for idx in range(5):
loss_data, = exe.run(train_program,
feed={"X": x},
fetch_list=[loss.name])
out.append(loss_data)
return out
def test_main(self):
if not paddle.is_compiled_with_cuda():
return
"Test dygraph mode"
output1_dy, params1_dy = self.dygraph_sgd_mp(mp=True)
output2_dy, params2_dy = self.dygraph_sgd_mp(mp=False)
self.assertEqual(
np.allclose(
output1_dy.astype('float32').numpy(),
output2_dy.astype('float32').numpy(),
atol=1e-01),
True)
for idx in range(len(params1_dy)):
self.assertEqual(
np.allclose(
params1_dy[idx].astype('float32').numpy(),
params2_dy[idx].astype('float32').numpy(),
atol=1e-01),
True)
"Test static mode"
output1_st = self.static_sgd_mp(mp=True)
output2_st = self.static_sgd_mp(mp=False)
for idx in range(len(output1_st)):
self.assertEqual(
np.allclose(
output1_st[idx].astype('float32'),
output2_st[idx].astype('float32'),
atol=1e-01),
True)
class TestSGDMultiPrecision1_0(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册