未验证 提交 beda7825 编写于 作者: Y Yibing Liu 提交者: GitHub

Init mixed precision training interface (#16856)

* Init mixed precision training interface

* Add fp16 test script

test=develop

* All initializers support float16

test=develop

* Code cleanup & add more code annotations

test=develop

* Update API spec

test=develop

* Add usage example in doc

test=develop
上级 0b07eef1
......@@ -424,6 +424,7 @@ paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'loca
paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a'))
paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a'))
paddle.fluid.contrib.extend_with_decoupled_weight_decay (ArgSpec(args=['base_optimizer'], varargs=None, keywords=None, defaults=None), ('document', 'a1095dfd4ec725747f662d69cd7659d4'))
paddle.fluid.contrib.mixed_precision.decorate (ArgSpec(args=['optimizer', 'init_loss_scaling', 'use_dynamic_loss_scaling'], varargs=None, keywords=None, defaults=(1.0, False)), ('document', '67e9bf14f345b38da169beb1ebb276eb'))
paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
......
......@@ -34,6 +34,8 @@ from . import extend_optimizer
from .extend_optimizer import *
from . import model_stat
from .model_stat import *
from . import mixed_precision
from .mixed_precision import *
__all__ = []
__all__ += decoder.__all__
......@@ -45,3 +47,4 @@ __all__ += reader.__all__
__all__ += slim.__all__
__all__ += utils.__all__
__all__ += extend_optimizer.__all__
__all__ += ['mixed_precision']
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from . import decorator
from .decorator import *
__all__ = decorator.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ... import default_main_program
from ... import default_startup_program
from ... import layers
from ... import unique_name
from . import fp16_utils
from .fp16_utils import create_master_params_grads, master_param_to_train_param
__all__ = ["decorate"]
class OptimizerWithMixedPrecison(object):
"""
Optimizer with mixed-precision (MP) training. This is a wrapper of a common
optimizer, plus the support of mixed-precision pretraining. The object
of this class almost has the same behavior as the common optimizer, with the
methods `minimize()`, `backward()`, `apply_gradients()` implemented.
Additionally, it enables the MP training automatically, i.e, the creation
and maintenance of master parameters, scaling of loss, etc.
Args:
optimizer (Optimizer): A common Optimizer object.
init_loss_scaling (float): The initial loss scaling factor.
use_dynamic_loss_scaling (bool): Whether to use dynamic loss scaling.
"""
def __init__(self, optimizer, init_loss_scaling, use_dynamic_loss_scaling):
self._optimizer = optimizer
self._param_grads = None
self._train_program = default_main_program()
self._startup_prog = default_startup_program()
self._loss_scaling = init_loss_scaling
self._use_dynamic_loss_scaling = use_dynamic_loss_scaling
# Ensure the data type of learning rate vars is float32 (same as the
# master parameter dtype)
if isinstance(optimizer._learning_rate, float):
optimizer._learning_rate_map[default_main_program()] = \
layers.create_global_var(
name=unique_name.generate("learning_rate"),
shape=[1],
value=float(optimizer._learning_rate),
dtype='float32',
persistable=True)
def get_loss_scaling(self):
"""Return the real-time loss scaling factor.
"""
return self._loss_scaling
def backward(self,
loss,
startup_program=None,
parameter_list=None,
no_grad_set=None,
callbacks=None):
"""
Backward propogation or auto differentiation for gradients' computation.
Args:
loss (Variable): The loss Variable to minimize.
startup_program (Program|None): The startup Program for initializing
parameters in `parameter_list`.
parameter_list (list|None): A list of Variables to update.
no_grad_set (set|None): A set of Variables should be ignored.
callbacks (list|None): A list of callables to run when appending
backward operator for one parameter.
Returns:
A list of (param, grad), which is a tuple of a parameter and its
gradient respectively, and the scaled loss.
"""
scaled_loss = loss * self._loss_scaling
self._param_grads = self._optimizer.backward(
scaled_loss, startup_program, parameter_list, no_grad_set,
callbacks)
master_params_grads = create_master_params_grads(
self._param_grads, self._train_program, self._startup_prog,
self._loss_scaling)
return master_params_grads, scaled_loss
def apply_gradients(self, master_params_grads):
"""
Update master parameters by their gradients, and cast to parameters
in float16.
Args:
master_params_grads (list): A list of master params and grads.
Returns:
A list of optimize operators.
"""
optimize_ops = self._optimizer.apply_gradients(master_params_grads)
master_param_to_train_param(master_params_grads, self._param_grads,
self._train_program)
return optimize_ops
def minimize(self, loss):
"""
Perform optimization by minimizing the given loss.
Args:
loss (Variable): The loss Variable.
Returns:
The scaled loss by scaling factor, the list of optimize ops, and a
list of master parameters and gradients.
"""
master_params_grads, scaled_loss = self.backward(loss)
optimize_ops = self.apply_gradients(master_params_grads)
return scaled_loss, optimize_ops, master_params_grads
def decorate(optimizer, init_loss_scaling=1.0, use_dynamic_loss_scaling=False):
"""
Decorate the given optimizer to adapt to the mixed-precision training.
Args:
optimizer(Optimizer): A common Optimizer.
init_loss_scaling(float): The initial loss scaling factor.
use_dynamic_loss_scaling(bool): Whether to use dynamic loss scaling.
Returns:
An optimizer acting like a normal one but with mixed-precision training
enabled.
Examples:
.. code-block:: python
loss = network()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
mp_optimizer = fluid.contrib.mixed_precision.decorate(
optimizer=optimizer, init_loss_scaling=8.0)
scaled_loss, _, _ = mp_optimizer.minimize(loss)
"""
mp_optimizer = OptimizerWithMixedPrecison(optimizer, init_loss_scaling,
use_dynamic_loss_scaling)
return mp_optimizer
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from ... import core
from ... import layers
from ... import framework
def append_cast_op(i, o, prog):
"""
Append a cast op in a given Program to cast input `i` to data type `o.dtype`.
Args:
i (Variable): The input Variable.
o (Variable): The output Variable.
prog (Program): The Program to append cast op.
"""
prog.global_block().append_op(
type="cast",
inputs={"X": i},
outputs={"Out": o},
attrs={"in_dtype": i.dtype,
"out_dtype": o.dtype})
def copy_to_master_param(p, block):
"""
New a master parameter for the input parameter, and they two share the same
attributes except the data type.
Args:
p(Parameter): The input parameter in float16.
block(Program): The block in which the parameter is.
"""
v = block.vars.get(p.name, None)
if v is None:
raise ValueError("no param name %s found!" % p.name)
new_p = framework.Parameter(
block=block,
shape=v.shape,
dtype=core.VarDesc.VarType.FP32,
type=v.type,
lod_level=v.lod_level,
stop_gradient=p.stop_gradient,
trainable=p.trainable,
optimize_attr=p.optimize_attr,
regularizer=p.regularizer,
gradient_clip_attr=p.gradient_clip_attr,
error_clip=p.error_clip,
name=v.name + ".master")
return new_p
def create_master_params_grads(params_grads, main_prog, startup_prog,
loss_scaling):
"""
Create master parameters and gradients in float32 from params and grads
in float16.
Args:
params_grads (list): A list of tuple (parameter, gradient) in float32.
main_prog (Program): The main program for training.
startup_prog (Program): The startup program to initialize all parameters.
loss_scaling (float): The factor to scale loss and gradients.
Returns:
A list of master parameters and gradients.
"""
master_params_grads = []
with main_prog._backward_role_guard():
for p, g in params_grads:
# create master parameters
master_param = copy_to_master_param(p, main_prog.global_block())
startup_master_param = startup_prog.global_block()._clone_variable(
master_param)
startup_p = startup_prog.global_block().var(p.name)
# fp16 -> fp32
append_cast_op(startup_p, startup_master_param, startup_prog)
# cast fp16 gradients to fp32 before apply gradients
if g.name.find("batch_norm") > -1:
if loss_scaling > 1:
scaled_g = g / float(loss_scaling)
else:
scaled_g = g
master_params_grads.append([p, scaled_g])
continue
master_grad = layers.cast(x=g, dtype="float32")
if loss_scaling > 1:
master_grad = master_grad / float(loss_scaling)
master_params_grads.append([master_param, master_grad])
return master_params_grads
def master_param_to_train_param(master_params_grads, params_grads, main_prog):
"""
Convert master master parameters and gradients in float32 to parameters and
gradients in float16 for forward computation.
Args:
master_params_grads (list): A list of master parameters and gradients in
float32.
params_grads (list): A list of parameters and gradients in float16.
main_prog (list): The main program for execution.
"""
for idx, m_p_g in enumerate(master_params_grads):
train_p, _ = params_grads[idx]
if train_p.name.find("batch_norm") > -1:
continue
with main_prog._optimized_guard([m_p_g[0], m_p_g[1]]):
# fp32 -> fp16
append_cast_op(m_p_g[0], train_p, main_prog)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import contextlib
import math
import sys
import numpy
import unittest
import os
import numpy as np
def resnet_cifar10(input, depth=32):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act)
def shortcut(input, ch_in, ch_out, stride):
if ch_in != ch_out:
return conv_bn_layer(input, ch_out, 1, stride, 0, None)
else:
return input
def basicblock(input, ch_in, ch_out, stride):
tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
short = shortcut(input, ch_in, ch_out, stride)
return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
def layer_warp(block_func, input, ch_in, ch_out, count, stride):
tmp = block_func(input, ch_in, ch_out, stride)
for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp
assert (depth - 2) % 6 == 0
n = (depth - 2) // 6
conv1 = conv_bn_layer(
input=input, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
pool = fluid.layers.pool2d(
input=res3, pool_size=8, pool_type='avg', pool_stride=1)
return pool
def vgg16_bn_drop(input):
def conv_block(input, num_filter, groups, dropouts):
return fluid.nets.img_conv_group(
input=input,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act='relu',
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type='max')
conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
bn = fluid.layers.batch_norm(input=fc1, act='relu')
drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
return fc2
def train(net_type, use_cuda, save_dirname, is_local):
classdim = 10
data_shape = [3, 32, 32]
train_program = fluid.Program()
startup_prog = fluid.Program()
train_program.random_seed = 123
startup_prog.random_seed = 456
with fluid.program_guard(train_program, startup_prog):
images = fluid.layers.data(
name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
imgs = fluid.layers.cast(images, "float16")
if net_type == "vgg":
print("train vgg net")
net = vgg16_bn_drop(imgs)
elif net_type == "resnet":
print("train resnet")
net = resnet_cifar10(imgs, 32)
else:
raise ValueError("%s network is not supported" % net_type)
logits = fluid.layers.fc(input=net, size=classdim, act="softmax")
cost, predict = fluid.layers.softmax_with_cross_entropy(
logits, label, return_softmax=True)
avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=predict, label=label)
# Test program
test_program = train_program.clone(for_test=True)
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
mp_optimizer = fluid.contrib.mixed_precision.decorate(
optimizer=optimizer, init_loss_scaling=8.0)
scaled_loss, _, _ = mp_optimizer.minimize(avg_cost)
BATCH_SIZE = 128
PASS_NUM = 1
# no shuffle for unit test
train_reader = paddle.batch(
paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
def train_loop(main_program):
exe.run(startup_prog)
loss = 0.0
for pass_id in range(PASS_NUM):
for batch_id, data in enumerate(train_reader()):
np_scaled_loss, loss = exe.run(
main_program,
feed=feeder.feed(data),
fetch_list=[scaled_loss, avg_cost])
print(
'PassID {0:1}, BatchID {1:04}, train loss {2:2.4}, scaled train closs {3:2.4}'.
format(pass_id, batch_id + 1,
float(loss), float(np_scaled_loss)))
if (batch_id % 10) == 0:
acc_list = []
avg_loss_list = []
for tid, test_data in enumerate(test_reader()):
loss_t, acc_t = exe.run(program=test_program,
feed=feeder.feed(test_data),
fetch_list=[avg_cost, acc])
if math.isnan(float(loss_t)):
sys.exit("got NaN loss, training failed.")
acc_list.append(float(acc_t))
avg_loss_list.append(float(loss_t))
break # Use 1 segment for speeding up CI
acc_value = numpy.array(acc_list).mean()
avg_loss_value = numpy.array(avg_loss_list).mean()
print(
'PassID {0:1}, BatchID {1:04}, test loss {2:2.2}, acc {3:2.2}'.
format(pass_id, batch_id + 1,
float(avg_loss_value), float(acc_value)))
if acc_value > 0.08: # Low threshold for speeding up CI
fluid.io.save_inference_model(
save_dirname, ["pixel"], [predict],
exe,
main_program=train_program)
return
if is_local:
train_loop(train_program)
else:
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def infer(use_cuda, save_dirname=None):
if save_dirname is None:
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded
# data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
# The input's dimension of conv should be 4-D or 5-D.
# Use normilized image pixels as input data, which should be in the range [0, 1.0].
batch_size = 1
tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")
# Use inference_transpiler to speedup
inference_transpiler_program = inference_program.clone()
t = fluid.transpiler.InferenceTranspiler()
t.transpile(inference_transpiler_program, place)
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets)
transpiler_results = exe.run(inference_transpiler_program,
feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets)
assert len(results[0]) == len(transpiler_results[0])
for i in range(len(results[0])):
np.testing.assert_almost_equal(
results[0][i], transpiler_results[0][i], decimal=4)
print("infer results: ", results[0])
fluid.io.save_inference_model(save_dirname, feed_target_names,
fetch_targets, exe,
inference_transpiler_program)
def main(net_type, use_cuda, is_local=True):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
# Directory for saving the trained model
save_dirname = "image_classification_" + net_type + ".inference.model"
train(net_type, use_cuda, save_dirname, is_local)
#infer(use_cuda, save_dirname)
class TestImageClassification(unittest.TestCase):
def test_vgg_cuda(self):
with self.scope_prog_guard():
main('vgg', use_cuda=True)
def test_resnet_cuda(self):
with self.scope_prog_guard():
main('resnet', use_cuda=True)
@contextlib.contextmanager
def scope_prog_guard(self):
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
if __name__ == '__main__':
unittest.main()
......@@ -154,17 +154,41 @@ class ConstantInitializer(Initializer):
"""
assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block)
# to be compatible of fp16 initializers
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
['constant_init', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
persistable=False)
else:
out_dtype = var.dtype
out_var = var
# Initialization Ops should be prepended and not appended
op = block._prepend_op(
type="fill_constant",
outputs={"Out": var},
outputs={"Out": out_var},
attrs={
"shape": var.shape,
"dtype": int(var.dtype),
"dtype": int(out_dtype),
"value": float(self._value),
'force_cpu': self._force_cpu or force_init_on_cpu()
},
stop_gradient=True)
if var.dtype == VarDesc.VarType.FP16:
block.append_op(
type="cast",
inputs={"X": out_var},
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -216,7 +240,8 @@ class UniformInitializer(Initializer):
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(['gaussian_random', 'tmp'])),
name=unique_name.generate(".".join(
['uniform_random', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
......@@ -295,7 +320,8 @@ class NormalInitializer(Initializer):
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(['gaussian_random', 'tmp'])),
name=unique_name.generate(".".join(
['gaussian_random', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
......@@ -375,7 +401,7 @@ class TruncatedNormalInitializer(Initializer):
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
['truncated_gaussian_random', 'tmp'])),
['truncated_gaussian_random', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
......@@ -482,14 +508,28 @@ class XavierInitializer(Initializer):
if self._seed == 0:
self._seed = block.program.random_seed
# to be compatible of fp16 initalizers
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
['xavier_init', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
persistable=False)
else:
out_dtype = var.dtype
out_var = var
if self._uniform:
limit = np.sqrt(6.0 / float(fan_in + fan_out))
op = block._prepend_op(
type="uniform_random",
outputs={"Out": var},
outputs={"Out": out_var},
attrs={
"shape": var.shape,
"dtype": int(var.dtype),
"shape": out_var.shape,
"dtype": out_dtype,
"min": -limit,
"max": limit,
"seed": self._seed
......@@ -500,15 +540,24 @@ class XavierInitializer(Initializer):
std = np.sqrt(2.0 / float(fan_in + fan_out))
op = block._prepend_op(
type="gaussian_random",
outputs={"Out": var},
outputs={"Out": out_var},
attrs={
"shape": var.shape,
"dtype": int(var.dtype),
"shape": out_var.shape,
"dtype": out_dtype,
"mean": 0.0,
"std": std,
"seed": self._seed
},
stop_gradient=True)
if var.dtype == VarDesc.VarType.FP16:
block.append_op(
type="cast",
inputs={"X": out_var},
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -583,14 +632,28 @@ class MSRAInitializer(Initializer):
if self._seed == 0:
self._seed = block.program.random_seed
# to be compatible of fp16 initalizers
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
['masra_init', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
persistable=False)
else:
out_dtype = var.dtype
out_var = var
if self._uniform:
limit = np.sqrt(6.0 / float(fan_in))
op = block._prepend_op(
type="uniform_random",
outputs={"Out": var},
outputs={"Out": out_var},
attrs={
"shape": var.shape,
"dtype": int(var.dtype),
"shape": out_var.shape,
"dtype": int(out_dtype),
"min": -limit,
"max": limit,
"seed": self._seed
......@@ -601,15 +664,24 @@ class MSRAInitializer(Initializer):
std = np.sqrt(2.0 / float(fan_in))
op = block._prepend_op(
type="gaussian_random",
outputs={"Out": var},
outputs={"Out": out_var},
attrs={
"shape": var.shape,
"dtype": int(var.dtype),
"shape": out_var.shape,
"dtype": int(out_dtype),
"mean": 0.0,
"std": std,
"seed": self._seed
},
stop_gradient=True)
if var.dtype == VarDesc.VarType.FP16:
block.append_op(
type="cast",
inputs={"X": out_var},
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -694,7 +766,21 @@ class BilinearInitializer(Initializer):
weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
weight = np.reshape(weight, shape)
if var.dtype == VarDesc.VarType.FP32:
# to be compatible of fp16 initalizers
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
['bilinear_init', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
persistable=False)
else:
out_dtype = var.dtype
out_var = var
if out_dtype == VarDesc.VarType.FP32:
value_name = "fp32_values"
values = [float(v) for v in weight.flat]
else:
......@@ -703,12 +789,21 @@ class BilinearInitializer(Initializer):
raise ValueError("The size of input is too big. ")
op = block.append_op(
type='assign_value',
outputs={'Out': [var]},
outputs={'Out': [out_var]},
attrs={
'dtype': var.dtype,
'dtype': out_dtype,
'shape': list(shape),
value_name: values
})
if var.dtype == VarDesc.VarType.FP16:
block.append_op(
type="cast",
inputs={"X": out_var},
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -746,14 +841,30 @@ class NumpyArrayInitializer(Initializer):
"""
assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block)
# to be compatible of fp16 initalizers
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
np_value = self._value.astype("float32")
out_var = block.create_var(
name=unique_name.generate(".".join(
['numpy_array_init', var.name, 'tmp'])),
shape=var.shape,
dtype=out_dtype,
type=VarDesc.VarType.LOD_TENSOR,
persistable=False)
else:
out_var = var
out_dtype = var.dtype
np_value = self._value
# Initialization Ops should be prepended and not appended
dtype = framework.convert_np_dtype_to_dtype_(self._value.dtype)
if dtype == VarDesc.VarType.FP32:
if out_dtype == VarDesc.VarType.FP32:
value_name = "fp32_values"
values = [float(v) for v in self._value.flat]
elif dtype == VarDesc.VarType.INT32:
values = [float(v) for v in np_value.flat]
elif out_dtype == VarDesc.VarType.INT32:
value_name = "int32_values"
values = [int(v) for v in self._value.flat]
values = [int(v) for v in np_value.flat]
else:
raise ValueError("Unsupported dtype %s", self._value.dtype)
if self._value.size > 1024 * 1024 * 1024:
......@@ -761,13 +872,22 @@ class NumpyArrayInitializer(Initializer):
"saving it to file and 'load_op' to load it")
op = block._prepend_op(
type='assign_value',
outputs={'Out': var},
outputs={'Out': out_var},
attrs={
'dtype': dtype,
'dtype': out_dtype,
'shape': list(self._value.shape),
value_name: values
},
stop_gradient=True)
if var.dtype == VarDesc.VarType.FP16:
block.append_op(
type="cast",
inputs={"X": out_var},
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
if not framework.in_dygraph_mode():
var.op = op
return op
......
......@@ -19,65 +19,86 @@ import unittest
import paddle.fluid.framework as framework
import paddle.fluid.initializer as initializer
from paddle.fluid.core import VarDesc
DELTA = 0.00001
def check_cast_op(op):
return op.type == 'cast' and \
op.attr('in_dtype') == VarDesc.VarType.FP32 and \
op.attr('out_dtype') == VarDesc.VarType.FP16
class TestConstantInitializer(unittest.TestCase):
def test_constant_initializer_default_value(self):
def test_constant_initializer_default_value(self, dtype="float32"):
"""Test the constant initializer with default value
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.ConstantInitializer())
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'fill_constant')
self.assertAlmostEqual(init_op.attr('value'), 0.0, delta=DELTA)
return block
def test_constant_initializer(self):
def test_constant_initializer(self, dtype="float32"):
"""Test constant initializer with supplied value
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.ConstantInitializer(2.3))
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'fill_constant')
self.assertAlmostEqual(init_op.attr('value'), 2.3, delta=DELTA)
return block
def test_constant_initializer_fp16(self):
"""Test constant initializer with float16
"""
block = self.test_constant_initializer_default_value("float16")
self.assertTrue(check_cast_op(block.ops[1]))
block = self.test_constant_initializer("float16")
self.assertTrue(check_cast_op(block.ops[1]))
class TestUniformInitializer(unittest.TestCase):
def test_uniform_initializer_default_value(self):
def test_uniform_initializer_default_value(self, dtype="float32"):
"""Test the uniform initializer with default value
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.UniformInitializer())
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
self.assertAlmostEqual(init_op.attr('min'), -1.0, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), 1.0, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)
return block
def test_uniform_initializer_random_seed(self):
"""Test the uniform initializer with manually setting seed
......@@ -103,43 +124,57 @@ class TestUniformInitializer(unittest.TestCase):
init_op1 = block.ops[0]
self.assertEqual(init_op1.attr("seed"), 456)
def test_uniform_initializer(self):
def test_uniform_initializer(self, dtype="float32"):
"""Test uniform initializer with supplied attributes
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.UniformInitializer(-4.2, 3.1, 123))
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
self.assertAlmostEqual(init_op.attr('min'), -4.2, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), 3.1, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 123)
return block
def test_uniform_initializer_two_op(self):
def test_uniform_initializer_two_op(self, dtype="float32"):
"""Test uniform initializer with supplied attributes
"""
program = framework.Program()
block = program.global_block()
for i in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.UniformInitializer(-4.2, float(i), 123))
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op0 = block.ops[0]
self.assertEqual(init_op0.type, 'uniform_random')
self.assertAlmostEqual(init_op0.attr('min'), -4.2, delta=DELTA)
self.assertAlmostEqual(init_op0.attr('max'), 0.0, delta=DELTA)
self.assertEqual(init_op0.attr('seed'), 123)
return block
def test_uniform_initializer_fp16(self):
"""Test uniform initializer with float16
"""
block = self.test_uniform_initializer_default_value("float16")
self.assertTrue(check_cast_op(block.ops[1]))
block = self.test_uniform_initializer(dtype="float16")
self.assertTrue(check_cast_op(block.ops[1]))
block = self.test_uniform_initializer_two_op("float16")
self.assertTrue(check_cast_op(block.ops[1]))
class TestNormalInitializer(unittest.TestCase):
......@@ -162,24 +197,32 @@ class TestNormalInitializer(unittest.TestCase):
self.assertAlmostEqual(init_op.attr('std'), 1.0, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)
def test_normal_initializer(self):
def test_normal_initializer(self, dtype="float32"):
"""Test normal initializer with supplied attributes
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.NormalInitializer(2.3, 1.9, 123))
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'gaussian_random')
self.assertAlmostEqual(init_op.attr('mean'), 2.3, delta=DELTA)
self.assertAlmostEqual(init_op.attr('std'), 1.9, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 123)
return block
def test_normal_initializer_fp16(self):
"""Test normal initializer with float16
"""
block = self.test_normal_initializer("float16")
self.assertTrue(check_cast_op(block.ops[1]))
class TestXavierInitializer(unittest.TestCase):
......@@ -271,26 +314,34 @@ class TestXavierInitializer(unittest.TestCase):
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)
def test_xavier_initializer_supplied_arguments(self):
def test_xavier_initializer_supplied_arguments(self, dtype="float32"):
"""Test the Xavier initializer with supplied arguments
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.XavierInitializer(
fan_in=12, fan_out=23, seed=134))
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
limit = np.sqrt(6.0 / (12 + 23))
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 134)
return block
def test_xavier_initializer_fp16(self):
"""Test the Xavier initializer with float16
"""
block = self.test_xavier_initializer_supplied_arguments("float16")
self.assertTrue(check_cast_op(block.ops[1]))
class TestMSRAInitializer(unittest.TestCase):
......@@ -380,54 +431,70 @@ class TestMSRAInitializer(unittest.TestCase):
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)
def test_msra_initializer_supplied_arguments(self):
def test_msra_initializer_supplied_arguments(self, dtype="float32"):
"""Test the MSRA initializer with supplied arguments
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.MSRAInitializer(
fan_in=12, seed=134))
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
limit = np.sqrt(6.0 / 12)
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 134)
return block
def test_msra_initializer_fp16(self):
"""Test the MSRA initializer with float16
"""
block = self.test_msra_initializer_supplied_arguments("float16")
self.assertTrue(check_cast_op(block.ops[1]))
class TestMSRAInitializer(unittest.TestCase):
def test_bilinear_initializer(self):
class TestBilinearInitializer(unittest.TestCase):
def test_bilinear_initializer(self, dtype="float32"):
"""Test the bilinear initializer with supplied arguments
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
block.create_parameter(
dtype="float32",
dtype=dtype,
shape=[8, 1, 3, 3],
lod_level=0,
name="param",
initializer=initializer.BilinearInitializer())
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'assign_value')
return block
def test_bilinear_initializer_fp16(self):
"""Test the bilinear initializer with supplied arguments
"""
block = self.test_bilinear_initializer("float16")
self.assertTrue(check_cast_op(block.ops[1]))
class TestNumpyArrayInitializer(unittest.TestCase):
def test_numpy_array_initializer(self):
def test_numpy_array_initializer(self, dtype="float32"):
"""Test the numpy array initializer with supplied arguments
"""
import numpy
program = framework.Program()
block = program.global_block()
np_array = numpy.random.random((10000)).astype("float32")
np_array = numpy.random.random((10000)).astype(dtype)
for _ in range(2):
block.create_parameter(
dtype=np_array.dtype,
......@@ -435,10 +502,18 @@ class TestNumpyArrayInitializer(unittest.TestCase):
lod_level=0,
name="param",
initializer=initializer.NumpyArrayInitializer(np_array))
self.assertEqual(len(block.ops), 1)
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'assign_value')
assert (init_op.attr('fp32_values') == np_array).all()
return block
def test_numpy_array_initializer_fp16(self):
"""Test the numpy array initializer with float16
"""
block = self.test_numpy_array_initializer("float16")
self.assertTrue(block.ops[1])
if __name__ == '__main__':
......
......@@ -120,6 +120,7 @@ packages=['paddle',
'paddle.fluid.contrib.slim.distillation',
'paddle.fluid.contrib.utils',
'paddle.fluid.contrib.extend_optimizer',
'paddle.fluid.contrib.mixed_precision',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler.details',
'paddle.fluid.incubate',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册