未验证 提交 7d8d4599 编写于 作者: L liym27 提交者: GitHub

control flow: support optimizer called (#21851)

* append optimize op in the grad block of current block if current block is in control flow. test=develop

* add conditional grad op when optimizer used in control flow. test=develop

* add comment and modify typo. test=develop

* fix append_backward to support control flow. test=develop

* add test. test=develop

* fix copy_var_to_parent_block and conditional_block_grad. test=develop

* fix bug: revert to append conditional_block_grad vars to sub grad block. test=develop

* fix bug: revert to assign var to parent block even if var already is in parent block

* fix bug: consider outputs is empty. test=develop

* move _rename_grad_ out. test=develop

* modify code according to reviews from Huihuang. test=develop

* modify code according to reviews from Jinle. test=develop
上级 12b2b4b1
......@@ -1009,6 +1009,17 @@ def _get_stop_gradients_(program):
return no_grad_dict
def _get_son_parent_block_idx_dict(program, current_block_idx):
son_parent_block_idx_dict = collections.OrderedDict()
while current_block_idx >= 0:
parent_block_idx = program.block(current_block_idx).parent_idx
son_parent_block_idx_dict[current_block_idx] = parent_block_idx
current_block_idx = parent_block_idx
return son_parent_block_idx_dict
def append_backward(loss,
parameter_list=None,
no_grad_set=None,
......@@ -1101,37 +1112,76 @@ def append_backward(loss,
isinstance(callbacks, list)
program = loss.block.program
root_block = program.block(0)
current_block_idx = program.current_block_idx
current_block = program.block(current_block_idx)
is_in_control_flow = current_block_idx != 0
# Double grad is not supported in sub-block (control flow)
if not is_in_control_flow:
# _appending_grad_times used for double grad
program._appending_grad_times += 1
if no_grad_set is None:
no_grad_set = set()
no_grad_set = copy.copy(no_grad_set)
no_grad_dict = _get_stop_gradients_(program)
# no_grad_set only contains vars in block 0
# Todo(liym27): support vars in sub block
no_grad_dict[0].update(list(map(_append_grad_suffix_, no_grad_set)))
root_block = program.block(0)
# Currently it is only to support the optimizer.minimize
# in a switch branch, which can append_backward in a sub_block.
# Note: while_loop is in control flow, but it makes no sense to call optimizer in while.
# Todo: report error when it is in while_loop
if is_in_control_flow:
# create grad block if in switch control flow.
target_grad_block = program._create_block(
parent_idx=current_block.parent_idx)
target_grad_block._set_forward_block_idx(current_block_idx)
# after _create_block, program.current_block changes
else:
target_grad_block = root_block
son_parent_block_idx_dict = _get_son_parent_block_idx_dict(
program, current_block_idx)
block_fwd_op_num_dict = {} # block_id: fwd_op_num
for idx in son_parent_block_idx_dict:
block_fwd_op_num_dict[idx] = program.block(idx).desc.op_size()
fwd_op_num = root_block.desc.op_size()
current_block_idx = program.current_block_idx
grad_to_var = dict()
op_desc = _create_loss_op_desc_(loss)
root_block.desc.append_op().copy_from(op_desc)
target_grad_block.desc.append_op().copy_from(op_desc)
block_no_grad_set = set(map(_strip_grad_suffix_, no_grad_dict[0]))
op_path = _find_op_path_(root_block, [loss], [], block_no_grad_set)
no_grad_vars = _find_no_grad_vars(root_block, op_path, [loss],
for block_idx in son_parent_block_idx_dict:
block = program.block(block_idx)
block_no_grad_set = set(
map(_strip_grad_suffix_, no_grad_dict[block_idx]))
op_path = _find_op_path_(block, [loss], [], block_no_grad_set)
no_grad_vars = _find_no_grad_vars(block, op_path, [loss],
block_no_grad_set)
block_no_grad_set.update(no_grad_vars)
no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set)))
no_grad_dict[block_idx].update(
list(map(_append_grad_suffix_, block_no_grad_set)))
input_grad_names_set = None
# For double backward, input_grad_names is used for filter
# some non-used gradients op.
# For double backward, input_grad_names is used for filtering
# some non-used gradients op(s).
# Todo(liym27): need a better design.
# not support double grad in control flow sub-block now.
if not is_in_control_flow:
if program._appending_grad_times > 1:
input_grad_names_set = set([_append_grad_suffix_(loss.name)])
# Todo: support _append_backward_ops_with_checkpoints_ in
# sub-block (control flow)
if checkpoints != None and \
isinstance(checkpoints, list) and \
len(checkpoints) > 0:
......@@ -1147,21 +1197,28 @@ def append_backward(loss,
checkpoints)
else:
_append_backward_ops_(
root_block,
block, # the block where forward ops are in
op_path,
root_block,
target_grad_block,
no_grad_dict,
grad_to_var,
callbacks,
input_grad_names_set=input_grad_names_set)
# Because calc_gradient may be called multiple times,
grad_info_map = dict()
# if in control flow, target_grad_block is a created new block which only contains grad ops,
# so fwd_op_num is set to 0.
fwd_op_num = block_fwd_op_num_dict[
current_block_idx] if not is_in_control_flow else 0
# Because append_backward may be called multiple times,
# we need rename the internal gradient variables so that they have
# different names.
_rename_grad_(root_block, fwd_op_num, grad_to_var, {})
_rename_grad_(target_grad_block, fwd_op_num, grad_to_var, {})
grad_info_map = dict()
_append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map)
_append_backward_vars_(target_grad_block, fwd_op_num, grad_to_var,
grad_info_map)
program.current_block_idx = current_block_idx
program._sync_with_cpp()
......@@ -1186,6 +1243,7 @@ def append_backward(loss,
parameters = [param.name for param in params if param.trainable]
params_and_grads = []
op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName()
for param in parameters:
if cpt.to_text(param) not in grad_info_map:
continue
......@@ -1197,16 +1255,20 @@ def append_backward(loss,
# Get the param var from the global block
param_var = program.global_block().var(param)
grad_var = grad_block.var(grad_info[0])
if not is_in_control_flow:
if loss.block.has_var(grad_info[0]):
params_and_grads.append((param_var, grad_var))
else:
params_and_grads.append((param_var, None))
else:
params_and_grads.append((param_var, grad_var))
op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName()
for p, g in params_and_grads:
if g is None:
continue
for op in reversed(program.global_block().ops):
ops = grad_block.ops if is_in_control_flow else program.global_block(
).ops
for op in reversed(ops):
assert isinstance(op, framework.Operator)
if g.name in op.output_arg_names:
g.op = op
......@@ -1228,12 +1290,62 @@ def _as_list(x):
return list(x) if isinstance(x, collections.Sequence) else [x]
def _is_ancestor_block(ancestor_block, block):
prog = block.program
ancestor_idx = ancestor_block.idx
parent_idx = block.parent_idx
while parent_idx != -1:
if parent_idx == ancestor_idx:
return True
parent_idx = prog.block(parent_idx).parent_idx
return False
def _get_output_names(cur_block, targets):
"""
In `cur_block`, get output names those linked to targets.
NOTE:
1. `targets` can be in `cur_block`;
Usually, `targets` is in `cur_block`. However, considering control flow,
2. `targets` may be in sub-block but `cur_block` is an ancestor of `targets[0].block`;
3. `targets` may be in the block which is ancestor of `cur_block`.
"""
block = targets[0].block if targets else cur_block
prog = cur_block.program
if _is_ancestor_block(block, cur_block):
return set()
current_output_names = set([out.name for out in targets])
# if `cur_block` is an ancestor of `targets[0].block`, run while loop
while block.idx != cur_block.idx:
assert block.parent_idx != -1
parent_block = prog.block(block.parent_idx)
parent_block_output_names = set()
for op in reversed(block.ops):
if _some_in_set_(op.desc.output_arg_names(), current_output_names):
for name in op.desc.input_arg_names():
current_output_names.add(name)
if not block.desc.find_var(cpt.to_bytes(name)) \
and parent_block.desc.find_var(cpt.to_bytes(name)):
parent_block_output_names.add(name)
block = parent_block
current_output_names = parent_block_output_names
return current_output_names
def _find_no_grad_vars(block, op_path, targets, no_grad_set):
"""
Find the vars which is not used in the program, and
those var belong to no_grad_var.
those vars belong to no_grad_var.
"""
output_names = set([out.name for out in targets])
output_names = _get_output_names(block, targets)
no_grad_var = []
for i, op in reversed(list(enumerate(op_path))):
# If the op has sub_block, it is too complicated to find the correct no_grad_var.
......@@ -1253,7 +1365,7 @@ def _find_op_path_(block, outputs, inputs, no_grad_set):
no_grad_set will also be changed
"""
input_names = set([inp.name for inp in inputs])
output_names = set([out.name for out in outputs])
output_names = _get_output_names(block, outputs)
relevant_op_flags = [True] * len(block.ops)
......
......@@ -2231,6 +2231,14 @@ class Block(object):
"""
self.desc._set_forward_block_idx(idx)
@property
def backward_block_idx(self):
cur_block_idx = self.idx
for block in self.program.blocks:
if block.forward_block_idx == cur_block_idx:
return block.idx
return -1
@property
def idx(self):
return self.desc.id
......
......@@ -28,6 +28,8 @@ import warnings
import six
from functools import reduce, partial
from ..data_feeder import convert_dtype, check_type_and_dtype
from ... import compat as cpt
from ..backward import _infer_var_data_type_shape_
__all__ = [
'While', 'Switch', 'increment', 'array_write', 'create_array', 'less_than',
......@@ -1799,6 +1801,9 @@ class ConditionalBlock(object):
intermediate.add(out_var_name)
input_set = set([ipt.name for ipt in self.inputs])
# Todo(liym27) Here assume that all params are in recursive parent block
# but when minimize() called in control flow, some params may be in
# conditional grad block
param_list = [
parent_block._var_recursive(each_name) for each_name in params
]
......@@ -1811,7 +1816,7 @@ class ConditionalBlock(object):
step_scope = parent_block.create_var(
type=core.VarDesc.VarType.STEP_SCOPES)
parent_block.append_op(
conditional_block_op = parent_block.append_op(
type='conditional_block',
inputs={
'Cond': self.inputs,
......@@ -1824,6 +1829,90 @@ class ConditionalBlock(object):
'is_scalar_condition': self.is_scalar_condition
})
if self.need_append_conditional_block_grad(inside_block):
self.append_conditional_block_grad(parent_block, inside_block,
conditional_block_op)
def need_append_conditional_block_grad(self, inside_block):
grad_sub_block_idx = inside_block.backward_block_idx
return grad_sub_block_idx != -1
def append_conditional_block_grad(self, parent_block, inside_block,
conditional_block_op):
'''
Append op `conditional_block_grad` manually.
When `optimizer.minimize/append_backward` is called in Paddle control flow,
grad ops will be appended before appending op `conditional_block` so that
op `conditional_block_grad` can't be appended when calling
`optimizer.minimize/append_backward`. After appending op `conditional_block`,
`conditional_block_grad` is appended manually.
Args:
parent_block (Block): The block that `conditional_block_op` blongs to.
inside_block (Block): The sub block of `conditional_block_op`.
conditional_block_op (Operator): The forward op conditional_block.
'''
grad_sub_block_idx = inside_block.backward_block_idx
grad_sub_block = self.helper.main_program.block(grad_sub_block_idx)
intermediate = set()
params = set()
for each_op in grad_sub_block.ops:
assert isinstance(each_op, Operator)
for iname in each_op.input_names:
for in_var_name in each_op.input(iname):
if in_var_name not in intermediate:
params.add(in_var_name)
for oname in each_op.output_names:
for out_var_name in each_op.output(oname):
intermediate.add(out_var_name)
param_list = []
for inner_input_name in params:
inner_var = parent_block._find_var_recursive(inner_input_name)
if inner_var:
param_list.append(cpt.to_text(inner_var.name))
grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
conditional_block_op.desc,
cpt.to_text(set()), [grad_sub_block.desc])
# append op_desc in grad_op_descs to target_block
op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
backward = core.op_proto_and_checker_maker.OpRole.Backward
new_op_desc = parent_block.desc.append_op()
new_op_desc.copy_from(grad_op_desc[0])
new_op_desc._set_attr(op_role_attr_name, backward)
# set input and output manually
new_op_desc.set_input('Input', param_list)
new_op_desc.set_output('Input@GRAD',
[param + "@GRAD" for param in param_list])
new_vars = set()
for grad_var_name in new_op_desc.output_arg_names():
if grad_sub_block.desc.has_var_recursive(
cpt.to_bytes(grad_var_name)
) or grad_var_name == core.empty_var_name():
continue
grad_sub_block.desc.var(cpt.to_bytes(grad_var_name))
new_vars.add(grad_var_name)
if grad_var_name not in op_grad_to_var:
continue
# infer_shape and infer_type
new_op_desc.infer_var_type(grad_sub_block.desc)
new_op_desc.infer_shape(grad_sub_block.desc)
for arg in new_op_desc.output_arg_names():
if arg in new_vars:
_infer_var_data_type_shape_(arg, grad_sub_block)
self.helper.main_program._sync_with_cpp()
def copy_var_to_parent_block(var, layer_helper):
if var is None:
......
......@@ -422,11 +422,22 @@ class Optimizer(object):
# for parameters and extend _finish_update method to add custom ops.
# Allways called under program_guard use global block as loss block
# But if current block is in control flow, append optimize op in the
# grad block of current block
global_block = framework.default_main_program().global_block()
start = len(global_block.ops)
target_block = global_block
current_block = framework.default_main_program().current_block()
if current_block.idx != global_block.idx:
assert current_block.backward_block_idx != -1, \
"current block is not global_block, but it doesn't have backward block."
target_block = framework.default_main_program().blocks[
current_block.backward_block_idx]
start = len(target_block.ops)
self.helper = LayerHelper(self.__class__.__name__)
self._create_accumulators(
global_block,
target_block,
[p[0] for p in parameters_and_grads if p[0].trainable])
self._create_global_learning_rate()
......@@ -438,7 +449,7 @@ class Optimizer(object):
with param_and_grad[0].block.program._optimized_guard(
param_and_grad):
if param_and_grad[0].trainable is True:
optimize_op = self._append_optimize_op(global_block,
optimize_op = self._append_optimize_op(target_block,
param_and_grad)
optimize_ops.append(optimize_op)
else:
......@@ -448,16 +459,16 @@ class Optimizer(object):
with param_and_grad[0].block.program._optimized_guard(
param_and_grad), name_scope("optimizer"):
if param_and_grad[0].trainable is True:
optimize_op = self._append_optimize_op(global_block,
optimize_op = self._append_optimize_op(target_block,
param_and_grad)
optimize_ops.append(optimize_op)
# Get custom finish ops for subclasses
# FIXME: Need to fix this once we figure out how to handle dependencies
self._finish_update(global_block, parameters_and_grads)
self._finish_update(target_block, parameters_and_grads)
end = len(global_block.ops)
return global_block._slice_ops(start, end)
end = len(target_block.ops)
return target_block._slice_ops(start, end)
def _process_distribute_lookuptable(self, param_grads):
"""
......@@ -1904,7 +1915,6 @@ class AdamaxOptimizer(Optimizer):
"""Update Beta1 Power accumulator
"""
assert isinstance(block, framework.Block)
main_block = block.program.global_block()
for param, grad in parameters_and_grads:
if grad is None or param.trainable is False:
continue
......@@ -1912,7 +1922,7 @@ class AdamaxOptimizer(Optimizer):
[param, grad]), name_scope('adamx'):
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param)
main_block.append_op(
block.append_op(
type="scale",
inputs={"X": beta1_pow_acc},
outputs={"Out": beta1_pow_acc},
......
......@@ -22,6 +22,7 @@ import paddle.fluid.core as core
import paddle.fluid.layers as layers
from paddle.fluid.framework import Program, program_guard
from functools import partial
import paddle.fluid.optimizer as optimizer
class TestAPICase(unittest.TestCase):
......@@ -223,5 +224,52 @@ class TestAPICase_Error(unittest.TestCase):
self.assertRaises(TypeError, type_error_default)
# when optimizer in case
class TestMutiTask(unittest.TestCase):
def test_optimizer_in_case(self):
BATCH_SIZE = 1
INPUT_SIZE = 784
EPOCH_NUM = 2
x = fluid.data(
name='x', shape=[BATCH_SIZE, INPUT_SIZE], dtype='float32')
y = fluid.data(
name='y', shape=[BATCH_SIZE, INPUT_SIZE], dtype='float32')
switch_id = fluid.data(name='switch_id', shape=[1], dtype='int32')
one = layers.fill_constant(shape=[1], dtype='int32', value=1)
adam = optimizer.Adam(learning_rate=0.001)
adagrad = optimizer.Adagrad(learning_rate=0.001)
def fn_1():
sum = layers.elementwise_mul(x, y)
loss = layers.mean(sum, name="f_1_loss")
adam.minimize(loss)
def fn_2():
sum = layers.elementwise_mul(x, y)
loss = layers.mean(sum, name="f_2_loss")
adagrad.minimize(loss)
layers.case(pred_fn_pairs=[(switch_id == one, fn_1)], default=fn_2)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
for epoch in range(EPOCH_NUM):
np.random.seed(epoch)
feed_image = np.random.random(
size=[BATCH_SIZE, INPUT_SIZE]).astype('float32')
main_program = fluid.default_main_program()
out = exe.run(main_program,
feed={
'x': feed_image,
'y': feed_image,
'switch_id': np.array([epoch]).astype('int32')
},
fetch_list=[])
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.optimizer as optimizer
from paddle.fluid.framework import Program, program_guard
import paddle.fluid.core as core
BATCH_SIZE = 1
INPUT_SIZE = 784
CLASS_NUM = 10
FC_SIZE = 40
EPOCH_NUM = 5
LR = 0.001
SEED = 2020
def static(train_data,
loss_in_switch=True,
use_cuda=False,
use_parallel_exe=False):
startup_program = Program()
main_program = Program()
startup_program.random_seed = SEED
main_program.random_seed = SEED
with program_guard(main_program, startup_program):
def double_fc_net(image):
hidden = layers.fc(
image,
size=FC_SIZE,
act='relu',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5)),
name="hidden")
prediction = layers.fc(
hidden,
size=CLASS_NUM,
act='softmax',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=1.2)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.8)),
name="prediction")
return hidden, prediction
def fn_1(opt, avg_loss=None, pred=None, label=None):
if avg_loss is None:
loss = layers.cross_entropy(input=pred, label=label)
avg_loss = layers.mean(loss, name='mean_cross_entropy_loss')
opt.minimize(avg_loss)
return avg_loss
def fn_2(opt, avg_loss=None, pred=None, label=None):
if avg_loss is None:
loss = layers.softmax_with_cross_entropy(
logits=pred, label=label)
avg_loss = layers.mean(loss, name='mean_softmax_loss')
opt.minimize(avg_loss)
return avg_loss
image = fluid.data('image', [BATCH_SIZE, INPUT_SIZE], 'float32')
label = fluid.data('label', [BATCH_SIZE, 1], 'int64')
hidden, prediction = double_fc_net(image)
adam = optimizer.Adam(learning_rate=LR)
sgd = optimizer.SGD(learning_rate=LR)
id = fluid.data('id', [1], 'int32')
two = layers.fill_constant([1], 'int32', 2)
mod_two = layers.elementwise_mod(id, two) == 0
if loss_in_switch:
avg_loss = layers.case([(
mod_two, lambda: fn_1(adam, None, prediction, label))],
lambda: fn_2(sgd, None, prediction, label))
else:
loss_1 = layers.cross_entropy(input=prediction, label=label)
avg_loss_1 = layers.mean(loss_1)
loss_2 = layers.softmax_with_cross_entropy(
logits=prediction, label=label)
avg_loss_2 = layers.mean(loss_2)
avg_loss = layers.case([(mod_two, lambda: fn_1(adam, avg_loss_1))],
lambda: fn_2(sgd, avg_loss_2))
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for epoch in range(EPOCH_NUM):
feed_image, feed_label = train_data[epoch]
fetch_list = [hidden, prediction, avg_loss]
feed = {
'image': feed_image,
'label': feed_label,
'id': np.array([epoch]).astype('int32')
}
out = exe.run(main_program, feed=feed, fetch_list=fetch_list)
out_hidden, out_pred, loss = out
return out_hidden, out_pred, loss
class DygraphLayer(fluid.dygraph.Layer):
def __init__(self):
super(DygraphLayer, self).__init__()
self.fc_1 = fluid.dygraph.nn.Linear(
INPUT_SIZE,
FC_SIZE,
act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
value=0.99)),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
value=0.5)), )
self.fc_2 = fluid.dygraph.nn.Linear(
FC_SIZE,
CLASS_NUM,
act='softmax',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
value=1.2)),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
value=0.8)))
def forward(self, inputs):
hidden = self.fc_1(inputs)
prediction = self.fc_2(hidden)
return hidden, prediction
def dynamic(train_data, use_cuda=False, use_parallel_exe=False):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
with fluid.dygraph.guard(place):
fluid.default_startup_program().random_seed = SEED
fluid.default_main_program().random_seed = SEED
dy_layer = DygraphLayer()
adam = fluid.optimizer.Adam(
learning_rate=LR, parameter_list=dy_layer.parameters())
sgd = fluid.optimizer.SGD(learning_rate=LR,
parameter_list=dy_layer.parameters())
for epoch in range(EPOCH_NUM):
image_data, label = train_data[epoch]
var_input = fluid.dygraph.to_variable(image_data)
var_label = fluid.dygraph.to_variable(label)
hidden, prediction = dy_layer(var_input)
if epoch % 2 == 0:
cross_entropy_loss = layers.cross_entropy(prediction, var_label)
loss = layers.mean(cross_entropy_loss)
loss.backward()
adam.minimize(loss)
else:
softmax_loss = layers.softmax_with_cross_entropy(prediction,
var_label)
loss = layers.mean(softmax_loss)
loss.backward()
sgd.minimize(loss)
dy_layer.clear_gradients()
return hidden.numpy(), prediction.numpy(), loss.numpy()
class TestMultiTask(unittest.TestCase):
'''
Compare results of static graph and dynamic graph.
Todo(liym27): add parallel GPU train.
'''
def random_input(self,
seed,
image_shape=[BATCH_SIZE, INPUT_SIZE],
label_shape=[BATCH_SIZE, 1]):
np.random.seed(seed)
image_np = np.random.random(size=image_shape).astype('float32')
np.random.seed(seed)
label_np = np.random.randint(
low=0, high=CLASS_NUM - 1, size=label_shape).astype('int64')
return image_np, label_np
def init_train_data(self):
self.train_data = []
for epoch in range(EPOCH_NUM):
self.train_data.append(self.random_input(epoch))
def test_optimzier_in_switch(self):
self.init_train_data()
use_cuda = core.is_compiled_with_cuda()
hidden_2, pre_2, loss_2 = dynamic(self.train_data, use_cuda)
for loss_in_switch in [True, False]:
hidden_1, pre_1, loss_1 = static(self.train_data, loss_in_switch,
use_cuda)
self.assertTrue(
np.allclose(hidden_1, hidden_2),
msg='static hidden is {}\ndynamic hidden is {}'.format(
hidden_1, hidden_2))
self.assertTrue(
np.allclose(pre_1, pre_2),
msg='static prediction is {}\ndynamic prediction is {}'.format(
pre_1, pre_2))
self.assertTrue(
np.allclose(loss_1, loss_2),
msg='static loss is {}\ndynamic loss is {}'.format(loss_1,
loss_2))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册