未验证 提交 1dcf6a72 编写于 作者: H Huihuang Zheng 提交者: GitHub

Add Much Complex Test and Fix Bugs for Control Flow cond API (#21532)

Add tests to use dy/dx to make sure the gradient values calculated by the control flow backward is correct. Also fixed bugs detected by those tests.

Fix bugs:

1. Unlike sum_op, optimizer ops don't allow uninitialized input tensor. But in conditional_block_grad_op, since the conditional_block may not run, the output gradient tensor may be uninitialized, which will cause the optimizer op error. To fix it, we should let optimizer ops support uninitialized input like sum_op or assign the uninitialized gradient to 0 when the conditional_block_grad_op doesn't run. I found there are about 10+ optimizer ops. **To be simpler, I just assign output gradient of the conditional_block_grad_op to 0 in this PR**. But it can be further explored whether we can make optimizer ops like sum_op to support uninitialized input tensor because theoretically we can speed up without the assigning in conditional_block_grad_op.

2. Infer parameter shapes during append_backward. I didn't know that all our parameters are in global block. When op_desc is inferring shapes at the sub-block, it may not know the shape of gradients of parameters whose shape information is at global block. I fixed it by inferring shapes of gradients from forward var.

This PR also did some code clean up:
1. Print the var name when sgd_op catches shape error so that it is easier to debug
2. Fix a typo: dicta -> dict
上级 c5aec2fe
......@@ -5,6 +5,8 @@ cc_library(conditional_block_op_helper SRCS conditional_block_op_helper.cc DEPS
cc_library(recurrent_op_helper SRCS recurrent_op_helper.cc DEPS operator op_variant recurrent_op)
cc_library(while_op_helper SRCS while_op_helper.cc DEPS operator op_variant)
cc_test(conditional_block_op_test SRCS conditional_block_op_test.cc DEPS conditional_block_op executor)
target_link_libraries(conditional_block_infer_op conditional_block_op)
file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
......@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/controlflow/conditional_block_op.h"
#include "paddle/fluid/operators/assign_op.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
......@@ -94,11 +96,10 @@ class ConditionalBlockGradOp : public ConditionalOp {
[](const framework::LoDTensor *t) { return t->numel() != 0; });
}
const auto &inputs = Inputs(ConditionalOp::kInputs);
const auto &outside_grads =
Outputs(framework::GradVarName(ConditionalOp::kInputs));
if (need_run) {
const auto &inputs = Inputs(ConditionalOp::kInputs);
const auto &outside_grads =
Outputs(framework::GradVarName(ConditionalOp::kInputs));
std::vector<std::string> inside_grads;
inside_grads.reserve(inputs.size());
for (auto &in : inputs) {
......@@ -126,7 +127,10 @@ class ConditionalBlockGradOp : public ConditionalOp {
AssignLocalGradientToParentScope(dev_place, cur_scope, scope,
inside_grads, outside_grads);
return;
}
AssignZeroToParentScope(dev_place, scope, inputs, outside_grads);
}
private:
......@@ -156,6 +160,77 @@ class ConditionalBlockGradOp : public ConditionalOp {
AssignFunctor(outside_var, *dev_ctx));
}
}
void AssignZeroToParentScope(
const platform::Place &place, const framework::Scope &scope,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outside_grads) const {
for (size_t i = 0; i < outside_grads.size(); ++i) {
const std::string &outside_grad_name = outside_grads[i];
const std::string &input_name = inputs[i];
VLOG(4) << "input_name = " << input_name
<< ", outside_grad_name = " << outside_grad_name;
framework::Variable *input_var = scope.FindVar(input_name);
if (input_var == nullptr) {
continue;
}
framework::Variable *outside_var = scope.FindVar(outside_grad_name);
if (outside_var == nullptr) {
continue;
}
if (input_var->IsType<framework::LoDTensor>()) {
PADDLE_ENFORCE_EQ(outside_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument(
"Type of outside_var %s is NOT LoDTensor, which "
"doesn't match input_var %s",
outside_grad_name, input_name));
AssignZeroToOutsideTensor(
place, scope, input_var->Get<framework::LoDTensor>(),
outside_var->GetMutable<framework::LoDTensor>());
} else if (input_var->IsType<framework::LoDTensorArray>()) {
PADDLE_ENFORCE_EQ(outside_var->IsType<framework::LoDTensorArray>(),
true,
platform::errors::InvalidArgument(
"Type of outside_var %s is NOT LoDTensorArray, "
"which doesn't match input_var %s",
outside_grad_name, input_name));
const auto &input_tensors = input_var->Get<framework::LoDTensorArray>();
auto *outside_tensors =
outside_var->GetMutable<framework::LoDTensorArray>();
PADDLE_ENFORCE_EQ(input_tensors.size(), outside_tensors->size(),
platform::errors::InvalidArgument(
"LoDTensorArray outside_var %s doen't have same "
"size as input_var %s",
outside_grad_name, input_name));
for (size_t j = 0; j < input_tensors.size(); ++j) {
AssignZeroToOutsideTensor(place, scope, input_tensors[j],
&((*outside_tensors)[j]));
}
} else {
// TODO(huihuangzheng): add support for SelectedRows
PADDLE_THROW(platform::errors::InvalidArgument(
"Conditional block grad op doesn't support non-LoDTensor output "
"now"));
}
}
}
void AssignZeroToOutsideTensor(const platform::Place &place,
const framework::Scope &cur_scope,
const framework::LoDTensor &input_tensor,
framework::LoDTensor *outside_tensor) const {
if (!input_tensor.IsInitialized() || input_tensor.numel() == 0) {
return;
}
VLOG(4) << "Assigning zero to " << outside_tensor;
outside_tensor->Resize(input_tensor.dims());
outside_tensor->mutable_data(place, input_tensor.type());
const platform::DeviceContext *dev_ctx =
platform::DeviceContextPool::Instance().Get(place);
math::set_constant(*dev_ctx, outside_tensor, 0.0f);
outside_tensor->set_lod(input_tensor.lod());
}
};
class ConditionalBlockGradInferShape : public framework::InferShapeBase {
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/controlflow/conditional_block_op.h"
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/var_type.h"
USE_NO_KERNEL_OP(conditional_block);
USE_NO_KERNEL_OP(conditional_block_grad);
using LoDTensor = paddle::framework::LoDTensor;
using LoDTensorArray = paddle::framework::LoDTensorArray;
using Scope = paddle::framework::Scope;
using Variable = paddle::framework::Variable;
using Place = paddle::platform::Place;
TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
Place place = paddle::platform::CPUPlace();
Scope scope;
Variable* cond_var = scope.Var("condition");
LoDTensor* cond_tensor = cond_var->GetMutable<LoDTensor>();
paddle::framework::DDim cond_dims = paddle::framework::make_ddim({1});
bool* cond_data = cond_tensor->mutable_data<bool>(cond_dims, place);
cond_data[0] = false;
Variable* input_var = scope.Var("input_lod_tensor_array");
LoDTensorArray* input_tensors = input_var->GetMutable<LoDTensorArray>();
for (int i = 0; i < 5; ++i) {
paddle::framework::DDim in_dims =
paddle::framework::make_ddim({i + 1, i + 2});
LoDTensor lod_tensor;
float* in_data = lod_tensor.mutable_data<float>(in_dims, place);
for (int j = 0; j < (i + 1) * (i + 2); ++j) {
in_data[j] = static_cast<float>(j);
}
input_tensors->push_back(lod_tensor);
}
Variable* input_grad_var = scope.Var("input_lod_tensor_array@GRAD");
LoDTensorArray* grad_tensors = input_grad_var->GetMutable<LoDTensorArray>();
grad_tensors->resize(5);
paddle::framework::AttributeMap attrs;
attrs.insert({"is_scalar_condition", true});
auto conditional_grad_op = paddle::framework::OpRegistry::CreateOp(
"conditional_block_grad",
{{"Input", {"input_lod_tensor_array"}}, {"Cond", {"condition"}}},
{{"Input@GRAD", {"input_lod_tensor_array@GRAD"}}}, attrs);
conditional_grad_op->Run(scope, place);
const LoDTensorArray& out_tensors = input_grad_var->Get<LoDTensorArray>();
for (int i = 0; i < 5; ++i) {
paddle::framework::DDim out_dims = out_tensors[i].dims();
EXPECT_EQ(paddle::framework::make_ddim({i + 1, i + 2}), out_dims);
const float* out_data = out_tensors[i].data<float>();
for (int j = 0; j < (i + 1) * (i + 2); ++j) {
EXPECT_EQ(0, out_data[j]);
}
}
}
......@@ -46,8 +46,9 @@ class SGDOp : public framework::OperatorWithKernel {
param_dim, ctx->GetInputDim("Grad"),
platform::errors::InvalidArgument(
"SGD Operator's input Param and Grad dimensions do not match. "
"The Param shape is [%s], but the Grad shape is [%s].",
param_dim, ctx->GetInputDim("Grad")));
"The Param %s shape is [%s], but the Grad %s shape is [%s].",
ctx->Inputs("Param")[0], param_dim, ctx->Inputs("Grad")[0],
ctx->GetInputDim("Grad")));
}
ctx->SetOutputDim("ParamOut", param_dim);
}
......
......@@ -263,15 +263,16 @@ def _create_loss_op_desc_(loss):
return op_desc
def _infer_var_data_type_(grad_var_name, block):
def _infer_var_data_type_shape_(grad_var_name, block):
"""
Infer the data type of given grad variable
Infer the data type and shape of given grad variable
"""
grad_var = block.desc.find_var(cpt.to_bytes(grad_var_name))
fwd_name = _strip_grad_suffix_(grad_var_name)
if block.desc.has_var_recursive(cpt.to_bytes(fwd_name)):
fwd_var = block.desc.find_var_recursive(cpt.to_bytes(fwd_name))
grad_var.set_dtype(fwd_var.dtype())
grad_var.set_shape(fwd_var.shape())
else:
grad_var.set_dtype(core.VarDesc.VarType.FP32)
......@@ -921,9 +922,10 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
# infer_shape and infer_type
op_desc.infer_var_type(block.desc)
op_desc.infer_shape(block.desc)
for arg in op_desc.output_arg_names():
if arg in new_vars:
_infer_var_data_type_(arg, block)
_infer_var_data_type_shape_(arg, block)
def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map):
......@@ -1062,7 +1064,6 @@ def append_backward(loss,
no_grad_dict = _get_stop_gradients_(program)
no_grad_dict[0].update(list(map(_append_grad_suffix_, no_grad_set)))
grad_info_map = dict()
root_block = program.block(0)
fwd_op_num = root_block.desc.op_size()
......@@ -1114,6 +1115,7 @@ def append_backward(loss,
# different names.
_rename_grad_(root_block, fwd_op_num, grad_to_var, {})
grad_info_map = dict()
_append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map)
program.current_block_idx = current_block_idx
......
......@@ -80,9 +80,11 @@ def select_input(inputs, mask):
helper = LayerHelper('select_input', **locals())
if isinstance(inputs, list) or isinstance(inputs, tuple):
input_dtype = inputs[0].dtype
input_shape = inputs[0].shape
else:
input_dtype = inputs.dtype
out = helper.create_variable(dtype=input_dtype)
input_shape = inputs.shape
out = helper.create_variable(dtype=input_dtype, shape=input_shape)
helper.append_op(
type='select_input',
inputs={'X': inputs,
......@@ -1742,7 +1744,8 @@ def copy_var_to_parent_block(var, layer_helper):
assert parent_idx >= 0, "Got wrong parent block index when assigning var to parent scope in control_flow"
parent_block = prog.block(parent_idx)
parent_block_var = parent_block.create_var(dtype=var.dtype, type=var.type)
parent_block_var = parent_block.create_var(
dtype=var.dtype, shape=var.shape, type=var.type)
assign(var, parent_block_var)
return parent_block_var
......
......@@ -37,10 +37,7 @@ def simple_fc_net(use_feed=None):
return simple_fc_net_with_inputs(img, label, class_num=10)
def fc_with_batchnorm(use_feed=None):
img = fluid.layers.data(name='image', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
def batchnorm_fc_with_inputs(img, label, class_num=10):
hidden = img
for _ in range(2):
hidden = fluid.layers.fc(
......@@ -52,12 +49,18 @@ def fc_with_batchnorm(use_feed=None):
hidden = fluid.layers.batch_norm(input=hidden)
prediction = fluid.layers.fc(hidden, size=10, act='softmax')
prediction = fluid.layers.fc(hidden, size=class_num, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
loss = fluid.layers.mean(loss)
return loss
def fc_with_batchnorm(use_feed=None):
img = fluid.layers.data(name='image', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
return batchnorm_fc_with_inputs(img, label, class_num=10)
def bow_net(use_feed,
dict_dim,
is_sparse=False,
......
......@@ -24,6 +24,9 @@ import paddle.fluid.framework as framework
from paddle.fluid.backward import append_backward
from paddle.fluid.executor import Executor
from paddle.fluid.framework import Program, program_guard
from simple_nets import simple_fc_net_with_inputs, batchnorm_fc_with_inputs
np.random.seed(123)
class TestCondInputOutput(unittest.TestCase):
......@@ -275,5 +278,134 @@ class TestCondNestedControlFlow(unittest.TestCase):
self.assertEqual(ret[1][0], expected_a_grad)
class TestCondBackward(unittest.TestCase):
def backward_value_helper(self, cond_func):
"""
Helper function that compares calculated backward value is close to dy/dx
"""
main_program = Program()
main_program.random_seed = 123
startup_program = Program()
startup_program.random_seed = 123
with program_guard(main_program, startup_program):
img = fluid.data(name='image', shape=[-1, 9], dtype='float32')
img.stop_gradient = False
label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
i = fluid.data(name="i", shape=[1], dtype='int32')
loss = cond_func(i, img, label)
append_backward(loss)
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
delta = 0.005
for feed_i in range(0, 10):
feed_img = np.random.random(size=[1, 9]).astype(np.float32)
feed_label = np.random.randint(
low=0, high=10, size=[1, 1], dtype=np.int64)
img_grad, loss_value = exe.run(
main_program,
feed={
'i': np.full((1), feed_i, np.int32),
'image': feed_img,
'label': feed_label
},
fetch_list=[img.grad_name, loss.name])
numerical_grad = np.zeros(shape=[1, 9], dtype=np.float32)
feed_img_delta = np.copy(feed_img)
for j in range(9):
feed_img_delta[0][j] = feed_img[0][j] + delta
loss_delta = exe.run(main_program,
feed={
'i': np.full((1), feed_i, np.int32),
'image': feed_img_delta,
'label': feed_label
},
fetch_list=[loss.name])
numerical_grad[0][j] = (loss_delta[0] - loss_value[0]) / delta
feed_img_delta[0][j] = feed_img[0][j]
self.assertTrue(
np.isclose(
img_grad, numerical_grad, atol=0.05, rtol=0.05).all())
def add_optimizer_helper(self, cond_func):
"""
Test that program is runnable when add optimizer
"""
main_program = Program()
startup_program = Program()
with program_guard(main_program, startup_program):
img = fluid.data(name='image', shape=[-1, 784], dtype='float32')
label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
i = fluid.data(name="i", shape=[1], dtype='int32')
loss = cond_func(i, img, label)
optimizer = fluid.optimizer.SGD(learning_rate=0.1)
optimizer.minimize(loss)
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
for feed_i in range(0, 10):
feed_img = np.random.random(size=[16, 784]).astype(np.float32)
feed_label = np.random.randint(
low=0, high=10, size=[16, 1], dtype=np.int64)
exe.run(main_program,
feed={
'i': np.full((1), feed_i, np.int32),
'image': feed_img,
'label': feed_label
},
fetch_list=[loss])
def test_cond_backward(self):
def cond_func(i, img, label):
predicate = ((i % 2) == 0)
return layers.cond(predicate,
lambda: simple_fc_net_with_inputs(img, label, class_num=10),
lambda: batchnorm_fc_with_inputs(img, label, class_num=10))
self.backward_value_helper(cond_func)
self.add_optimizer_helper(cond_func)
def test_half_nested_cond_backward(self):
def branch(i, img, label):
return layers.cond((i % 2) == 0, lambda: simple_fc_net_with_inputs(img, label, class_num=10),
lambda: batchnorm_fc_with_inputs(img, label, class_num=10))
def cond_func_simple_net_at_true(i, img, label):
return layers.cond(i < 5, lambda: branch(i, img, label),
lambda: layers.mean(img))
def cond_func_simple_net_at_false(i, img, label):
return layers.cond(i < 5, lambda: layers.mean(img),
lambda: branch(i, img, label))
self.backward_value_helper(cond_func_simple_net_at_true)
self.add_optimizer_helper(cond_func_simple_net_at_true)
self.backward_value_helper(cond_func_simple_net_at_false)
self.add_optimizer_helper(cond_func_simple_net_at_false)
def test_nested_cond_backward(self):
def branch(i, img, label, mod_two):
if mod_two:
predicate = ((i % 2) == 0)
else:
predicate = ((i % 2) != 0)
return layers.cond(predicate, lambda: simple_fc_net_with_inputs(img, label, class_num=10),
lambda: batchnorm_fc_with_inputs(img, label, class_num=10))
def cond_func(i, img, label):
return layers.cond(i < 5, lambda: branch(i, img, label, True),
lambda: branch(i, img, label, False))
self.backward_value_helper(cond_func)
self.add_optimizer_helper(cond_func)
if __name__ == '__main__':
unittest.main()
......@@ -30,7 +30,7 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase):
def compare_fuse_all_reduce_ops(self,
model,
use_cuda,
init_feed_dicta=None,
init_feed_dict=None,
get_data_from_feeder=None,
optimizer=None,
fuse_all_optimizer_ops=False):
......@@ -38,8 +38,8 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase):
return
feed_dict_data = None
if init_feed_dicta is not None:
img, label = init_feed_dicta()
if init_feed_dict is not None:
img, label = init_feed_dict()
feed_dict_data = {"image": img, "label": label}
not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence(
......@@ -76,7 +76,7 @@ class TestFuseAllReduceOps(TestFuseAllReduceOpsBase):
self.compare_fuse_all_reduce_ops(
model,
use_cuda,
init_feed_dicta=init_data,
init_feed_dict=init_data,
optimizer=self.optimizer,
fuse_all_optimizer_ops=True)
......@@ -94,7 +94,7 @@ class TestFuseAllReduceOpsAndOptiOps(TestFuseAllReduceOps):
self.compare_fuse_all_reduce_ops(
model,
use_cuda,
init_feed_dicta=init_data,
init_feed_dict=init_data,
optimizer=self.optimizer,
fuse_all_optimizer_ops=True)
......
......@@ -26,22 +26,52 @@ from paddle.fluid.layers.control_flow import select_input, select_output
class TestSplitMergeSelectedVarOps(unittest.TestCase):
def test_forward_backward(self):
branch_num = 9
def test_forward_backward_list_output(self):
for branch_num in range(2, 10):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[2], dtype='float32')
x.stop_gradient = False # For test gradient
mask = layers.data(name='mask', shape=[1], dtype='int32')
outputs = []
for i in range(branch_num):
out = program.current_block().create_var(
dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR)
outputs.append(out)
select_output(x, outputs, mask)
y = select_input(outputs, mask)
mean = layers.mean(y)
append_backward(mean)
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = Executor(place)
feed_x = np.asarray([1.3, -1.4]).astype(np.float32)
for i in range(branch_num):
feed_mask = np.asarray([i]).astype(np.int32)
ret = exe.run(program,
feed={'x': feed_x,
'mask': feed_mask},
fetch_list=[y.name, x.grad_name])
x_grad = np.asarray([0.5, 0.5]).astype(np.float32)
self.assertTrue(np.allclose(np.asarray(ret[0]), feed_x))
self.assertTrue(np.allclose(np.asarray(ret[1]), x_grad))
def test_forward_backward_single_tensor_output(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[2], dtype='float32')
x.stop_gradient = False # For test gradient
mask = layers.data(name='mask', shape=[1], dtype='int32')
outputs = []
for i in range(branch_num):
out = program.current_block().create_var(
dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR)
outputs.append(out)
out = program.current_block().create_var(
dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR)
select_output(x, outputs, mask)
y = select_input(outputs, mask)
select_output(x, out, mask)
y = select_input(out, mask)
mean = layers.mean(y)
append_backward(mean)
......@@ -50,15 +80,14 @@ class TestSplitMergeSelectedVarOps(unittest.TestCase):
exe = Executor(place)
feed_x = np.asarray([1.3, -1.4]).astype(np.float32)
for i in range(branch_num):
feed_mask = np.asarray([i]).astype(np.int32)
ret = exe.run(program,
feed={'x': feed_x,
'mask': feed_mask},
fetch_list=[y.name, x.grad_name])
x_grad = np.asarray([0.5, 0.5]).astype(np.float32)
self.assertTrue(np.allclose(np.asarray(ret[0]), feed_x))
self.assertTrue(np.allclose(np.asarray(ret[1]), x_grad))
feed_mask = np.asarray([0]).astype(np.int32)
ret = exe.run(program,
feed={'x': feed_x,
'mask': feed_mask},
fetch_list=[y.name, x.grad_name])
x_grad = np.asarray([0.5, 0.5]).astype(np.float32)
self.assertTrue(np.allclose(np.asarray(ret[0]), feed_x))
self.assertTrue(np.allclose(np.asarray(ret[1]), x_grad))
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册