From 532079a2224445edebad821c971443677ae700ee Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 13 Apr 2020 14:05:45 +0800 Subject: [PATCH] API (CompiledProgram) error message enhancement (#23559) * api compild program error polish, test=develop * fix coverage problem, test=develop * fix details & add unittests, test=develop * add test for coverage, test=develop --- paddle/fluid/framework/ir/graph.cc | 29 ++-- paddle/fluid/framework/ir/graph.h | 41 +++-- python/paddle/fluid/compiler.py | 50 +++--- .../tests/unittests/test_compiled_program.py | 162 ++++++++++++++++++ 4 files changed, 230 insertions(+), 52 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_compiled_program.py diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc index 8ba0e8b80b..83b6acbf05 100644 --- a/paddle/fluid/framework/ir/graph.cc +++ b/paddle/fluid/framework/ir/graph.cc @@ -69,10 +69,11 @@ std::map> Graph::InitFromProgram( std::unordered_set out_arg_set; for (auto &each_var_name : op->OutputArgumentNames()) { if (each_var_name != kEmptyVarName) { - PADDLE_ENFORCE(out_arg_set.count(each_var_name) == 0, - "Program is wrong. %s occurs in output of %s several " - "times.", - each_var_name, op->Type()); + PADDLE_ENFORCE_EQ(out_arg_set.count(each_var_name), 0, + platform::errors::InvalidArgument( + "The input Program is invalid. Variable %s occurs" + " in output of %s multiple times.", + each_var_name, op->Type())); out_arg_set.insert(each_var_name); } @@ -121,10 +122,10 @@ void Graph::ResolveHazard( (*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0]; const auto &read_ops = (*it_old)->outputs; - PADDLE_ENFORCE( - write_op, - string::Sprintf("The write_op of var %s should not be empty.", - (*it_new)->Name())); + PADDLE_ENFORCE_NOT_NULL( + write_op, platform::errors::NotFound( + "The generate operator of variable %s is null.", + (*it_new)->Name())); // Add write after write dependence ir::Node *upstream_op = @@ -174,6 +175,8 @@ std::shared_ptr Graph::Clone() { cloned_graph->num_node_created_ = 0; std::unordered_map origin_to_cloned; for (auto *n : this->node_set_) { + PADDLE_ENFORCE_NOT_NULL(n, platform::errors::InvalidArgument( + "The node to be cloned is nullptr.")); ir::Node *cloned_node = nullptr; if (n->IsCtrlVar()) { cloned_node = cloned_graph->CreateControlDepVar(); @@ -184,11 +187,11 @@ std::shared_ptr Graph::Clone() { } else if (n->IsOp()) { cloned_node = cloned_graph->CreateOpNode(n->Op()); } - if (cloned_node) { - origin_to_cloned[n] = cloned_node; - } else { - PADDLE_THROW("The cloned node's type is not supported!"); - } + PADDLE_ENFORCE_NOT_NULL( + cloned_node, + platform::errors::InvalidArgument( + "Failed to clone new node from original node in graph.")); + origin_to_cloned[n] = cloned_node; } for (auto *n : this->node_set_) { for (auto it = n->inputs.begin(); it != n->inputs.end(); it++) { diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h index c671e191e5..137ab7a56d 100644 --- a/paddle/fluid/framework/ir/graph.h +++ b/paddle/fluid/framework/ir/graph.h @@ -95,15 +95,17 @@ class Graph { template AttrType &Get(const std::string &attr_name) const { - PADDLE_ENFORCE_EQ(Has(attr_name), true, "%s attr not registered for graph.", - attr_name); + PADDLE_ENFORCE_EQ( + Has(attr_name), true, + platform::errors::PreconditionNotMet( + "%s attribute not registered for current graph.", attr_name)); try { return *boost::any_cast(attrs_.at(attr_name)); } catch (boost::bad_any_cast &) { - PADDLE_THROW( - "Invalid attribute type of %s error, expected: %s, actual: %s", - attr_name, typeid(AttrType *).name(), - attrs_.at(attr_name).type().name()); + PADDLE_THROW(platform::errors::InvalidArgument( + "Invalid attribute type of %s, expected: %s, received: %s.", + attr_name, platform::demangle(typeid(AttrType *).name()), // NOLINT + platform::demangle(attrs_.at(attr_name).type().name()))); } } @@ -112,7 +114,8 @@ class Graph { PADDLE_ENFORCE_EQ( attrs_.count(attr_name), 0, platform::errors::AlreadyExists( - "The attribute %s has been set in the graph.", attr_name)); + "The attribute %s to be set already exists in the graph.", + attr_name)); attrs_[attr_name] = attr; attr_dels_[attr_name] = [attr, attr_name]() { VLOG(3) << "deleting " << attr_name; @@ -124,8 +127,9 @@ class Graph { void SetNotOwned(const std::string &attr_name, AttrType *attr) { PADDLE_ENFORCE_EQ( attrs_.count(attr_name), 0, - platform::errors::AlreadyExists( - "The attribute %s has been set in the graph.", attr_name)); + platform::errors::AlreadyExists("The attribute %s to be set(not owned) " + "already exists in the graph.", + attr_name)); attrs_[attr_name] = attr; attr_dels_[attr_name] = []() {}; } @@ -134,7 +138,8 @@ class Graph { PADDLE_ENFORCE_NE( attrs_.count(attr_name), 0, platform::errors::NotFound( - "The attribute %s has not been set in the graph.", attr_name)); + "The attribute %s to be erased does not exist in the graph.", + attr_name)); attr_dels_[attr_name](); attrs_.erase(attr_name); attr_dels_.erase(attr_name); @@ -144,7 +149,9 @@ class Graph { // Create a normal variable with non-null VarDesc. ir::Node *CreateVarNode(VarDesc *var_desc) { - PADDLE_ENFORCE_NOT_NULL(var_desc); + PADDLE_ENFORCE_NOT_NULL( + var_desc, platform::errors::InvalidArgument( + "The VarDesc used to create variable node is null.")); auto *x = AddNode(new ir::Node(var_desc)); x->SetId(num_node_created_++); return x; @@ -152,7 +159,9 @@ class Graph { // Create a normal runnable operator with OpDesc. ir::Node *CreateOpNode(OpDesc *op_desc) { - PADDLE_ENFORCE_NOT_NULL(op_desc); + PADDLE_ENFORCE_NOT_NULL( + op_desc, platform::errors::InvalidArgument( + "The OpDesc used to create operator node is null.")); auto *x = AddNode(new ir::Node(op_desc)); x->SetId(num_node_created_++); return x; @@ -192,7 +201,9 @@ class Graph { } std::unique_ptr RemoveNode(ir::Node *node) { - PADDLE_ENFORCE_EQ(node_set_.find(node) != node_set_.end(), true); + PADDLE_ENFORCE_EQ(node_set_.find(node) != node_set_.end(), true, + platform::errors::PreconditionNotMet( + "The node to be removed does not exist.")); std::unique_ptr ret; ret.reset(nodes_.at(node).release()); nodes_.erase(node); @@ -218,7 +229,9 @@ class Graph { // This method takes ownership of `node`. ir::Node *AddNode(ir::Node *node) { - PADDLE_ENFORCE_EQ(node_set_.find(node) == node_set_.end(), true); + PADDLE_ENFORCE_EQ(node_set_.find(node) == node_set_.end(), true, + platform::errors::PreconditionNotMet( + "The node to be added already exists.")); nodes_[node].reset(node); node_set_.insert(node); return node; diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index b07a514cce..4441de50be 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -139,8 +139,9 @@ class CompiledProgram(object): self._graph = core.Graph(program_or_graph.desc) self._program = program_or_graph else: - raise ValueError("Wrong program_to_graph type: %s" % - type(program_or_graph)) + raise TypeError( + "The type of program_to_graph parameter is wrong, expected Graph or Program, but received %s" + % type(program_or_graph)) self._scope = None self._place = None @@ -258,8 +259,8 @@ class CompiledProgram(object): feed={"X": test_data}, fetch_list=[loss.name]) """ - assert not self._is_data_parallel, "Already compiled with parallel." - assert not self._is_inference, "Cannot compile both data parallel and inference" + assert not self._is_data_parallel, "Already compiled with parallel, cannot be recompiled." + assert not self._is_inference, "Cannot compile with both data parallel and inference." self._is_data_parallel = True # FIXME(zcd): Currently, the build_strategy can be set during creating # CompiledProgram or calling with_data_parallel, and it may be confusing, @@ -272,7 +273,7 @@ class CompiledProgram(object): self._places = places if _has_backward_op(self._graph): - assert self._loss_name is not None, "The loss_name should be set here." + assert self._loss_name is not None, "The loss name of CompiledProgram is None. The loss name should be set if CompiledProgram contains backward part." if self._places is not None: if not isinstance(self._places, (list, tuple)): @@ -288,8 +289,8 @@ class CompiledProgram(object): Returns: self """ - assert not self._is_data_parallel, "Cannot compile both data parallel and inference" - assert not self._is_inference, "Already compiled with inference" + assert not self._is_data_parallel, "Cannot compile with both data parallel and inference" + assert not self._is_inference, "Already compiled with inference, cannot be recompiled." assert any([ isinstance(config, InferNativeConfig), @@ -300,30 +301,29 @@ class CompiledProgram(object): return self def _with_distributed(self): - raise NotImplementedError() + raise NotImplementedError( + "Subclass of CompiledProgram should implement _with_distributed method." + ) def _compile_data_parallel(self, places, use_cuda=False, scope=None): if self._share_vars_from: if scope: sys.stderr.write("share_vars_from is set, scope is ignored.\n") - if not self._is_data_parallel: - raise ValueError( - "Currently, only data parallel mode need share_vars_from.") if not self._share_vars_from._is_data_parallel: - raise ValueError("share_vars_from is not data parallel. Cannot " - "share vars from it.") + raise ValueError( + "The shared Program is not data parallel, cannot " + "share variables from it.") if self._share_vars_from._executor is None: raise ValueError( - "share_vars_from is not compiled and run, so there is no " - "var to share.") + "The shared Program is not compiled and executed, so there is no " + "variables to share.") self._local_scopes = self._share_vars_from._executor.local_scopes() else: assert scope is not None, "" self._local_scopes = [] assert isinstance(places, tuple) or isinstance(places, list), \ - "Currently , The places type only should be list or tuple, \n" \ - "but the input type is {}.".format(type(places)) + "Currently , The places type can only be list or tuple, but the input type is {}.".format(type(places)) if self._build_strategy is None: self._build_strategy = BuildStrategy() @@ -354,7 +354,7 @@ class CompiledProgram(object): tps = self._program._trainers_endpoints assert self._build_strategy.num_trainers == len( - tps), "num_trainers == len(end_points)" + tps), "The trainer numbers is not equal to endpoint numbers." self._build_strategy.trainers_endpoints = tps if self._program: @@ -366,11 +366,11 @@ class CompiledProgram(object): self._build_strategy.enable_sequential_execution = True if self._program is not None and self._program._enable_dgc: - assert use_cuda, "DGC only used under cuda" + assert use_cuda, "DGC only used under CUDA environment." assert self._build_strategy.num_trainers * len( - places) > 1, "DGC is not useful for single card training" + places) > 1, "DGC is not avaliable for single card training." assert self._build_strategy.reduce_strategy == BuildStrategy.ReduceStrategy.AllReduce, "DGC \ - only used for AllReduce BuildStrategy" + only can be used for AllReduce BuildStrategy." # DGC doesn't support fuse for now, close fuse. self._build_strategy.fuse_all_reduce_ops = False @@ -411,9 +411,9 @@ class CompiledProgram(object): """ if self._compiled: if scope and self._scope != scope: - raise ValueError("Cannot compile with different scope") + raise ValueError("Cannot compile program with different scope.") if place and not self._place._equals(place): - raise ValueError("Cannot compile with different place") + raise ValueError("Cannot compile program with different place.") return self self._compiled = True @@ -448,9 +448,9 @@ class CompiledProgram(object): if has_set_place: for p in place_list: assert p._type() == place._type(), \ - "Place type not match. You may set the wrong type of places" + "Place type not match. You may set wrong type of places." else: place_list = cuda_places() if isinstance( place, core.CUDAPlace) else cpu_places() - assert place_list, "no place for execution" + assert place_list, "No places for execution." return place_list diff --git a/python/paddle/fluid/tests/unittests/test_compiled_program.py b/python/paddle/fluid/tests/unittests/test_compiled_program.py new file mode 100644 index 0000000000..8430f39578 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_compiled_program.py @@ -0,0 +1,162 @@ +# copyright (c) 2020 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle.fluid as fluid +from paddle.fluid import core +from test_imperative_base import new_program_scope +from simple_nets import simple_fc_net + + +class TestCompiledProgram(unittest.TestCase): + def setUp(self): + self.seed = 100 + self.img = np.random.random(size=(16, 784)).astype('float32') + self.label = np.random.randint( + low=0, high=10, size=[16, 1], dtype=np.int64) + with new_program_scope(): + fluid.default_startup_program().random_seed = self.seed + fluid.default_main_program().random_seed = self.seed + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + loss = simple_fc_net() + exe.run(fluid.default_startup_program()) + + loss_data, = exe.run(fluid.default_main_program(), + feed={"image": self.img, + "label": self.label}, + fetch_list=[loss.name]) + self.loss = loss_data[0] + + def test_compiled_program_base(self): + with new_program_scope(): + fluid.default_startup_program().random_seed = self.seed + fluid.default_main_program().random_seed = self.seed + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + loss = simple_fc_net() + exe.run(fluid.default_startup_program()) + compiled_prog = fluid.CompiledProgram(fluid.default_main_program()) + + loss_data, = exe.run(compiled_prog, + feed={"image": self.img, + "label": self.label}, + fetch_list=[loss.name]) + self.assertTrue(np.array_equal(loss_data[0], self.loss)) + + def test_compiled_program_with_data_parallel(self): + with new_program_scope(): + fluid.default_startup_program().random_seed = self.seed + fluid.default_main_program().random_seed = self.seed + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + loss = simple_fc_net() + exe.run(fluid.default_startup_program()) + compiled_prog = fluid.CompiledProgram(fluid.default_main_program( + )).with_data_parallel( + loss_name=loss.name, places=[place]) + + loss_data, = exe.run(compiled_prog, + feed={"image": self.img, + "label": self.label}, + fetch_list=[loss.name]) + self.assertTrue(np.array_equal(loss_data[0], self.loss)) + + +class TestCompiledProgramError(unittest.TestCase): + def test_program_or_graph_error(self): + self.assertRaises(TypeError, fluid.CompiledProgram, "program") + + def build_simple_model(self): + img = fluid.layers.data( + name='image', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + prediction = fluid.layers.fc(input=img, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + + def compile_program_not_compiled(self): + with fluid.program_guard(fluid.Program()): + # build model + self.build_simple_model() + # compile program + program = fluid.default_main_program() + compiled_program = fluid.CompiledProgram( + program).with_data_parallel() + return compiled_program + + def compile_program(self): + with fluid.program_guard(fluid.Program()): + # build model + self.build_simple_model() + # compile program + program = fluid.default_main_program() + compiled_program = fluid.CompiledProgram(program) + scope = fluid.global_scope() + place = fluid.CPUPlace() + compiled_program._compile(scope, place) + return compiled_program, scope, place + + def test_compile_scope_error(self): + compiled_program, _, place = self.compile_program() + new_scope = core.Scope() + with self.assertRaises(ValueError): + compiled_program._compile(new_scope, place) + + def test_compile_place_error(self): + # need create different place + if core.is_compiled_with_cuda(): + compiled_program, scope, _ = self.compile_program() + new_place = fluid.CUDAPlace(0) + with self.assertRaises(ValueError): + compiled_program._compile(scope, new_place) + + def test_share_vars_from_error_no_parallel(self): + with fluid.program_guard(fluid.Program()): + source_program, _, _ = self.compile_program() + self.build_simple_model() + # compile program + program = fluid.default_main_program() + compiled_program = fluid.CompiledProgram( + program).with_data_parallel(share_vars_from=source_program) + scope = fluid.global_scope() + place = fluid.CPUPlace() + with self.assertRaises(ValueError): + compiled_program._compile(scope, place) + + def test_share_vars_from_error_no_executor(self): + with fluid.program_guard(fluid.Program()): + source_program = self.compile_program_not_compiled() + self.build_simple_model() + # compile program + program = fluid.default_main_program() + compiled_program = fluid.CompiledProgram( + program).with_data_parallel(share_vars_from=source_program) + scope = fluid.global_scope() + place = fluid.CPUPlace() + with self.assertRaises(ValueError): + compiled_program._compile(scope, place) + + +if __name__ == '__main__': + unittest.main() -- GitLab