From c957445c72fd8f2c0354d8b430ef37f47ac3bc73 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 17:51:21 +0800 Subject: [PATCH] A better error message for gradient checker * Give which parameter, which element are wrong. And what max_diff is. --- paddle/framework/pybind.cc | 9 +++- .../v2/framework/tests/gradient_checker.py | 41 +++++++++++-------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 915ffb1c00..9139a496ec 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/operators/net_op.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "paddle/string/to_string.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" @@ -205,9 +206,13 @@ All parameter, weight, gradient are variables in Paddle. }); // clang-format on - py::class_(m, "GPUPlace").def(py::init()); + py::class_(m, "GPUPlace") + .def(py::init()) + .def("__str__", string::to_string); - py::class_(m, "CPUPlace").def(py::init<>()); + py::class_(m, "CPUPlace") + .def(py::init<>()) + .def("__str__", string::to_string); py::class_> operator_base( m, "Operator"); diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index b73c4869d1..7c4eda5f30 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -92,15 +92,26 @@ def get_numeric_gradient(op, class GradientChecker(unittest.TestCase): - def __is_close(self, numeric_grads, scope, max_relative_error): + def __is_close(self, numeric_grads, scope, max_relative_error, msg_prefix): for name in numeric_grads: - op_grad = numpy.array( - scope.find_var(grad_var_name(name)).get_tensor()) - is_close = numpy.allclose( - numeric_grads[name], op_grad, rtol=max_relative_error, atol=100) - if not is_close: - return False - return True + b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + a = numeric_grads[name] + + abs_a = numpy.abs(a) + # if abs_a is nearly zero, then use abs error for a, not relative + # error. + abs_a[abs_a < 1e-3] = 1 + + diff_mat = numpy.abs(a - b) / abs_a + max_diff = numpy.max(diff_mat) + + def err_msg(): + offset = numpy.argmax(diff_mat > max_relative_error) + return "%s Variable %s max gradient diff %f over limit %f, the first " \ + "error element is %d" % ( + msg_prefix, name, max_diff, max_relative_error, offset) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) def check_grad(self, forward_op, @@ -145,7 +156,8 @@ class GradientChecker(unittest.TestCase): # get numeric gradient for check_name in inputs_to_check: numeric_grad[check_name] = \ - get_numeric_gradient(forward_op, input_vars, output_name, check_name) + get_numeric_gradient(forward_op, input_vars, output_name, + check_name) # get operator gradient according to different device for place in places: @@ -187,15 +199,8 @@ class GradientChecker(unittest.TestCase): backward_op.infer_shape(scope) backward_op.run(scope, ctx) - if isinstance(place, core.CPUPlace): - msg = "CPU kernel gradient is not close to numeric gradient" - else: - if isinstance(place, core.GPUPlace): - msg = "GPU kernel gradient is not close to numeric gradient" - else: - raise ValueError("unknown place " + type(place)) - self.assertTrue( - self.__is_close(numeric_grad, scope, max_relative_error), msg) + self.__is_close(numeric_grad, scope, max_relative_error, + "Gradient Check On %s" % str(place)) if __name__ == '__main__': -- GitLab