提交 c957445c 编写于 作者: Y Yu Yang

A better error message for gradient checker

* Give which parameter, which element are wrong. And what max_diff is.
上级 be9867f9
...@@ -22,6 +22,7 @@ limitations under the License. */ ...@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "paddle/string/to_string.h"
#include "pybind11/numpy.h" #include "pybind11/numpy.h"
#include "pybind11/pybind11.h" #include "pybind11/pybind11.h"
#include "pybind11/stl.h" #include "pybind11/stl.h"
...@@ -205,9 +206,13 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -205,9 +206,13 @@ All parameter, weight, gradient are variables in Paddle.
}); });
// clang-format on // clang-format on
py::class_<paddle::platform::GPUPlace>(m, "GPUPlace").def(py::init<int>()); py::class_<platform::GPUPlace>(m, "GPUPlace")
.def(py::init<int>())
.def("__str__", string::to_string<const platform::GPUPlace &>);
py::class_<paddle::platform::CPUPlace>(m, "CPUPlace").def(py::init<>()); py::class_<paddle::platform::CPUPlace>(m, "CPUPlace")
.def(py::init<>())
.def("__str__", string::to_string<const platform::CPUPlace &>);
py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base( py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base(
m, "Operator"); m, "Operator");
......
...@@ -92,15 +92,26 @@ def get_numeric_gradient(op, ...@@ -92,15 +92,26 @@ def get_numeric_gradient(op,
class GradientChecker(unittest.TestCase): class GradientChecker(unittest.TestCase):
def __is_close(self, numeric_grads, scope, max_relative_error): def __is_close(self, numeric_grads, scope, max_relative_error, msg_prefix):
for name in numeric_grads: for name in numeric_grads:
op_grad = numpy.array( b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor())
scope.find_var(grad_var_name(name)).get_tensor()) a = numeric_grads[name]
is_close = numpy.allclose(
numeric_grads[name], op_grad, rtol=max_relative_error, atol=100) abs_a = numpy.abs(a)
if not is_close: # if abs_a is nearly zero, then use abs error for a, not relative
return False # error.
return True abs_a[abs_a < 1e-3] = 1
diff_mat = numpy.abs(a - b) / abs_a
max_diff = numpy.max(diff_mat)
def err_msg():
offset = numpy.argmax(diff_mat > max_relative_error)
return "%s Variable %s max gradient diff %f over limit %f, the first " \
"error element is %d" % (
msg_prefix, name, max_diff, max_relative_error, offset)
self.assertLessEqual(max_diff, max_relative_error, err_msg())
def check_grad(self, def check_grad(self,
forward_op, forward_op,
...@@ -145,7 +156,8 @@ class GradientChecker(unittest.TestCase): ...@@ -145,7 +156,8 @@ class GradientChecker(unittest.TestCase):
# get numeric gradient # get numeric gradient
for check_name in inputs_to_check: for check_name in inputs_to_check:
numeric_grad[check_name] = \ numeric_grad[check_name] = \
get_numeric_gradient(forward_op, input_vars, output_name, check_name) get_numeric_gradient(forward_op, input_vars, output_name,
check_name)
# get operator gradient according to different device # get operator gradient according to different device
for place in places: for place in places:
...@@ -187,15 +199,8 @@ class GradientChecker(unittest.TestCase): ...@@ -187,15 +199,8 @@ class GradientChecker(unittest.TestCase):
backward_op.infer_shape(scope) backward_op.infer_shape(scope)
backward_op.run(scope, ctx) backward_op.run(scope, ctx)
if isinstance(place, core.CPUPlace): self.__is_close(numeric_grad, scope, max_relative_error,
msg = "CPU kernel gradient is not close to numeric gradient" "Gradient Check On %s" % str(place))
else:
if isinstance(place, core.GPUPlace):
msg = "GPU kernel gradient is not close to numeric gradient"
else:
raise ValueError("unknown place " + type(place))
self.assertTrue(
self.__is_close(numeric_grad, scope, max_relative_error), msg)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册