gradient_checker.py 3.2 KB
Newer Older
Y
Yu Yang 已提交
1 2 3 4 5
import paddle.v2.framework.core as core
from paddle.v2.framework.create_op_creation_methods import op_creations
import numpy
import unittest

Y
Yu Yang 已提交
6 7
__all__ = ['get_numeric_gradient']

Y
Yu Yang 已提交
8 9 10 11 12 13 14

def get_numeric_gradient(op,
                         input_values,
                         output_name,
                         input_to_check,
                         delta=1e-5,
                         local_scope=None):
Y
Yu Yang 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28
    """
    Get Numeric Gradient for an operator's input.
    
    :param op: C++ operator instance, could be an network 
    :param input_values: The input variables. Should be an dictionary, key is 
    variable name. Value is numpy array.
    :param output_name: The final output variable name. 
    :param input_to_check: The input variable need to get gradient.
    :param delta: The perturbation value for numeric gradient method. The 
    smaller delta is, the more accurate result will get. But if that delta is
     too small, it could occur numerical stability problem.
    :param local_scope: The local scope used for get_numeric_gradient.
    :return: The gradient array in numpy format.
    """
Y
Yu Yang 已提交
29 30
    if local_scope is None:
        local_scope = core.Scope()
Y
Yu Yang 已提交
31 32

    # Create all input variable in local_scope
Y
Yu Yang 已提交
33 34 35 36 37 38 39
    for var_name in input_values:
        var = local_scope.new_var(var_name)
        tensor = var.get_tensor()
        tensor.set_dims(input_values[var_name].shape)
        tensor.alloc_float()
        tensor.set(input_values[var_name])

Y
Yu Yang 已提交
40
    # Create all output variable in local_scope
Y
Yu Yang 已提交
41
    for output in op.outputs():
Y
Yu Yang 已提交
42 43
        if local_scope.find_var(output) is None:
            local_scope.new_var(output).get_tensor()
Y
Yu Yang 已提交
44 45 46

    op.infer_shape(local_scope)

Y
Yu Yang 已提交
47
    # allocate output memory
Y
Yu Yang 已提交
48 49 50
    for output in op.outputs():
        local_scope.find_var(output).get_tensor().alloc_float()

Y
Yu Yang 已提交
51
    # TODO(yuyang18): Only CPU is support now.
Y
Yu Yang 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
    cpu_ctx = core.DeviceContext.cpu_context()

    def get_output():
        op.run(local_scope, cpu_ctx)
        return numpy.array(local_scope.find_var(output_name).get_tensor()).sum()

    def product(dim):
        return reduce(lambda a, b: a * b, dim, 1)

    tensor_to_check = local_scope.find_var(input_to_check).get_tensor()
    tensor_size = product(tensor_to_check.get_dims())
    gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32')
    for i in xrange(tensor_size):
        origin = tensor_to_check.get_float_element(i)
        x_pos = origin + delta
        tensor_to_check.set_float_element(i, x_pos)
        y_pos = get_output()

        x_neg = origin - delta
        tensor_to_check.set_float_element(i, x_neg)
        y_neg = get_output()

        tensor_to_check.set_float_element(i, origin)  # restore old value
        gradient_flat[i] = (y_pos - y_neg) / delta / 2
    return gradient_flat.reshape(tensor_to_check.get_dims())


if __name__ == '__main__':

    class GetNumericGradientTest(unittest.TestCase):
        def test_add_op(self):
            add_op = op_creations.add_two(X="X", Y="Y", Out="Z")
            x = numpy.random.random((10, 1)).astype("float32")
            y = numpy.random.random((10, 1)).astype("float32")

            arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')

            self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2)

    unittest.main()