未验证 提交 c1c2633a 编写于 作者: Q qingqing01 提交者: GitHub

Support backward of backward for Relu and add a new gradient checker by...

Support backward of backward for Relu and add a new gradient checker by comparing theoretical and numerical Jacobian. (#16862)

* Support backward of backward and a new gradient checker
* Rename decorators.py to decorator_helper.py, since Python on Windows CI has decorators package.

1. Add ReluDoubleGradMaker when register relu_grad.
2. Add a new gradient checker by comparing theoretical and numerical Jacobian.  Check double gradients by double_grad_check.
上级 63d9fe33
...@@ -597,10 +597,57 @@ REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc); ...@@ -597,10 +597,57 @@ REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc);
REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc); REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc);
REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc);
class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
if (ctx->HasOutput("DOut")) {
ctx->ShareDim("Out", "DOut");
ctx->ShareLoD("Out", "DOut");
}
if (ctx->HasOutput("DDOut")) {
ctx->ShareDim("Out", "DDOut");
ctx->ShareLoD("Out", "DDOut");
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "Out");
}
};
//
// ReluGrad: dx = dy if y >= 0 else 0
// ReluGradGrad: ddy = ddx if y >= 0 else 0
//
class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpDescMaker {
public:
using ::paddle::framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<::paddle::framework::OpDesc> Apply() const override {
auto* op = new ::paddle::framework::OpDesc();
op->SetType("relu_grad_grad");
// input1: Out
op->SetInput("Out", Input("Out"));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(Attrs());
// Out@GRAD@GRAD: ddy
op->SetOutput("DOut", InputGrad("Out"));
op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out")));
return std::unique_ptr<::paddle::framework::OpDesc>(op);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform;
#define REGISTER_ACTIVATION_OP(KERNEL_TYPE, OP_NAME, functor, grad_functor) \ #define REGISTER_ACTIVATION_OP(KERNEL_TYPE, OP_NAME, functor, grad_functor) \
REGISTER_OPERATOR( \ REGISTER_OPERATOR( \
...@@ -632,3 +679,23 @@ namespace ops = paddle::operators; ...@@ -632,3 +679,23 @@ namespace ops = paddle::operators;
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
REGISTER_OPERATOR(
relu, ops::ActivationOp, ops::ReluOpMaker, ops::ActivationOpInferVarType,
ops::ActivationGradOpDescMaker<ops::ReluGradFunctor<float>::FwdDeps()>,
paddle::framework::SingleOpInplaceInToOut);
REGISTER_OPERATOR(relu_grad, ops::ActivationOpGrad,
paddle::framework::SingleOpInplaceInToOut,
ops::ReluDoubleGradMaker);
REGISTER_OPERATOR(relu_grad_grad, ops::ActivationOpDoubleGrad);
REGISTER_ACTIVATION_CPU_KERNEL(relu, Relu, ReluFunctor, ReluGradFunctor);
REGISTER_OP_CPU_KERNEL(
relu_grad_grad,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::ReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::ReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::ReluGradGradFunctor<plat::float16>>);
...@@ -32,3 +32,14 @@ namespace plat = paddle::platform; ...@@ -32,3 +32,14 @@ namespace plat = paddle::platform;
ops::grad_functor<plat::float16>>); ops::grad_functor<plat::float16>>);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL);
REGISTER_ACTIVATION_CUDA_KERNEL(relu, Relu, ReluFunctor, ReluGradFunctor);
REGISTER_OP_CUDA_KERNEL(
relu_grad_grad,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::ReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::ReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::ReluGradGradFunctor<plat::float16>>);
...@@ -1198,6 +1198,126 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> { ...@@ -1198,6 +1198,126 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
/*
* in arguments: x, out, ddx
* out arguments: ddout, dout, dx
*/
template <ActBwdOpFwdDeps kDepValue>
inline void ExtractActivationDoubleGradTensor(
const framework::ExecutionContext& ctx, const framework::Tensor** X,
const framework::Tensor** Out, const framework::Tensor** ddX,
framework::Tensor** dX, framework::Tensor** dOut,
framework::Tensor** ddOut) {
auto out_var = ctx.InputVar("Out");
auto ddx_var = ctx.InputVar("DDX");
auto ddo_var = ctx.OutputVar("DDOut");
auto do_var = ctx.OutputVar("DOut");
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input Variable Out, variable name = %s",
ctx.op().Input("Out"));
PADDLE_ENFORCE(ddx_var != nullptr,
"Cannot get input Variable %s, variable name = %s", "DDX",
ctx.op().Input("DDX"));
if (CanBeUsedBySelectedRows.count(ctx.op().Type())) {
*Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
*ddX = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*ddx_var);
if (ddo_var) {
*ddOut = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
ddo_var);
}
if (do_var) {
*dOut = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
do_var);
}
} else {
*Out = ctx.Input<framework::Tensor>("Out");
*ddX = ctx.Input<framework::Tensor>("DDX");
if (ddo_var) {
*ddOut = ctx.Output<framework::Tensor>("DDOut");
}
if (do_var) {
*dOut = ctx.Output<framework::Tensor>("DOut");
}
}
PADDLE_ENFORCE(*ddX != nullptr,
"Cannot get output tensor %s, variable name = %s", "DDX",
ctx.op().Output("DDX"));
if (static_cast<int>(kDepValue) & static_cast<int>(kDepX)) {
auto x_var = ctx.InputVar("X");
PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input tensor X, variable name = %s",
ctx.op().Input("X"));
auto dx_var = ctx.OutputVar("DX");
if (CanBeUsedBySelectedRows.count(ctx.op().Type())) {
*X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var);
if (dx_var) {
*dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
dx_var);
}
} else {
*X = ctx.Input<framework::Tensor>("X");
if (dx_var) {
*dX = ctx.Output<framework::Tensor>("DX");
}
}
} else {
VLOG(10) << " Inplace activation of Op : " << ctx.op().Type();
*X = *ddX;
}
}
template <typename DeviceContext, typename Functor>
class ActivationDoubleGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& ctx) const override {
const framework::Tensor *X, *Out, *ddX;
X = Out = ddX = nullptr;
framework::Tensor *ddOut, *dOut, *dX;
ddOut = dOut = dX = nullptr;
ExtractActivationDoubleGradTensor<Functor::FwdDeps()>(ctx, &X, &Out, &ddX,
&dX, &dOut, &ddOut);
if (ddOut) ddOut->mutable_data<T>(ctx.GetPlace());
if (dOut) dOut->mutable_data<T>(ctx.GetPlace());
if (dX) dX->mutable_data<T>(Out->dims(), ctx.GetPlace());
auto& place = ctx.template device_context<DeviceContext>();
Functor functor;
auto attrs = functor.GetAttrs();
for (auto& attr : attrs) {
*attr.second = ctx.Attr<float>(attr.first);
}
functor(place, X, Out, ddX, ddOut, dOut, dX);
}
};
template <typename T>
struct ReluGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* X,
const framework::Tensor* Out, const framework::Tensor* ddX,
framework::Tensor* ddOut, framework::Tensor* dOut,
framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(detail::Ref(ddX));
auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out));
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(detail::Ref(ddOut));
ddout.device(*d) = ddx * (out > static_cast<T>(0)).template cast<T>();
}
if (dOut) {
auto dout = framework::EigenVector<T>::Flatten(detail::Ref(dOut));
dout.device(*d) = dout.constant(static_cast<T>(0));
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -1205,7 +1325,6 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> { ...@@ -1205,7 +1325,6 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> {
__macro(sigmoid, Sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ __macro(sigmoid, Sigmoid, SigmoidFunctor, SigmoidGradFunctor); \
__macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ __macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
__macro(exp, Exp, ExpFunctor, ExpGradFunctor); \ __macro(exp, Exp, ExpFunctor, ExpGradFunctor); \
__macro(relu, Relu, ReluFunctor, ReluGradFunctor); \
__macro(gelu, Gelu, GeluFunctor, GeluGradFunctor); \ __macro(gelu, Gelu, GeluFunctor, GeluGradFunctor); \
__macro(tanh, Tanh, TanhFunctor, TanhGradFunctor); \ __macro(tanh, Tanh, TanhFunctor, TanhGradFunctor); \
__macro(atan, Atan, AtanFunctor, AtanGradFunctor); \ __macro(atan, Atan, AtanFunctor, AtanGradFunctor); \
......
...@@ -611,7 +611,7 @@ def _find_op_path_(block, outputs, inputs, no_grad_set): ...@@ -611,7 +611,7 @@ def _find_op_path_(block, outputs, inputs, no_grad_set):
if inputs: if inputs:
for op in op_path: for op in op_path:
for name in op.desc.input_arg_names(): for name in op.desc.input_arg_names():
if name not in input_names: if name not in input_names and block.vars[name].stop_gradient:
no_grad_set.add(name) no_grad_set.add(name)
return op_path return op_path
......
...@@ -29,7 +29,7 @@ list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com ...@@ -29,7 +29,7 @@ list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com
list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957 list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957
list(REMOVE_ITEM TEST_OPS op_test) # op_test is a helper python file, not a test list(REMOVE_ITEM TEST_OPS op_test) # op_test is a helper python file, not a test
list(REMOVE_ITEM TEST_OPS decorators) # decorators is a helper python file, not a test list(REMOVE_ITEM TEST_OPS decorator_helper) # decorator_helper is a helper python file, not a test
if(APPLE) if(APPLE)
if(NOT WITH_DISTRIBUTE) if(NOT WITH_DISTRIBUTE)
list(REMOVE_ITEM TEST_OPS test_desc_clone) list(REMOVE_ITEM TEST_OPS test_desc_clone)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import six
import collections
import numpy as np
from itertools import product
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.executor import Executor
from paddle.fluid.backward import calc_gradient
from paddle.fluid.backward import _append_grad_suffix_, _as_list
def _product(t):
if isinstance(t, int):
return t
else:
return np.product(t)
def dtype_to_np_dtype(dtype):
if dtype == core.VarDesc.VarType.FP32:
return np.float32
elif dtype == core.VarDesc.VarType.FP64:
return np.float64
elif dtype == core.VarDesc.VarType.FP16:
return np.float16
else:
raise ValueError("Not supported data type " + str(dtype))
def _get_item(t, i, np_dtype):
if np_dtype == np.float16:
np_t = np.array(t).astype(np.float16)
np_t = np_t.flatten()
return np_t[i]
elif np_dtype == np.float32:
return t._get_float_element(i)
elif np_dtype == np.float64:
return t._get_double_element(i)
else:
raise ValueError("Not supported data type " + str(np_dtype))
def _set_item(t, i, e, np_dtype):
if np_dtype == np.float16:
np_t = np.array(t).astype(np.float16)
shape = np_t.shape
np_t = np_t.flatten()
np_t[i] = e
np_t = np_t.reshape(shape).view(np.uint16)
t.set(np_t, place)
elif np_dtype == np.float32:
t._set_float_element(i, e)
elif np_dtype == np.float64:
t._set_double_element(i, e)
else:
raise ValueError("Not supported data type " + str(np_dtype))
def set_var_in_scope(scope, place, name, value, recursive_seq_len=None):
t = scope.var(name).get_tensor()
t.set(value, place)
if recursive_seq_len:
t.set_recursive_sequence_lengths(recursive_seq_len)
return t
def make_jacobian(x, y_size, np_dtype):
if isinstance(x, fluid.framework.Variable):
return np.zeros((_product(x.shape), y_size), dtype=np_dtype)
elif isinstance(x, collections.Sequence):
jacobians = list(
filter(lambda t: t is not None, (make_jacobian(
item, y_size, np_dtype) for item in x)))
return jacobians
else:
None
def _compute_numerical_jacobian(program, x, y, place, scope, delta):
"""Computes the numeric Jacobian for dy/dx.
Computes the numeric Jacobian by slightly perturbing the inputs and
measuring the differences on the output.
Args:
program (Program): the network program.
x (Variable): the input variables.
y (list[Variable]): the output variables.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
scope (Scope): the scope used to run program.
delta: the amount of perturbation we give to the input
Returns:
A list of 2-D numpy array, the list length is len(y).
Each 2-D numpy array represents the Jacobian for dy_i/dx.
It has "x_size" rows and "y_size" columns
where "x_size" is the number of elements in x and
"y_size" is the number of elements in each y_i.
"""
if not isinstance(x, fluid.framework.Variable):
raise TypeError('x is not Variable')
# To compute the jacobian, treat x and y as one-dimensional vectors.
y = _as_list(y)
exe = fluid.Executor(place)
def run():
y_res = exe.run(program, scope=scope, fetch_list=y)
return [yi.flatten() for yi in y_res]
x_name = x.name
x_shape = x.shape
x_size = _product(x_shape)
x_t = scope.find_var(x_name).get_tensor()
np_type = dtype_to_np_dtype(x.dtype)
jacobian = [make_jacobian(x, _product(yi.shape), np_type) for yi in y]
for i in six.moves.xrange(x_size):
orig = _get_item(x_t, i, np_type)
x_pos = orig + delta
_set_item(x_t, i, x_pos, np_type)
y_pos = run()
x_neg = orig - delta
_set_item(x_t, i, x_neg, np_type)
y_neg = run()
_set_item(x_t, i, orig, np_type)
for j in six.moves.xrange(len(y)):
jacobian[j][i, :] = (y_pos[j] - y_neg[j]) / delta / 2.
return jacobian
def _compute_analytical_jacobian(program, x, y, place, scope):
"""Computes the analytical Jacobian for dy/dx.
Args:
program (Program): a Program with forward pass.
x (Variable|list[Variable]): a variable or list of variable
y (Variable): the target variable.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
scope (Scope): the scope used to run program.
Returns:
A list of 2-D numpy array. The list length is len(x).
Each 2-D numpy array represents the Jacobian for dy/dx_i.
It has "xi_size" rows and "dy_size" columns
where "x_size" is the number of elements in x_i and
"dy_size" is the number of elements in y.
"""
if not isinstance(y, fluid.framework.Variable):
raise TypeError('y is not Variable')
dy_name = _append_grad_suffix_(y.name)
np_type = dtype_to_np_dtype(y.dtype)
# create dy Variable in Program
dy = program.global_block().create_var(
name=dy_name, shape=y.shape, dtype=np_type, persistable=True)
# append backward
dx = calc_gradient(y, x, dy)
# init dy tensor in scope
value = np.zeros(y.shape, dtype=np_type)
dy_t = set_var_in_scope(scope, place, dy_name, value)
exe = fluid.Executor(place)
y_size = _product(y.shape)
x = _as_list(x)
jacobian = make_jacobian(x, y_size, np_type)
dx = _as_list(dx)
for i in six.moves.xrange(y_size):
_set_item(dy_t, i, 1, np_type)
dx_res = exe.run(program, scope=scope, fetch_list=dx)
for j in six.moves.xrange(len(x)):
jacobian[j][:, i] = dx_res[j].flatten()
_set_item(dy_t, i, 0, np_type)
return jacobian
def grad_check(x,
y,
x_init=None,
place=None,
program=None,
eps=1e-6,
atol=1e-5,
rtol=1e-3,
raise_exception=True):
"""
Check numerical and analytical gradients for dy/dx.
Each Jacobian gradients is a 2-D array with shape [xi_size, yi_size].
Args:
x (Variable|list[Variable]): input variables to the program.
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
program (Program|None): a Program with forward pass.
If None, use fluid.default_main_program().
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
the check fails. Default is True.
Returns:
True if all differences satisfy numpy.allclose condition.
"""
def fail_test(msg):
if raise_exception:
raise RuntimeError(msg)
return False
# check input arguments
x = _as_list(x)
y = _as_list(y)
for v in x:
v.stop_gradient = False
v.persistable = True
if place is None:
place = fluid.CPUPlace()
if program is None:
program = fluid.default_main_program()
# init variable in strtup program
scope = fluid.executor.global_scope()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
x_init = _as_list(x_init)
# init inputs if x_init is not None
if x_init:
if len(x_init) != len(x):
raise ValueError('len(x_init) (=%d) is not the same'
' as len(x) (= %d)' % (len(x_init), len(x)))
# init variable in main program
for var, arr in zip(x, x_init):
assert var.shape == arr.shape
feeds = {k.name: v for k, v in zip(x, x_init)}
exe.run(program, feed=feeds, scope=scope)
# [x_idx, y_idx]
numerical = [
_compute_numerical_jacobian(program, xi, y, place, scope, eps)
for xi in x
]
# [y_idx, x_idx]
analytical = [
_compute_analytical_jacobian(program, x, yi, place, scope) for yi in y
]
for i, (x_idx,
y_idx) in enumerate(product(*[range(len(x)), range(len(y))])):
a = analytical[y_idx][x_idx]
n = numerical[x_idx][y_idx]
if not np.allclose(a, n, rtol, atol):
msg = 'Jacobian mismatch for output %s ' \
'with respect to input %s on %s,\n' \
'numerical:%s\nanalytical:%s\n' \
% (y[y_idx].name, x[x_idx].name, str(place), n, a)
return fail_test(msg)
return True
def double_grad_check(x,
y,
x_init=None,
y_grads=None,
place=None,
program=None,
eps=1e-6,
atol=1e-5,
rtol=1e-3,
raise_exception=True):
"""
Check gradients of gradients. This function will append backward to the
program before second order gradient check.
Args:
x (Variable|list[Variable]): input variables to the program.
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
program (Program|None): a Program with forward pass.
If None, use fluid.default_main_program().
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
the check fails. Default is True.
Returns:
True if all differences satisfy numpy.allclose condition.
"""
# check input arguments
x = _as_list(x)
for v in x:
v.stop_gradient = False
v.persistable = True
y = _as_list(y)
if program is None:
program = fluid.default_main_program()
if y_grads is None:
scope = fluid.executor.global_scope()
y_grads = []
for yi in y:
dyi_name = _append_grad_suffix_(yi.name)
np_type = dtype_to_np_dtype(yi.dtype)
dy = program.global_block().create_var(
name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True)
dy.stop_gradient = False
v = np.random.random(size=yi.shape).astype(np_type)
set_var_in_scope(scope, place, dyi_name, v)
y_grads.append(dy)
else:
y_grads = _as_list(y_grads)
# append first order grads
target_grads = calc_gradient(y, x, y_grads)
grad_check(x, target_grads, x_init, place, program, eps, atol, rtol)
...@@ -19,7 +19,7 @@ import random ...@@ -19,7 +19,7 @@ import random
import collections import collections
import paddle.fluid as fluid import paddle.fluid as fluid
import unittest import unittest
from decorators import * from decorator_helper import *
class Memory(object): class Memory(object):
......
...@@ -16,12 +16,12 @@ from __future__ import print_function ...@@ -16,12 +16,12 @@ from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places from paddle.fluid.layers.device import get_places
import decorators from decorator_helper import prog_scope
import unittest import unittest
class TestGetPlaces(unittest.TestCase): class TestGetPlaces(unittest.TestCase):
@decorators.prog_scope() @prog_scope()
def test_get_places(self): def test_get_places(self):
places = get_places() places = get_places()
cpu = fluid.CPUPlace() cpu = fluid.CPUPlace()
......
...@@ -17,7 +17,7 @@ import unittest ...@@ -17,7 +17,7 @@ import unittest
import contextlib import contextlib
import numpy as np import numpy as np
import decorators from decorator_helper import prog_scope
import inspect import inspect
from six.moves import filter from six.moves import filter
...@@ -1171,7 +1171,7 @@ class TestBook(LayerTest): ...@@ -1171,7 +1171,7 @@ class TestBook(LayerTest):
fluid.default_startup_program()): fluid.default_startup_program()):
get_places(device_count=1) get_places(device_count=1)
@decorators.prog_scope() @prog_scope()
def make_nce(self): def make_nce(self):
window_size = 5 window_size = 5
words = [] words = []
......
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import decorators from decorator_helper import prog_scope
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy import numpy
class TestMathOpPatches(unittest.TestCase): class TestMathOpPatches(unittest.TestCase):
@decorators.prog_scope() @prog_scope()
def test_add_scalar(self): def test_add_scalar(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = a + 10 b = a + 10
...@@ -41,7 +41,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -41,7 +41,7 @@ class TestMathOpPatches(unittest.TestCase):
d_expected = ab_np + numpy.concatenate([a_np, a_np], axis=1) d_expected = ab_np + numpy.concatenate([a_np, a_np], axis=1)
self.assertTrue(numpy.allclose(d_expected, d_np)) self.assertTrue(numpy.allclose(d_expected, d_np))
@decorators.prog_scope() @prog_scope()
def test_radd_scalar(self): def test_radd_scalar(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = 10 + a b = 10 + a
...@@ -53,7 +53,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -53,7 +53,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[b]) fetch_list=[b])
self.assertTrue(numpy.allclose(a_np + 10, b_np)) self.assertTrue(numpy.allclose(a_np + 10, b_np))
@decorators.prog_scope() @prog_scope()
def test_sub_scalar(self): def test_sub_scalar(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = a - 10 b = a - 10
...@@ -65,7 +65,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -65,7 +65,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[b]) fetch_list=[b])
self.assertTrue(numpy.allclose(a_np - 10, b_np)) self.assertTrue(numpy.allclose(a_np - 10, b_np))
@decorators.prog_scope() @prog_scope()
def test_radd_scalar(self): def test_radd_scalar(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = 10 - a b = 10 - a
...@@ -77,7 +77,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -77,7 +77,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[b]) fetch_list=[b])
self.assertTrue(numpy.allclose(10 - a_np, b_np)) self.assertTrue(numpy.allclose(10 - a_np, b_np))
@decorators.prog_scope() @prog_scope()
def test_mul_scalar(self): def test_mul_scalar(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = a * 10 b = a * 10
...@@ -89,7 +89,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -89,7 +89,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[b]) fetch_list=[b])
self.assertTrue(numpy.allclose(a_np * 10, b_np)) self.assertTrue(numpy.allclose(a_np * 10, b_np))
@decorators.prog_scope() @prog_scope()
def test_rmul_scalar(self): def test_rmul_scalar(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = 10 * a b = 10 * a
...@@ -101,7 +101,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -101,7 +101,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[b]) fetch_list=[b])
self.assertTrue(numpy.allclose(10 * a_np, b_np)) self.assertTrue(numpy.allclose(10 * a_np, b_np))
@decorators.prog_scope() @prog_scope()
def test_div_scalar(self): def test_div_scalar(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = a / 10 b = a / 10
...@@ -113,7 +113,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -113,7 +113,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[b]) fetch_list=[b])
self.assertTrue(numpy.allclose(a_np / 10, b_np)) self.assertTrue(numpy.allclose(a_np / 10, b_np))
@decorators.prog_scope() @prog_scope()
def test_rdiv_scalar(self): def test_rdiv_scalar(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = 10 / a b = 10 / a
...@@ -126,7 +126,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -126,7 +126,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[b]) fetch_list=[b])
self.assertTrue(numpy.allclose(10 / a_np, b_np)) self.assertTrue(numpy.allclose(10 / a_np, b_np))
@decorators.prog_scope() @prog_scope()
def test_div_two_tensor(self): def test_div_two_tensor(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = fluid.layers.data(name="b", shape=[1]) b = fluid.layers.data(name="b", shape=[1])
...@@ -141,7 +141,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -141,7 +141,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[c]) fetch_list=[c])
self.assertTrue(numpy.allclose(a_np / b_np, c_np)) self.assertTrue(numpy.allclose(a_np / b_np, c_np))
@decorators.prog_scope() @prog_scope()
def test_mul_two_tensor(self): def test_mul_two_tensor(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = fluid.layers.data(name="b", shape=[1]) b = fluid.layers.data(name="b", shape=[1])
...@@ -156,7 +156,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -156,7 +156,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[c]) fetch_list=[c])
self.assertTrue(numpy.allclose(a_np * b_np, c_np)) self.assertTrue(numpy.allclose(a_np * b_np, c_np))
@decorators.prog_scope() @prog_scope()
def test_add_two_tensor(self): def test_add_two_tensor(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = fluid.layers.data(name="b", shape=[1]) b = fluid.layers.data(name="b", shape=[1])
...@@ -171,7 +171,7 @@ class TestMathOpPatches(unittest.TestCase): ...@@ -171,7 +171,7 @@ class TestMathOpPatches(unittest.TestCase):
fetch_list=[c]) fetch_list=[c])
self.assertTrue(numpy.allclose(a_np + b_np, c_np)) self.assertTrue(numpy.allclose(a_np + b_np, c_np))
@decorators.prog_scope() @prog_scope()
def test_sub_two_tensor(self): def test_sub_two_tensor(self):
a = fluid.layers.data(name="a", shape=[1]) a = fluid.layers.data(name="a", shape=[1])
b = fluid.layers.data(name="b", shape=[1]) b = fluid.layers.data(name="b", shape=[1])
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.core as core
import gradient_checker
from decorator_helper import prog_scope
class TestMulGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
prog = fluid.Program()
with fluid.program_guard(prog):
x = layers.create_parameter(dtype="float64", shape=[2, 8], name='x')
y = layers.create_parameter(dtype="float64", shape=[8, 4], name='y')
z = layers.mul(x=x, y=y)
gradient_checker.grad_check([x, y], z, place=place)
def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
class TestReluDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
# the shape of input variable shoule be clearly specified, not inlcude -1.
shape = [2, 8]
eps = 0.005
dtype = np.float64
x = layers.data('x', shape, False, dtype)
x.persistable = True
y = layers.relu(x)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
x_arr[np.abs(x_arr) < 0.005] = 0.02
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
if __name__ == "__main__":
unittest.main()
...@@ -17,11 +17,11 @@ import unittest ...@@ -17,11 +17,11 @@ import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import decorators from decorator_helper import prog_scope
class TestRegistry(unittest.TestCase): class TestRegistry(unittest.TestCase):
@decorators.prog_scope() @prog_scope()
def test_registry_layer(self): def test_registry_layer(self):
x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32') x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32')
output = fluid.layers.mean(x) output = fluid.layers.mean(x)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册