未验证 提交 8b91174c 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #7027 from reyoung/feature/rnn_gradient_check

Feature/rnn gradient check
......@@ -116,9 +116,9 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
auto height = dout_tensor.dims()[0];
auto slice = dx_tensor.Slice(0, static_cast<int>(height));
framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice);
if (dx_tensor.dims()[0] < height) {
if (dx_tensor.dims()[0] > height) {
auto rest_tensor = dx_tensor.Slice(
static_cast<int>(height), static_cast<int>(dout_tensor.dims()[0]));
static_cast<int>(height), static_cast<int>(dx_tensor.dims()[0]));
math::set_constant(dev_ctx, &rest_tensor, 0.0f);
}
}
......
......@@ -37,11 +37,11 @@ class SumKernel : public framework::OpKernel<T> {
bool in_place = out_var == in_vars[0];
if (out_var->IsType<framework::LoDTensor>()) {
auto *out = context.Output<Tensor>("Out");
auto *out = context.Output<LoDTensor>("Out");
if (!in_place) {
out->mutable_data<T>(context.GetPlace());
}
auto result = EigenVector<T>::Flatten(*out);
if (!in_place) {
math::SetConstant<DeviceContext, T> constant_functor;
constant_functor(context.template device_context<DeviceContext>(), out,
......
......@@ -130,9 +130,9 @@ class ReadFromArrayOp : public ArrayOp {
auto &x_array = x->Get<framework::LoDTensorArray>();
auto *out = scope.FindVar(Output("Out"));
PADDLE_ENFORCE(out != nullptr, "Out must be set");
auto *out_tensor = out->GetMutable<framework::LoDTensor>();
size_t offset = GetOffset(scope, place);
if (offset < x_array.size()) {
auto *out_tensor = out->GetMutable<framework::LoDTensor>();
platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
......
......@@ -77,10 +77,10 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
} else if (paddle::platform::is_cpu_place(tensor.place())) {
dst_tensor = tensor;
}
return py::buffer_info(
dst_tensor.mutable_data<CUR_TYPE>(dst_tensor.place()),
sizeof(CUR_TYPE), py::format_descriptor<CUR_TYPE>::format(),
(size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
return py::buffer_info(dst_tensor.data<CUR_TYPE>(), sizeof(CUR_TYPE),
py::format_descriptor<CUR_TYPE>::format(),
(size_t)framework::arity(dst_tensor.dims()),
dims_outside, strides);
} else {
constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
......
import numpy as np
import contextlib
from framework import Program, default_main_program
from . import core
from framework import Program, default_main_program, Parameter, Variable
__all__ = ['Executor', 'g_scope']
__all__ = ['Executor', 'global_scope', 'scope_guard', 'switch_scope']
g_scope = core.Scope()
def global_scope():
return g_scope
def switch_scope(scope):
global g_scope
ex = g_scope
g_scope = scope
return ex
@contextlib.contextmanager
def scope_guard(scope):
ex = switch_scope(scope)
yield
switch_scope(ex)
def as_numpy(tensor):
if isinstance(tensor, list):
return [as_numpy(t) for t in tensor]
......@@ -117,7 +136,7 @@ class Executor(object):
raise TypeError()
if scope is None:
scope = g_scope
scope = global_scope()
program = program.clone()
global_block = program.global_block()
......
......@@ -170,7 +170,7 @@ def main():
exe.run(fluid.default_startup_program())
embedding_param = fluid.g_scope.find_var(embedding_name).get_tensor()
embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor()
embedding_param.set(
load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place)
......
import paddle.v2.fluid as fluid
__all__ = ['many_times', 'prog_scope']
def many_times(times):
def __impl__(fn):
def __fn__(*args, **kwargs):
for _ in range(times):
fn(*args, **kwargs)
return __fn__
return __impl__
def prog_scope():
def __impl__(fn):
def __fn__(*args, **kwargs):
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
fn(*args, **kwargs)
return __fn__
return __impl__
import numpy
import random
import collections
import paddle.v2.fluid as fluid
import unittest
from decorators import *
class Memory(object):
def __init__(self, shape, dtype='float32'):
self.ex = numpy.zeros(shape=shape, dtype=dtype)
self.cur = None
def update(self, val):
assert val.shape == self.ex.shape
assert val.dtype == self.ex.dtype
self.cur = val
def ex(self):
return self.ex
def next(self):
self.ex = self.cur
self.cur = None
def __next__(self):
self.next()
def reset(self):
self.ex = numpy.zeros(shape=self.ex.shape, dtype=self.ex.dtype)
self.cur = None
class Output(object):
def __init__(self):
self.outs = []
def next_sequence(self):
self.outs.append([])
def out(self, val):
self.outs[-1].append(val)
def last(self):
return self.outs[-1][-1]
class BaseRNN(object):
def __init__(self, ins, mems, params, outs, num_seq=5, max_seq_len=15):
self.num_seq = num_seq
self.inputs = collections.defaultdict(list)
for _ in xrange(num_seq):
seq_len = random.randint(1, max_seq_len - 1)
for iname in ins:
ishape = ins[iname].get('shape', None)
idtype = ins[iname].get('dtype', 'float32')
lst = []
for _ in xrange(seq_len):
lst.append(numpy.random.random(size=ishape).astype(idtype))
self.inputs[iname].append(lst)
self.mems = dict()
for mname in mems:
mshape = mems[mname].get('shape', None)
mdtype = mems[mname].get('dtype', 'float32')
self.mems[mname] = Memory(shape=mshape, dtype=mdtype)
self.params = dict()
for pname in params:
pshape = params[pname].get('shape', None)
pdtype = params[pname].get('dtype', 'float32')
self.params[pname] = numpy.random.random(size=pshape).astype(pdtype)
self.outputs = dict()
for oname in outs:
self.outputs[oname] = Output()
def step(self, **kwargs):
raise NotImplementedError()
def exe(self):
retv = dict()
for out in self.outputs:
retv[out] = []
for seq_id in xrange(self.num_seq):
for mname in self.mems:
self.mems[mname].reset()
for out in self.outputs:
self.outputs[out].next_sequence()
iname0 = self.inputs.keys()[0]
seq_len = len(self.inputs[iname0][seq_id])
for step_id in xrange(seq_len):
xargs = dict()
for iname in self.inputs:
xargs[iname] = self.inputs[iname][seq_id][step_id]
for mname in self.mems:
xargs[mname] = self.mems[mname]
for pname in self.params:
xargs[pname] = self.params[pname]
for out in self.outputs:
xargs[out] = self.outputs[out]
self.step(**xargs)
for mname in self.mems:
next(self.mems[mname])
for out in self.outputs:
retv[out].append(self.outputs[out].last())
for out in retv:
retv[out] = numpy.array(retv[out])
return retv
def to_feed(self, place):
feed_dict = dict()
for iname in self.inputs:
lod = [0]
np_flatten = []
for seq_id in xrange(len(self.inputs[iname])):
seq_len = len(self.inputs[iname][seq_id])
lod.append(lod[-1] + seq_len)
np_flatten.extend(self.inputs[iname][seq_id])
t = fluid.Tensor()
t.set(numpy.array(np_flatten), place)
t.set_lod([lod])
feed_dict[iname] = t
for pname in self.params:
feed_dict[pname] = self.params[pname]
return feed_dict
def get_numeric_gradient_of_param(self, param_name, delta=0.001):
p = self.params[param_name]
if len(p.shape) != 2:
raise ValueError("Not support get numeric gradient of an parameter,"
" which is not matrix")
g = numpy.zeros(shape=p.shape, dtype=p.dtype)
for i in xrange(p.shape[0]):
for j in xrange(p.shape[1]):
o = p[i][j]
p[i][j] += delta
pos = self._exe_mean_out_()
p[i][j] -= 2 * delta
neg = self._exe_mean_out_()
p[i][j] = o
g[i][j] = (pos - neg) / (delta * 2)
return g
def get_numeric_gradient_of_input(self,
input_name,
delta=0.001,
return_one_tensor=True):
ipt = self.inputs[input_name]
grad = []
for seq in ipt:
seq_grad = []
for item in seq:
item_grad = numpy.zeros(shape=item.shape, dtype=item.dtype)
if len(item.shape) != 1:
raise ValueError("Not support")
for i in xrange(len(item)):
o = item[i]
item[i] += delta
pos = self._exe_mean_out_()
item[i] -= 2 * delta
neg = self._exe_mean_out_()
item[i] = o
item_grad[i] = (pos - neg) / (delta * 2)
seq_grad.append(item_grad)
grad.append(seq_grad)
if not return_one_tensor:
return grad
for i in xrange(len(grad)):
grad[i] = numpy.concatenate(grad[i])
grad = numpy.concatenate(grad)
return grad
def _exe_mean_out_(self):
outs = self.exe()
return numpy.array([o.mean() for o in outs.itervalues()]).mean()
class TestSimpleMul(unittest.TestCase):
DATA_NAME = 'X'
DATA_WIDTH = 32
PARAM_NAME = 'W'
HIDDEN_WIDTH = 10
OUT_NAME = 'Out'
class SimpleMul(BaseRNN):
def __init__(self):
base = TestSimpleMul
super(base.SimpleMul, self).__init__({
base.DATA_NAME: {
'shape': [base.DATA_WIDTH]
}
}, {}, {
base.PARAM_NAME: {
'shape': [base.DATA_WIDTH, base.HIDDEN_WIDTH]
}
}, [base.OUT_NAME])
def step(self, X, W, Out):
Out.out(numpy.matmul(X, W))
# Test many times in local to ensure the random seed cannot breaks CI
# @many_times(10)
@prog_scope()
def test_forward_backward(self):
py_rnn = TestSimpleMul.SimpleMul()
dat = fluid.layers.data(
name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1)
dat.stop_gradient = False
rnn = fluid.layers.DynamicRNN()
with rnn.block():
d = rnn.step_input(dat)
o = fluid.layers.fc(input=d,
param_attr=self.PARAM_NAME,
bias_attr=False,
size=self.HIDDEN_WIDTH,
act=None)
rnn.output(o)
out = rnn()
out = fluid.layers.sequence_pool(out, pool_type='last')
loss = fluid.layers.mean(x=out)
fluid.backward.append_backward(loss)
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
out, w_g, i_g = map(numpy.array,
exe.run(feed=py_rnn.to_feed(cpu),
fetch_list=[
out, self.PARAM_NAME + "@GRAD",
self.DATA_NAME + "@GRAD"
],
return_numpy=False))
out_by_python = py_rnn.exe()[self.OUT_NAME]
self.assertTrue(numpy.allclose(out, out_by_python))
w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME)
self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.05))
i_g_num = py_rnn.get_numeric_gradient_of_input(
input_name=self.DATA_NAME)
i_g_num = i_g_num.reshape(i_g.shape)
self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.05))
class TestSimpleMulWithMemory(unittest.TestCase):
DATA_WIDTH = 32
HIDDEN_WIDTH = 20
DATA_NAME = 'X'
PARAM_NAME = 'W'
class SimpleMulWithMemory(BaseRNN):
def __init__(self):
super(TestSimpleMulWithMemory.SimpleMulWithMemory, self).__init__({
TestSimpleMulWithMemory.DATA_NAME: {
'shape': [TestSimpleMulWithMemory.DATA_WIDTH]
}
}, {'Mem': {
'shape': [TestSimpleMulWithMemory.HIDDEN_WIDTH]
}}, {
TestSimpleMulWithMemory.PARAM_NAME: {
'shape': [
TestSimpleMulWithMemory.DATA_WIDTH,
TestSimpleMulWithMemory.HIDDEN_WIDTH
]
}
}, ['Out'])
def step(self, X, Mem, W, Out):
o = numpy.matmul(X, W)
assert isinstance(Mem, Memory)
o += Mem.ex
Mem.update(o)
assert isinstance(Out, Output)
Out.out(o)
# many_times used locally for debug. Make sure the calculation is stable.
# @many_times(10)
@prog_scope()
def test_forward_backward(self):
py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory()
data = fluid.layers.data(
name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1)
data.stop_gradient = False
rnn = fluid.layers.DynamicRNN()
with rnn.block():
d = rnn.step_input(data)
mem = rnn.memory(value=0.0, shape=[self.HIDDEN_WIDTH])
hidden = fluid.layers.fc(input=d,
size=self.HIDDEN_WIDTH,
param_attr=self.PARAM_NAME,
bias_attr=False,
act=None)
o = fluid.layers.elementwise_add(x=hidden, y=mem)
rnn.update_memory(mem, o)
rnn.output(o)
out = rnn()
last = fluid.layers.sequence_pool(input=out, pool_type='last')
loss = fluid.layers.mean(x=last)
fluid.backward.append_backward(loss)
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
feed = py_rnn.to_feed(cpu)
last_np, w_g, i_g = map(numpy.array,
exe.run(feed=feed,
fetch_list=[
last, self.PARAM_NAME + "@GRAD",
self.DATA_NAME + "@GRAD"
],
return_numpy=False))
last_by_py, = py_rnn.exe().values()
w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME)
self.assertTrue(numpy.allclose(last_np, last_by_py))
self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.1))
i_g_num = py_rnn.get_numeric_gradient_of_input(self.DATA_NAME)
i_g_num = i_g_num.reshape(i_g.shape)
# Since this RNN has many float add. The number could be not stable.
# rtol = 0.1
self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.1))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册