提交 bad3d4b6 编写于 作者: Y Yang Yu

Grad Check For RNN

上级 ea5d6eae
...@@ -136,6 +136,17 @@ class ReadFromArrayOp : public ArrayOp { ...@@ -136,6 +136,17 @@ class ReadFromArrayOp : public ArrayOp {
auto &dev_ctx = *pool.Borrow(place); auto &dev_ctx = *pool.Borrow(place);
framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor); framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor);
out_tensor->set_lod(x_array[offset].lod()); out_tensor->set_lod(x_array[offset].lod());
if (Input("X") == "dynamic_rnn_0_output_array_fc_0.tmp_0_0@GRAD") {
VLOG(10) << "Offset = " << offset;
if (x_array[offset].numel() != 0) {
auto d = x_array[offset].dims();
std::ostringstream sout;
for (int64_t i = 0; i < d[0]; ++i) {
sout << x_array[offset].data<float>()[0 * d[1]] << ", ";
}
VLOG(10) << "Grad = " << sout.str();
}
}
} else { } else {
VLOG(10) << "offset " << offset << " >= " << x_array.size(); VLOG(10) << "offset " << offset << " >= " << x_array.size();
} }
......
...@@ -129,6 +129,9 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -129,6 +129,9 @@ class WhileGradOp : public framework::OperatorBase {
auto &og_inside = auto &og_inside =
detail::Ref(cur_scope.Var(inside_og_name), detail::Ref(cur_scope.Var(inside_og_name),
"Cannot find inside gradient %s", inside_og_name); "Cannot find inside gradient %s", inside_og_name);
VLOG(10) << "OG " << outside_og_name << " Type is "
<< og_outside.Type().name();
if (og_outside.Type().hash_code() == if (og_outside.Type().hash_code() ==
typeid(framework::LoDTensor).hash_code()) { typeid(framework::LoDTensor).hash_code()) {
auto &outside_tensor = og_outside.Get<framework::LoDTensor>(); auto &outside_tensor = og_outside.Get<framework::LoDTensor>();
...@@ -145,7 +148,6 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -145,7 +148,6 @@ class WhileGradOp : public framework::OperatorBase {
inside_array.resize(outside_array.size()); inside_array.resize(outside_array.size());
for (size_t j = 0; j < inside_array.size(); ++j) { for (size_t j = 0; j < inside_array.size(); ++j) {
VLOG(10) << j << " " << outside_array[j].numel();
if (outside_array[j].numel() != 0) { if (outside_array[j].numel() != 0) {
inside_array[j].set_lod(outside_array[j].lod()); inside_array[j].set_lod(outside_array[j].lod());
inside_array[j].ShareDataWith(outside_array[j]); inside_array[j].ShareDataWith(outside_array[j]);
...@@ -198,6 +200,17 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -198,6 +200,17 @@ class WhileGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}}, "sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{});
VLOG(10) << "Accumulate the gradient of " << pg_names[param_id];
if (pg_names[param_id] == "W@GRAD") {
auto &w_g = detail::Ref(cur_scope.FindVar(new_inside_name))
.Get<framework::LoDTensor>();
VLOG(10) << "W_G is" << w_g.data<float>()[0];
} else {
VLOG(10) << pg_names[param_id];
}
sum_op->Run(cur_scope, dev_place); sum_op->Run(cur_scope, dev_place);
cur_scope.Rename(new_inside_name, inside_grad_name); cur_scope.Rename(new_inside_name, inside_grad_name);
} }
......
import numpy
import random
import collections
import paddle.v2.fluid as fluid
import unittest
import copy
class Memory(object):
def __init__(self, shape, dtype='float32'):
self.ex = numpy.zeros(shape=shape, dtype=dtype)
self.cur = None
def update(self, val):
assert val.shape == self.ex.shape
assert val.dtype == self.ex.dtype
self.cur = val
def ex(self):
return self.ex
def next(self):
self.ex = self.cur
self.cur = None
def __next__(self):
self.next()
def reset(self):
self.ex = numpy.zeros(shape=self.ex.shape, dtype=self.ex.dtype)
self.cur = None
class Output(object):
def __init__(self):
self.outs = []
def next_sequence(self):
self.outs.append([])
def out(self, val):
self.outs[-1].append(val)
def last(self):
return self.outs[-1][-1]
class BaseRNN(object):
def __init__(self, ins, mems, params, outs, num_seq=5, max_seq_len=15):
self.num_seq = num_seq
self.inputs = collections.defaultdict(list)
for _ in xrange(num_seq):
seq_len = random.randint(1, max_seq_len - 1)
for iname in ins:
ishape = ins[iname].get('shape', None)
idtype = ins[iname].get('dtype', 'float32')
lst = []
for _ in xrange(seq_len):
lst.append(numpy.random.random(size=ishape).astype(idtype))
self.inputs[iname].append(lst)
self.mems = dict()
for mname in mems:
mshape = mems[mname].get('shape', None)
mdtype = mems[mname].get('dtype', 'float32')
self.mems[mname] = Memory(shape=mshape, dtype=mdtype)
self.params = dict()
for pname in params:
pshape = params[pname].get('shape', None)
pdtype = params[pname].get('dtype', 'float32')
self.params[pname] = numpy.random.random(size=pshape).astype(pdtype)
self.outputs = dict()
for oname in outs:
self.outputs[oname] = Output()
def step(self, **kwargs):
pass
def exe(self):
retv = dict()
for out in self.outputs:
retv[out] = []
for seq_id in xrange(self.num_seq):
for mname in self.mems:
self.mems[mname].reset()
for out in self.outputs:
self.outputs[out].next_sequence()
iname0 = self.inputs.keys()[0]
seq_len = len(self.inputs[iname0][seq_id])
for step_id in xrange(seq_len):
xargs = dict()
for iname in self.inputs:
xargs[iname] = self.inputs[iname][seq_id][step_id]
for mname in self.mems:
xargs[mname] = self.mems[mname]
for pname in self.params:
xargs[pname] = self.params[pname]
for out in self.outputs:
xargs[out] = self.outputs[out]
self.step(**xargs)
for mname in self.mems:
next(self.mems[mname])
for out in self.outputs:
retv[out].append(self.outputs[out].last())
for out in retv:
retv[out] = numpy.array(retv[out])
return retv
def to_feed(self, place):
feed_dict = dict()
for iname in self.inputs:
lod = [0]
np_flatten = []
for seq_id in xrange(len(self.inputs[iname])):
seq_len = len(self.inputs[iname][seq_id])
lod.append(lod[-1] + seq_len)
np_flatten.extend(self.inputs[iname][seq_id])
t = fluid.Tensor()
t.set(numpy.array(np_flatten), place)
t.set_lod([lod])
feed_dict[iname] = t
for pname in self.params:
feed_dict[pname] = self.params[pname]
return feed_dict
def get_numeric_gradient_of_param(self, param_name, delta=0.01):
p = self.params[param_name]
g = numpy.zeros(shape=p.shape, dtype=p.dtype)
for p_it, g_it in numpy.nditer([p, g], op_flags=['readwrite']):
o = float(p_it)
p_it[...] = o + delta
pos = self._exe_mean_out_()
p_it[...] = o - delta
neg = self._exe_mean_out_()
p_it[...] = o
g[:] = (pos - neg) / (delta * 2)
return g
def _exe_mean_out_(self):
outs = self.exe()
return numpy.array([o.mean() for o in outs.itervalues()]).mean()
class SimpleMul(BaseRNN):
def __init__(self):
super(SimpleMul, self).__init__({
'X': {
'shape': [32]
}
}, {}, {'W': {
'shape': [32, 10]
}}, ['Out'])
def step(self, X, W, Out):
Out.out(numpy.matmul(X, W))
class TestSimpleMul(unittest.TestCase):
def setUp(self):
self.python_impl = SimpleMul()
def test_forward(self):
program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(program, startup_program):
dat = fluid.layers.data(name='X', shape=[32], lod_level=1)
rnn = fluid.layers.DynamicRNN()
with rnn.block():
d = rnn.step_input(dat)
o = fluid.layers.fc(input=d,
param_attr='W',
bias_attr=False,
size=10,
act=None)
rnn.output(o)
out = rnn()
out = fluid.layers.sequence_pool(out, pool_type='last')
loss = fluid.layers.mean(x=out)
fluid.backward.append_backward_ops(loss)
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
out, w_g = exe.run(program,
feed=self.python_impl.to_feed(cpu),
fetch_list=[out, "W@GRAD"])
out_by_python = self.python_impl.exe()['Out']
self.assertTrue(numpy.allclose(out, out_by_python))
w_g_num = self.python_impl.get_numeric_gradient_of_param("W")
print w_g_num[0][0]
print w_g_num - w_g
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册